From ca9bc12b90fbc4e2b1f81360f63842c9da54bb3c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Tue, 11 Jan 2011 13:47:24 +0100
Subject: drbd: Get rid of BE_DRBD_MAGIC and BE_DRBD_MAGIC_BIG

Converting the constants happens at compile time.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 9e5f5607eba3..d28202811672 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -334,9 +334,7 @@ enum drbd_timeout_flag {
 #define UUID_JUST_CREATED ((__u64)4)
 
 #define DRBD_MAGIC 0x83740267
-#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
 #define DRBD_MAGIC_BIG 0x835a
-#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG)
 
 /* these are of type "int" */
 #define DRBD_MD_INDEX_INTERNAL -1
-- 
cgit v1.2.3-55-g7522


From 9749f30f1a387070e6e8351f35aeb829eacc3ab6 Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Wed, 20 Jul 2011 14:59:37 +0200
Subject: idr: idr_for_each_entry() macro

Inspired by the list_for_each_entry() macro
---
 include/linux/idr.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 255491cf522e..52a9da295296 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id);
 
 void __init idr_init_cache(void);
 
+/**
+ * idr_for_each_entry - iterate over an idr's elements of a given type
+ * @idp:     idr handle
+ * @entry:   the type * to use as cursor
+ * @id:      id entry's key
+ */
+#define idr_for_each_entry(idp, entry, id)				\
+	for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \
+	     entry != NULL;                                             \
+	     ++id, entry = (typeof(entry))idr_get_next((idp), &(id)))
+
 #endif /* __IDR_H__ */
-- 
cgit v1.2.3-55-g7522


From fd340c12c98b57ec0751ebb317057eee41be0c3d Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Wed, 19 Jan 2011 16:57:39 +0100
Subject: drbd: Use new header layout

The new header layout will only be used if the peer supports
it of course.

For the first packet and the handshake packet the old (h80)
layout is used for compatibility reasons.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  1 -
 drivers/block/drbd/drbd_main.c     | 82 +++++++++++++++++---------------------
 drivers/block/drbd/drbd_receiver.c |  7 +++-
 include/linux/drbd.h               |  2 +-
 4 files changed, 42 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index dc669dfe5b0d..4de43481bcb9 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -345,7 +345,6 @@ struct p_header95 {
 	u16	  magic;	/* use DRBD_MAGIC_BIG here */
 	u16	  command;
 	u32	  length;	/* Use only 24 bits of that. Ignore the highest 8 bit. */
-	u8	  payload[0];
 } __packed;
 
 struct p_header {
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 55ce48e24b8e..f8cb15c84ed8 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1820,12 +1820,36 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
 }
 #endif
 
+static void prepare_header80(struct drbd_conf *mdev, struct p_header80 *h,
+			     enum drbd_packets cmd, int size)
+{
+	h->magic   = cpu_to_be32(DRBD_MAGIC);
+	h->command = cpu_to_be16(cmd);
+	h->length  = cpu_to_be16(size);
+}
+
+static void prepare_header95(struct drbd_conf *mdev, struct p_header95 *h,
+			     enum drbd_packets cmd, int size)
+{
+	h->magic   = cpu_to_be16(DRBD_MAGIC_BIG);
+	h->command = cpu_to_be16(cmd);
+	h->length  = cpu_to_be32(size);
+}
+
+static void prepare_header(struct drbd_conf *mdev, struct p_header *h,
+			   enum drbd_packets cmd, int size)
+{
+	if (mdev->tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET)
+		prepare_header95(mdev, &h->h95, cmd, size);
+	else
+		prepare_header80(mdev, &h->h80, cmd, size);
+}
+
 /* the appropriate socket mutex must be held already */
 int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
-			  enum drbd_packets cmd, struct p_header *hg,
+			  enum drbd_packets cmd, struct p_header *h,
 			  size_t size, unsigned msg_flags)
 {
-	struct p_header80 *h = (struct p_header80 *)hg;
 	int sent, ok;
 
 	if (!expect(h))
@@ -1833,9 +1857,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
 	if (!expect(size))
 		return false;
 
-	h->magic   = cpu_to_be32(DRBD_MAGIC);
-	h->command = cpu_to_be16(cmd);
-	h->length  = cpu_to_be16(size-sizeof(struct p_header80));
+	prepare_header(mdev, h, cmd, size - sizeof(struct p_header));
 
 	sent = drbd_send(mdev, sock, h, size, msg_flags);
 
@@ -1878,12 +1900,10 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
 int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
 		   size_t size)
 {
-	struct p_header80 h;
+	struct p_header h;
 	int ok;
 
-	h.magic   = cpu_to_be32(DRBD_MAGIC);
-	h.command = cpu_to_be16(cmd);
-	h.length  = cpu_to_be16(size);
+	prepare_header(mdev, &h, cmd, size);
 
 	if (!drbd_get_data_sock(mdev))
 		return 0;
@@ -2456,14 +2476,11 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev,
 	int ok;
 	struct p_block_req p;
 
+	prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header) + digest_size);
 	p.sector   = cpu_to_be64(sector);
 	p.block_id = ID_SYNCER /* unused */;
 	p.blksize  = cpu_to_be32(size);
 
-	p.head.h80.magic   = cpu_to_be32(DRBD_MAGIC);
-	p.head.h80.command = cpu_to_be16(cmd);
-	p.head.h80.length  = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size);
-
 	mutex_lock(&mdev->tconn->data.mutex);
 
 	ok = (sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), 0));
@@ -2663,22 +2680,10 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
 	dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ?
 		crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0;
 
-	if (req->i.size <= DRBD_MAX_SIZE_H80_PACKET) {
-		p.head.h80.magic   = cpu_to_be32(DRBD_MAGIC);
-		p.head.h80.command = cpu_to_be16(P_DATA);
-		p.head.h80.length  =
-			cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size);
-	} else {
-		p.head.h95.magic   = cpu_to_be16(DRBD_MAGIC_BIG);
-		p.head.h95.command = cpu_to_be16(P_DATA);
-		p.head.h95.length  =
-			cpu_to_be32(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size);
-	}
-
+	prepare_header(mdev, &p.head, P_DATA, sizeof(p) - sizeof(struct p_header) + dgs + req->i.size);
 	p.sector   = cpu_to_be64(req->i.sector);
 	p.block_id = (unsigned long)req;
-	p.seq_num  = cpu_to_be32(req->seq_num =
-				 atomic_add_return(1, &mdev->packet_seq));
+	p.seq_num  = cpu_to_be32(req->seq_num = atomic_add_return(1, &mdev->packet_seq));
 
 	dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
 
@@ -2748,18 +2753,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
 	dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ?
 		crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0;
 
-	if (e->i.size <= DRBD_MAX_SIZE_H80_PACKET) {
-		p.head.h80.magic   = cpu_to_be32(DRBD_MAGIC);
-		p.head.h80.command = cpu_to_be16(cmd);
-		p.head.h80.length  =
-			cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size);
-	} else {
-		p.head.h95.magic   = cpu_to_be16(DRBD_MAGIC_BIG);
-		p.head.h95.command = cpu_to_be16(cmd);
-		p.head.h95.length  =
-			cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size);
-	}
-
+	prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size);
 	p.sector   = cpu_to_be64(e->i.sector);
 	p.block_id = e->block_id;
 	/* p.seq_num  = 0;    No sequence numbers here.. */
@@ -3028,7 +3022,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
 	drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker);
 	drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender);
 
-	mdev->tconn->agreed_pro_version = PRO_VERSION_MAX;
+	/* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */
 	mdev->write_ordering = WO_bdev_flush;
 	mdev->resync_wenr = LC_FREE;
 	mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
@@ -3506,12 +3500,8 @@ int __init drbd_init(void)
 {
 	int err;
 
-	if (sizeof(struct p_handshake) != 80) {
-		printk(KERN_ERR
-		       "drbd: never change the size or layout "
-		       "of the HandShake packet.\n");
-		return -EINVAL;
-	}
+	BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95));
+	BUILD_BUG_ON(sizeof(struct p_handshake) != 80);
 
 	if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
 		printk(KERN_ERR
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 9393fe482efc..8f5a241fe20a 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -761,6 +761,9 @@ static int drbd_connect(struct drbd_conf *mdev)
 		return -2;
 
 	clear_bit(DISCARD_CONCURRENT, &mdev->flags);
+	mdev->tconn->agreed_pro_version = 99;
+	/* agreed_pro_version must be smaller than 100 so we send the old
+	   header (h80) in the first packet and in the handshake packet. */
 
 	sock  = NULL;
 	msock = NULL;
@@ -935,12 +938,12 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi
 		return false;
 	}
 
-	if (likely(h->h80.magic == cpu_to_be32(DRBD_MAGIC))) {
+	if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
 		*cmd = be16_to_cpu(h->h80.command);
 		*packet_size = be16_to_cpu(h->h80.length);
 	} else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
 		*cmd = be16_to_cpu(h->h95.command);
-		*packet_size = be32_to_cpu(h->h95.length);
+		*packet_size = be32_to_cpu(h->h95.length) & 0x00ffffff;
 	} else {
 		dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n",
 		    be32_to_cpu(h->h80.magic),
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index d28202811672..35fc08a0a552 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void);
 #define REL_VERSION "8.3.11"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 96
+#define PRO_VERSION_MAX 100
 
 
 enum drbd_io_error_p {
-- 
cgit v1.2.3-55-g7522


From 4738fa16907a933d72bbcae1b8922dc9330fde92 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Mon, 21 Feb 2011 13:20:55 +0100
Subject: drbd: use clear_bit_unlock() where appropriate

Some open-coded clear_bit(); smp_mb__after_clear_bit();
should in fact have been smp_mb__before_clear_bit(); clear_bit();

Instead, use clear_bit_unlock() to annotate the intention,
and have it do the right thing.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_bitmap.c |  3 +--
 drivers/block/drbd/drbd_main.c   |  3 +--
 include/linux/lru_cache.h        |  3 +--
 lib/lru_cache.c                  | 10 ++++------
 4 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index e8d652f197c3..4be737055718 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -219,8 +219,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	void *addr = &page_private(b->bm_pages[page_nr]);
-	clear_bit(BM_PAGE_IO_LOCK, addr);
-	smp_mb__after_clear_bit();
+	clear_bit_unlock(BM_PAGE_IO_LOCK, addr);
 	wake_up(&mdev->bitmap->bm_io_wait);
 }
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 592f0c949fd0..c77e51a40926 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2818,8 +2818,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused)
 		put_ldev(mdev);
 	}
 
-	clear_bit(BITMAP_IO, &mdev->flags);
-	smp_mb__after_clear_bit();
+	clear_bit_unlock(BITMAP_IO, &mdev->flags);
 	wake_up(&mdev->misc_wait);
 
 	if (work->done)
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 7a71ffad037c..4cceafb0732d 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -275,8 +275,7 @@ static inline int lc_try_lock(struct lru_cache *lc)
  */
 static inline void lc_unlock(struct lru_cache *lc)
 {
-	clear_bit(__LC_DIRTY, &lc->flags);
-	smp_mb__after_clear_bit();
+	clear_bit_unlock(__LC_DIRTY, &lc->flags);
 }
 
 static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index a07e7268d7ed..9f353f7f41ca 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -44,8 +44,8 @@ MODULE_LICENSE("GPL");
 } while (0)
 
 #define RETURN(x...)     do { \
-	clear_bit(__LC_PARANOIA, &lc->flags); \
-	smp_mb__after_clear_bit(); return x ; } while (0)
+	clear_bit_unlock(__LC_PARANOIA, &lc->flags); \
+	return x ; } while (0)
 
 /* BUG() if e is not one of the elements tracked by lc */
 #define PARANOIA_LC_ELEMENT(lc, e) do {	\
@@ -438,8 +438,7 @@ void lc_changed(struct lru_cache *lc, struct lc_element *e)
 	hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
 	lc->changing_element = NULL;
 	lc->new_number = LC_FREE;
-	clear_bit(__LC_DIRTY, &lc->flags);
-	smp_mb__after_clear_bit();
+	clear_bit_unlock(__LC_DIRTY, &lc->flags);
 	RETURN();
 }
 
@@ -463,8 +462,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
 		/* move it to the front of LRU. */
 		list_move(&e->list, &lc->lru);
 		lc->used--;
-		clear_bit(__LC_STARVING, &lc->flags);
-		smp_mb__after_clear_bit();
+		clear_bit_unlock(__LC_STARVING, &lc->flags);
 	}
 	RETURN(e->refcnt);
 }
-- 
cgit v1.2.3-55-g7522


From 46a15bc3ec425b546d140581c28192ab7877ddc4 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Mon, 21 Feb 2011 13:21:01 +0100
Subject: lru_cache: allow multiple changes per transaction

Allow multiple changes to the active set of elements in lru_cache.
The only current user of lru_cache, drbd, is driving this generalisation.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_actlog.c |  50 ++------
 drivers/block/drbd/drbd_nl.c     |   4 +-
 include/linux/lru_cache.h        |  68 +++++++----
 lib/lru_cache.c                  | 243 +++++++++++++++++++++++++++------------
 4 files changed, 225 insertions(+), 140 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 1ce3de6eed1b..44097c87fed7 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -175,7 +175,6 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
 {
 	struct lc_element *al_ext;
 	struct lc_element *tmp;
-	unsigned long     al_flags = 0;
 	int wake;
 
 	spin_lock_irq(&mdev->al_lock);
@@ -190,19 +189,8 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
 			return NULL;
 		}
 	}
-	al_ext   = lc_get(mdev->act_log, enr);
-	al_flags = mdev->act_log->flags;
+	al_ext = lc_get(mdev->act_log, enr);
 	spin_unlock_irq(&mdev->al_lock);
-
-	/*
-	if (!al_ext) {
-		if (al_flags & LC_STARVING)
-			dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n");
-		if (al_flags & LC_DIRTY)
-			dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n");
-	}
-	*/
-
 	return al_ext;
 }
 
@@ -235,7 +223,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector)
 		mdev->al_writ_cnt++;
 
 		spin_lock_irq(&mdev->al_lock);
-		lc_changed(mdev->act_log, al_ext);
+		lc_committed(mdev->act_log);
 		spin_unlock_irq(&mdev->al_lock);
 		wake_up(&mdev->al_wait);
 	}
@@ -601,7 +589,7 @@ void drbd_al_shrink(struct drbd_conf *mdev)
 	struct lc_element *al_ext;
 	int i;
 
-	D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags));
+	D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags));
 
 	for (i = 0; i < mdev->act_log->nr_elements; i++) {
 		al_ext = lc_element_by_index(mdev->act_log, i);
@@ -708,7 +696,9 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
 			}
 			ext->rs_left = rs_left;
 			ext->rs_failed = success ? 0 : count;
-			lc_changed(mdev->resync, &ext->lce);
+			/* we don't keep a persistent log of the resync lru,
+			 * we can commit any change right away. */
+			lc_committed(mdev->resync);
 		}
 		lc_put(mdev->resync, &ext->lce);
 		/* no race, we are within the al_lock! */
@@ -892,7 +882,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
 		if (bm_ext->lce.lc_number != enr) {
 			bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
 			bm_ext->rs_failed = 0;
-			lc_changed(mdev->resync, &bm_ext->lce);
+			lc_committed(mdev->resync);
 			wakeup = 1;
 		}
 		if (bm_ext->lce.refcnt == 1)
@@ -908,7 +898,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
 		if (rs_flags & LC_STARVING)
 			dev_warn(DEV, "Have to wait for element"
 			     " (resync LRU too small?)\n");
-		BUG_ON(rs_flags & LC_DIRTY);
+		BUG_ON(rs_flags & LC_LOCKED);
 	}
 
 	return bm_ext;
@@ -916,26 +906,12 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
 
 static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
 {
-	struct lc_element *al_ext;
-	int rv = 0;
+	int rv;
 
 	spin_lock_irq(&mdev->al_lock);
-	if (unlikely(enr == mdev->act_log->new_number))
-		rv = 1;
-	else {
-		al_ext = lc_find(mdev->act_log, enr);
-		if (al_ext) {
-			if (al_ext->refcnt)
-				rv = 1;
-		}
-	}
+	rv = lc_is_used(mdev->act_log, enr);
 	spin_unlock_irq(&mdev->al_lock);
 
-	/*
-	if (unlikely(rv)) {
-		dev_info(DEV, "Delaying sync read until app's write is done\n");
-	}
-	*/
 	return rv;
 }
 
@@ -1065,13 +1041,13 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
 			if (rs_flags & LC_STARVING)
 				dev_warn(DEV, "Have to wait for element"
 				     " (resync LRU too small?)\n");
-			BUG_ON(rs_flags & LC_DIRTY);
+			BUG_ON(rs_flags & LC_LOCKED);
 			goto try_again;
 		}
 		if (bm_ext->lce.lc_number != enr) {
 			bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
 			bm_ext->rs_failed = 0;
-			lc_changed(mdev->resync, &bm_ext->lce);
+			lc_committed(mdev->resync);
 			wake_up(&mdev->al_wait);
 			D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0);
 		}
@@ -1082,8 +1058,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
 	}
 check_al:
 	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
-		if (unlikely(al_enr+i == mdev->act_log->new_number))
-			goto try_again;
 		if (lc_is_used(mdev->act_log, al_enr+i))
 			goto try_again;
 	}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index ae8f42e38e4f..0a92f5226c2a 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
 
 	in_use = 0;
 	t = mdev->act_log;
-	n = lc_create("act_log", drbd_al_ext_cache,
+	n = lc_create("act_log", drbd_al_ext_cache, 1,
 		mdev->sync_conf.al_extents, sizeof(struct lc_element), 0);
 
 	if (n == NULL) {
@@ -1016,7 +1016,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	}
 
 	resync_lru = lc_create("resync", drbd_bm_ext_cache,
-			61, sizeof(struct bm_extent),
+			1, 61, sizeof(struct bm_extent),
 			offsetof(struct bm_extent, lce));
 	if (!resync_lru) {
 		retcode = ERR_NOMEM;
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 4cceafb0732d..cbafae40c649 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -166,9 +166,11 @@ struct lc_element {
 	/* if we want to track a larger set of objects,
 	 * it needs to become arch independend u64 */
 	unsigned lc_number;
-
 	/* special label when on free list */
 #define LC_FREE (~0U)
+
+	/* for pending changes */
+	unsigned lc_new_number;
 };
 
 struct lru_cache {
@@ -176,6 +178,7 @@ struct lru_cache {
 	struct list_head lru;
 	struct list_head free;
 	struct list_head in_use;
+	struct list_head to_be_changed;
 
 	/* the pre-created kmem cache to allocate the objects from */
 	struct kmem_cache *lc_cache;
@@ -186,7 +189,7 @@ struct lru_cache {
 	size_t element_off;
 
 	/* number of elements (indices) */
-	unsigned int  nr_elements;
+	unsigned int nr_elements;
 	/* Arbitrary limit on maximum tracked objects. Practical limit is much
 	 * lower due to allocation failures, probably. For typical use cases,
 	 * nr_elements should be a few thousand at most.
@@ -194,18 +197,19 @@ struct lru_cache {
 	 * 8 high bits of .lc_index to be overloaded with flags in the future. */
 #define LC_MAX_ACTIVE	(1<<24)
 
+	/* allow to accumulate a few (index:label) changes,
+	 * but no more than max_pending_changes */
+	unsigned int max_pending_changes;
+	/* number of elements currently on to_be_changed list */
+	unsigned int pending_changes;
+
 	/* statistics */
-	unsigned used; /* number of lelements currently on in_use list */
-	unsigned long hits, misses, starving, dirty, changed;
+	unsigned used; /* number of elements currently on in_use list */
+	unsigned long hits, misses, starving, locked, changed;
 
 	/* see below: flag-bits for lru_cache */
 	unsigned long flags;
 
-	/* when changing the label of an index element */
-	unsigned int  new_number;
-
-	/* for paranoia when changing the label of an index element */
-	struct lc_element *changing_element;
 
 	void  *lc_private;
 	const char *name;
@@ -221,10 +225,15 @@ enum {
 	/* debugging aid, to catch concurrent access early.
 	 * user needs to guarantee exclusive access by proper locking! */
 	__LC_PARANOIA,
-	/* if we need to change the set, but currently there is a changing
-	 * transaction pending, we are "dirty", and must deferr further
-	 * changing requests */
+
+	/* annotate that the set is "dirty", possibly accumulating further
+	 * changes, until a transaction is finally triggered */
 	__LC_DIRTY,
+
+	/* Locked, no further changes allowed.
+	 * Also used to serialize changing transactions. */
+	__LC_LOCKED,
+
 	/* if we need to change the set, but currently there is no free nor
 	 * unused element available, we are "starving", and must not give out
 	 * further references, to guarantee that eventually some refcnt will
@@ -236,9 +245,11 @@ enum {
 };
 #define LC_PARANOIA (1<<__LC_PARANOIA)
 #define LC_DIRTY    (1<<__LC_DIRTY)
+#define LC_LOCKED   (1<<__LC_LOCKED)
 #define LC_STARVING (1<<__LC_STARVING)
 
 extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+		unsigned max_pending_changes,
 		unsigned e_count, size_t e_size, size_t e_off);
 extern void lc_reset(struct lru_cache *lc);
 extern void lc_destroy(struct lru_cache *lc);
@@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr);
 extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
 extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
 extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
-extern void lc_changed(struct lru_cache *lc, struct lc_element *e);
+extern void lc_committed(struct lru_cache *lc);
 
 struct seq_file;
 extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
@@ -258,31 +269,40 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char
 				void (*detail) (struct seq_file *, struct lc_element *));
 
 /**
- * lc_try_lock - can be used to stop lc_get() from changing the tracked set
+ * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set
  * @lc: the lru cache to operate on
  *
- * Note that the reference counts and order on the active and lru lists may
- * still change.  Returns true if we acquired the lock.
+ * Allows (expects) the set to be "dirty".  Note that the reference counts and
+ * order on the active and lru lists may still change.  Used to serialize
+ * changing transactions.  Returns true if we aquired the lock.
  */
-static inline int lc_try_lock(struct lru_cache *lc)
+static inline int lc_try_lock_for_transaction(struct lru_cache *lc)
 {
-	return !test_and_set_bit(__LC_DIRTY, &lc->flags);
+	return !test_and_set_bit(__LC_LOCKED, &lc->flags);
 }
 
+/**
+ * lc_try_lock - variant to stop lc_get() from changing the tracked set
+ * @lc: the lru cache to operate on
+ *
+ * Note that the reference counts and order on the active and lru lists may
+ * still change.  Only works on a "clean" set.  Returns true if we aquired the
+ * lock, which means there are no pending changes, and any further attempt to
+ * change the set will not succeed until the next lc_unlock().
+ */
+extern int lc_try_lock(struct lru_cache *lc);
+
 /**
  * lc_unlock - unlock @lc, allow lc_get() to change the set again
  * @lc: the lru cache to operate on
  */
 static inline void lc_unlock(struct lru_cache *lc)
 {
-	clear_bit_unlock(__LC_DIRTY, &lc->flags);
+	clear_bit(__LC_DIRTY, &lc->flags);
+	clear_bit_unlock(__LC_LOCKED, &lc->flags);
 }
 
-static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
-{
-	struct lc_element *e = lc_find(lc, enr);
-	return e && e->refcnt;
-}
+extern bool lc_is_used(struct lru_cache *lc, unsigned int enr);
 
 #define lc_entry(ptr, type, member) \
 	container_of(ptr, type, member)
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index 17621684758a..d71d89498943 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -55,9 +55,40 @@ MODULE_LICENSE("GPL");
 	BUG_ON(i >= lc_->nr_elements);	\
 	BUG_ON(lc_->lc_element[i] != e_); } while (0)
 
+
+/* We need to atomically
+ *  - try to grab the lock (set LC_LOCKED)
+ *  - only if there is no pending transaction
+ *    (neither LC_DIRTY nor LC_STARVING is set)
+ * Because of PARANOIA_ENTRY() above abusing lc->flags as well,
+ * it is not sufficient to just say
+ *	return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED);
+ */
+int lc_try_lock(struct lru_cache *lc)
+{
+	unsigned long val;
+	do {
+		val = cmpxchg(&lc->flags, 0, LC_LOCKED);
+	} while (unlikely (val == LC_PARANOIA));
+	/* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */
+	return 0 == val;
+#if 0
+	/* Alternative approach, spin in case someone enters or leaves a
+	 * PARANOIA_ENTRY()/RETURN() section. */
+	unsigned long old, new, val;
+	do {
+		old = lc->flags & LC_PARANOIA;
+		new = old | LC_LOCKED;
+		val = cmpxchg(&lc->flags, old, new);
+	} while (unlikely (val == (old ^ LC_PARANOIA)));
+	return old == val;
+#endif
+}
+
 /**
  * lc_create - prepares to track objects in an active set
  * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
+ * @max_pending_changes: maximum changes to accumulate until a transaction is required
  * @e_count: number of elements allowed to be active simultaneously
  * @e_size: size of the tracked objects
  * @e_off: offset to the &struct lc_element member in a tracked object
@@ -66,6 +97,7 @@ MODULE_LICENSE("GPL");
  * or NULL on (allocation) failure.
  */
 struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+		unsigned max_pending_changes,
 		unsigned e_count, size_t e_size, size_t e_off)
 {
 	struct hlist_head *slot = NULL;
@@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
 	INIT_LIST_HEAD(&lc->in_use);
 	INIT_LIST_HEAD(&lc->lru);
 	INIT_LIST_HEAD(&lc->free);
+	INIT_LIST_HEAD(&lc->to_be_changed);
 
 	lc->name = name;
 	lc->element_size = e_size;
 	lc->element_off = e_off;
 	lc->nr_elements = e_count;
-	lc->new_number = LC_FREE;
+	lc->max_pending_changes = max_pending_changes;
 	lc->lc_cache = cache;
 	lc->lc_element = element;
 	lc->lc_slot = slot;
@@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
 		e = p + e_off;
 		e->lc_index = i;
 		e->lc_number = LC_FREE;
+		e->lc_new_number = LC_FREE;
 		list_add(&e->list, &lc->free);
 		element[i] = e;
 	}
@@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc)
 	INIT_LIST_HEAD(&lc->in_use);
 	INIT_LIST_HEAD(&lc->lru);
 	INIT_LIST_HEAD(&lc->free);
+	INIT_LIST_HEAD(&lc->to_be_changed);
 	lc->used = 0;
 	lc->hits = 0;
 	lc->misses = 0;
 	lc->starving = 0;
-	lc->dirty = 0;
+	lc->locked = 0;
 	lc->changed = 0;
+	lc->pending_changes = 0;
 	lc->flags = 0;
-	lc->changing_element = NULL;
-	lc->new_number = LC_FREE;
 	memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
 
 	for (i = 0; i < lc->nr_elements; i++) {
@@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc)
 		/* re-init it */
 		e->lc_index = i;
 		e->lc_number = LC_FREE;
+		e->lc_new_number = LC_FREE;
 		list_add(&e->list, &lc->free);
 	}
 }
@@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
 	/* NOTE:
 	 * total calls to lc_get are
 	 * (starving + hits + misses)
-	 * misses include "dirty" count (update from an other thread in
+	 * misses include "locked" count (update from an other thread in
 	 * progress) and "changed", when this in fact lead to an successful
 	 * update of the cache.
 	 */
 	return seq_printf(seq, "\t%s: used:%u/%u "
-		"hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
+		"hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
 		lc->name, lc->used, lc->nr_elements,
-		lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
+		lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
 }
 
 static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
@@ -224,16 +259,8 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
 }
 
 
-/**
- * lc_find - find element by label, if present in the hash table
- * @lc: The lru_cache object
- * @enr: element number
- *
- * Returns the pointer to an element, if the element with the requested
- * "label" or element number is present in the hash table,
- * or NULL if not found. Does not change the refcnt.
- */
-struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
+static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
+		bool include_changing)
 {
 	struct hlist_node *n;
 	struct lc_element *e;
@@ -241,29 +268,48 @@ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
 	BUG_ON(!lc);
 	BUG_ON(!lc->nr_elements);
 	hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
-		if (e->lc_number == enr)
+		/* "about to be changed" elements, pending transaction commit,
+		 * are hashed by their "new number". "Normal" elements have
+		 * lc_number == lc_new_number. */
+		if (e->lc_new_number != enr)
+			continue;
+		if (e->lc_new_number == e->lc_number || include_changing)
 			return e;
+		break;
 	}
 	return NULL;
 }
 
-/* returned element will be "recycled" immediately */
-static struct lc_element *lc_evict(struct lru_cache *lc)
+/**
+ * lc_find - find element by label, if present in the hash table
+ * @lc: The lru_cache object
+ * @enr: element number
+ *
+ * Returns the pointer to an element, if the element with the requested
+ * "label" or element number is present in the hash table,
+ * or NULL if not found. Does not change the refcnt.
+ * Ignores elements that are "about to be used", i.e. not yet in the active
+ * set, but still pending transaction commit.
+ */
+struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
 {
-	struct list_head  *n;
-	struct lc_element *e;
-
-	if (list_empty(&lc->lru))
-		return NULL;
-
-	n = lc->lru.prev;
-	e = list_entry(n, struct lc_element, list);
-
-	PARANOIA_LC_ELEMENT(lc, e);
+	return __lc_find(lc, enr, 0);
+}
 
-	list_del(&e->list);
-	hlist_del(&e->colision);
-	return e;
+/**
+ * lc_is_used - find element by label
+ * @lc: The lru_cache object
+ * @enr: element number
+ *
+ * Returns true, if the element with the requested "label" or element number is
+ * present in the hash table, and is used (refcnt > 0).
+ * Also finds elements that are not _currently_ used but only "about to be
+ * used", i.e. on the "to_be_changed" list, pending transaction commit.
+ */
+bool lc_is_used(struct lru_cache *lc, unsigned int enr)
+{
+	struct lc_element *e = __lc_find(lc, enr, 1);
+	return e && e->refcnt;
 }
 
 /**
@@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e)
 	PARANOIA_LC_ELEMENT(lc, e);
 	BUG_ON(e->refcnt);
 
-	e->lc_number = LC_FREE;
+	e->lc_number = e->lc_new_number = LC_FREE;
 	hlist_del_init(&e->colision);
 	list_move(&e->list, &lc->free);
 	RETURN();
 }
 
-static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
+static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number)
 {
 	struct list_head *n;
+	struct lc_element *e;
+
+	if (!list_empty(&lc->free))
+		n = lc->free.next;
+	else if (!list_empty(&lc->lru))
+		n = lc->lru.prev;
+	else
+		return NULL;
+
+	e = list_entry(n, struct lc_element, list);
+	PARANOIA_LC_ELEMENT(lc, e);
 
-	if (list_empty(&lc->free))
-		return lc_evict(lc);
+	e->lc_new_number = new_number;
+	if (!hlist_unhashed(&e->colision))
+		__hlist_del(&e->colision);
+	hlist_add_head(&e->colision, lc_hash_slot(lc, new_number));
+	list_move(&e->list, &lc->to_be_changed);
 
-	n = lc->free.next;
-	list_del(n);
-	return list_entry(n, struct lc_element, list);
+	return e;
 }
 
 static int lc_unused_element_available(struct lru_cache *lc)
@@ -318,8 +376,12 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool
 		RETURN(NULL);
 	}
 
-	e = lc_find(lc, enr);
-	if (e) {
+	e = __lc_find(lc, enr, 1);
+	/* if lc_new_number != lc_number,
+	 * this enr is currently being pulled in already,
+	 * and will be available once the pending transaction
+	 * has been committed. */
+	if (e && e->lc_new_number == e->lc_number) {
 		++lc->hits;
 		if (e->refcnt++ == 0)
 			lc->used++;
@@ -331,6 +393,24 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool
 	if (!may_change)
 		RETURN(NULL);
 
+	/* It has been found above, but on the "to_be_changed" list, not yet
+	 * committed.  Don't pull it in twice, wait for the transaction, then
+	 * try again */
+	if (e)
+		RETURN(NULL);
+
+	/* To avoid races with lc_try_lock(), first, mark us dirty
+	 * (using test_and_set_bit, as it implies memory barriers), ... */
+	test_and_set_bit(__LC_DIRTY, &lc->flags);
+
+	/* ... only then check if it is locked anyways. If lc_unlock clears
+	 * the dirty bit again, that's not a problem, we will come here again.
+	 */
+	if (test_bit(__LC_LOCKED, &lc->flags)) {
+		++lc->locked;
+		RETURN(NULL);
+	}
+
 	/* In case there is nothing available and we can not kick out
 	 * the LRU element, we have to wait ...
 	 */
@@ -339,24 +419,19 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool
 		RETURN(NULL);
 	}
 
-	/* it was not present in the active set.
-	 * we are going to recycle an unused (or even "free") element.
-	 * user may need to commit a transaction to record that change.
-	 * we serialize on flags & LC_DIRTY */
-	if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
-		++lc->dirty;
+	/* It was not present in the active set.  We are going to recycle an
+	 * unused (or even "free") element, but we won't accumulate more than
+	 * max_pending_changes changes.  */
+	if (lc->pending_changes >= lc->max_pending_changes)
 		RETURN(NULL);
-	}
 
-	e = lc_get_unused_element(lc);
+	e = lc_prepare_for_change(lc, enr);
 	BUG_ON(!e);
 
 	clear_bit(__LC_STARVING, &lc->flags);
 	BUG_ON(++e->refcnt != 1);
 	lc->used++;
-
-	lc->changing_element = e;
-	lc->new_number = enr;
+	lc->pending_changes++;
 
 	RETURN(e);
 }
@@ -388,12 +463,15 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool
  *  pointer to an UNUSED element with some different element number,
  *          where that different number may also be %LC_FREE.
  *
- *          In this case, the cache is marked %LC_DIRTY (blocking further changes),
- *          and the returned element pointer is removed from the lru list and
- *          hash collision chains.  The user now should do whatever housekeeping
- *          is necessary.
- *          Then he must call lc_changed(lc,element_pointer), to finish
- *          the change.
+ *          In this case, the cache is marked %LC_DIRTY,
+ *          so lc_try_lock() will no longer succeed.
+ *          The returned element pointer is moved to the "to_be_changed" list,
+ *          and registered with the new element number on the hash collision chains,
+ *          so it is possible to pick it up from lc_is_used().
+ *          Up to "max_pending_changes" (see lc_create()) can be accumulated.
+ *          The user now should do whatever housekeeping is necessary,
+ *          typically serialize on lc_try_lock_for_transaction(), then call
+ *          lc_committed(lc) and lc_unlock(), to finish the change.
  *
  * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
  *       any cache set change.
@@ -425,22 +503,25 @@ struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
 }
 
 /**
- * lc_changed - tell @lc that the change has been recorded
+ * lc_committed - tell @lc that pending changes have been recorded
  * @lc: the lru cache to operate on
- * @e: the element pending label change
+ *
+ * User is expected to serialize on explicit lc_try_lock_for_transaction()
+ * before the transaction is started, and later needs to lc_unlock() explicitly
+ * as well.
  */
-void lc_changed(struct lru_cache *lc, struct lc_element *e)
+void lc_committed(struct lru_cache *lc)
 {
+	struct lc_element *e, *tmp;
+
 	PARANOIA_ENTRY();
-	BUG_ON(e != lc->changing_element);
-	PARANOIA_LC_ELEMENT(lc, e);
-	++lc->changed;
-	e->lc_number = lc->new_number;
-	list_add(&e->list, &lc->in_use);
-	hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
-	lc->changing_element = NULL;
-	lc->new_number = LC_FREE;
-	clear_bit_unlock(__LC_DIRTY, &lc->flags);
+	list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) {
+		/* count number of changes, not number of transactions */
+		++lc->changed;
+		e->lc_number = e->lc_new_number;
+		list_move(&e->list, &lc->in_use);
+	}
+	lc->pending_changes = 0;
 	RETURN();
 }
 
@@ -459,7 +540,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
 	PARANOIA_ENTRY();
 	PARANOIA_LC_ELEMENT(lc, e);
 	BUG_ON(e->refcnt == 0);
-	BUG_ON(e == lc->changing_element);
+	BUG_ON(e->lc_number != e->lc_new_number);
 	if (--e->refcnt == 0) {
 		/* move it to the front of LRU. */
 		list_move(&e->list, &lc->lru);
@@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
 void lc_set(struct lru_cache *lc, unsigned int enr, int index)
 {
 	struct lc_element *e;
+	struct list_head *lh;
 
 	if (index < 0 || index >= lc->nr_elements)
 		return;
 
 	e = lc_element_by_index(lc, index);
-	e->lc_number = enr;
+	BUG_ON(e->lc_number != e->lc_new_number);
+	BUG_ON(e->refcnt != 0);
 
+	e->lc_number = e->lc_new_number = enr;
 	hlist_del_init(&e->colision);
-	hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
-	list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
+	if (enr == LC_FREE)
+		lh = &lc->free;
+	else {
+		hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
+		lh = &lc->lru;
+	}
+	list_move(&e->list, lh);
 }
 
 /**
@@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get);
 EXPORT_SYMBOL(lc_find);
 EXPORT_SYMBOL(lc_get);
 EXPORT_SYMBOL(lc_put);
-EXPORT_SYMBOL(lc_changed);
+EXPORT_SYMBOL(lc_committed);
 EXPORT_SYMBOL(lc_element_by_index);
 EXPORT_SYMBOL(lc_index_of);
 EXPORT_SYMBOL(lc_seq_printf_stats);
 EXPORT_SYMBOL(lc_seq_dump_details);
+EXPORT_SYMBOL(lc_try_lock);
+EXPORT_SYMBOL(lc_is_used);
-- 
cgit v1.2.3-55-g7522


From 7ad651b52218eea3f9280dbb353dfe0c42742d85 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Mon, 21 Feb 2011 13:21:03 +0100
Subject: drbd: new on-disk activity log transaction format

Use a new on-disk transaction format for the activity log, which allows
for multiple changes to the active set per transaction.

Using 4k transaction blocks, we can now get rid of the work-around code
to deal with devices not supporting 512 byte logical block size.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_actlog.c | 409 ++++++++++++++++++++++++---------------
 drivers/block/drbd/drbd_int.h    |  44 +++--
 drivers/block/drbd/drbd_main.c   |   4 -
 drivers/block/drbd/drbd_nl.c     |  42 +---
 include/linux/drbd.h             |   4 +
 include/linux/drbd_limits.h      |   8 +-
 6 files changed, 302 insertions(+), 209 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 44097c87fed7..ea3895de4e6d 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -24,21 +24,67 @@
  */
 
 #include <linux/slab.h>
+#include <linux/crc32c.h>
 #include <linux/drbd.h>
+#include <linux/drbd_limits.h>
+#include <linux/dynamic_debug.h>
 #include "drbd_int.h"
 #include "drbd_wrappers.h"
 
-/* We maintain a trivial checksum in our on disk activity log.
- * With that we can ensure correct operation even when the storage
- * device might do a partial (last) sector write while losing power.
- */
-struct __packed al_transaction {
-	u32       magic;
-	u32       tr_number;
-	struct __packed {
-		u32 pos;
-		u32 extent; } updates[1 + AL_EXTENTS_PT];
-	u32       xor_sum;
+/* all fields on disc in big endian */
+struct __packed al_transaction_on_disk {
+	/* don't we all like magic */
+	__be32	magic;
+
+	/* to identify the most recent transaction block
+	 * in the on disk ring buffer */
+	__be32	tr_number;
+
+	/* checksum on the full 4k block, with this field set to 0. */
+	__be32	crc32c;
+
+	/* type of transaction, special transaction types like:
+	 * purge-all, set-all-idle, set-all-active, ... to-be-defined */
+	__be16	transaction_type;
+
+	/* we currently allow only a few thousand extents,
+	 * so 16bit will be enough for the slot number. */
+
+	/* how many updates in this transaction */
+	__be16	n_updates;
+
+	/* maximum slot number, "al-extents" in drbd.conf speak.
+	 * Having this in each transaction should make reconfiguration
+	 * of that parameter easier. */
+	__be16	context_size;
+
+	/* slot number the context starts with */
+	__be16	context_start_slot_nr;
+
+	/* Some reserved bytes.  Expected usage is a 64bit counter of
+	 * sectors-written since device creation, and other data generation tag
+	 * supporting usage */
+	__be32	__reserved[4];
+
+	/* --- 36 byte used --- */
+
+	/* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes
+	 * in one transaction, then use the remaining byte in the 4k block for
+	 * context information.  "Flexible" number of updates per transaction
+	 * does not help, as we have to account for the case when all update
+	 * slots are used anyways, so it would only complicate code without
+	 * additional benefit.
+	 */
+	__be16	update_slot_nr[AL_UPDATES_PER_TRANSACTION];
+
+	/* but the extent number is 32bit, which at an extent size of 4 MiB
+	 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */
+	__be32	update_extent_nr[AL_UPDATES_PER_TRANSACTION];
+
+	/* --- 420 bytes used (36 + 64*6) --- */
+
+	/* 4096 - 420 = 3676 = 919 * 4 */
+	__be32	context[AL_CONTEXT_PER_TRANSACTION];
 };
 
 struct update_odbm_work {
@@ -48,11 +94,8 @@ struct update_odbm_work {
 
 struct update_al_work {
 	struct drbd_work w;
-	struct lc_element *al_ext;
 	struct completion event;
-	unsigned int enr;
-	/* if old_enr != LC_FREE, write corresponding bitmap sector, too */
-	unsigned int old_enr;
+	int err;
 };
 
 struct drbd_atodb_wait {
@@ -107,67 +150,30 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
 int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
 			 sector_t sector, int rw)
 {
-	int logical_block_size, mask, ok;
-	int offset = 0;
+	int ok;
 	struct page *iop = mdev->md_io_page;
 
 	D_ASSERT(mutex_is_locked(&mdev->md_io_mutex));
 
 	BUG_ON(!bdev->md_bdev);
 
-	logical_block_size = bdev_logical_block_size(bdev->md_bdev);
-	if (logical_block_size == 0)
-		logical_block_size = MD_SECTOR_SIZE;
-
-	/* in case logical_block_size != 512 [ s390 only? ] */
-	if (logical_block_size != MD_SECTOR_SIZE) {
-		mask = (logical_block_size / MD_SECTOR_SIZE) - 1;
-		D_ASSERT(mask == 1 || mask == 3 || mask == 7);
-		D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE);
-		offset = sector & mask;
-		sector = sector & ~mask;
-		iop = mdev->md_io_tmpp;
-
-		if (rw & WRITE) {
-			/* these are GFP_KERNEL pages, pre-allocated
-			 * on device initialization */
-			void *p = page_address(mdev->md_io_page);
-			void *hp = page_address(mdev->md_io_tmpp);
-
-			ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector,
-					READ, logical_block_size);
-
-			if (unlikely(!ok)) {
-				dev_err(DEV, "drbd_md_sync_page_io(,%llus,"
-				    "READ [logical_block_size!=512]) failed!\n",
-				    (unsigned long long)sector);
-				return 0;
-			}
-
-			memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE);
-		}
-	}
+	dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n",
+	     current->comm, current->pid, __func__,
+	     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
 
 	if (sector < drbd_md_first_sector(bdev) ||
-	    sector > drbd_md_last_sector(bdev))
+	    sector + 7 > drbd_md_last_sector(bdev))
 		dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
 		     current->comm, current->pid, __func__,
 		     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
 
-	ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size);
+	ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE);
 	if (unlikely(!ok)) {
 		dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n",
 		    (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
 		return 0;
 	}
 
-	if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) {
-		void *p = page_address(mdev->md_io_page);
-		void *hp = page_address(mdev->md_io_tmpp);
-
-		memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE);
-	}
-
 	return ok;
 }
 
@@ -211,20 +217,34 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector)
 		 * current->bio_tail list now.
 		 * we have to delegate updates to the activity log
 		 * to the worker thread. */
-		init_completion(&al_work.event);
-		al_work.al_ext = al_ext;
-		al_work.enr = enr;
-		al_work.old_enr = al_ext->lc_number;
-		al_work.w.cb = w_al_write_transaction;
-		al_work.w.mdev = mdev;
-		drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w);
-		wait_for_completion(&al_work.event);
 
-		mdev->al_writ_cnt++;
+		/* Serialize multiple transactions.
+		 * This uses test_and_set_bit, memory barrier is implicit.
+		 * Optimization potential:
+		 * first check for transaction number > old transaction number,
+		 * so not all waiters have to lock/unlock.  */
+		wait_event(mdev->al_wait, lc_try_lock_for_transaction(mdev->act_log));
 
-		spin_lock_irq(&mdev->al_lock);
-		lc_committed(mdev->act_log);
-		spin_unlock_irq(&mdev->al_lock);
+		/* Double check: it may have been committed by someone else,
+		 * while we have been waiting for the lock. */
+		if (al_ext->lc_number != enr) {
+			init_completion(&al_work.event);
+			al_work.w.cb = w_al_write_transaction;
+			al_work.w.mdev = mdev;
+			drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w);
+			wait_for_completion(&al_work.event);
+
+			mdev->al_writ_cnt++;
+
+			spin_lock_irq(&mdev->al_lock);
+			/* FIXME
+			if (al_work.err)
+				we need an "lc_cancel" here;
+			*/
+			lc_committed(mdev->act_log);
+			spin_unlock_irq(&mdev->al_lock);
+		}
+		lc_unlock(mdev->act_log);
 		wake_up(&mdev->al_wait);
 	}
 }
@@ -283,95 +303,118 @@ w_al_write_transaction(struct drbd_work *w, int unused)
 {
 	struct update_al_work *aw = container_of(w, struct update_al_work, w);
 	struct drbd_conf *mdev = w->mdev;
-	struct lc_element *updated = aw->al_ext;
-	const unsigned int new_enr = aw->enr;
-	const unsigned int evicted = aw->old_enr;
-	struct al_transaction *buffer;
+	struct al_transaction_on_disk *buffer;
+	struct lc_element *e;
 	sector_t sector;
-	int i, n, mx;
-	unsigned int extent_nr;
-	u32 xor_sum = 0;
+	int i, mx;
+	unsigned extent_nr;
+	unsigned crc = 0;
 
 	if (!get_ldev(mdev)) {
-		dev_err(DEV,
-			"disk is %s, cannot start al transaction (-%d +%d)\n",
-			drbd_disk_str(mdev->state.disk), evicted, new_enr);
+		dev_err(DEV, "disk is %s, cannot start al transaction\n",
+			drbd_disk_str(mdev->state.disk));
+		aw->err = -EIO;
 		complete(&((struct update_al_work *)w)->event);
 		return 1;
 	}
-	/* do we have to do a bitmap write, first?
-	 * TODO reduce maximum latency:
-	 * submit both bios, then wait for both,
-	 * instead of doing two synchronous sector writes.
-	 * For now, we must not write the transaction,
-	 * if we cannot write out the bitmap of the evicted extent. */
-	if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
-		drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted));
 
 	/* The bitmap write may have failed, causing a state change. */
 	if (mdev->state.disk < D_INCONSISTENT) {
 		dev_err(DEV,
-			"disk is %s, cannot write al transaction (-%d +%d)\n",
-			drbd_disk_str(mdev->state.disk), evicted, new_enr);
+			"disk is %s, cannot write al transaction\n",
+			drbd_disk_str(mdev->state.disk));
+		aw->err = -EIO;
 		complete(&((struct update_al_work *)w)->event);
 		put_ldev(mdev);
 		return 1;
 	}
 
 	mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */
-	buffer = (struct al_transaction *)page_address(mdev->md_io_page);
+	buffer = page_address(mdev->md_io_page);
 
-	buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
+	memset(buffer, 0, sizeof(*buffer));
+	buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
 	buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
 
-	n = lc_index_of(mdev->act_log, updated);
+	i = 0;
+
+	/* Even though no one can start to change this list
+	 * once we set the LC_LOCKED -- from drbd_al_begin_io(),
+	 * lc_try_lock_for_transaction() --, someone may still
+	 * be in the process of changing it. */
+	spin_lock_irq(&mdev->al_lock);
+	list_for_each_entry(e, &mdev->act_log->to_be_changed, list) {
+		if (i == AL_UPDATES_PER_TRANSACTION) {
+			i++;
+			break;
+		}
+		buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
+		buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
+		if (e->lc_number != LC_FREE)
+			drbd_bm_mark_for_writeout(mdev,
+					al_extent_to_bm_page(e->lc_number));
+		i++;
+	}
+	spin_unlock_irq(&mdev->al_lock);
+	BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
 
-	buffer->updates[0].pos = cpu_to_be32(n);
-	buffer->updates[0].extent = cpu_to_be32(new_enr);
+	buffer->n_updates = cpu_to_be16(i);
+	for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
+		buffer->update_slot_nr[i] = cpu_to_be16(-1);
+		buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
+	}
 
-	xor_sum ^= new_enr;
+	buffer->context_size = cpu_to_be16(mdev->act_log->nr_elements);
+	buffer->context_start_slot_nr = cpu_to_be16(mdev->al_tr_cycle);
 
-	mx = min_t(int, AL_EXTENTS_PT,
+	mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
 		   mdev->act_log->nr_elements - mdev->al_tr_cycle);
 	for (i = 0; i < mx; i++) {
 		unsigned idx = mdev->al_tr_cycle + i;
 		extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number;
-		buffer->updates[i+1].pos = cpu_to_be32(idx);
-		buffer->updates[i+1].extent = cpu_to_be32(extent_nr);
-		xor_sum ^= extent_nr;
+		buffer->context[i] = cpu_to_be32(extent_nr);
 	}
-	for (; i < AL_EXTENTS_PT; i++) {
-		buffer->updates[i+1].pos = __constant_cpu_to_be32(-1);
-		buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE);
-		xor_sum ^= LC_FREE;
-	}
-	mdev->al_tr_cycle += AL_EXTENTS_PT;
+	for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
+		buffer->context[i] = cpu_to_be32(LC_FREE);
+
+	mdev->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
 	if (mdev->al_tr_cycle >= mdev->act_log->nr_elements)
 		mdev->al_tr_cycle = 0;
 
-	buffer->xor_sum = cpu_to_be32(xor_sum);
-
 	sector =  mdev->ldev->md.md_offset
-		+ mdev->ldev->md.al_offset + mdev->al_tr_pos;
-
-	if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
-		drbd_chk_io_error(mdev, 1, true);
+		+ mdev->ldev->md.al_offset
+		+ mdev->al_tr_pos * (MD_BLOCK_SIZE>>9);
 
-	if (++mdev->al_tr_pos >
-	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
-		mdev->al_tr_pos = 0;
+	crc = crc32c(0, buffer, 4096);
+	buffer->crc32c = cpu_to_be32(crc);
 
-	D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE);
-	mdev->al_tr_number++;
+	if (drbd_bm_write_hinted(mdev))
+		aw->err = -EIO;
+		/* drbd_chk_io_error done already */
+	else if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
+		aw->err = -EIO;
+		drbd_chk_io_error(mdev, 1, true);
+	} else {
+		/* advance ringbuffer position and transaction counter */
+		mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
+		mdev->al_tr_number++;
+	}
 
 	mutex_unlock(&mdev->md_io_mutex);
-
 	complete(&((struct update_al_work *)w)->event);
 	put_ldev(mdev);
 
 	return 1;
 }
 
+/* FIXME
+ * reading of the activity log,
+ * and potentially dirtying of the affected bitmap regions,
+ * should be done from userland only.
+ * DRBD would simply always attach with an empty activity log,
+ * and refuse to attach to something that looks like a crashed primary.
+ */
+
 /**
  * drbd_al_read_tr() - Read a single transaction from the on disk activity log
  * @mdev:	DRBD device.
@@ -383,27 +426,39 @@ w_al_write_transaction(struct drbd_work *w, int unused)
  */
 static int drbd_al_read_tr(struct drbd_conf *mdev,
 			   struct drbd_backing_dev *bdev,
-			   struct al_transaction *b,
 			   int index)
 {
+	struct al_transaction_on_disk *b = page_address(mdev->md_io_page);
 	sector_t sector;
-	int rv, i;
-	u32 xor_sum = 0;
+	u32 crc;
 
-	sector = bdev->md.md_offset + bdev->md.al_offset + index;
+	sector =  bdev->md.md_offset
+		+ bdev->md.al_offset
+		+ index * (MD_BLOCK_SIZE>>9);
 
 	/* Dont process error normally,
 	 * as this is done before disk is attached! */
 	if (!drbd_md_sync_page_io(mdev, bdev, sector, READ))
 		return -1;
 
-	rv = (b->magic == cpu_to_be32(DRBD_MAGIC));
+	if (!expect(b->magic == cpu_to_be32(DRBD_AL_MAGIC)))
+		return 0;
+
+	if (!expect(be16_to_cpu(b->n_updates) <= AL_UPDATES_PER_TRANSACTION))
+		return 0;
 
-	for (i = 0; i < AL_EXTENTS_PT + 1; i++)
-		xor_sum ^= be32_to_cpu(b->updates[i].extent);
-	rv &= (xor_sum == be32_to_cpu(b->xor_sum));
+	if (!expect(be16_to_cpu(b->context_size) <= DRBD_AL_EXTENTS_MAX))
+		return 0;
 
-	return rv;
+	if (!expect(be16_to_cpu(b->context_start_slot_nr) < DRBD_AL_EXTENTS_MAX))
+		return 0;
+
+	crc = be32_to_cpu(b->crc32c);
+	b->crc32c = 0;
+	if (!expect(crc == crc32c(0, b, 4096)))
+		return 0;
+
+	return 1;
 }
 
 /**
@@ -415,7 +470,7 @@ static int drbd_al_read_tr(struct drbd_conf *mdev,
  */
 int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 {
-	struct al_transaction *buffer;
+	struct al_transaction_on_disk *b;
 	int i;
 	int rv;
 	int mx;
@@ -428,25 +483,36 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	u32 to_tnr = 0;
 	u32 cnr;
 
-	mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT);
+	/* Note that this is expected to be called with a newly created,
+	 * clean and all unused activity log of the "expected size".
+	 */
 
 	/* lock out all other meta data io for now,
 	 * and make sure the page is mapped.
 	 */
 	mutex_lock(&mdev->md_io_mutex);
-	buffer = page_address(mdev->md_io_page);
+	b = page_address(mdev->md_io_page);
+
+	/* Always use the full ringbuffer space for now.
+	 * possible optimization: read in all of it,
+	 * then scan the in-memory pages. */
+
+	mx = (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
 
 	/* Find the valid transaction in the log */
-	for (i = 0; i <= mx; i++) {
-		rv = drbd_al_read_tr(mdev, bdev, buffer, i);
+	for (i = 0; i < mx; i++) {
+		rv = drbd_al_read_tr(mdev, bdev, i);
+		/* invalid data in that block */
 		if (rv == 0)
 			continue;
+
+		/* IO error */
 		if (rv == -1) {
 			mutex_unlock(&mdev->md_io_mutex);
 			return 0;
 		}
-		cnr = be32_to_cpu(buffer->tr_number);
 
+		cnr = be32_to_cpu(b->tr_number);
 		if (++found_valid == 1) {
 			from = i;
 			to = i;
@@ -454,8 +520,11 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 			to_tnr = cnr;
 			continue;
 		}
+
+		D_ASSERT(cnr != to_tnr);
+		D_ASSERT(cnr != from_tnr);
 		if ((int)cnr - (int)from_tnr < 0) {
-			D_ASSERT(from_tnr - cnr + i - from == mx+1);
+			D_ASSERT(from_tnr - cnr + i - from == mx);
 			from = i;
 			from_tnr = cnr;
 		}
@@ -476,11 +545,10 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	 * dev_info(DEV, "Reading from %d to %d.\n",from,to); */
 	i = from;
 	while (1) {
-		int j, pos;
-		unsigned int extent_nr;
-		unsigned int trn;
+		struct lc_element *e;
+		unsigned j, n, slot, extent_nr;
 
-		rv = drbd_al_read_tr(mdev, bdev, buffer, i);
+		rv = drbd_al_read_tr(mdev, bdev, i);
 		if (!expect(rv != 0))
 			goto cancel;
 		if (rv == -1) {
@@ -488,23 +556,55 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 			return 0;
 		}
 
-		trn = be32_to_cpu(buffer->tr_number);
+		/* deal with different transaction types.
+		 * not yet implemented */
+		if (!expect(b->transaction_type == 0))
+			goto cancel;
 
-		spin_lock_irq(&mdev->al_lock);
+		/* on the fly re-create/resize activity log?
+		 * will be a special transaction type flag. */
+		if (!expect(be16_to_cpu(b->context_size) == mdev->act_log->nr_elements))
+			goto cancel;
+		if (!expect(be16_to_cpu(b->context_start_slot_nr) < mdev->act_log->nr_elements))
+			goto cancel;
 
-		/* This loop runs backwards because in the cyclic
-		   elements there might be an old version of the
-		   updated element (in slot 0). So the element in slot 0
-		   can overwrite old versions. */
-		for (j = AL_EXTENTS_PT; j >= 0; j--) {
-			pos = be32_to_cpu(buffer->updates[j].pos);
-			extent_nr = be32_to_cpu(buffer->updates[j].extent);
+		/* We are the only user of the activity log right now,
+		 * don't actually need to take that lock. */
+		spin_lock_irq(&mdev->al_lock);
 
-			if (extent_nr == LC_FREE)
-				continue;
+		/* first, apply the context, ... */
+		for (j = 0, slot = be16_to_cpu(b->context_start_slot_nr);
+		     j < AL_CONTEXT_PER_TRANSACTION &&
+		     slot < mdev->act_log->nr_elements; j++, slot++) {
+			extent_nr = be32_to_cpu(b->context[j]);
+			e = lc_element_by_index(mdev->act_log, slot);
+			if (e->lc_number != extent_nr) {
+				if (extent_nr != LC_FREE)
+					active_extents++;
+				else
+					active_extents--;
+			}
+			lc_set(mdev->act_log, extent_nr, slot);
+		}
 
-			lc_set(mdev->act_log, extent_nr, pos);
-			active_extents++;
+		/* ... then apply the updates,
+		 * which override the context information.
+		 * drbd_al_read_tr already did the rangecheck
+		 * on n <= AL_UPDATES_PER_TRANSACTION */
+		n = be16_to_cpu(b->n_updates);
+		for (j = 0; j < n; j++) {
+			slot = be16_to_cpu(b->update_slot_nr[j]);
+			extent_nr = be32_to_cpu(b->update_extent_nr[j]);
+			if (!expect(slot < mdev->act_log->nr_elements))
+				break;
+			e = lc_element_by_index(mdev->act_log, slot);
+			if (e->lc_number != extent_nr) {
+				if (extent_nr != LC_FREE)
+					active_extents++;
+				else
+					active_extents--;
+			}
+			lc_set(mdev->act_log, extent_nr, slot);
 		}
 		spin_unlock_irq(&mdev->al_lock);
 
@@ -514,15 +614,12 @@ cancel:
 		if (i == to)
 			break;
 		i++;
-		if (i > mx)
+		if (i >= mx)
 			i = 0;
 	}
 
 	mdev->al_tr_number = to_tnr+1;
-	mdev->al_tr_pos = to;
-	if (++mdev->al_tr_pos >
-	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
-		mdev->al_tr_pos = 0;
+	mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
 
 	/* ok, we are done with it */
 	mutex_unlock(&mdev->md_io_mutex);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index edfdeb62c18f..3213808a898a 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1069,7 +1069,6 @@ struct drbd_conf {
 	atomic_t pp_in_use_by_net;	/* sendpage()d, still referenced by tcp */
 	wait_queue_head_t ee_wait;
 	struct page *md_io_page;	/* one page buffer for md_io */
-	struct page *md_io_tmpp;	/* for logical_block_size != 512 */
 	struct mutex md_io_mutex;	/* protects the md_io_buffer */
 	spinlock_t al_lock;
 	wait_queue_head_t al_wait;
@@ -1259,22 +1258,39 @@ extern void drbd_ldev_destroy(struct drbd_conf *mdev);
    * either at the end of the backing device
    * or on a separate meta data device. */
 
-#define MD_RESERVED_SECT (128LU << 11)  /* 128 MB, unit sectors */
 /* The following numbers are sectors */
-#define MD_AL_OFFSET 8	    /* 8 Sectors after start of meta area */
-#define MD_AL_MAX_SIZE 64   /* = 32 kb LOG  ~ 3776 extents ~ 14 GB Storage */
-/* Allows up to about 3.8TB */
-#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE)
-
-/* Since the smalles IO unit is usually 512 byte */
-#define MD_SECTOR_SHIFT	 9
-#define MD_SECTOR_SIZE	 (1<<MD_SECTOR_SHIFT)
-
-/* activity log */
-#define AL_EXTENTS_PT ((MD_SECTOR_SIZE-12)/8-1) /* 61 ; Extents per 512B sector */
-#define AL_EXTENT_SHIFT 22		 /* One extent represents 4M Storage */
+/* Allows up to about 3.8TB, so if you want more,
+ * you need to use the "flexible" meta data format. */
+#define MD_RESERVED_SECT (128LU << 11)  /* 128 MB, unit sectors */
+#define MD_AL_OFFSET	8    /* 8 Sectors after start of meta area */
+#define MD_AL_SECTORS	64   /* = 32 kB on disk activity log ring buffer */
+#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_SECTORS)
+
+/* we do all meta data IO in 4k blocks */
+#define MD_BLOCK_SHIFT	12
+#define MD_BLOCK_SIZE	(1<<MD_BLOCK_SHIFT)
+
+/* One activity log extent represents 4M of storage */
+#define AL_EXTENT_SHIFT 22
 #define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT)
 
+/* We could make these currently hardcoded constants configurable
+ * variables at create-md time (or even re-configurable at runtime?).
+ * Which will require some more changes to the DRBD "super block"
+ * and attach code.
+ *
+ * updates per transaction:
+ *   This many changes to the active set can be logged with one transaction.
+ *   This number is arbitrary.
+ * context per transaction:
+ *   This many context extent numbers are logged with each transaction.
+ *   This number is resulting from the transaction block size (4k), the layout
+ *   of the transaction header, and the number of updates per transaction.
+ *   See drbd_actlog.c:struct al_transaction_on_disk
+ * */
+#define AL_UPDATES_PER_TRANSACTION	 64	// arbitrary
+#define AL_CONTEXT_PER_TRANSACTION	919	// (4096 - 36 - 6*64)/4
+
 #if BITS_PER_LONG == 32
 #define LN2_BPL 5
 #define cpu_to_lel(A) cpu_to_le32(A)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index c77e51a40926..efedfbc06198 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2841,10 +2841,6 @@ void drbd_ldev_destroy(struct drbd_conf *mdev)
 		drbd_free_bc(mdev->ldev);
 		mdev->ldev = NULL;);
 
-	if (mdev->md_io_tmpp) {
-		__free_page(mdev->md_io_tmpp);
-		mdev->md_io_tmpp = NULL;
-	}
 	clear_bit(GO_DISKLESS, &mdev->flags);
 }
 
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 0a92f5226c2a..90d731723205 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -527,7 +527,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
 	case DRBD_MD_INDEX_FLEX_INT:
 		bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
 		/* al size is still fixed */
-		bdev->md.al_offset = -MD_AL_MAX_SIZE;
+		bdev->md.al_offset = -MD_AL_SECTORS;
 		/* we need (slightly less than) ~ this much bitmap sectors: */
 		md_size_sect = drbd_get_capacity(bdev->backing_bdev);
 		md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
@@ -751,8 +751,8 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
 	unsigned int in_use;
 	int i;
 
-	if (!expect(mdev->sync_conf.al_extents >= 7))
-		mdev->sync_conf.al_extents = 127;
+	if (!expect(mdev->sync_conf.al_extents >= DRBD_AL_EXTENTS_MIN))
+		mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_MIN;
 
 	if (mdev->act_log &&
 	    mdev->act_log->nr_elements == mdev->sync_conf.al_extents)
@@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
 
 	in_use = 0;
 	t = mdev->act_log;
-	n = lc_create("act_log", drbd_al_ext_cache, 1,
+	n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
 		mdev->sync_conf.al_extents, sizeof(struct lc_element), 0);
 
 	if (n == NULL) {
@@ -932,7 +932,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	union drbd_state ns, os;
 	enum drbd_state_rv rv;
 	int cp_discovered = 0;
-	int logical_block_size;
 
 	drbd_reconfig_start(mdev);
 
@@ -1087,25 +1086,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
 	drbd_md_set_sector_offsets(mdev, nbc);
 
-	/* allocate a second IO page if logical_block_size != 512 */
-	logical_block_size = bdev_logical_block_size(nbc->md_bdev);
-	if (logical_block_size == 0)
-		logical_block_size = MD_SECTOR_SIZE;
-
-	if (logical_block_size != MD_SECTOR_SIZE) {
-		if (!mdev->md_io_tmpp) {
-			struct page *page = alloc_page(GFP_NOIO);
-			if (!page)
-				goto force_diskless_dec;
-
-			dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
-			     logical_block_size, MD_SECTOR_SIZE);
-			dev_warn(DEV, "Workaround engaged (has performance impact).\n");
-
-			mdev->md_io_tmpp = page;
-		}
-	}
-
 	if (!mdev->bitmap) {
 		if (drbd_bm_init(mdev)) {
 			retcode = ERR_NOMEM;
@@ -1804,14 +1784,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 
 	if (!expect(sc.rate >= 1))
 		sc.rate = 1;
-	if (!expect(sc.al_extents >= 7))
-		sc.al_extents = 127; /* arbitrary minimum */
-#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT)
-	if (sc.al_extents > AL_MAX) {
-		dev_err(DEV, "sc.al_extents > %d\n", AL_MAX);
-		sc.al_extents = AL_MAX;
-	}
-#undef AL_MAX
+
+	/* clip to allowed range */
+	if (!expect(sc.al_extents >= DRBD_AL_EXTENTS_MIN))
+		sc.al_extents = DRBD_AL_EXTENTS_MIN;
+	if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX))
+		sc.al_extents = DRBD_AL_EXTENTS_MAX;
 
 	/* to avoid spurious errors when configuring minors before configuring
 	 * the minors they depend on: if necessary, first create the minor we
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 35fc08a0a552..70a688b92c1b 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -336,6 +336,10 @@ enum drbd_timeout_flag {
 #define DRBD_MAGIC 0x83740267
 #define DRBD_MAGIC_BIG 0x835a
 
+/* how I came up with this magic?
+ * base64 decode "actlog==" ;) */
+#define DRBD_AL_MAGIC 0x69cb65a2
+
 /* these are of type "int" */
 #define DRBD_MD_INDEX_INTERNAL -1
 #define DRBD_MD_INDEX_FLEX_EXT -2
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 447c36752385..75f05af33725 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -102,10 +102,12 @@
 #define DRBD_RATE_DEF 250  /* kb/second */
 
   /* less than 7 would hit performance unnecessarily.
-   * 3833 is the largest prime that still does fit
-   * into 64 sectors of activity log */
+   * 919 slots context information per transaction,
+   * 32k activity log, 4k transaction size,
+   * one transaction in flight:
+   * 919 * 7 = 6433 */
 #define DRBD_AL_EXTENTS_MIN  7
-#define DRBD_AL_EXTENTS_MAX  3833
+#define DRBD_AL_EXTENTS_MAX  6433
 #define DRBD_AL_EXTENTS_DEF  127
 
 #define DRBD_AFTER_MIN  -1
-- 
cgit v1.2.3-55-g7522


From 1aba4d7fcfabe999e0c99683b394aa76d5c42842 Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Mon, 21 Feb 2011 15:38:08 +0100
Subject: drbd: Preparing the connector interface to operator on connections

Up to now it only operated on minor numbers. Now it can work also
on named connections.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h  |  1 +
 drivers/block/drbd/drbd_main.c | 15 +++++++
 drivers/block/drbd/drbd_nl.c   | 96 +++++++++++++++++++++++++++---------------
 include/linux/drbd.h           | 19 +++++++--
 4 files changed, 94 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 48367e53a7a5..033af1995867 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1479,6 +1479,7 @@ extern void drbd_free_mdev(struct drbd_conf *mdev);
 
 struct drbd_tconn *drbd_new_tconn(char *name);
 extern void drbd_free_tconn(struct drbd_tconn *tconn);
+struct drbd_tconn *conn_by_name(const char *name);
 
 extern int proc_details;
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index cbec5ff2cc74..4761426f9ad7 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2196,6 +2196,21 @@ static void drbd_init_workqueue(struct drbd_work_queue* wq)
 	INIT_LIST_HEAD(&wq->q);
 }
 
+struct drbd_tconn *conn_by_name(const char *name)
+{
+	struct drbd_tconn *tconn;
+
+	write_lock_irq(&global_state_lock);
+	list_for_each_entry(tconn, &drbd_tconns, all_tconn) {
+		if (!strcmp(tconn->name, name))
+			goto found;
+	}
+	tconn = NULL;
+found:
+	write_unlock_irq(&global_state_lock);
+	return tconn;
+}
+
 struct drbd_tconn *drbd_new_tconn(char *name)
 {
 	struct drbd_tconn *tconn;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index b141f891f643..27a43d138f6b 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2184,42 +2184,57 @@ out:
 	return 0;
 }
 
+enum cn_handler_type {
+	CHT_MINOR,
+	CHT_CONN,
+	CHT_CTOR,
+	/* CHT_RES, later */
+};
+
 struct cn_handler_struct {
-	int (*function)(struct drbd_conf *,
-			 struct drbd_nl_cfg_req *,
-			 struct drbd_nl_cfg_reply *);
+	enum cn_handler_type type;
+	union {
+		int (*minor_based)(struct drbd_conf *,
+				   struct drbd_nl_cfg_req *,
+				   struct drbd_nl_cfg_reply *);
+		int (*conn_based)(struct drbd_tconn *,
+				  struct drbd_nl_cfg_req *,
+				  struct drbd_nl_cfg_reply *);
+		int (*constructor)(struct drbd_nl_cfg_req *,
+				   struct drbd_nl_cfg_reply *);
+	};
 	int reply_body_size;
 };
 
 static struct cn_handler_struct cnd_table[] = {
-	[ P_primary ]		= { &drbd_nl_primary,		0 },
-	[ P_secondary ]		= { &drbd_nl_secondary,		0 },
-	[ P_disk_conf ]		= { &drbd_nl_disk_conf,		0 },
-	[ P_detach ]		= { &drbd_nl_detach,		0 },
-	[ P_net_conf ]		= { &drbd_nl_net_conf,		0 },
-	[ P_disconnect ]	= { &drbd_nl_disconnect,	0 },
-	[ P_resize ]		= { &drbd_nl_resize,		0 },
-	[ P_syncer_conf ]	= { &drbd_nl_syncer_conf,	0 },
-	[ P_invalidate ]	= { &drbd_nl_invalidate,	0 },
-	[ P_invalidate_peer ]	= { &drbd_nl_invalidate_peer,	0 },
-	[ P_pause_sync ]	= { &drbd_nl_pause_sync,	0 },
-	[ P_resume_sync ]	= { &drbd_nl_resume_sync,	0 },
-	[ P_suspend_io ]	= { &drbd_nl_suspend_io,	0 },
-	[ P_resume_io ]		= { &drbd_nl_resume_io,		0 },
-	[ P_outdate ]		= { &drbd_nl_outdate,		0 },
-	[ P_get_config ]	= { &drbd_nl_get_config,
+	[ P_primary ]		= { CHT_MINOR, { &drbd_nl_primary },	0 },
+	[ P_secondary ]		= { CHT_MINOR, { &drbd_nl_secondary },	0 },
+	[ P_disk_conf ]		= { CHT_MINOR, { &drbd_nl_disk_conf },	0 },
+	[ P_detach ]		= { CHT_MINOR, { &drbd_nl_detach },	0 },
+	[ P_net_conf ]		= { CHT_MINOR, { &drbd_nl_net_conf },	0 },
+	[ P_disconnect ]	= { CHT_MINOR, { &drbd_nl_disconnect },	0 },
+	[ P_resize ]		= { CHT_MINOR, { &drbd_nl_resize },	0 },
+	[ P_syncer_conf ]	= { CHT_MINOR, { &drbd_nl_syncer_conf },0 },
+	[ P_invalidate ]	= { CHT_MINOR, { &drbd_nl_invalidate },	0 },
+	[ P_invalidate_peer ]	= { CHT_MINOR, { &drbd_nl_invalidate_peer },0 },
+	[ P_pause_sync ]	= { CHT_MINOR, { &drbd_nl_pause_sync },	0 },
+	[ P_resume_sync ]	= { CHT_MINOR, { &drbd_nl_resume_sync },0 },
+	[ P_suspend_io ]	= { CHT_MINOR, { &drbd_nl_suspend_io },	0 },
+	[ P_resume_io ]		= { CHT_MINOR, { &drbd_nl_resume_io },	0 },
+	[ P_outdate ]		= { CHT_MINOR, { &drbd_nl_outdate },	0 },
+	[ P_get_config ]	= { CHT_MINOR, { &drbd_nl_get_config },
 				    sizeof(struct syncer_conf_tag_len_struct) +
 				    sizeof(struct disk_conf_tag_len_struct) +
 				    sizeof(struct net_conf_tag_len_struct) },
-	[ P_get_state ]		= { &drbd_nl_get_state,
+	[ P_get_state ]		= { CHT_MINOR, { &drbd_nl_get_state },
 				    sizeof(struct get_state_tag_len_struct) +
 				    sizeof(struct sync_progress_tag_len_struct)	},
-	[ P_get_uuids ]		= { &drbd_nl_get_uuids,
+	[ P_get_uuids ]		= { CHT_MINOR, { &drbd_nl_get_uuids },
 				    sizeof(struct get_uuids_tag_len_struct) },
-	[ P_get_timeout_flag ]	= { &drbd_nl_get_timeout_flag,
+	[ P_get_timeout_flag ]	= { CHT_MINOR, { &drbd_nl_get_timeout_flag },
 				    sizeof(struct get_timeout_flag_tag_len_struct)},
-	[ P_start_ov ]		= { &drbd_nl_start_ov,		0 },
-	[ P_new_c_uuid ]	= { &drbd_nl_new_c_uuid,	0 },
+	[ P_start_ov ]		= { CHT_MINOR, { &drbd_nl_start_ov },	0 },
+	[ P_new_c_uuid ]	= { CHT_MINOR, { &drbd_nl_new_c_uuid },	0 },
 };
 
 static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp)
@@ -2229,6 +2244,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
 	struct cn_msg *cn_reply;
 	struct drbd_nl_cfg_reply *reply;
 	struct drbd_conf *mdev;
+	struct drbd_tconn *tconn;
 	int retcode, rr;
 	int reply_size = sizeof(struct cn_msg)
 		+ sizeof(struct drbd_nl_cfg_reply)
@@ -2244,13 +2260,6 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
 		goto fail;
 	}
 
-	mdev = ensure_mdev(nlp->drbd_minor,
-			(nlp->flags & DRBD_NL_CREATE_DEVICE));
-	if (!mdev) {
-		retcode = ERR_MINOR_INVALID;
-		goto fail;
-	}
-
 	if (nlp->packet_type >= P_nl_after_last_packet ||
 	    nlp->packet_type == P_return_code_only) {
 		retcode = ERR_PACKET_NR;
@@ -2260,7 +2269,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
 	cm = cnd_table + nlp->packet_type;
 
 	/* This may happen if packet number is 0: */
-	if (cm->function == NULL) {
+	if (cm->minor_based == NULL) {
 		retcode = ERR_PACKET_NR;
 		goto fail;
 	}
@@ -2281,7 +2290,28 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
 	reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
 	/* reply->tag_list; might be modified by cm->function. */
 
-	rr = cm->function(mdev, nlp, reply);
+	retcode = ERR_MINOR_INVALID;
+	rr = 0;
+	switch (cm->type) {
+	case CHT_MINOR:
+		mdev = minor_to_mdev(nlp->drbd_minor);
+		if (!mdev)
+			goto fail;
+		rr = cm->minor_based(mdev, nlp, reply);
+		break;
+	case CHT_CONN:
+		tconn = conn_by_name(nlp->obj_name);
+		if (!tconn) {
+			retcode = ERR_CONN_NOT_KNOWN;
+			goto fail;
+		}
+		rr = cm->conn_based(tconn, nlp, reply);
+		break;
+	case CHT_CTOR:
+		rr = cm->constructor(nlp, reply);
+		break;
+	/* case CHT_RES: */
+	}
 
 	cn_reply->id = req->id;
 	cn_reply->seq = req->seq;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 70a688b92c1b..7683b4ab6583 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -155,6 +155,7 @@ enum drbd_ret_code {
 	ERR_CONG_NOT_PROTO_A	= 155,
 	ERR_PIC_AFTER_DEP	= 156,
 	ERR_PIC_PEER_DEP	= 157,
+	ERR_CONN_NOT_KNOWN      = 158,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
@@ -347,8 +348,11 @@ enum drbd_timeout_flag {
 
 /* Start of the new netlink/connector stuff */
 
-#define DRBD_NL_CREATE_DEVICE 0x01
-#define DRBD_NL_SET_DEFAULTS  0x02
+enum drbd_ncr_flags {
+	DRBD_NL_CREATE_DEVICE = 0x01,
+	DRBD_NL_SET_DEFAULTS =  0x02,
+};
+#define DRBD_NL_OBJ_NAME_LEN 32
 
 
 /* For searching a vacant cn_idx value */
@@ -356,8 +360,15 @@ enum drbd_timeout_flag {
 
 struct drbd_nl_cfg_req {
 	int packet_type;
-	unsigned int drbd_minor;
-	int flags;
+	union {
+		struct {
+			unsigned int drbd_minor;
+			enum drbd_ncr_flags flags;
+		};
+		struct {
+			char obj_name[DRBD_NL_OBJ_NAME_LEN];
+		};
+	};
 	unsigned short tag_list[];
 };
 
-- 
cgit v1.2.3-55-g7522


From 774b305518a68a50df4f479bcf79da2add724e6e Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Tue, 22 Feb 2011 02:07:03 -0500
Subject: drbd: Implemented new commands to create/delete connections/minors

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h  |   4 +-
 drivers/block/drbd/drbd_main.c |  68 ++++++++++++++------------
 drivers/block/drbd/drbd_nl.c   | 106 +++++++++++++++++++++++++----------------
 include/linux/drbd.h           |   3 ++
 include/linux/drbd_nl.h        |  12 +++++
 5 files changed, 120 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index a27e2a4e038d..535d503886d8 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1258,7 +1258,6 @@ extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
 extern void drbd_go_diskless(struct drbd_conf *mdev);
 extern void drbd_ldev_destroy(struct drbd_conf *mdev);
 
-
 /* Meta data layout
    We reserve a 128MB Block (4k aligned)
    * either at the end of the backing device
@@ -1476,8 +1475,9 @@ extern wait_queue_head_t drbd_pp_wait;
 extern rwlock_t global_state_lock;
 
 extern int conn_lowest_minor(struct drbd_tconn *tconn);
-extern struct drbd_conf *drbd_new_device(unsigned int minor);
+enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr);
 extern void drbd_free_mdev(struct drbd_conf *mdev);
+extern void drbd_delete_device(unsigned int minor);
 
 struct drbd_tconn *drbd_new_tconn(char *name);
 extern void drbd_free_tconn(struct drbd_tconn *tconn);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 2bfd63058f40..ec7d0d98657c 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -614,13 +614,16 @@ char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *tas
 	return thi ? thi->name : task->comm;
 }
 
-#ifdef CONFIG_SMP
 int conn_lowest_minor(struct drbd_tconn *tconn)
 {
 	int minor = 0;
-	idr_get_next(&tconn->volumes, &minor);
+
+	if (!idr_get_next(&tconn->volumes, &minor))
+		return -1;
 	return minor;
 }
+
+#ifdef CONFIG_SMP
 /**
  * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
  * @mdev:	DRBD device.
@@ -2078,15 +2081,16 @@ static void drbd_release_ee_lists(struct drbd_conf *mdev)
 		dev_err(DEV, "%d EEs in net list found!\n", rr);
 }
 
-/* caution. no locking.
- * currently only used from module cleanup code. */
-static void drbd_delete_device(unsigned int minor)
+/* caution. no locking. */
+void drbd_delete_device(unsigned int minor)
 {
 	struct drbd_conf *mdev = minor_to_mdev(minor);
 
 	if (!mdev)
 		return;
 
+	idr_remove(&mdev->tconn->volumes, minor);
+
 	/* paranoia asserts */
 	D_ASSERT(mdev->open_cnt == 0);
 	D_ASSERT(list_empty(&mdev->tconn->data.work.q));
@@ -2101,7 +2105,6 @@ static void drbd_delete_device(unsigned int minor)
 		bdput(mdev->this_bdev);
 
 	drbd_free_resources(mdev);
-	drbd_free_tconn(mdev->tconn);
 
 	drbd_release_ee_lists(mdev);
 
@@ -2223,6 +2226,9 @@ struct drbd_tconn *drbd_new_tconn(char *name)
 	if (!tconn->name)
 		goto fail;
 
+	if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL))
+		goto fail;
+
 	if (!tl_init(tconn))
 		goto fail;
 
@@ -2252,6 +2258,7 @@ struct drbd_tconn *drbd_new_tconn(char *name)
 
 fail:
 	tl_cleanup(tconn);
+	free_cpumask_var(tconn->cpu_mask);
 	kfree(tconn->name);
 	kfree(tconn);
 
@@ -2265,6 +2272,7 @@ void drbd_free_tconn(struct drbd_tconn *tconn)
 	write_unlock_irq(&global_state_lock);
 	idr_destroy(&tconn->volumes);
 
+	free_cpumask_var(tconn->cpu_mask);
 	kfree(tconn->name);
 	kfree(tconn->int_dig_out);
 	kfree(tconn->int_dig_in);
@@ -2272,32 +2280,31 @@ void drbd_free_tconn(struct drbd_tconn *tconn)
 	kfree(tconn);
 }
 
-struct drbd_conf *drbd_new_device(unsigned int minor)
+enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr)
 {
 	struct drbd_conf *mdev;
 	struct gendisk *disk;
 	struct request_queue *q;
-	char conn_name[9]; /* drbd1234N */
-	int vnr;
+	int vnr_got = vnr;
+
+	mdev = minor_to_mdev(minor);
+	if (mdev)
+		return ERR_MINOR_EXISTS;
 
 	/* GFP_KERNEL, we are outside of all write-out paths */
 	mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
 	if (!mdev)
-		return NULL;
-	sprintf(conn_name, "drbd%d", minor);
-	mdev->tconn = drbd_new_tconn(conn_name);
-	if (!mdev->tconn)
-		goto out_no_tconn;
-	if (!idr_pre_get(&mdev->tconn->volumes, GFP_KERNEL))
-		goto out_no_cpumask;
-	if (idr_get_new(&mdev->tconn->volumes, mdev, &vnr))
-		goto out_no_cpumask;
-	if (vnr != 0) {
-		dev_err(DEV, "vnr = %d\n", vnr);
-		goto out_no_cpumask;
-	}
-	if (!zalloc_cpumask_var(&mdev->tconn->cpu_mask, GFP_KERNEL))
-		goto out_no_cpumask;
+		return ERR_NOMEM;
+
+	mdev->tconn = tconn;
+	if (!idr_pre_get(&tconn->volumes, GFP_KERNEL))
+		goto out_no_idr;
+	if (idr_get_new(&tconn->volumes, mdev, &vnr_got))
+		goto out_no_idr;
+	if (vnr_got != vnr) {
+		dev_err(DEV, "vnr_got (%d) != vnr (%d)\n", vnr_got, vnr);
+		goto out_no_q;
+	}
 
 	mdev->minor = minor;
 
@@ -2354,7 +2361,10 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
 	INIT_LIST_HEAD(&mdev->current_epoch->list);
 	mdev->epochs = 1;
 
-	return mdev;
+	minor_table[minor] = mdev;
+	add_disk(disk);
+
+	return NO_ERROR;
 
 /* out_whatever_else:
 	kfree(mdev->current_epoch); */
@@ -2367,12 +2377,10 @@ out_no_io_page:
 out_no_disk:
 	blk_cleanup_queue(q);
 out_no_q:
-	free_cpumask_var(mdev->tconn->cpu_mask);
-out_no_cpumask:
-	drbd_free_tconn(mdev->tconn);
-out_no_tconn:
+	idr_remove(&tconn->volumes, vnr_got);
+out_no_idr:
 	kfree(mdev);
-	return NULL;
+	return ERR_NOMEM;
 }
 
 /* counterpart of drbd_new_device.
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 455a51dd364d..f2739fd188a0 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -443,40 +443,6 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 	return rv;
 }
 
-static struct drbd_conf *ensure_mdev(int minor, int create)
-{
-	struct drbd_conf *mdev;
-
-	if (minor >= minor_count)
-		return NULL;
-
-	mdev = minor_to_mdev(minor);
-
-	if (!mdev && create) {
-		struct gendisk *disk = NULL;
-		mdev = drbd_new_device(minor);
-
-		spin_lock_irq(&drbd_pp_lock);
-		if (minor_table[minor] == NULL) {
-			minor_table[minor] = mdev;
-			disk = mdev->vdisk;
-			mdev = NULL;
-		} /* else: we lost the race */
-		spin_unlock_irq(&drbd_pp_lock);
-
-		if (disk) /* we won the race above */
-			/* in case we ever add a drbd_delete_device(),
-			 * don't forget the del_gendisk! */
-			add_disk(disk);
-		else /* we lost the race above */
-			drbd_free_mdev(mdev);
-
-		mdev = minor_to_mdev(minor);
-	}
-
-	return mdev;
-}
-
 static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 			   struct drbd_nl_cfg_reply *reply)
 {
@@ -1789,12 +1755,6 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 	if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX))
 		sc.al_extents = DRBD_AL_EXTENTS_MAX;
 
-	/* to avoid spurious errors when configuring minors before configuring
-	 * the minors they depend on: if necessary, first create the minor we
-	 * depend on */
-	if (sc.after >= 0)
-		ensure_mdev(sc.after, 1);
-
 	/* most sanity checks done, try to assign the new sync-after
 	 * dependency.  need to hold the global lock in there,
 	 * to avoid a race in the dependency loop check. */
@@ -2184,13 +2144,73 @@ out:
 	return 0;
 }
 
+static int drbd_nl_new_conn(struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply)
+{
+	struct new_connection args;
+
+	if (!new_connection_from_tags(nlp->tag_list, &args)) {
+		reply->ret_code = ERR_MANDATORY_TAG;
+		return 0;
+	}
+
+	reply->ret_code = NO_ERROR;
+	if (!drbd_new_tconn(args.name))
+		reply->ret_code = ERR_NOMEM;
+
+	return 0;
+}
+
+static int drbd_nl_new_minor(struct drbd_tconn *tconn,
+		      struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply)
+{
+	struct new_minor args;
+
+	args.vol_nr = 0;
+	args.minor = 0;
+
+	if (!new_minor_from_tags(nlp->tag_list, &args)) {
+		reply->ret_code = ERR_MANDATORY_TAG;
+		return 0;
+	}
+
+	reply->ret_code = conn_new_minor(tconn, args.minor, args.vol_nr);
+
+	return 0;
+}
+
+static int drbd_nl_del_minor(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			     struct drbd_nl_cfg_reply *reply)
+{
+	if (mdev->state.disk == D_DISKLESS &&
+	    mdev->state.conn == C_STANDALONE &&
+	    mdev->state.role == R_SECONDARY) {
+		drbd_delete_device(mdev_to_minor(mdev));
+		reply->ret_code = NO_ERROR;
+	} else {
+		reply->ret_code = ERR_MINOR_CONFIGURED;
+	}
+	return 0;
+}
+
+static int drbd_nl_del_conn(struct drbd_tconn *tconn,
+			    struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply)
+{
+	if (conn_lowest_minor(tconn) < 0) {
+		drbd_free_tconn(tconn);
+		reply->ret_code = NO_ERROR;
+	} else {
+		reply->ret_code = ERR_CONN_IN_USE;
+	}
+
+	return 0;
+}
+
 enum cn_handler_type {
 	CHT_MINOR,
 	CHT_CONN,
 	CHT_CTOR,
 	/* CHT_RES, later */
 };
-
 struct cn_handler_struct {
 	enum cn_handler_type type;
 	union {
@@ -2235,6 +2255,10 @@ static struct cn_handler_struct cnd_table[] = {
 				    sizeof(struct get_timeout_flag_tag_len_struct)},
 	[ P_start_ov ]		= { CHT_MINOR, { &drbd_nl_start_ov },	0 },
 	[ P_new_c_uuid ]	= { CHT_MINOR, { &drbd_nl_new_c_uuid },	0 },
+	[ P_new_connection ]	= { CHT_CTOR,  { .constructor = &drbd_nl_new_conn }, 0 },
+	[ P_new_minor ]		= { CHT_CONN,  { .conn_based = &drbd_nl_new_minor }, 0 },
+	[ P_del_minor ]		= { CHT_MINOR, { &drbd_nl_del_minor },	0 },
+	[ P_del_connection ]    = { CHT_CONN,  { .conn_based = &drbd_nl_del_conn }, 0 },
 };
 
 static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp)
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 7683b4ab6583..e192167e6145 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -156,6 +156,9 @@ enum drbd_ret_code {
 	ERR_PIC_AFTER_DEP	= 156,
 	ERR_PIC_PEER_DEP	= 157,
 	ERR_CONN_NOT_KNOWN      = 158,
+	ERR_CONN_IN_USE         = 159,
+	ERR_MINOR_CONFIGURED    = 160,
+	ERR_MINOR_EXISTS	= 161,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index ab6159e4fcf0..1216c7a432c5 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -152,6 +152,18 @@ NL_PACKET(new_c_uuid, 26,
 NL_RESPONSE(return_code_only, 27)
 #endif
 
+NL_PACKET(new_connection, 28, /* CHT_CTOR */
+	NL_STRING(	85,	T_MANDATORY,	name, DRBD_NL_OBJ_NAME_LEN)
+)
+
+NL_PACKET(new_minor, 29, /* CHT_CONN */
+	NL_INTEGER(	86,	T_MANDATORY,	minor)
+	NL_INTEGER(	87,	T_MANDATORY,	vol_nr)
+)
+
+NL_PACKET(del_minor, 30, ) /* CHT_MINOR */
+NL_PACKET(del_connection, 31, ) /* CHT_CONN */
+
 #undef NL_PACKET
 #undef NL_INTEGER
 #undef NL_INT64
-- 
cgit v1.2.3-55-g7522


From a5df0e199cf6b31400fa86f6c3f73fa6e127e9ed Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Wed, 23 Feb 2011 12:51:43 +0100
Subject: drbd: default to detach on-io-error

Old default behaviour was "pass-on",
which is not useful in production at all.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 75f05af33725..22920a8af4e2 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -125,7 +125,7 @@
 #define DRBD_DISK_SIZE_SECT_MAX  (1 * (2LLU << 40))
 #define DRBD_DISK_SIZE_SECT_DEF  0 /* = disabled = no user size... */
 
-#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON
+#define DRBD_ON_IO_ERROR_DEF EP_DETACH
 #define DRBD_FENCING_DEF FP_DONT_CARE
 #define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT
 #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
-- 
cgit v1.2.3-55-g7522


From ec2c35ac1ea288f5c931e32452ecea50068e8450 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Mon, 7 Mar 2011 10:20:08 +0100
Subject: drbd: prepare the transition from connector to genetlink

This adds the new API header and helper files.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_genl.h         | 349 +++++++++++++++++++++++++++++++
 include/linux/drbd_genl_api.h     |  55 +++++
 include/linux/genl_magic_func.h   | 417 ++++++++++++++++++++++++++++++++++++++
 include/linux/genl_magic_struct.h | 260 ++++++++++++++++++++++++
 4 files changed, 1081 insertions(+)
 create mode 100644 include/linux/drbd_genl.h
 create mode 100644 include/linux/drbd_genl_api.h
 create mode 100644 include/linux/genl_magic_func.h
 create mode 100644 include/linux/genl_magic_struct.h

(limited to 'include/linux')

diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
new file mode 100644
index 000000000000..84e16848f7a1
--- /dev/null
+++ b/include/linux/drbd_genl.h
@@ -0,0 +1,349 @@
+/*
+ * General overview:
+ * full generic netlink message:
+ * |nlmsghdr|genlmsghdr|<payload>
+ *
+ * payload:
+ * |optional fixed size family header|<sequence of netlink attributes>
+ *
+ * sequence of netlink attributes:
+ * I chose to have all "top level" attributes NLA_NESTED,
+ * corresponding to some real struct.
+ * So we have a sequence of |tla, len|<nested nla sequence>
+ *
+ * nested nla sequence:
+ * may be empty, or contain a sequence of netlink attributes
+ * representing the struct fields.
+ *
+ * The tag number of any field (regardless of containing struct)
+ * will be available as T_ ## field_name,
+ * so you cannot have the same field name in two differnt structs.
+ *
+ * The tag numbers themselves are per struct, though,
+ * so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type,
+ * which we won't use here).
+ * The tag numbers are used as index in the respective nla_policy array.
+ *
+ * GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy
+ *	genl_magic_struct.h
+ *		generates the struct declaration,
+ *		generates an entry in the tla enum,
+ *	genl_magic_func.h
+ *		generates an entry in the static tla policy
+ *		with .type = NLA_NESTED
+ *		generates the static <struct_name>_nl_policy definition,
+ *		and static conversion functions
+ *
+ *	genl_magic_func.h
+ *
+ * GENL_mc_group(group)
+ *	genl_magic_struct.h
+ *		does nothing
+ *	genl_magic_func.h
+ *		defines and registers the mcast group,
+ *		and provides a send helper
+ *
+ * GENL_notification(op_name, op_num, mcast_group, tla list)
+ *	These are notifications to userspace.
+ *
+ *	genl_magic_struct.h
+ *		generates an entry in the genl_ops enum,
+ *	genl_magic_func.h
+ *		does nothing
+ *
+ *	mcast group: the name of the mcast group this notification should be
+ *	expected on
+ *	tla list: the list of expected top level attributes,
+ *	for documentation and sanity checking.
+ *
+ * GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations"
+ *	These are requests from userspace.
+ *
+ *	_op and _notification share the same "number space",
+ *	op_nr will be assigned to "genlmsghdr->cmd"
+ *
+ *	genl_magic_struct.h
+ *		generates an entry in the genl_ops enum,
+ *	genl_magic_func.h
+ *		generates an entry in the static genl_ops array,
+ *		and static register/unregister functions to
+ *		genl_register_family_with_ops().
+ *
+ *	flags and handler:
+ *		GENL_op_init( .doit = x, .dumpit = y, .flags = something)
+ *		GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM
+ *	tla list: the list of expected top level attributes,
+ *	for documentation and sanity checking.
+ */
+
+/*
+ * STRUCTS
+ */
+
+/* this is sent kernel -> userland on various error conditions, and contains
+ * informational textual info, which is supposedly human readable.
+ * The computer relevant return code is in the drbd_genlmsghdr.
+ */
+GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
+		/* "arbitrary" size strings, nla_policy.len = 0 */
+	__str_field(1, GENLA_F_MANDATORY,	info_text, 0)
+)
+
+/* Configuration requests typically need a context to operate on.
+ * Possible keys are device minor (fits in the drbd_genlmsghdr),
+ * the replication link (aka connection) name,
+ * and/or the replication group (aka resource) name,
+ * and the volume id within the resource. */
+GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
+		/* currently only 256 volumes per group,
+		 * but maybe we still change that */
+	__u32_field(1, GENLA_F_MANDATORY,	ctx_volume)
+	__str_field(2, GENLA_F_MANDATORY,	ctx_conn_name, 128)
+)
+
+GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
+	__u64_field(1, GENLA_F_MANDATORY,	disk_size)
+	__str_field(2, GENLA_F_REQUIRED,	backing_dev,	128)
+	__str_field(3, GENLA_F_REQUIRED,	meta_dev,	128)
+	__u32_field(4, GENLA_F_REQUIRED,	meta_dev_idx)
+	__u32_field(5, GENLA_F_MANDATORY,	max_bio_bvecs)
+	__u32_field(6, GENLA_F_MANDATORY,	on_io_error)
+	__u32_field(7, GENLA_F_MANDATORY,	fencing)
+	__flg_field(8, GENLA_F_MANDATORY,	no_disk_barrier)
+	__flg_field(9, GENLA_F_MANDATORY,	no_disk_flush)
+	__flg_field(10, GENLA_F_MANDATORY,	no_disk_drain)
+	__flg_field(11, GENLA_F_MANDATORY,	no_md_flush)
+	__flg_field(12, GENLA_F_MANDATORY,	use_bmbv)
+)
+
+GENL_struct(DRBD_NLA_SYNCER_CONF, 4, syncer_conf,
+	__u32_field(1,	GENLA_F_MANDATORY,	rate)
+	__u32_field(2,	GENLA_F_MANDATORY,	after)
+	__u32_field(3,	GENLA_F_MANDATORY,	al_extents)
+	__str_field(4,	GENLA_F_MANDATORY,	cpu_mask,       32)
+	__str_field(5,	GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
+	__str_field(6,	GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
+	__flg_field(7,	GENLA_F_MANDATORY,	use_rle)
+	__u32_field(8,	GENLA_F_MANDATORY,	on_no_data)
+	__u32_field(9,	GENLA_F_MANDATORY,	c_plan_ahead)
+	__u32_field(10,	GENLA_F_MANDATORY,	c_delay_target)
+	__u32_field(11,	GENLA_F_MANDATORY,	c_fill_target)
+	__u32_field(12,	GENLA_F_MANDATORY,	c_max_rate)
+	__u32_field(13,	GENLA_F_MANDATORY,	c_min_rate)
+)
+
+GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
+	__str_field(1,	GENLA_F_MANDATORY | GENLA_F_SENSITIVE,
+						shared_secret,	SHARED_SECRET_MAX)
+	__str_field(2,	GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	__str_field(3,	GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
+	__str_field(4,	GENLA_F_REQUIRED,	my_addr,	128)
+	__str_field(5,	GENLA_F_REQUIRED,	peer_addr,	128)
+	__u32_field(6,	GENLA_F_REQUIRED,	wire_protocol)
+	__u32_field(7,	GENLA_F_MANDATORY,	try_connect_int)
+	__u32_field(8,	GENLA_F_MANDATORY,	timeout)
+	__u32_field(9,	GENLA_F_MANDATORY,	ping_int)
+	__u32_field(10,	GENLA_F_MANDATORY,	ping_timeo)
+	__u32_field(11,	GENLA_F_MANDATORY,	sndbuf_size)
+	__u32_field(12,	GENLA_F_MANDATORY,	rcvbuf_size)
+	__u32_field(13,	GENLA_F_MANDATORY,	ko_count)
+	__u32_field(14,	GENLA_F_MANDATORY,	max_buffers)
+	__u32_field(15,	GENLA_F_MANDATORY,	max_epoch_size)
+	__u32_field(16,	GENLA_F_MANDATORY,	unplug_watermark)
+	__u32_field(17,	GENLA_F_MANDATORY,	after_sb_0p)
+	__u32_field(18,	GENLA_F_MANDATORY,	after_sb_1p)
+	__u32_field(19,	GENLA_F_MANDATORY,	after_sb_2p)
+	__u32_field(20,	GENLA_F_MANDATORY,	rr_conflict)
+	__u32_field(21,	GENLA_F_MANDATORY,	on_congestion)
+	__u32_field(22,	GENLA_F_MANDATORY,	cong_fill)
+	__u32_field(23,	GENLA_F_MANDATORY,	cong_extents)
+	__flg_field(24, GENLA_F_MANDATORY,	two_primaries)
+	__flg_field(25, GENLA_F_MANDATORY,	want_lose)
+	__flg_field(26, GENLA_F_MANDATORY,	no_cork)
+	__flg_field(27, GENLA_F_MANDATORY,	always_asbp)
+	__flg_field(28, GENLA_F_MANDATORY,	dry_run)
+)
+
+GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
+	__flg_field(1, GENLA_F_MANDATORY,	assume_uptodate)
+)
+
+GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
+	__u64_field(1, GENLA_F_MANDATORY,	resize_size)
+	__flg_field(2, GENLA_F_MANDATORY,	resize_force)
+	__flg_field(3, GENLA_F_MANDATORY,	no_resync)
+)
+
+GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
+	/* the reason of the broadcast,
+	 * if this is an event triggered broadcast. */
+	__u32_field(1, GENLA_F_MANDATORY,	sib_reason)
+	__u32_field(2, GENLA_F_REQUIRED,	current_state)
+	__u64_field(3, GENLA_F_MANDATORY,	capacity)
+	__u64_field(4, GENLA_F_MANDATORY,	ed_uuid)
+
+	/* These are for broadcast from after state change work.
+	 * prev_state and new_state are from the moment the state change took
+	 * place, new_state is not neccessarily the same as current_state,
+	 * there may have been more state changes since.  Which will be
+	 * broadcasted soon, in their respective after state change work.  */
+	__u32_field(5, GENLA_F_MANDATORY,	prev_state)
+	__u32_field(6, GENLA_F_MANDATORY,	new_state)
+
+	/* if we have a local disk: */
+	__bin_field(7, GENLA_F_MANDATORY,	uuids, (UI_SIZE*sizeof(__u64)))
+	__u32_field(8, GENLA_F_MANDATORY,	disk_flags)
+	__u64_field(9, GENLA_F_MANDATORY,	bits_total)
+	__u64_field(10, GENLA_F_MANDATORY,	bits_oos)
+	/* and in case resync or online verify is active */
+	__u64_field(11, GENLA_F_MANDATORY,	bits_rs_total)
+	__u64_field(12, GENLA_F_MANDATORY,	bits_rs_failed)
+
+	/* for pre and post notifications of helper execution */
+	__str_field(13, GENLA_F_MANDATORY,	helper, 32)
+	__u32_field(14, GENLA_F_MANDATORY,	helper_exit_code)
+)
+
+GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
+	__u64_field(1, GENLA_F_MANDATORY,	ov_start_sector)
+)
+
+GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms,
+	__flg_field(1, GENLA_F_MANDATORY, clear_bm)
+)
+
+GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms,
+	__u32_field(1,	GENLA_F_REQUIRED,	timeout_type)
+)
+
+GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms,
+	__flg_field(1, GENLA_F_MANDATORY,	force_disconnect)
+)
+
+/*
+ * Notifications and commands (genlmsghdr->cmd)
+ */
+GENL_mc_group(events)
+
+	/* kernel -> userspace announcement of changes */
+GENL_notification(
+	DRBD_EVENT, 1, events,
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_STATE_INFO, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY)
+)
+
+	/* query kernel for specific or all info */
+GENL_op(
+	DRBD_ADM_GET_STATUS, 2,
+	GENL_op_init(
+		.doit = drbd_adm_get_status,
+		.dumpit = drbd_adm_get_status_all,
+		/* anyone may ask for the status,
+		 * it is broadcasted anyways */
+	),
+	/* To select the object .doit.
+	 * Or a subset of objects in .dumpit. */
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_MANDATORY)
+)
+
+#if 0
+	/* TO BE DONE */
+	/* create or destroy resources, aka replication groups */
+GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+#endif
+
+	/* add DRBD minor devices as volumes to resources */
+GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+
+	/* add or delete replication links to resources */
+GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+
+	/* operates on replication links */
+GENL_op(DRBD_ADM_SYNCER, 9,
+	GENL_doit(drbd_adm_syncer),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY)
+)
+
+GENL_op(
+	DRBD_ADM_CONNECT, 10,
+	GENL_doit(drbd_adm_connect),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED)
+)
+
+GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+
+	/* operates on minors */
+GENL_op(DRBD_ADM_ATTACH, 12,
+	GENL_doit(drbd_adm_attach),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED)
+)
+
+GENL_op(
+	DRBD_ADM_RESIZE, 13,
+	GENL_doit(drbd_adm_resize),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY)
+)
+
+	/* operates on all volumes within a resource */
+GENL_op(
+	DRBD_ADM_PRIMARY, 14,
+	GENL_doit(drbd_adm_set_role),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED)
+)
+
+GENL_op(
+	DRBD_ADM_SECONDARY, 15,
+	GENL_doit(drbd_adm_set_role),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED)
+)
+
+GENL_op(
+	DRBD_ADM_NEW_C_UUID, 16,
+	GENL_doit(drbd_adm_new_c_uuid),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, GENLA_F_MANDATORY)
+)
+
+GENL_op(
+	DRBD_ADM_START_OV, 17,
+	GENL_doit(drbd_adm_start_ov),
+	GENL_tla_expected(DRBD_NLA_START_OV_PARMS, GENLA_F_MANDATORY)
+)
+
+GENL_op(DRBD_ADM_DETACH,	18, GENL_doit(drbd_adm_detach),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_INVALIDATE,	19, GENL_doit(drbd_adm_invalidate),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_INVAL_PEER,	20, GENL_doit(drbd_adm_invalidate_peer),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_PAUSE_SYNC,	21, GENL_doit(drbd_adm_pause_sync),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_RESUME_SYNC,	22, GENL_doit(drbd_adm_resume_sync),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_SUSPEND_IO,	23, GENL_doit(drbd_adm_suspend_io),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_RESUME_IO,	24, GENL_doit(drbd_adm_resume_io),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_OUTDATE,	25, GENL_doit(drbd_adm_outdate),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
diff --git a/include/linux/drbd_genl_api.h b/include/linux/drbd_genl_api.h
new file mode 100644
index 000000000000..9ef50d51e34e
--- /dev/null
+++ b/include/linux/drbd_genl_api.h
@@ -0,0 +1,55 @@
+#ifndef DRBD_GENL_STRUCT_H
+#define DRBD_GENL_STRUCT_H
+
+/**
+ * struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests
+ * @minor:
+ *     For admin requests (user -> kernel): which minor device to operate on.
+ *     For (unicast) replies or informational (broadcast) messages
+ *     (kernel -> user): which minor device the information is about.
+ *     If we do not operate on minors, but on connections or resources,
+ *     the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT
+ *     is used instead.
+ * @flags: possible operation modifiers (relevant only for user->kernel):
+ *     DRBD_GENL_F_SET_DEFAULTS
+ * @volume:
+ *     When creating a new minor (adding it to a resource), the resource needs
+ *     to know which volume number within the resource this is supposed to be.
+ *     The volume number corresponds to the same volume number on the remote side,
+ *     whereas the minor number on the remote side may be different
+ *     (union with flags).
+ * @ret_code: kernel->userland unicast cfg reply return code (union with flags);
+ */
+struct drbd_genlmsghdr {
+	__u32 minor;
+	union {
+	__u32 flags;
+	__s32 ret_code;
+	};
+};
+
+/* To be used in drbd_genlmsghdr.flags */
+enum {
+	DRBD_GENL_F_SET_DEFAULTS = 1,
+};
+
+enum drbd_state_info_bcast_reason {
+	SIB_GET_STATUS_REPLY = 1,
+	SIB_STATE_CHANGE = 2,
+	SIB_HELPER_PRE = 3,
+	SIB_HELPER_POST = 4,
+	SIB_SYNC_PROGRESS = 5,
+};
+
+/* hack around predefined gcc/cpp "linux=1",
+ * we cannot possibly include <1/drbd_genl.h> */
+#undef linux
+
+#include <linux/drbd.h>
+#define GENL_MAGIC_VERSION	API_VERSION
+#define GENL_MAGIC_FAMILY	drbd
+#define GENL_MAGIC_FAMILY_HDRSZ	sizeof(struct drbd_genlmsghdr)
+#define GENL_MAGIC_INCLUDE_FILE <linux/drbd_genl.h>
+#include <linux/genl_magic_struct.h>
+
+#endif
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
new file mode 100644
index 000000000000..8a86f659d363
--- /dev/null
+++ b/include/linux/genl_magic_func.h
@@ -0,0 +1,417 @@
+#ifndef GENL_MAGIC_FUNC_H
+#define GENL_MAGIC_FUNC_H
+
+#include <linux/genl_magic_struct.h>
+
+/*
+ * Extension of genl attribute validation policies			{{{1
+ *									{{{2
+ */
+
+/**
+ * nla_is_required - return true if this attribute is required
+ * @nla: netlink attribute
+ */
+static inline int nla_is_required(const struct nlattr *nla)
+{
+        return nla->nla_type & GENLA_F_REQUIRED;
+}
+
+/**
+ * nla_is_mandatory - return true if understanding this attribute is mandatory
+ * @nla: netlink attribute
+ * Note: REQUIRED attributes are implicitly MANDATORY as well
+ */
+static inline int nla_is_mandatory(const struct nlattr *nla)
+{
+        return nla->nla_type & (GENLA_F_MANDATORY | GENLA_F_REQUIRED);
+}
+
+/* Functionality to be integrated into nla_parse(), and validate_nla(),
+ * respectively.
+ *
+ * Enforcing the "mandatory" bit is done here,
+ * by rejecting unknown mandatory attributes.
+ *
+ * Part of enforcing the "required" flag would mean to embed it into
+ * nla_policy.type, and extending validate_nla(), which currently does
+ * BUG_ON(pt->type > NLA_TYPE_MAX); we have to work on existing kernels,
+ * so we cannot do that.  Thats why enforcing "required" is done in the
+ * generated assignment functions below. */
+static int nla_check_unknown(int maxtype, struct nlattr *head, int len)
+{
+	struct nlattr *nla;
+	int rem;
+        nla_for_each_attr(nla, head, len, rem) {
+		__u16 type = nla_type(nla);
+		if (type > maxtype && nla_is_mandatory(nla))
+			return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+/*
+ * Magic: declare tla policy						{{{1
+ * Magic: declare nested policies
+ *									{{{2
+ */
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+	[tag_name] = { .type = NLA_NESTED },
+
+static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static struct nla_policy s_name ## _nl_policy[] __read_mostly =		\
+{ s_fields };
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, __put) \
+	[__nla_type(attr_nr)] = { .type = nla_type },
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen,	\
+		__get, __put)						\
+	[__nla_type(attr_nr)] = { .type = nla_type,			\
+		.len = maxlen - (nla_type == NLA_NUL_STRING) },
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#ifndef __KERNEL__
+#ifndef pr_info
+#define pr_info(args...)	fprintf(stderr, args);
+#endif
+#endif
+
+#if 1
+static void dprint_field(const char *dir, int nla_type,
+		const char *name, void *valp)
+{
+	__u64 val = valp ? *(__u32 *)valp : 1;
+	switch (nla_type) {
+	case NLA_U8:  val = (__u8)val;
+	case NLA_U16: val = (__u16)val;
+	case NLA_U32: val = (__u32)val;
+		pr_info("%s attr %s: %d 0x%08x\n", dir,
+			name, (int)val, (unsigned)val);
+		break;
+	case NLA_U64:
+		val = *(__u64*)valp;
+		pr_info("%s attr %s: %lld 0x%08llx\n", dir,
+			name, (long long)val, (unsigned long long)val);
+		break;
+	case NLA_FLAG:
+		if (val)
+			pr_info("%s attr %s: set\n", dir, name);
+		break;
+	}
+}
+
+static void dprint_array(const char *dir, int nla_type,
+		const char *name, const char *val, unsigned len)
+{
+	switch (nla_type) {
+	case NLA_NUL_STRING:
+		if (len && val[len-1] == '\0')
+			len--;
+		pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val);
+		break;
+	default:
+		/* we can always show 4 byte,
+		 * thats what nlattr are aligned to. */
+		pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n",
+			dir, name, len, val[0], val[1], val[2], val[3]);
+	}
+}
+
+#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b);
+
+/* Name is a member field name of the struct s.
+ * If s is NULL (only parsing, no copy requested in *_from_attrs()),
+ * nla is supposed to point to the attribute containing the information
+ * corresponding to that struct member. */
+#define DPRINT_FIELD(dir, nla_type, name, s, nla)			\
+	do {								\
+		if (s)							\
+			dprint_field(dir, nla_type, #name, &s->name);	\
+		else if (nla)						\
+			dprint_field(dir, nla_type, #name,		\
+				(nla_type == NLA_FLAG) ? NULL		\
+						: nla_data(nla));	\
+	} while (0)
+
+#define	DPRINT_ARRAY(dir, nla_type, name, s, nla)			\
+	do {								\
+		if (s)							\
+			dprint_array(dir, nla_type, #name,		\
+					s->name, s->name ## _len);	\
+		else if (nla)						\
+			dprint_array(dir, nla_type, #name,		\
+					nla_data(nla), nla_len(nla));	\
+	} while (0)
+#else
+#define DPRINT_TLA(a, op, b) do {} while (0)
+#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0)
+#define	DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0)
+#endif
+
+/*
+ * Magic: provide conversion functions					{{{1
+ * populate struct from attribute table:
+ *									{{{2
+ */
+
+/* processing of generic netlink messages is serialized.
+ * use one static buffer for parsing of nested attributes */
+static struct nlattr *nested_attr_tb[128];
+
+#ifndef BUILD_BUG_ON
+/* Force a compilation error if condition is true */
+#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
+/* Force a compilation error if condition is true, but also produce a
+   result (of value 0 and type size_t), so the expression can be used
+   e.g. in a structure initializer (or where-ever else comma expressions
+   aren't permitted). */
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
+#endif
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+	/* static, potentially unused */				\
+int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[])	\
+{									\
+	const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1;		\
+	struct nlattr *tla = tb[tag_number];				\
+	struct nlattr **ntb = nested_attr_tb;				\
+	struct nlattr *nla;						\
+	int err;							\
+	BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb));	\
+	if (!tla)							\
+		return -ENOMSG;						\
+	DPRINT_TLA(#s_name, "<=-", #tag_name);				\
+	err = nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \
+	if (err)							\
+		return err;						\
+	err = nla_check_unknown(maxtype, nla_data(tla), nla_len(tla));	\
+	if (err)							\
+	      return err;						\
+									\
+	s_fields							\
+	return 0;							\
+}
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+		nla = ntb[__nla_type(attr_nr)];				\
+		if (nla) {						\
+			if (s)						\
+				s->name = __get(nla);			\
+			DPRINT_FIELD("<<", nla_type, name, s, nla);	\
+		} else if ((attr_flag) & GENLA_F_REQUIRED) {		\
+			pr_info("<< missing attr: %s\n", #name);	\
+			return -ENOMSG;					\
+		}
+
+/* validate_nla() already checked nla_len <= maxlen appropriately. */
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+		nla = ntb[__nla_type(attr_nr)];				\
+		if (nla) {						\
+			if (s)						\
+				s->name ## _len =			\
+					__get(s->name, nla, maxlen);	\
+			DPRINT_ARRAY("<<", nla_type, name, s, nla);	\
+		} else if ((attr_flag) & GENLA_F_REQUIRED) {		\
+			pr_info("<< missing attr: %s\n", #name);	\
+			return -ENOMSG;					\
+		}							\
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+/*
+ * Magic: define op number to op name mapping				{{{1
+ *									{{{2
+ */
+const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
+{
+	switch (cmd) {
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)		\
+	case op_num: return #op_name;
+#include GENL_MAGIC_INCLUDE_FILE
+	default:
+		     return "unknown";
+	}
+}
+
+#ifdef __KERNEL__
+#include <linux/stringify.h>
+/*
+ * Magic: define genl_ops						{{{1
+ *									{{{2
+ */
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)		\
+{								\
+	handler							\
+	.cmd = op_name,						\
+	.policy	= CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy),	\
+},
+
+#define ZZZ_genl_ops		CONCAT_(GENL_MAGIC_FAMILY, _genl_ops)
+static struct genl_ops ZZZ_genl_ops[] __read_mostly = {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+/*
+ * Define the genl_family, multicast groups,				{{{1
+ * and provide register/unregister functions.
+ *									{{{2
+ */
+#define ZZZ_genl_family		CONCAT_(GENL_MAGIC_FAMILY, _genl_family)
+static struct genl_family ZZZ_genl_family __read_mostly = {
+	.id = GENL_ID_GENERATE,
+	.name = __stringify(GENL_MAGIC_FAMILY),
+	.version = GENL_MAGIC_VERSION,
+#ifdef GENL_MAGIC_FAMILY_HDRSZ
+	.hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
+#endif
+	.maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1,
+};
+
+/*
+ * Magic: define multicast groups
+ * Magic: define multicast group registration helper
+ */
+#undef GENL_mc_group
+#define GENL_mc_group(group)						\
+static struct genl_multicast_group					\
+CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = {		\
+	.name = #group,							\
+};									\
+static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)(	\
+	struct sk_buff *skb, gfp_t flags)				\
+{									\
+	unsigned int group_id =						\
+		CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id;	\
+	if (!group_id)							\
+		return -EINVAL;						\
+	return genlmsg_multicast(skb, 0, group_id, flags);		\
+}
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
+{
+	int err = genl_register_family_with_ops(&ZZZ_genl_family,
+		ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops));
+	if (err)
+		return err;
+#undef GENL_mc_group
+#define GENL_mc_group(group)						\
+	err = genl_register_mc_group(&ZZZ_genl_family,			\
+		&CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group));		\
+	if (err)							\
+		goto fail;						\
+	else								\
+		pr_info("%s: mcg %s: %u\n", #group,			\
+			__stringify(GENL_MAGIC_FAMILY),			\
+			CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id);
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+	return 0;
+fail:
+	genl_unregister_family(&ZZZ_genl_family);
+	return err;
+}
+
+void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void)
+{
+	genl_unregister_family(&ZZZ_genl_family);
+}
+
+/*
+ * Magic: provide conversion functions					{{{1
+ * populate skb from struct.
+ *									{{{2
+ */
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s,	\
+		const bool exclude_sensitive)				\
+{									\
+	struct nlattr *tla = nla_nest_start(skb, tag_number);		\
+	if (!tla)							\
+		goto nla_put_failure;					\
+	DPRINT_TLA(#s_name, "-=>", #tag_name);				\
+	s_fields							\
+	nla_nest_end(skb, tla);						\
+	return 0;							\
+									\
+nla_put_failure:							\
+	if (tla)							\
+		nla_nest_cancel(skb, tla);				\
+        return -EMSGSIZE;						\
+}									\
+static inline int s_name ## _to_priv_skb(struct sk_buff *skb,		\
+		struct s_name *s)					\
+{									\
+	return s_name ## _to_skb(skb, s, 0);				\
+}									\
+static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
+		struct s_name *s)					\
+{									\
+	return s_name ## _to_skb(skb, s, 1);				\
+}
+
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+	if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) {	\
+		DPRINT_FIELD(">>", nla_type, name, s, NULL);		\
+		__put(skb, attr_nr, s->name);				\
+	}
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+	if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) {	\
+		DPRINT_ARRAY(">>",nla_type, name, s, NULL);		\
+		__put(skb, attr_nr, min_t(int, maxlen,			\
+			s->name ## _len + (nla_type == NLA_NUL_STRING)),\
+						s->name);		\
+	}
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#endif /* __KERNEL__ */
+
+/* }}}1 */
+#endif /* GENL_MAGIC_FUNC_H */
+/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
new file mode 100644
index 000000000000..745ebfd6c7e5
--- /dev/null
+++ b/include/linux/genl_magic_struct.h
@@ -0,0 +1,260 @@
+#ifndef GENL_MAGIC_STRUCT_H
+#define GENL_MAGIC_STRUCT_H
+
+#ifndef GENL_MAGIC_FAMILY
+# error "you need to define GENL_MAGIC_FAMILY before inclusion"
+#endif
+
+#ifndef GENL_MAGIC_VERSION
+# error "you need to define GENL_MAGIC_VERSION before inclusion"
+#endif
+
+#ifndef GENL_MAGIC_INCLUDE_FILE
+# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion"
+#endif
+
+#include <linux/genetlink.h>
+#include <linux/types.h>
+
+#define CONCAT__(a,b)	a ## b
+#define CONCAT_(a,b)	CONCAT__(a,b)
+
+extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void);
+extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
+
+/*
+ * Extension of genl attribute validation policies			{{{2
+ */
+
+/**
+ * GENLA_F_FLAGS - policy type flags to ease compatible ABI evolvement
+ *
+ * @GENLA_F_REQUIRED: attribute has to be present, or message is considered invalid.
+ * Adding new REQUIRED attributes breaks ABI compatibility, so don't do that.
+ *
+ * @GENLA_F_MANDATORY: if present, receiver _must_ understand it.
+ * Without this, unknown attributes (> maxtype) are _silently_ ignored
+ * by validate_nla().
+ *
+ * To be used for API extensions, so older kernel can reject requests for not
+ * yet implemented features, if newer userland tries to use them even though
+ * the genl_family version clearly indicates they are not available.
+ *
+ * @GENLA_F_MAY_IGNORE: To clearly document the fact, for good measure.
+ * To be used for API extensions for things that have sane defaults,
+ * so newer userland can still talk to older kernel, knowing it will
+ * silently ignore these attributes if not yet known.
+ *
+ * NOTE: These flags overload
+ *   NLA_F_NESTED		(1 << 15)
+ *   NLA_F_NET_BYTEORDER	(1 << 14)
+ * from linux/netlink.h, which are not useful for validate_nla():
+ * NET_BYTEORDER is not used anywhere, and NESTED would be specified by setting
+ * .type = NLA_NESTED in the appropriate policy.
+ *
+ * See also: nla_type()
+ */
+enum {
+	GENLA_F_MAY_IGNORE	= 0,
+	GENLA_F_MANDATORY	= 1 << 14,
+	GENLA_F_REQUIRED	= 1 << 15,
+
+	/* This will not be present in the __u16 .nla_type, but can be
+	 * triggered on in <struct>_to_skb, to exclude "sensitive"
+	 * information from broadcasts, or on unpriviledged get requests.
+	 * This is useful because genetlink multicast groups can be listened in
+	 * on by anyone.  */
+	GENLA_F_SENSITIVE	= 1 << 16,
+};
+
+#define __nla_type(x)	((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK))
+
+/*									}}}1
+ * MAGIC
+ * multi-include macro expansion magic starts here
+ */
+
+/* MAGIC helpers							{{{2 */
+
+/* possible field types */
+#define __flg_field(attr_nr, attr_flag, name) \
+	__field(attr_nr, attr_flag, name, NLA_FLAG, char, \
+			nla_get_flag, __nla_put_flag)
+#define __u8_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
+			nla_get_u8, NLA_PUT_U8)
+#define __u16_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U16, __u16, \
+			nla_get_u16, NLA_PUT_U16)
+#define __u32_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U32, __u32, \
+			nla_get_u32, NLA_PUT_U32)
+#define __u64_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U64, __u64, \
+			nla_get_u64, NLA_PUT_U64)
+#define __str_field(attr_nr, attr_flag, name, maxlen) \
+	__array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \
+			nla_strlcpy, NLA_PUT)
+#define __bin_field(attr_nr, attr_flag, name, maxlen) \
+	__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
+			nla_memcpy, NLA_PUT)
+
+#define __nla_put_flag(skb, attrtype, value)		\
+	do {						\
+		if (value)				\
+			NLA_PUT_FLAG(skb, attrtype);	\
+	} while (0)
+
+#define GENL_op_init(args...)	args
+#define GENL_doit(handler)		\
+	.doit = handler,		\
+	.flags = GENL_ADMIN_PERM,
+#define GENL_dumpit(handler)		\
+	.dumpit = handler,		\
+	.flags = GENL_ADMIN_PERM,
+
+/*									}}}1
+ * Magic: define the enum symbols for genl_ops
+ * Magic: define the enum symbols for top level attributes
+ * Magic: define the enum symbols for nested attributes
+ *									{{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)	\
+	op_name = op_num,
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)			\
+	op_name = op_num,
+
+enum {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+		tag_name = tag_number,
+
+enum {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)	\
+enum {								\
+	s_fields						\
+};
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+	T_ ## name = (__u16)(attr_nr | attr_flag),
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+	T_ ## name = (__u16)(attr_nr | attr_flag),
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/*									}}}1
+ * Magic: compile time assert unique numbers for operations
+ * Magic: -"- unique numbers for top level attributes
+ * Magic: -"- unique numbers for nested attributes
+ *									{{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list)	\
+	case op_name:
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)	\
+	case op_name:
+
+static inline void ct_assert_unique_operations(void)
+{
+	switch (0) {
+#include GENL_MAGIC_INCLUDE_FILE
+		;
+	}
+}
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+		case tag_number:
+
+static inline void ct_assert_unique_top_level_attributes(void)
+{
+	switch (0) {
+#include GENL_MAGIC_INCLUDE_FILE
+		;
+	}
+}
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static inline void ct_assert_unique_ ## s_name ## _attributes(void)	\
+{									\
+	switch (0) {							\
+		s_fields						\
+			;						\
+	}								\
+}
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+	case attr_nr:
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+	case attr_nr:
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/*									}}}1
+ * Magic: declare structs
+ * struct <name> {
+ *	fields
+ * };
+ *									{{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+struct s_name { s_fields };
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \
+	type name;
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+	type name[maxlen];	\
+	__u32 name ## _len;
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/* }}}1 */
+#endif /* GENL_MAGIC_STRUCT_H */
+/* vim: set foldmethod=marker nofoldenable : */
-- 
cgit v1.2.3-55-g7522


From d4f65b5d2497b2fd9c45f06b71deb4ab084a5b66 Mon Sep 17 00:00:00 2001
From: David Howells
Date: Thu, 13 Sep 2012 13:06:29 +0100
Subject: KEYS: Add payload preparsing opportunity prior to key instantiate or
 update

Give the key type the opportunity to preparse the payload prior to the
instantiation and update routines being called.  This is done with the
provision of two new key type operations:

	int (*preparse)(struct key_preparsed_payload *prep);
	void (*free_preparse)(struct key_preparsed_payload *prep);

If the first operation is present, then it is called before key creation (in
the add/update case) or before the key semaphore is taken (in the update and
instantiate cases).  The second operation is called to clean up if the first
was called.

preparse() is given the opportunity to fill in the following structure:

	struct key_preparsed_payload {
		char		*description;
		void		*type_data[2];
		void		*payload;
		const void	*data;
		size_t		datalen;
		size_t		quotalen;
	};

Before the preparser is called, the first three fields will have been cleared,
the payload pointer and size will be stored in data and datalen and the default
quota size from the key_type struct will be stored into quotalen.

The preparser may parse the payload in any way it likes and may store data in
the type_data[] and payload fields for use by the instantiate() and update()
ops.

The preparser may also propose a description for the key by attaching it as a
string to the description field.  This can be used by passing a NULL or ""
description to the add_key() system call or the key_create_or_update()
function.  This cannot work with request_key() as that required the description
to tell the upcall about the key to be created.

This, for example permits keys that store PGP public keys to generate their own
name from the user ID and public key fingerprint in the key.

The instantiate() and update() operations are then modified to look like this:

	int (*instantiate)(struct key *key, struct key_preparsed_payload *prep);
	int (*update)(struct key *key, struct key_preparsed_payload *prep);

and the new payload data is passed in *prep, whether or not it was preparsed.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/security/keys.txt          |  50 +++++++++++++-
 fs/cifs/cifs_spnego.c                    |   6 +-
 fs/cifs/cifsacl.c                        |   8 +--
 include/keys/user-type.h                 |   6 +-
 include/linux/key-type.h                 |  35 +++++++++-
 net/ceph/crypto.c                        |   9 +--
 net/dns_resolver/dns_key.c               |   6 +-
 net/rxrpc/ar-key.c                       |  40 +++++------
 security/keys/encrypted-keys/encrypted.c |  16 +++--
 security/keys/key.c                      | 114 ++++++++++++++++++++++---------
 security/keys/keyctl.c                   |  18 +++--
 security/keys/keyring.c                  |   6 +-
 security/keys/request_key_auth.c         |   8 +--
 security/keys/trusted.c                  |  16 +++--
 security/keys/user_defined.c             |  14 ++--
 15 files changed, 250 insertions(+), 102 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
index aa0dbd74b71b..7d9ca92022d8 100644
--- a/Documentation/security/keys.txt
+++ b/Documentation/security/keys.txt
@@ -412,6 +412,10 @@ The main syscalls are:
      to the keyring. In this case, an error will be generated if the process
      does not have permission to write to the keyring.
 
+     If the key type supports it, if the description is NULL or an empty
+     string, the key type will try and generate a description from the content
+     of the payload.
+
      The payload is optional, and the pointer can be NULL if not required by
      the type. The payload is plen in size, and plen can be zero for an empty
      payload.
@@ -1114,12 +1118,53 @@ The structure has a number of fields, some of which are mandatory:
      it should return 0.
 
 
- (*) int (*instantiate)(struct key *key, const void *data, size_t datalen);
+ (*) int (*preparse)(struct key_preparsed_payload *prep);
+
+     This optional method permits the key type to attempt to parse payload
+     before a key is created (add key) or the key semaphore is taken (update or
+     instantiate key).  The structure pointed to by prep looks like:
+
+	struct key_preparsed_payload {
+		char		*description;
+		void		*type_data[2];
+		void		*payload;
+		const void	*data;
+		size_t		datalen;
+		size_t		quotalen;
+	};
+
+     Before calling the method, the caller will fill in data and datalen with
+     the payload blob parameters; quotalen will be filled in with the default
+     quota size from the key type and the rest will be cleared.
+
+     If a description can be proposed from the payload contents, that should be
+     attached as a string to the description field.  This will be used for the
+     key description if the caller of add_key() passes NULL or "".
+
+     The method can attach anything it likes to type_data[] and payload.  These
+     are merely passed along to the instantiate() or update() operations.
+
+     The method should return 0 if success ful or a negative error code
+     otherwise.
+
+     
+ (*) void (*free_preparse)(struct key_preparsed_payload *prep);
+
+     This method is only required if the preparse() method is provided,
+     otherwise it is unused.  It cleans up anything attached to the
+     description, type_data and payload fields of the key_preparsed_payload
+     struct as filled in by the preparse() method.
+
+
+ (*) int (*instantiate)(struct key *key, struct key_preparsed_payload *prep);
 
      This method is called to attach a payload to a key during construction.
      The payload attached need not bear any relation to the data passed to this
      function.
 
+     The prep->data and prep->datalen fields will define the original payload
+     blob.  If preparse() was supplied then other fields may be filled in also.
+
      If the amount of data attached to the key differs from the size in
      keytype->def_datalen, then key_payload_reserve() should be called.
 
@@ -1135,6 +1180,9 @@ The structure has a number of fields, some of which are mandatory:
      If this type of key can be updated, then this method should be provided.
      It is called to update a key's payload from the blob of data provided.
 
+     The prep->data and prep->datalen fields will define the original payload
+     blob.  If preparse() was supplied then other fields may be filled in also.
+
      key_payload_reserve() should be called if the data length might change
      before any changes are actually made. Note that if this succeeds, the type
      is committed to changing the key because it's already been altered, so all
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index e622863b292f..086f381d6489 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -31,18 +31,18 @@
 
 /* create a new cifs key */
 static int
-cifs_spnego_key_instantiate(struct key *key, const void *data, size_t datalen)
+cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 {
 	char *payload;
 	int ret;
 
 	ret = -ENOMEM;
-	payload = kmalloc(datalen, GFP_KERNEL);
+	payload = kmalloc(prep->datalen, GFP_KERNEL);
 	if (!payload)
 		goto error;
 
 	/* attach the data */
-	memcpy(payload, data, datalen);
+	memcpy(payload, prep->data, prep->datalen);
 	key->payload.data = payload;
 	ret = 0;
 
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 05f4dc263a23..f3c60e264ca8 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -167,17 +167,17 @@ static struct shrinker cifs_shrinker = {
 };
 
 static int
-cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen)
+cifs_idmap_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 {
 	char *payload;
 
-	payload = kmalloc(datalen, GFP_KERNEL);
+	payload = kmalloc(prep->datalen, GFP_KERNEL);
 	if (!payload)
 		return -ENOMEM;
 
-	memcpy(payload, data, datalen);
+	memcpy(payload, prep->data, prep->datalen);
 	key->payload.data = payload;
-	key->datalen = datalen;
+	key->datalen = prep->datalen;
 	return 0;
 }
 
diff --git a/include/keys/user-type.h b/include/keys/user-type.h
index bc9ec1d7698c..5e452c84f1e6 100644
--- a/include/keys/user-type.h
+++ b/include/keys/user-type.h
@@ -35,8 +35,10 @@ struct user_key_payload {
 extern struct key_type key_type_user;
 extern struct key_type key_type_logon;
 
-extern int user_instantiate(struct key *key, const void *data, size_t datalen);
-extern int user_update(struct key *key, const void *data, size_t datalen);
+struct key_preparsed_payload;
+
+extern int user_instantiate(struct key *key, struct key_preparsed_payload *prep);
+extern int user_update(struct key *key, struct key_preparsed_payload *prep);
 extern int user_match(const struct key *key, const void *criterion);
 extern void user_revoke(struct key *key);
 extern void user_destroy(struct key *key);
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index f0c651cda7b0..518a53afb9ea 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -26,6 +26,27 @@ struct key_construction {
 	struct key	*authkey;/* authorisation for key being constructed */
 };
 
+/*
+ * Pre-parsed payload, used by key add, update and instantiate.
+ *
+ * This struct will be cleared and data and datalen will be set with the data
+ * and length parameters from the caller and quotalen will be set from
+ * def_datalen from the key type.  Then if the preparse() op is provided by the
+ * key type, that will be called.  Then the struct will be passed to the
+ * instantiate() or the update() op.
+ *
+ * If the preparse() op is given, the free_preparse() op will be called to
+ * clear the contents.
+ */
+struct key_preparsed_payload {
+	char		*description;	/* Proposed key description (or NULL) */
+	void		*type_data[2];	/* Private key-type data */
+	void		*payload;	/* Proposed payload */
+	const void	*data;		/* Raw data */
+	size_t		datalen;	/* Raw datalen */
+	size_t		quotalen;	/* Quota length for proposed payload */
+};
+
 typedef int (*request_key_actor_t)(struct key_construction *key,
 				   const char *op, void *aux);
 
@@ -45,18 +66,28 @@ struct key_type {
 	/* vet a description */
 	int (*vet_description)(const char *description);
 
+	/* Preparse the data blob from userspace that is to be the payload,
+	 * generating a proposed description and payload that will be handed to
+	 * the instantiate() and update() ops.
+	 */
+	int (*preparse)(struct key_preparsed_payload *prep);
+
+	/* Free a preparse data structure.
+	 */
+	void (*free_preparse)(struct key_preparsed_payload *prep);
+
 	/* instantiate a key of this type
 	 * - this method should call key_payload_reserve() to determine if the
 	 *   user's quota will hold the payload
 	 */
-	int (*instantiate)(struct key *key, const void *data, size_t datalen);
+	int (*instantiate)(struct key *key, struct key_preparsed_payload *prep);
 
 	/* update a key of this type (optional)
 	 * - this method should call key_payload_reserve() to recalculate the
 	 *   quota consumption
 	 * - the key must be locked against read when modifying
 	 */
-	int (*update)(struct key *key, const void *data, size_t datalen);
+	int (*update)(struct key *key, struct key_preparsed_payload *prep);
 
 	/* match a key against a description */
 	int (*match)(const struct key *key, const void *desc);
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 9da7fdd3cd8a..af14cb425164 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -423,14 +423,15 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
 	}
 }
 
-int ceph_key_instantiate(struct key *key, const void *data, size_t datalen)
+int ceph_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 {
 	struct ceph_crypto_key *ckey;
+	size_t datalen = prep->datalen;
 	int ret;
 	void *p;
 
 	ret = -EINVAL;
-	if (datalen <= 0 || datalen > 32767 || !data)
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		goto err;
 
 	ret = key_payload_reserve(key, datalen);
@@ -443,8 +444,8 @@ int ceph_key_instantiate(struct key *key, const void *data, size_t datalen)
 		goto err;
 
 	/* TODO ceph_crypto_key_decode should really take const input */
-	p = (void *)data;
-	ret = ceph_crypto_key_decode(ckey, &p, (char*)data+datalen);
+	p = (void *)prep->data;
+	ret = ceph_crypto_key_decode(ckey, &p, (char*)prep->data+datalen);
 	if (ret < 0)
 		goto err_ckey;
 
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index d9507dd05818..859ab8b6ec34 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -59,13 +59,13 @@ const struct cred *dns_resolver_cache;
  *        "ip1,ip2,...#foo=bar"
  */
 static int
-dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
+dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep)
 {
 	struct user_key_payload *upayload;
 	unsigned long derrno;
 	int ret;
-	size_t result_len = 0;
-	const char *data = _data, *end, *opt;
+	size_t datalen = prep->datalen, result_len = 0;
+	const char *data = prep->data, *end, *opt;
 
 	kenter("%%%d,%s,'%*.*s',%zu",
 	       key->serial, key->description,
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 8b1f9f49960f..106c5a6b1ab2 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -26,8 +26,8 @@
 #include "ar-internal.h"
 
 static int rxrpc_vet_description_s(const char *);
-static int rxrpc_instantiate(struct key *, const void *, size_t);
-static int rxrpc_instantiate_s(struct key *, const void *, size_t);
+static int rxrpc_instantiate(struct key *, struct key_preparsed_payload *);
+static int rxrpc_instantiate_s(struct key *, struct key_preparsed_payload *);
 static void rxrpc_destroy(struct key *);
 static void rxrpc_destroy_s(struct key *);
 static void rxrpc_describe(const struct key *, struct seq_file *);
@@ -678,7 +678,7 @@ error:
  *
  * if no data is provided, then a no-security key is made
  */
-static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
+static int rxrpc_instantiate(struct key *key, struct key_preparsed_payload *prep)
 {
 	const struct rxrpc_key_data_v1 *v1;
 	struct rxrpc_key_token *token, **pp;
@@ -686,26 +686,26 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
 	u32 kver;
 	int ret;
 
-	_enter("{%x},,%zu", key_serial(key), datalen);
+	_enter("{%x},,%zu", key_serial(key), prep->datalen);
 
 	/* handle a no-security key */
-	if (!data && datalen == 0)
+	if (!prep->data && prep->datalen == 0)
 		return 0;
 
 	/* determine if the XDR payload format is being used */
-	if (datalen > 7 * 4) {
-		ret = rxrpc_instantiate_xdr(key, data, datalen);
+	if (prep->datalen > 7 * 4) {
+		ret = rxrpc_instantiate_xdr(key, prep->data, prep->datalen);
 		if (ret != -EPROTO)
 			return ret;
 	}
 
 	/* get the key interface version number */
 	ret = -EINVAL;
-	if (datalen <= 4 || !data)
+	if (prep->datalen <= 4 || !prep->data)
 		goto error;
-	memcpy(&kver, data, sizeof(kver));
-	data += sizeof(kver);
-	datalen -= sizeof(kver);
+	memcpy(&kver, prep->data, sizeof(kver));
+	prep->data += sizeof(kver);
+	prep->datalen -= sizeof(kver);
 
 	_debug("KEY I/F VERSION: %u", kver);
 
@@ -715,11 +715,11 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
 
 	/* deal with a version 1 key */
 	ret = -EINVAL;
-	if (datalen < sizeof(*v1))
+	if (prep->datalen < sizeof(*v1))
 		goto error;
 
-	v1 = data;
-	if (datalen != sizeof(*v1) + v1->ticket_length)
+	v1 = prep->data;
+	if (prep->datalen != sizeof(*v1) + v1->ticket_length)
 		goto error;
 
 	_debug("SCIX: %u", v1->security_index);
@@ -784,17 +784,17 @@ error:
  * instantiate a server secret key
  * data should be a pointer to the 8-byte secret key
  */
-static int rxrpc_instantiate_s(struct key *key, const void *data,
-			       size_t datalen)
+static int rxrpc_instantiate_s(struct key *key,
+			       struct key_preparsed_payload *prep)
 {
 	struct crypto_blkcipher *ci;
 
-	_enter("{%x},,%zu", key_serial(key), datalen);
+	_enter("{%x},,%zu", key_serial(key), prep->datalen);
 
-	if (datalen != 8)
+	if (prep->datalen != 8)
 		return -EINVAL;
 
-	memcpy(&key->type_data, data, 8);
+	memcpy(&key->type_data, prep->data, 8);
 
 	ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(ci)) {
@@ -802,7 +802,7 @@ static int rxrpc_instantiate_s(struct key *key, const void *data,
 		return PTR_ERR(ci);
 	}
 
-	if (crypto_blkcipher_setkey(ci, data, 8) < 0)
+	if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0)
 		BUG();
 
 	key->payload.data = ci;
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 2d1bb8af7696..9e1e005c7596 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -773,8 +773,8 @@ static int encrypted_init(struct encrypted_key_payload *epayload,
  *
  * On success, return 0. Otherwise return errno.
  */
-static int encrypted_instantiate(struct key *key, const void *data,
-				 size_t datalen)
+static int encrypted_instantiate(struct key *key,
+				 struct key_preparsed_payload *prep)
 {
 	struct encrypted_key_payload *epayload = NULL;
 	char *datablob = NULL;
@@ -782,16 +782,17 @@ static int encrypted_instantiate(struct key *key, const void *data,
 	char *master_desc = NULL;
 	char *decrypted_datalen = NULL;
 	char *hex_encoded_iv = NULL;
+	size_t datalen = prep->datalen;
 	int ret;
 
-	if (datalen <= 0 || datalen > 32767 || !data)
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		return -EINVAL;
 
 	datablob = kmalloc(datalen + 1, GFP_KERNEL);
 	if (!datablob)
 		return -ENOMEM;
 	datablob[datalen] = 0;
-	memcpy(datablob, data, datalen);
+	memcpy(datablob, prep->data, datalen);
 	ret = datablob_parse(datablob, &format, &master_desc,
 			     &decrypted_datalen, &hex_encoded_iv);
 	if (ret < 0)
@@ -834,16 +835,17 @@ static void encrypted_rcu_free(struct rcu_head *rcu)
  *
  * On success, return 0. Otherwise return errno.
  */
-static int encrypted_update(struct key *key, const void *data, size_t datalen)
+static int encrypted_update(struct key *key, struct key_preparsed_payload *prep)
 {
 	struct encrypted_key_payload *epayload = key->payload.data;
 	struct encrypted_key_payload *new_epayload;
 	char *buf;
 	char *new_master_desc = NULL;
 	const char *format = NULL;
+	size_t datalen = prep->datalen;
 	int ret = 0;
 
-	if (datalen <= 0 || datalen > 32767 || !data)
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		return -EINVAL;
 
 	buf = kmalloc(datalen + 1, GFP_KERNEL);
@@ -851,7 +853,7 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen)
 		return -ENOMEM;
 
 	buf[datalen] = 0;
-	memcpy(buf, data, datalen);
+	memcpy(buf, prep->data, datalen);
 	ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL);
 	if (ret < 0)
 		goto out;
diff --git a/security/keys/key.c b/security/keys/key.c
index 50d96d4e06f2..1d039af99f50 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -412,8 +412,7 @@ EXPORT_SYMBOL(key_payload_reserve);
  * key_construction_mutex.
  */
 static int __key_instantiate_and_link(struct key *key,
-				      const void *data,
-				      size_t datalen,
+				      struct key_preparsed_payload *prep,
 				      struct key *keyring,
 				      struct key *authkey,
 				      unsigned long *_prealloc)
@@ -431,7 +430,7 @@ static int __key_instantiate_and_link(struct key *key,
 	/* can't instantiate twice */
 	if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) {
 		/* instantiate the key */
-		ret = key->type->instantiate(key, data, datalen);
+		ret = key->type->instantiate(key, prep);
 
 		if (ret == 0) {
 			/* mark the key as being instantiated */
@@ -482,22 +481,37 @@ int key_instantiate_and_link(struct key *key,
 			     struct key *keyring,
 			     struct key *authkey)
 {
+	struct key_preparsed_payload prep;
 	unsigned long prealloc;
 	int ret;
 
+	memset(&prep, 0, sizeof(prep));
+	prep.data = data;
+	prep.datalen = datalen;
+	prep.quotalen = key->type->def_datalen;
+	if (key->type->preparse) {
+		ret = key->type->preparse(&prep);
+		if (ret < 0)
+			goto error;
+	}
+
 	if (keyring) {
 		ret = __key_link_begin(keyring, key->type, key->description,
 				       &prealloc);
 		if (ret < 0)
-			return ret;
+			goto error_free_preparse;
 	}
 
-	ret = __key_instantiate_and_link(key, data, datalen, keyring, authkey,
+	ret = __key_instantiate_and_link(key, &prep, keyring, authkey,
 					 &prealloc);
 
 	if (keyring)
 		__key_link_end(keyring, key->type, prealloc);
 
+error_free_preparse:
+	if (key->type->preparse)
+		key->type->free_preparse(&prep);
+error:
 	return ret;
 }
 
@@ -706,7 +720,7 @@ void key_type_put(struct key_type *ktype)
  * if we get an error.
  */
 static inline key_ref_t __key_update(key_ref_t key_ref,
-				     const void *payload, size_t plen)
+				     struct key_preparsed_payload *prep)
 {
 	struct key *key = key_ref_to_ptr(key_ref);
 	int ret;
@@ -722,7 +736,7 @@ static inline key_ref_t __key_update(key_ref_t key_ref,
 
 	down_write(&key->sem);
 
-	ret = key->type->update(key, payload, plen);
+	ret = key->type->update(key, prep);
 	if (ret == 0)
 		/* updating a negative key instantiates it */
 		clear_bit(KEY_FLAG_NEGATIVE, &key->flags);
@@ -774,6 +788,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 			       unsigned long flags)
 {
 	unsigned long prealloc;
+	struct key_preparsed_payload prep;
 	const struct cred *cred = current_cred();
 	struct key_type *ktype;
 	struct key *keyring, *key = NULL;
@@ -789,8 +804,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	}
 
 	key_ref = ERR_PTR(-EINVAL);
-	if (!ktype->match || !ktype->instantiate)
-		goto error_2;
+	if (!ktype->match || !ktype->instantiate ||
+	    (!description && !ktype->preparse))
+		goto error_put_type;
 
 	keyring = key_ref_to_ptr(keyring_ref);
 
@@ -798,18 +814,37 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 
 	key_ref = ERR_PTR(-ENOTDIR);
 	if (keyring->type != &key_type_keyring)
-		goto error_2;
+		goto error_put_type;
+
+	memset(&prep, 0, sizeof(prep));
+	prep.data = payload;
+	prep.datalen = plen;
+	prep.quotalen = ktype->def_datalen;
+	if (ktype->preparse) {
+		ret = ktype->preparse(&prep);
+		if (ret < 0) {
+			key_ref = ERR_PTR(ret);
+			goto error_put_type;
+		}
+		if (!description)
+			description = prep.description;
+		key_ref = ERR_PTR(-EINVAL);
+		if (!description)
+			goto error_free_prep;
+	}
 
 	ret = __key_link_begin(keyring, ktype, description, &prealloc);
-	if (ret < 0)
-		goto error_2;
+	if (ret < 0) {
+		key_ref = ERR_PTR(ret);
+		goto error_free_prep;
+	}
 
 	/* if we're going to allocate a new key, we're going to have
 	 * to modify the keyring */
 	ret = key_permission(keyring_ref, KEY_WRITE);
 	if (ret < 0) {
 		key_ref = ERR_PTR(ret);
-		goto error_3;
+		goto error_link_end;
 	}
 
 	/* if it's possible to update this type of key, search for an existing
@@ -840,25 +875,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 			perm, flags);
 	if (IS_ERR(key)) {
 		key_ref = ERR_CAST(key);
-		goto error_3;
+		goto error_link_end;
 	}
 
 	/* instantiate it and link it into the target keyring */
-	ret = __key_instantiate_and_link(key, payload, plen, keyring, NULL,
-					 &prealloc);
+	ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc);
 	if (ret < 0) {
 		key_put(key);
 		key_ref = ERR_PTR(ret);
-		goto error_3;
+		goto error_link_end;
 	}
 
 	key_ref = make_key_ref(key, is_key_possessed(keyring_ref));
 
- error_3:
+error_link_end:
 	__key_link_end(keyring, ktype, prealloc);
- error_2:
+error_free_prep:
+	if (ktype->preparse)
+		ktype->free_preparse(&prep);
+error_put_type:
 	key_type_put(ktype);
- error:
+error:
 	return key_ref;
 
  found_matching_key:
@@ -866,10 +903,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	 * - we can drop the locks first as we have the key pinned
 	 */
 	__key_link_end(keyring, ktype, prealloc);
-	key_type_put(ktype);
 
-	key_ref = __key_update(key_ref, payload, plen);
-	goto error;
+	key_ref = __key_update(key_ref, &prep);
+	goto error_free_prep;
 }
 EXPORT_SYMBOL(key_create_or_update);
 
@@ -888,6 +924,7 @@ EXPORT_SYMBOL(key_create_or_update);
  */
 int key_update(key_ref_t key_ref, const void *payload, size_t plen)
 {
+	struct key_preparsed_payload prep;
 	struct key *key = key_ref_to_ptr(key_ref);
 	int ret;
 
@@ -900,18 +937,31 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen)
 
 	/* attempt to update it if supported */
 	ret = -EOPNOTSUPP;
-	if (key->type->update) {
-		down_write(&key->sem);
-
-		ret = key->type->update(key, payload, plen);
-		if (ret == 0)
-			/* updating a negative key instantiates it */
-			clear_bit(KEY_FLAG_NEGATIVE, &key->flags);
+	if (!key->type->update)
+		goto error;
 
-		up_write(&key->sem);
+	memset(&prep, 0, sizeof(prep));
+	prep.data = payload;
+	prep.datalen = plen;
+	prep.quotalen = key->type->def_datalen;
+	if (key->type->preparse) {
+		ret = key->type->preparse(&prep);
+		if (ret < 0)
+			goto error;
 	}
 
- error:
+	down_write(&key->sem);
+
+	ret = key->type->update(key, &prep);
+	if (ret == 0)
+		/* updating a negative key instantiates it */
+		clear_bit(KEY_FLAG_NEGATIVE, &key->flags);
+
+	up_write(&key->sem);
+
+	if (key->type->preparse)
+		key->type->free_preparse(&prep);
+error:
 	return ret;
 }
 EXPORT_SYMBOL(key_update);
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 3364fbf46807..505d40be196c 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -46,6 +46,9 @@ static int key_get_type_from_user(char *type,
  * Extract the description of a new key from userspace and either add it as a
  * new key to the specified keyring or update a matching key in that keyring.
  *
+ * If the description is NULL or an empty string, the key type is asked to
+ * generate one from the payload.
+ *
  * The keyring must be writable so that we can attach the key to it.
  *
  * If successful, the new key's serial number is returned, otherwise an error
@@ -72,10 +75,17 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type,
 	if (ret < 0)
 		goto error;
 
-	description = strndup_user(_description, PAGE_SIZE);
-	if (IS_ERR(description)) {
-		ret = PTR_ERR(description);
-		goto error;
+	description = NULL;
+	if (_description) {
+		description = strndup_user(_description, PAGE_SIZE);
+		if (IS_ERR(description)) {
+			ret = PTR_ERR(description);
+			goto error;
+		}
+		if (!*description) {
+			kfree(description);
+			description = NULL;
+		}
 	}
 
 	/* pull the payload in if one was supplied */
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 81e7852d281d..f04d8cf81f3c 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -66,7 +66,7 @@ static inline unsigned keyring_hash(const char *desc)
  * operations.
  */
 static int keyring_instantiate(struct key *keyring,
-			       const void *data, size_t datalen);
+			       struct key_preparsed_payload *prep);
 static int keyring_match(const struct key *keyring, const void *criterion);
 static void keyring_revoke(struct key *keyring);
 static void keyring_destroy(struct key *keyring);
@@ -121,12 +121,12 @@ static void keyring_publish_name(struct key *keyring)
  * Returns 0 on success, -EINVAL if given any data.
  */
 static int keyring_instantiate(struct key *keyring,
-			       const void *data, size_t datalen)
+			       struct key_preparsed_payload *prep)
 {
 	int ret;
 
 	ret = -EINVAL;
-	if (datalen == 0) {
+	if (prep->datalen == 0) {
 		/* make the keyring available by name if it has one */
 		keyring_publish_name(keyring);
 		ret = 0;
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 60d4e3f5e4bb..85730d5a5a59 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -19,7 +19,8 @@
 #include <asm/uaccess.h>
 #include "internal.h"
 
-static int request_key_auth_instantiate(struct key *, const void *, size_t);
+static int request_key_auth_instantiate(struct key *,
+					struct key_preparsed_payload *);
 static void request_key_auth_describe(const struct key *, struct seq_file *);
 static void request_key_auth_revoke(struct key *);
 static void request_key_auth_destroy(struct key *);
@@ -42,10 +43,9 @@ struct key_type key_type_request_key_auth = {
  * Instantiate a request-key authorisation key.
  */
 static int request_key_auth_instantiate(struct key *key,
-					const void *data,
-					size_t datalen)
+					struct key_preparsed_payload *prep)
 {
-	key->payload.data = (struct request_key_auth *) data;
+	key->payload.data = (struct request_key_auth *)prep->data;
 	return 0;
 }
 
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 2d5d041f2049..42036c7a0856 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -927,22 +927,23 @@ static struct trusted_key_payload *trusted_payload_alloc(struct key *key)
  *
  * On success, return 0. Otherwise return errno.
  */
-static int trusted_instantiate(struct key *key, const void *data,
-			       size_t datalen)
+static int trusted_instantiate(struct key *key,
+			       struct key_preparsed_payload *prep)
 {
 	struct trusted_key_payload *payload = NULL;
 	struct trusted_key_options *options = NULL;
+	size_t datalen = prep->datalen;
 	char *datablob;
 	int ret = 0;
 	int key_cmd;
 
-	if (datalen <= 0 || datalen > 32767 || !data)
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		return -EINVAL;
 
 	datablob = kmalloc(datalen + 1, GFP_KERNEL);
 	if (!datablob)
 		return -ENOMEM;
-	memcpy(datablob, data, datalen);
+	memcpy(datablob, prep->data, datalen);
 	datablob[datalen] = '\0';
 
 	options = trusted_options_alloc();
@@ -1011,17 +1012,18 @@ static void trusted_rcu_free(struct rcu_head *rcu)
 /*
  * trusted_update - reseal an existing key with new PCR values
  */
-static int trusted_update(struct key *key, const void *data, size_t datalen)
+static int trusted_update(struct key *key, struct key_preparsed_payload *prep)
 {
 	struct trusted_key_payload *p = key->payload.data;
 	struct trusted_key_payload *new_p;
 	struct trusted_key_options *new_o;
+	size_t datalen = prep->datalen;
 	char *datablob;
 	int ret = 0;
 
 	if (!p->migratable)
 		return -EPERM;
-	if (datalen <= 0 || datalen > 32767 || !data)
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		return -EINVAL;
 
 	datablob = kmalloc(datalen + 1, GFP_KERNEL);
@@ -1038,7 +1040,7 @@ static int trusted_update(struct key *key, const void *data, size_t datalen)
 		goto out;
 	}
 
-	memcpy(datablob, data, datalen);
+	memcpy(datablob, prep->data, datalen);
 	datablob[datalen] = '\0';
 	ret = datablob_parse(datablob, new_p, new_o);
 	if (ret != Opt_update) {
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index c7660a25a3e4..55dc88939185 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -58,13 +58,14 @@ EXPORT_SYMBOL_GPL(key_type_logon);
 /*
  * instantiate a user defined key
  */
-int user_instantiate(struct key *key, const void *data, size_t datalen)
+int user_instantiate(struct key *key, struct key_preparsed_payload *prep)
 {
 	struct user_key_payload *upayload;
+	size_t datalen = prep->datalen;
 	int ret;
 
 	ret = -EINVAL;
-	if (datalen <= 0 || datalen > 32767 || !data)
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		goto error;
 
 	ret = key_payload_reserve(key, datalen);
@@ -78,7 +79,7 @@ int user_instantiate(struct key *key, const void *data, size_t datalen)
 
 	/* attach the data */
 	upayload->datalen = datalen;
-	memcpy(upayload->data, data, datalen);
+	memcpy(upayload->data, prep->data, datalen);
 	rcu_assign_keypointer(key, upayload);
 	ret = 0;
 
@@ -92,13 +93,14 @@ EXPORT_SYMBOL_GPL(user_instantiate);
  * update a user defined key
  * - the key's semaphore is write-locked
  */
-int user_update(struct key *key, const void *data, size_t datalen)
+int user_update(struct key *key, struct key_preparsed_payload *prep)
 {
 	struct user_key_payload *upayload, *zap;
+	size_t datalen = prep->datalen;
 	int ret;
 
 	ret = -EINVAL;
-	if (datalen <= 0 || datalen > 32767 || !data)
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		goto error;
 
 	/* construct a replacement payload */
@@ -108,7 +110,7 @@ int user_update(struct key *key, const void *data, size_t datalen)
 		goto error;
 
 	upayload->datalen = datalen;
-	memcpy(upayload->data, data, datalen);
+	memcpy(upayload->data, prep->data, datalen);
 
 	/* check the quota and attach the new data */
 	zap = upayload;
-- 
cgit v1.2.3-55-g7522


From e6459606b04e6385ccd3c2060fc10f78a92c7700 Mon Sep 17 00:00:00 2001
From: H. Peter Anvin
Date: Mon, 1 Oct 2012 00:23:52 +0200
Subject: lib: Add early cpio decoder

Add a simple cpio decoder without library dependencies for the purpose
of extracting components from the initramfs blob for early kernel
uses.  Intended consumers so far are microcode and ACPI override.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1349043837-22659-2-git-send-email-trenn@suse.de
Cc: Len Brown <lenb@kernel.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/earlycpio.h |  17 ++++++
 lib/Makefile              |   2 +-
 lib/earlycpio.c           | 145 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 163 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/earlycpio.h
 create mode 100644 lib/earlycpio.c

(limited to 'include/linux')

diff --git a/include/linux/earlycpio.h b/include/linux/earlycpio.h
new file mode 100644
index 000000000000..111f46d83d00
--- /dev/null
+++ b/include/linux/earlycpio.h
@@ -0,0 +1,17 @@
+#ifndef _LINUX_EARLYCPIO_H
+#define _LINUX_EARLYCPIO_H
+
+#include <linux/types.h>
+
+#define MAX_CPIO_FILE_NAME 18
+
+struct cpio_data {
+	void *data;
+	size_t size;
+	char name[MAX_CPIO_FILE_NAME];
+};
+
+struct cpio_data find_cpio_data(const char *path, void *data, size_t len,
+				long *offset);
+
+#endif /* _LINUX_EARLYCPIO_H */
diff --git a/lib/Makefile b/lib/Makefile
index 42d283edc4d3..0924041b6959 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
-	 is_single_threaded.o plist.o decompress.o
+	 is_single_threaded.o plist.o decompress.o earlycpio.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/earlycpio.c b/lib/earlycpio.c
new file mode 100644
index 000000000000..8078ef49cb79
--- /dev/null
+++ b/lib/earlycpio.c
@@ -0,0 +1,145 @@
+/* ----------------------------------------------------------------------- *
+ *
+ *   Copyright 2012 Intel Corporation; author H. Peter Anvin
+ *
+ *   This file is part of the Linux kernel, and is made available
+ *   under the terms of the GNU General Public License version 2, as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * earlycpio.c
+ *
+ * Find a specific cpio member; must precede any compressed content.
+ * This is used to locate data items in the initramfs used by the
+ * kernel itself during early boot (before the main initramfs is
+ * decompressed.)  It is the responsibility of the initramfs creator
+ * to ensure that these items are uncompressed at the head of the
+ * blob.  Depending on the boot loader or package tool that may be a
+ * separate file or part of the same file.
+ */
+
+#include <linux/earlycpio.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+enum cpio_fields {
+	C_MAGIC,
+	C_INO,
+	C_MODE,
+	C_UID,
+	C_GID,
+	C_NLINK,
+	C_MTIME,
+	C_FILESIZE,
+	C_MAJ,
+	C_MIN,
+	C_RMAJ,
+	C_RMIN,
+	C_NAMESIZE,
+	C_CHKSUM,
+	C_NFIELDS
+};
+
+/**
+ * cpio_data find_cpio_data - Search for files in an uncompressed cpio
+ * @path:   The directory to search for, including a slash at the end
+ * @data:   Pointer to the the cpio archive or a header inside
+ * @len:    Remaining length of the cpio based on data pointer
+ * @offset: When a matching file is found, this is the offset to the
+ *          beginning of the cpio. It can be used to iterate through
+ *          the cpio to find all files inside of a directory path
+ *
+ * @return: struct cpio_data containing the address, length and
+ *          filename (with the directory path cut off) of the found file.
+ *          If you search for a filename and not for files in a directory,
+ *          pass the absolute path of the filename in the cpio and make sure
+ *          the match returned an empty filename string.
+ */
+
+struct cpio_data __cpuinit find_cpio_data(const char *path, void *data,
+					  size_t len,  long *offset)
+{
+	const size_t cpio_header_len = 8*C_NFIELDS - 2;
+	struct cpio_data cd = { NULL, 0, "" };
+	const char *p, *dptr, *nptr;
+	unsigned int ch[C_NFIELDS], *chp, v;
+	unsigned char c, x;
+	size_t mypathsize = strlen(path);
+	int i, j;
+
+	p = data;
+
+	while (len > cpio_header_len) {
+		if (!*p) {
+			/* All cpio headers need to be 4-byte aligned */
+			p += 4;
+			len -= 4;
+			continue;
+		}
+
+		j = 6;		/* The magic field is only 6 characters */
+		chp = ch;
+		for (i = C_NFIELDS; i; i--) {
+			v = 0;
+			while (j--) {
+				v <<= 4;
+				c = *p++;
+
+				x = c - '0';
+				if (x < 10) {
+					v += x;
+					continue;
+				}
+
+				x = (c | 0x20) - 'a';
+				if (x < 6) {
+					v += x + 10;
+					continue;
+				}
+
+				goto quit; /* Invalid hexadecimal */
+			}
+			*chp++ = v;
+			j = 8;	/* All other fields are 8 characters */
+		}
+
+		if ((ch[C_MAGIC] - 0x070701) > 1)
+			goto quit; /* Invalid magic */
+
+		len -= cpio_header_len;
+
+		dptr = PTR_ALIGN(p + ch[C_NAMESIZE], 4);
+		nptr = PTR_ALIGN(dptr + ch[C_FILESIZE], 4);
+
+		if (nptr > p + len || dptr < p || nptr < dptr)
+			goto quit; /* Buffer overrun */
+
+		if ((ch[C_MODE] & 0170000) == 0100000 &&
+		    ch[C_NAMESIZE] >= mypathsize &&
+		    !memcmp(p, path, mypathsize)) {
+			*offset = (long)nptr - (long)data;
+			if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) {
+				pr_warn(
+				"File %s exceeding MAX_CPIO_FILE_NAME [%d]\n",
+				p, MAX_CPIO_FILE_NAME);
+			}
+			strlcpy(cd.name, p + mypathsize, MAX_CPIO_FILE_NAME);
+
+			cd.data = (void *)dptr;
+			cd.size = ch[C_FILESIZE];
+			return cd; /* Found it! */
+		}
+		len -= (nptr - p);
+		p = nptr;
+	}
+
+quit:
+	return cd;
+}
-- 
cgit v1.2.3-55-g7522


From 8e30524dcc0d0ac1a18a5cee482b9d9cde3cb332 Mon Sep 17 00:00:00 2001
From: Thomas Renninger
Date: Mon, 1 Oct 2012 00:23:53 +0200
Subject: x86, acpi: Introduce x86 arch specific arch_reserve_mem_area() for
 e820 handling

This is needed for ACPI table overriding via initrd. Beside reserving
memblocks, X86 also requires to flag the memory area to E820_RESERVED or
E820_ACPI in the e820 mappings to be able to io(re)map it later.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Link: http://lkml.kernel.org/r/1349043837-22659-3-git-send-email-trenn@suse.de
Cc: Len Brown <lenb@kernel.org>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/acpi/boot.c | 6 ++++++
 include/linux/acpi.h        | 8 ++++++++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b2297e58c6ed..6b75777c0a8d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1700,3 +1700,9 @@ int __acpi_release_global_lock(unsigned int *lock)
 	} while (unlikely (val != old));
 	return old & 0x1;
 }
+
+void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
+{
+	e820_add_region(addr, size, E820_ACPI);
+	update_e820();
+}
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 4f2a76224509..946fd1ea79ff 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -426,6 +426,14 @@ void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
 
 acpi_status acpi_os_prepare_sleep(u8 sleep_state,
 				  u32 pm1a_control, u32 pm1b_control);
+#if CONFIG_X86
+void arch_reserve_mem_area(acpi_physical_address addr, size_t size);
+#else
+static inline void arch_reserve_mem_area(acpi_physical_address addr,
+					  size_t size)
+{
+}
+#endif /* CONFIG_X86 */
 #else
 #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0)
 #endif
-- 
cgit v1.2.3-55-g7522


From 53aac44c904abbad9f474f652f099de13b5c3563 Mon Sep 17 00:00:00 2001
From: Thomas Renninger
Date: Mon, 1 Oct 2012 00:23:54 +0200
Subject: ACPI: Store valid ACPI tables passed via early initrd in reserved
 memblock areas

A later patch will compare them with ACPI tables that get loaded at boot or
runtime and if criteria match, a stored one is loaded.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Link: http://lkml.kernel.org/r/1349043837-22659-4-git-send-email-trenn@suse.de
Cc: Len Brown <lenb@kernel.org>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/setup.c |   2 +
 drivers/acpi/Kconfig    |   9 ++++
 drivers/acpi/osl.c      | 122 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi.h    |   8 ++++
 4 files changed, 141 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f4b9b80e1b95..764e543b2297 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -941,6 +941,8 @@ void __init setup_arch(char **cmdline_p)
 
 	reserve_initrd();
 
+	acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start);
+
 	reserve_crashkernel();
 
 	vsmp_init();
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 80998958cf45..8cf719547b51 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -261,6 +261,15 @@ config ACPI_CUSTOM_DSDT
 	bool
 	default ACPI_CUSTOM_DSDT_FILE != ""
 
+config ACPI_INITRD_TABLE_OVERRIDE
+	bool "ACPI tables can be passed via uncompressed cpio in initrd"
+	default n
+	help
+	  This option provides functionality to override arbitrary ACPI tables
+	  via initrd. No functional change if no ACPI tables are passed via
+	  initrd, therefore it's safe to say Y.
+	  See Documentation/acpi/initrd_table_override.txt for details
+
 config ACPI_BLACKLIST_YEAR
 	int "Disable ACPI for systems before Jan 1st this year" if X86_32
 	default 0
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 9eaf708f5885..b20b07903218 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -534,6 +534,128 @@ acpi_os_predefined_override(const struct acpi_predefined_names *init_val,
 	return AE_OK;
 }
 
+#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE
+#include <linux/earlycpio.h>
+#include <linux/memblock.h>
+
+static u64 acpi_tables_addr;
+static int all_tables_size;
+
+/* Copied from acpica/tbutils.c:acpi_tb_checksum() */
+u8 __init acpi_table_checksum(u8 *buffer, u32 length)
+{
+	u8 sum = 0;
+	u8 *end = buffer + length;
+
+	while (buffer < end)
+		sum = (u8) (sum + *(buffer++));
+	return sum;
+}
+
+/* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */
+static const char * const table_sigs[] = {
+	ACPI_SIG_BERT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ,
+	ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT,
+	ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, ACPI_SIG_ASF,
+	ACPI_SIG_BOOT, ACPI_SIG_DBGP, ACPI_SIG_DMAR, ACPI_SIG_HPET,
+	ACPI_SIG_IBFT, ACPI_SIG_IVRS, ACPI_SIG_MCFG, ACPI_SIG_MCHI,
+	ACPI_SIG_SLIC, ACPI_SIG_SPCR, ACPI_SIG_SPMI, ACPI_SIG_TCPA,
+	ACPI_SIG_UEFI, ACPI_SIG_WAET, ACPI_SIG_WDAT, ACPI_SIG_WDDT,
+	ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT,
+	ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, NULL };
+
+/* Non-fatal errors: Affected tables/files are ignored */
+#define INVALID_TABLE(x, path, name)					\
+	{ pr_err("ACPI OVERRIDE: " x " [%s%s]\n", path, name); continue; }
+
+#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header)
+
+/* Must not increase 10 or needs code modification below */
+#define ACPI_OVERRIDE_TABLES 10
+
+void __init acpi_initrd_override(void *data, size_t size)
+{
+	int sig, no, table_nr = 0, total_offset = 0;
+	long offset = 0;
+	struct acpi_table_header *table;
+	char cpio_path[32] = "kernel/firmware/acpi/";
+	struct cpio_data file;
+	struct cpio_data early_initrd_files[ACPI_OVERRIDE_TABLES];
+	char *p;
+
+	if (data == NULL || size == 0)
+		return;
+
+	for (no = 0; no < ACPI_OVERRIDE_TABLES; no++) {
+		file = find_cpio_data(cpio_path, data, size, &offset);
+		if (!file.data)
+			break;
+
+		data += offset;
+		size -= offset;
+
+		if (file.size < sizeof(struct acpi_table_header))
+			INVALID_TABLE("Table smaller than ACPI header",
+				      cpio_path, file.name);
+
+		table = file.data;
+
+		for (sig = 0; table_sigs[sig]; sig++)
+			if (!memcmp(table->signature, table_sigs[sig], 4))
+				break;
+
+		if (!table_sigs[sig])
+			INVALID_TABLE("Unknown signature",
+				      cpio_path, file.name);
+		if (file.size != table->length)
+			INVALID_TABLE("File length does not match table length",
+				      cpio_path, file.name);
+		if (acpi_table_checksum(file.data, table->length))
+			INVALID_TABLE("Bad table checksum",
+				      cpio_path, file.name);
+
+		pr_info("%4.4s ACPI table found in initrd [%s%s][0x%x]\n",
+			table->signature, cpio_path, file.name, table->length);
+
+		all_tables_size += table->length;
+		early_initrd_files[table_nr].data = file.data;
+		early_initrd_files[table_nr].size = file.size;
+		table_nr++;
+	}
+	if (table_nr == 0)
+		return;
+
+	acpi_tables_addr =
+		memblock_find_in_range(0, max_low_pfn_mapped << PAGE_SHIFT,
+				       all_tables_size, PAGE_SIZE);
+	if (!acpi_tables_addr) {
+		WARN_ON(1);
+		return;
+	}
+	/*
+	 * Only calling e820_add_reserve does not work and the
+	 * tables are invalid (memory got used) later.
+	 * memblock_reserve works as expected and the tables won't get modified.
+	 * But it's not enough on X86 because ioremap will
+	 * complain later (used by acpi_os_map_memory) that the pages
+	 * that should get mapped are not marked "reserved".
+	 * Both memblock_reserve and e820_add_region (via arch_reserve_mem_area)
+	 * works fine.
+	 */
+	memblock_reserve(acpi_tables_addr, acpi_tables_addr + all_tables_size);
+	arch_reserve_mem_area(acpi_tables_addr, all_tables_size);
+
+	p = early_ioremap(acpi_tables_addr, all_tables_size);
+
+	for (no = 0; no < table_nr; no++) {
+		memcpy(p + total_offset, early_initrd_files[no].data,
+		       early_initrd_files[no].size);
+		total_offset += early_initrd_files[no].size;
+	}
+	early_iounmap(p, all_tables_size);
+}
+#endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */
+
 acpi_status
 acpi_os_table_override(struct acpi_table_header * existing_table,
 		       struct acpi_table_header ** new_table)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 946fd1ea79ff..d14081c10c17 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -76,6 +76,14 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table);
 
 typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end);
 
+#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE
+void acpi_initrd_override(void *data, size_t size);
+#else
+static inline void acpi_initrd_override(void *data, size_t size)
+{
+}
+#endif
+
 char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
 void __acpi_unmap_table(char *map, unsigned long size);
 int early_acpi_boot_init(void);
-- 
cgit v1.2.3-55-g7522


From 3a50597de8635cd05133bd12c95681c82fe7b878 Mon Sep 17 00:00:00 2001
From: David Howells
Date: Tue, 2 Oct 2012 19:24:29 +0100
Subject: KEYS: Make the session and process keyrings per-thread

Make the session keyring per-thread rather than per-process, but still
inherited from the parent thread to solve a problem with PAM and gdm.

The problem is that join_session_keyring() will reject attempts to change the
session keyring of a multithreaded program but gdm is now multithreaded before
it gets to the point of starting PAM and running pam_keyinit to create the
session keyring.  See:

	https://bugs.freedesktop.org/show_bug.cgi?id=49211

The reason that join_session_keyring() will only change the session keyring
under a single-threaded environment is that it's hard to alter the other
thread's credentials to effect the change in a multi-threaded program.  The
problems are such as:

 (1) How to prevent two threads both running join_session_keyring() from
     racing.

 (2) Another thread's credentials may not be modified directly by this process.

 (3) The number of threads is uncertain whilst we're not holding the
     appropriate spinlock, making preallocation slightly tricky.

 (4) We could use TIF_NOTIFY_RESUME and key_replace_session_keyring() to get
     another thread to replace its keyring, but that means preallocating for
     each thread.

A reasonable way around this is to make the session keyring per-thread rather
than per-process and just document that if you want a common session keyring,
you must get it before you spawn any threads - which is the current situation
anyway.

Whilst we're at it, we can the process keyring behave in the same way.  This
means we can clean up some of the ickyness in the creds code.

Basically, after this patch, the session, process and thread keyrings are about
inheritance rules only and not about sharing changes of keyring.

Reported-by: Mantas M. <grawity@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Ray Strode <rstrode@redhat.com>
---
 include/linux/cred.h         |  17 +-----
 kernel/cred.c                | 127 +++++--------------------------------------
 security/keys/keyctl.c       |  11 ++--
 security/keys/process_keys.c |  66 ++++++++--------------
 security/keys/request_key.c  |  10 ++--
 5 files changed, 50 insertions(+), 181 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index ebbed2ce6637..0142aacb70b7 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -76,21 +76,6 @@ extern int groups_search(const struct group_info *, kgid_t);
 extern int in_group_p(kgid_t);
 extern int in_egroup_p(kgid_t);
 
-/*
- * The common credentials for a thread group
- * - shared by CLONE_THREAD
- */
-#ifdef CONFIG_KEYS
-struct thread_group_cred {
-	atomic_t	usage;
-	pid_t		tgid;			/* thread group process ID */
-	spinlock_t	lock;
-	struct key __rcu *session_keyring;	/* keyring inherited over fork */
-	struct key	*process_keyring;	/* keyring private to this process */
-	struct rcu_head	rcu;			/* RCU deletion hook */
-};
-#endif
-
 /*
  * The security context of a task
  *
@@ -139,6 +124,8 @@ struct cred {
 #ifdef CONFIG_KEYS
 	unsigned char	jit_keyring;	/* default keyring to attach requested
 					 * keys to */
+	struct key __rcu *session_keyring; /* keyring inherited over fork */
+	struct key	*process_keyring; /* keyring private to this process */
 	struct key	*thread_keyring; /* keyring private to this thread */
 	struct key	*request_key_auth; /* assumed request_key authority */
 	struct thread_group_cred *tgcred; /* thread-group shared credentials */
diff --git a/kernel/cred.c b/kernel/cred.c
index de728ac50d82..3f7ad1ec2ae4 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -29,17 +29,6 @@
 
 static struct kmem_cache *cred_jar;
 
-/*
- * The common credentials for the initial task's thread group
- */
-#ifdef CONFIG_KEYS
-static struct thread_group_cred init_tgcred = {
-	.usage	= ATOMIC_INIT(2),
-	.tgid	= 0,
-	.lock	= __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock),
-};
-#endif
-
 /*
  * The initial credentials for the initial task
  */
@@ -65,9 +54,6 @@ struct cred init_cred = {
 	.user			= INIT_USER,
 	.user_ns		= &init_user_ns,
 	.group_info		= &init_groups,
-#ifdef CONFIG_KEYS
-	.tgcred			= &init_tgcred,
-#endif
 };
 
 static inline void set_cred_subscribers(struct cred *cred, int n)
@@ -95,36 +81,6 @@ static inline void alter_cred_subscribers(const struct cred *_cred, int n)
 #endif
 }
 
-/*
- * Dispose of the shared task group credentials
- */
-#ifdef CONFIG_KEYS
-static void release_tgcred_rcu(struct rcu_head *rcu)
-{
-	struct thread_group_cred *tgcred =
-		container_of(rcu, struct thread_group_cred, rcu);
-
-	BUG_ON(atomic_read(&tgcred->usage) != 0);
-
-	key_put(tgcred->session_keyring);
-	key_put(tgcred->process_keyring);
-	kfree(tgcred);
-}
-#endif
-
-/*
- * Release a set of thread group credentials.
- */
-static void release_tgcred(struct cred *cred)
-{
-#ifdef CONFIG_KEYS
-	struct thread_group_cred *tgcred = cred->tgcred;
-
-	if (atomic_dec_and_test(&tgcred->usage))
-		call_rcu(&tgcred->rcu, release_tgcred_rcu);
-#endif
-}
-
 /*
  * The RCU callback to actually dispose of a set of credentials
  */
@@ -150,9 +106,10 @@ static void put_cred_rcu(struct rcu_head *rcu)
 #endif
 
 	security_cred_free(cred);
+	key_put(cred->session_keyring);
+	key_put(cred->process_keyring);
 	key_put(cred->thread_keyring);
 	key_put(cred->request_key_auth);
-	release_tgcred(cred);
 	if (cred->group_info)
 		put_group_info(cred->group_info);
 	free_uid(cred->user);
@@ -246,15 +203,6 @@ struct cred *cred_alloc_blank(void)
 	if (!new)
 		return NULL;
 
-#ifdef CONFIG_KEYS
-	new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL);
-	if (!new->tgcred) {
-		kmem_cache_free(cred_jar, new);
-		return NULL;
-	}
-	atomic_set(&new->tgcred->usage, 1);
-#endif
-
 	atomic_set(&new->usage, 1);
 #ifdef CONFIG_DEBUG_CREDENTIALS
 	new->magic = CRED_MAGIC;
@@ -308,9 +256,10 @@ struct cred *prepare_creds(void)
 	get_user_ns(new->user_ns);
 
 #ifdef CONFIG_KEYS
+	key_get(new->session_keyring);
+	key_get(new->process_keyring);
 	key_get(new->thread_keyring);
 	key_get(new->request_key_auth);
-	atomic_inc(&new->tgcred->usage);
 #endif
 
 #ifdef CONFIG_SECURITY
@@ -334,39 +283,20 @@ EXPORT_SYMBOL(prepare_creds);
  */
 struct cred *prepare_exec_creds(void)
 {
-	struct thread_group_cred *tgcred = NULL;
 	struct cred *new;
 
-#ifdef CONFIG_KEYS
-	tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
-	if (!tgcred)
-		return NULL;
-#endif
-
 	new = prepare_creds();
-	if (!new) {
-		kfree(tgcred);
+	if (!new)
 		return new;
-	}
 
 #ifdef CONFIG_KEYS
 	/* newly exec'd tasks don't get a thread keyring */
 	key_put(new->thread_keyring);
 	new->thread_keyring = NULL;
 
-	/* create a new per-thread-group creds for all this set of threads to
-	 * share */
-	memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred));
-
-	atomic_set(&tgcred->usage, 1);
-	spin_lock_init(&tgcred->lock);
-
 	/* inherit the session keyring; new process keyring */
-	key_get(tgcred->session_keyring);
-	tgcred->process_keyring = NULL;
-
-	release_tgcred(new);
-	new->tgcred = tgcred;
+	key_put(new->process_keyring);
+	new->process_keyring = NULL;
 #endif
 
 	return new;
@@ -383,9 +313,6 @@ struct cred *prepare_exec_creds(void)
  */
 int copy_creds(struct task_struct *p, unsigned long clone_flags)
 {
-#ifdef CONFIG_KEYS
-	struct thread_group_cred *tgcred;
-#endif
 	struct cred *new;
 	int ret;
 
@@ -425,22 +352,12 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 			install_thread_keyring_to_cred(new);
 	}
 
-	/* we share the process and session keyrings between all the threads in
-	 * a process - this is slightly icky as we violate COW credentials a
-	 * bit */
+	/* The process keyring is only shared between the threads in a process;
+	 * anything outside of those threads doesn't inherit.
+	 */
 	if (!(clone_flags & CLONE_THREAD)) {
-		tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
-		if (!tgcred) {
-			ret = -ENOMEM;
-			goto error_put;
-		}
-		atomic_set(&tgcred->usage, 1);
-		spin_lock_init(&tgcred->lock);
-		tgcred->process_keyring = NULL;
-		tgcred->session_keyring = key_get(new->tgcred->session_keyring);
-
-		release_tgcred(new);
-		new->tgcred = tgcred;
+		key_put(new->process_keyring);
+		new->process_keyring = NULL;
 	}
 #endif
 
@@ -643,9 +560,6 @@ void __init cred_init(void)
  */
 struct cred *prepare_kernel_cred(struct task_struct *daemon)
 {
-#ifdef CONFIG_KEYS
-	struct thread_group_cred *tgcred;
-#endif
 	const struct cred *old;
 	struct cred *new;
 
@@ -653,14 +567,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
 	if (!new)
 		return NULL;
 
-#ifdef CONFIG_KEYS
-	tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
-	if (!tgcred) {
-		kmem_cache_free(cred_jar, new);
-		return NULL;
-	}
-#endif
-
 	kdebug("prepare_kernel_cred() alloc %p", new);
 
 	if (daemon)
@@ -678,13 +584,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
 	get_group_info(new->group_info);
 
 #ifdef CONFIG_KEYS
-	atomic_set(&tgcred->usage, 1);
-	spin_lock_init(&tgcred->lock);
-	tgcred->process_keyring = NULL;
-	tgcred->session_keyring = NULL;
-	new->tgcred = tgcred;
-	new->request_key_auth = NULL;
+	new->session_keyring = NULL;
+	new->process_keyring = NULL;
 	new->thread_keyring = NULL;
+	new->request_key_auth = NULL;
 	new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
 #endif
 
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index a0d373f76815..65b38417c211 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1475,7 +1475,8 @@ long keyctl_session_to_parent(void)
 		goto error_keyring;
 	newwork = &cred->rcu;
 
-	cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r);
+	cred->session_keyring = key_ref_to_ptr(keyring_r);
+	keyring_r = NULL;
 	init_task_work(newwork, key_change_session_keyring);
 
 	me = current;
@@ -1500,7 +1501,7 @@ long keyctl_session_to_parent(void)
 	mycred = current_cred();
 	pcred = __task_cred(parent);
 	if (mycred == pcred ||
-	    mycred->tgcred->session_keyring == pcred->tgcred->session_keyring) {
+	    mycred->session_keyring == pcred->session_keyring) {
 		ret = 0;
 		goto unlock;
 	}
@@ -1516,9 +1517,9 @@ long keyctl_session_to_parent(void)
 		goto unlock;
 
 	/* the keyrings must have the same UID */
-	if ((pcred->tgcred->session_keyring &&
-	     pcred->tgcred->session_keyring->uid != mycred->euid) ||
-	    mycred->tgcred->session_keyring->uid != mycred->euid)
+	if ((pcred->session_keyring &&
+	     pcred->session_keyring->uid != mycred->euid) ||
+	    mycred->session_keyring->uid != mycred->euid)
 		goto unlock;
 
 	/* cancel an already pending keyring replacement */
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 178b8c3b130a..9de5dc598276 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -169,9 +169,8 @@ static int install_thread_keyring(void)
 int install_process_keyring_to_cred(struct cred *new)
 {
 	struct key *keyring;
-	int ret;
 
-	if (new->tgcred->process_keyring)
+	if (new->process_keyring)
 		return -EEXIST;
 
 	keyring = keyring_alloc("_pid", new->uid, new->gid,
@@ -179,17 +178,8 @@ int install_process_keyring_to_cred(struct cred *new)
 	if (IS_ERR(keyring))
 		return PTR_ERR(keyring);
 
-	spin_lock_irq(&new->tgcred->lock);
-	if (!new->tgcred->process_keyring) {
-		new->tgcred->process_keyring = keyring;
-		keyring = NULL;
-		ret = 0;
-	} else {
-		ret = -EEXIST;
-	}
-	spin_unlock_irq(&new->tgcred->lock);
-	key_put(keyring);
-	return ret;
+	new->process_keyring = keyring;
+	return 0;
 }
 
 /*
@@ -230,7 +220,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
 	/* create an empty session keyring */
 	if (!keyring) {
 		flags = KEY_ALLOC_QUOTA_OVERRUN;
-		if (cred->tgcred->session_keyring)
+		if (cred->session_keyring)
 			flags = KEY_ALLOC_IN_QUOTA;
 
 		keyring = keyring_alloc("_ses", cred->uid, cred->gid,
@@ -242,17 +232,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
 	}
 
 	/* install the keyring */
-	spin_lock_irq(&cred->tgcred->lock);
-	old = cred->tgcred->session_keyring;
-	rcu_assign_pointer(cred->tgcred->session_keyring, keyring);
-	spin_unlock_irq(&cred->tgcred->lock);
-
-	/* we're using RCU on the pointer, but there's no point synchronising
-	 * on it if it didn't previously point to anything */
-	if (old) {
-		synchronize_rcu();
+	old = cred->session_keyring;
+	rcu_assign_pointer(cred->session_keyring, keyring);
+
+	if (old)
 		key_put(old);
-	}
 
 	return 0;
 }
@@ -367,9 +351,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
 	}
 
 	/* search the process keyring second */
-	if (cred->tgcred->process_keyring) {
+	if (cred->process_keyring) {
 		key_ref = keyring_search_aux(
-			make_key_ref(cred->tgcred->process_keyring, 1),
+			make_key_ref(cred->process_keyring, 1),
 			cred, type, description, match, no_state_check);
 		if (!IS_ERR(key_ref))
 			goto found;
@@ -388,12 +372,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
 	}
 
 	/* search the session keyring */
-	if (cred->tgcred->session_keyring) {
+	if (cred->session_keyring) {
 		rcu_read_lock();
 		key_ref = keyring_search_aux(
-			make_key_ref(rcu_dereference(
-					     cred->tgcred->session_keyring),
-				     1),
+			make_key_ref(rcu_dereference(cred->session_keyring), 1),
 			cred, type, description, match, no_state_check);
 		rcu_read_unlock();
 
@@ -563,7 +545,7 @@ try_again:
 		break;
 
 	case KEY_SPEC_PROCESS_KEYRING:
-		if (!cred->tgcred->process_keyring) {
+		if (!cred->process_keyring) {
 			if (!(lflags & KEY_LOOKUP_CREATE))
 				goto error;
 
@@ -575,13 +557,13 @@ try_again:
 			goto reget_creds;
 		}
 
-		key = cred->tgcred->process_keyring;
+		key = cred->process_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_SESSION_KEYRING:
-		if (!cred->tgcred->session_keyring) {
+		if (!cred->session_keyring) {
 			/* always install a session keyring upon access if one
 			 * doesn't exist yet */
 			ret = install_user_keyrings();
@@ -596,7 +578,7 @@ try_again:
 			if (ret < 0)
 				goto error;
 			goto reget_creds;
-		} else if (cred->tgcred->session_keyring ==
+		} else if (cred->session_keyring ==
 			   cred->user->session_keyring &&
 			   lflags & KEY_LOOKUP_CREATE) {
 			ret = join_session_keyring(NULL);
@@ -606,7 +588,7 @@ try_again:
 		}
 
 		rcu_read_lock();
-		key = rcu_dereference(cred->tgcred->session_keyring);
+		key = rcu_dereference(cred->session_keyring);
 		atomic_inc(&key->usage);
 		rcu_read_unlock();
 		key_ref = make_key_ref(key, 1);
@@ -766,12 +748,6 @@ long join_session_keyring(const char *name)
 	struct key *keyring;
 	long ret, serial;
 
-	/* only permit this if there's a single thread in the thread group -
-	 * this avoids us having to adjust the creds on all threads and risking
-	 * ENOMEM */
-	if (!current_is_single_threaded())
-		return -EMLINK;
-
 	new = prepare_creds();
 	if (!new)
 		return -ENOMEM;
@@ -783,7 +759,7 @@ long join_session_keyring(const char *name)
 		if (ret < 0)
 			goto error;
 
-		serial = new->tgcred->session_keyring->serial;
+		serial = new->session_keyring->serial;
 		ret = commit_creds(new);
 		if (ret == 0)
 			ret = serial;
@@ -806,6 +782,9 @@ long join_session_keyring(const char *name)
 	} else if (IS_ERR(keyring)) {
 		ret = PTR_ERR(keyring);
 		goto error2;
+	} else if (keyring == new->session_keyring) {
+		ret = 0;
+		goto error2;
 	}
 
 	/* we've got a keyring - now to install it */
@@ -862,8 +841,7 @@ void key_change_session_keyring(struct callback_head *twork)
 
 	new->jit_keyring	= old->jit_keyring;
 	new->thread_keyring	= key_get(old->thread_keyring);
-	new->tgcred->tgid	= old->tgcred->tgid;
-	new->tgcred->process_keyring = key_get(old->tgcred->process_keyring);
+	new->process_keyring	= key_get(old->process_keyring);
 
 	security_transfer_creds(new, old);
 
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 000e75017520..275c4f9e4b8c 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -150,12 +150,12 @@ static int call_sbin_request_key(struct key_construction *cons,
 		cred->thread_keyring ? cred->thread_keyring->serial : 0);
 
 	prkey = 0;
-	if (cred->tgcred->process_keyring)
-		prkey = cred->tgcred->process_keyring->serial;
+	if (cred->process_keyring)
+		prkey = cred->process_keyring->serial;
 	sprintf(keyring_str[1], "%d", prkey);
 
 	rcu_read_lock();
-	session = rcu_dereference(cred->tgcred->session_keyring);
+	session = rcu_dereference(cred->session_keyring);
 	if (!session)
 		session = cred->user->session_keyring;
 	sskey = session->serial;
@@ -297,14 +297,14 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
 				break;
 
 		case KEY_REQKEY_DEFL_PROCESS_KEYRING:
-			dest_keyring = key_get(cred->tgcred->process_keyring);
+			dest_keyring = key_get(cred->process_keyring);
 			if (dest_keyring)
 				break;
 
 		case KEY_REQKEY_DEFL_SESSION_KEYRING:
 			rcu_read_lock();
 			dest_keyring = key_get(
-				rcu_dereference(cred->tgcred->session_keyring));
+				rcu_dereference(cred->session_keyring));
 			rcu_read_unlock();
 
 			if (dest_keyring)
-- 
cgit v1.2.3-55-g7522


From 96b5c8fea6c0861621051290d705ec2e971963f1 Mon Sep 17 00:00:00 2001
From: David Howells
Date: Tue, 2 Oct 2012 19:24:56 +0100
Subject: KEYS: Reduce initial permissions on keys

Reduce the initial permissions on new keys to grant the possessor everything,
view permission only to the user (so the keys can be seen in /proc/keys) and
nothing else.

This gives the creator a chance to adjust the permissions mask before other
processes can access the new key or create a link to it.

To aid with this, keyring_alloc() now takes a permission argument rather than
setting the permissions itself.

The following permissions are now set:

 (1) The user and user-session keyrings grant the user that owns them full
     permissions and grant a possessor everything bar SETATTR.

 (2) The process and thread keyrings grant the possessor full permissions but
     only grant the user VIEW.  This permits the user to see them in
     /proc/keys, but not to do anything with them.

 (3) Anonymous session keyrings grant the possessor full permissions, but only
     grant the user VIEW and READ.  This means that the user can see them in
     /proc/keys and can list them, but nothing else.  Possibly READ shouldn't
     be provided either.

 (4) Named session keyrings grant everything an anonymous session keyring does,
     plus they grant the user LINK permission.  The whole point of named
     session keyrings is that others can also subscribe to them.  Possibly this
     should be a separate permission to LINK.

 (5) The temporary session keyring created by call_sbin_request_key() gets the
     same permissions as an anonymous session keyring.

 (6) Keys created by add_key() get VIEW, SEARCH, LINK and SETATTR for the
     possessor, plus READ and/or WRITE if the key type supports them.  The used
     only gets VIEW now.

 (7) Keys created by request_key() now get the same as those created by
     add_key().

Reported-by: Lennart Poettering <lennart@poettering.net>
Reported-by: Stef Walter <stefw@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/key.h          |  1 +
 security/keys/key.c          |  6 +++---
 security/keys/keyring.c      |  9 +++------
 security/keys/process_keys.c | 26 +++++++++++++++++---------
 security/keys/request_key.c  | 11 ++++++++++-
 5 files changed, 34 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index cef3b315ba7c..890699815212 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -264,6 +264,7 @@ extern int key_unlink(struct key *keyring,
 
 extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
 				 const struct cred *cred,
+				 key_perm_t perm,
 				 unsigned long flags,
 				 struct key *dest);
 
diff --git a/security/keys/key.c b/security/keys/key.c
index 50d96d4e06f2..bebeca3a78e4 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -826,13 +826,13 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	/* if the client doesn't provide, decide on the permissions we want */
 	if (perm == KEY_PERM_UNDEF) {
 		perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
-		perm |= KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK | KEY_USR_SETATTR;
+		perm |= KEY_USR_VIEW;
 
 		if (ktype->read)
-			perm |= KEY_POS_READ | KEY_USR_READ;
+			perm |= KEY_POS_READ;
 
 		if (ktype == &key_type_keyring || ktype->update)
-			perm |= KEY_USR_WRITE;
+			perm |= KEY_POS_WRITE;
 	}
 
 	/* allocate a new key */
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 81e7852d281d..cf704a92083f 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -257,17 +257,14 @@ error:
  * Allocate a keyring and link into the destination keyring.
  */
 struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
-			  const struct cred *cred, unsigned long flags,
-			  struct key *dest)
+			  const struct cred *cred, key_perm_t perm,
+			  unsigned long flags, struct key *dest)
 {
 	struct key *keyring;
 	int ret;
 
 	keyring = key_alloc(&key_type_keyring, description,
-			    uid, gid, cred,
-			    (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL,
-			    flags);
-
+			    uid, gid, cred, perm, flags);
 	if (!IS_ERR(keyring)) {
 		ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL);
 		if (ret < 0) {
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 9de5dc598276..b58d93892740 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -46,9 +46,11 @@ int install_user_keyrings(void)
 	struct user_struct *user;
 	const struct cred *cred;
 	struct key *uid_keyring, *session_keyring;
+	key_perm_t user_keyring_perm;
 	char buf[20];
 	int ret;
 
+	user_keyring_perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL;
 	cred = current_cred();
 	user = cred->user;
 
@@ -72,8 +74,8 @@ int install_user_keyrings(void)
 		uid_keyring = find_keyring_by_name(buf, true);
 		if (IS_ERR(uid_keyring)) {
 			uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1,
-						    cred, KEY_ALLOC_IN_QUOTA,
-						    NULL);
+						    cred, user_keyring_perm,
+						    KEY_ALLOC_IN_QUOTA, NULL);
 			if (IS_ERR(uid_keyring)) {
 				ret = PTR_ERR(uid_keyring);
 				goto error;
@@ -88,7 +90,8 @@ int install_user_keyrings(void)
 		if (IS_ERR(session_keyring)) {
 			session_keyring =
 				keyring_alloc(buf, user->uid, (gid_t) -1,
-					      cred, KEY_ALLOC_IN_QUOTA, NULL);
+					      cred, user_keyring_perm,
+					      KEY_ALLOC_IN_QUOTA, NULL);
 			if (IS_ERR(session_keyring)) {
 				ret = PTR_ERR(session_keyring);
 				goto error_release;
@@ -129,6 +132,7 @@ int install_thread_keyring_to_cred(struct cred *new)
 	struct key *keyring;
 
 	keyring = keyring_alloc("_tid", new->uid, new->gid, new,
+				KEY_POS_ALL | KEY_USR_VIEW,
 				KEY_ALLOC_QUOTA_OVERRUN, NULL);
 	if (IS_ERR(keyring))
 		return PTR_ERR(keyring);
@@ -173,8 +177,9 @@ int install_process_keyring_to_cred(struct cred *new)
 	if (new->process_keyring)
 		return -EEXIST;
 
-	keyring = keyring_alloc("_pid", new->uid, new->gid,
-				new, KEY_ALLOC_QUOTA_OVERRUN, NULL);
+	keyring = keyring_alloc("_pid", new->uid, new->gid, new,
+				KEY_POS_ALL | KEY_USR_VIEW,
+				KEY_ALLOC_QUOTA_OVERRUN, NULL);
 	if (IS_ERR(keyring))
 		return PTR_ERR(keyring);
 
@@ -223,8 +228,9 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
 		if (cred->session_keyring)
 			flags = KEY_ALLOC_IN_QUOTA;
 
-		keyring = keyring_alloc("_ses", cred->uid, cred->gid,
-					cred, flags, NULL);
+		keyring = keyring_alloc("_ses", cred->uid, cred->gid, cred,
+					KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ,
+					flags, NULL);
 		if (IS_ERR(keyring))
 			return PTR_ERR(keyring);
 	} else {
@@ -773,8 +779,10 @@ long join_session_keyring(const char *name)
 	keyring = find_keyring_by_name(name, false);
 	if (PTR_ERR(keyring) == -ENOKEY) {
 		/* not found - try and create a new one */
-		keyring = keyring_alloc(name, old->uid, old->gid, old,
-					KEY_ALLOC_IN_QUOTA, NULL);
+		keyring = keyring_alloc(
+			name, old->uid, old->gid, old,
+			KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_LINK,
+			KEY_ALLOC_IN_QUOTA, NULL);
 		if (IS_ERR(keyring)) {
 			ret = PTR_ERR(keyring);
 			goto error2;
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 275c4f9e4b8c..0ae3a2202771 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -126,6 +126,7 @@ static int call_sbin_request_key(struct key_construction *cons,
 
 	cred = get_current_cred();
 	keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred,
+				KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ,
 				KEY_ALLOC_QUOTA_OVERRUN, NULL);
 	put_cred(cred);
 	if (IS_ERR(keyring)) {
@@ -347,6 +348,7 @@ static int construct_alloc_key(struct key_type *type,
 	const struct cred *cred = current_cred();
 	unsigned long prealloc;
 	struct key *key;
+	key_perm_t perm;
 	key_ref_t key_ref;
 	int ret;
 
@@ -355,8 +357,15 @@ static int construct_alloc_key(struct key_type *type,
 	*_key = NULL;
 	mutex_lock(&user->cons_lock);
 
+	perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
+	perm |= KEY_USR_VIEW;
+	if (type->read)
+		perm |= KEY_POS_READ;
+	if (type == &key_type_keyring || type->update)
+		perm |= KEY_POS_WRITE;
+
 	key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred,
-			KEY_POS_ALL, flags);
+			perm, flags);
 	if (IS_ERR(key))
 		goto alloc_failed;
 
-- 
cgit v1.2.3-55-g7522


From 9202e07636f0c4858ba6c30773a3f160b2b5659a Mon Sep 17 00:00:00 2001
From: Liu Yu-B13201
Date: Tue, 3 Jul 2012 05:48:52 +0000
Subject: KVM: PPC: Add support for ePAPR idle hcall in host kernel

And add a new flag definition in kvm_ppc_pvinfo to indicate
whether the host supports the EV_IDLE hcall.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
[stuart.yoder@freescale.com: cleanup,fixes for conditions allowing idle]
Signed-off-by: Stuart Yoder <stuart.yoder@freescale.com>
[agraf: fix typo]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt |  7 +++++--
 arch/powerpc/include/asm/Kbuild   |  1 +
 arch/powerpc/kvm/powerpc.c        | 10 ++++++++--
 include/linux/kvm.h               |  2 ++
 4 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index f6ec3a92e621..170afb1fbc2e 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1194,12 +1194,15 @@ struct kvm_ppc_pvinfo {
 This ioctl fetches PV specific information that need to be passed to the guest
 using the device tree or other means from vm context.
 
-For now the only implemented piece of information distributed here is an array
-of 4 instructions that make up a hypercall.
+The hcall array defines 4 instructions that make up a hypercall.
 
 If any additional field gets added to this structure later on, a bit for that
 additional piece of information will be set in the flags bitmap.
 
+The flags bitmap is defined as:
+
+   /* the host supports the ePAPR idle hcall
+   #define KVM_PPC_PVINFO_FLAGS_EV_IDLE   (1<<0)
 
 4.48 KVM_ASSIGN_PCI_DEVICE
 
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 7e313f1ed183..13d6b7bf3b69 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -34,5 +34,6 @@ header-y += termios.h
 header-y += types.h
 header-y += ucontext.h
 header-y += unistd.h
+header-y += epapr_hcalls.h
 
 generic-y += rwsem.h
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a478e662b2bc..dbf56e173c25 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -38,8 +38,7 @@
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
-	return !(v->arch.shared->msr & MSR_WE) ||
-	       !!(v->arch.pending_exceptions) ||
+	return !!(v->arch.pending_exceptions) ||
 	       v->requests;
 }
 
@@ -86,6 +85,11 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 
 		/* Second return value is in r4 */
 		break;
+	case EV_HCALL_TOKEN(EV_IDLE):
+		r = EV_SUCCESS;
+		kvm_vcpu_block(vcpu);
+		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+		break;
 	default:
 		r = EV_UNIMPLEMENTED;
 		break;
@@ -779,6 +783,8 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
 	pvinfo->hcall[3] = inst_nop;
 #endif
 
+	pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
+
 	return 0;
 }
 
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 0a6d6ba44c85..4cb3761bebae 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -477,6 +477,8 @@ struct kvm_ppc_smmu_info {
 	struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
 };
 
+#define KVM_PPC_PVINFO_FLAGS_EV_IDLE   (1<<0)
+
 #define KVMIO 0xAE
 
 /* machine type bits, to be used as argument to KVM_CREATE_VM */
-- 
cgit v1.2.3-55-g7522


From f61c94bb99ca4253ac5dd57750e1af209a4beb7a Mon Sep 17 00:00:00 2001
From: Bharat Bhushan
Date: Wed, 8 Aug 2012 20:38:19 +0000
Subject: KVM: PPC: booke: Add watchdog emulation

This patch adds the watchdog emulation in KVM. The watchdog
emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl.
The kernel timer are used for watchdog emulation and emulates
h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
it is configured.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
[bharat.bhushan@freescale.com: reworked patch]
Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
[agraf: adjust to new request framework]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h  |   3 +
 arch/powerpc/include/asm/kvm_ppc.h   |   2 +
 arch/powerpc/include/asm/reg_booke.h |   7 ++
 arch/powerpc/kvm/book3s.c            |   9 ++
 arch/powerpc/kvm/booke.c             | 155 +++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/booke_emulate.c     |   8 ++
 arch/powerpc/kvm/powerpc.c           |  14 +++-
 include/linux/kvm.h                  |   2 +
 include/linux/kvm_host.h             |   1 +
 9 files changed, 199 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 4a5ec8f573c7..51b0ccd56769 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -471,6 +471,8 @@ struct kvm_vcpu_arch {
 	ulong fault_esr;
 	ulong queued_dear;
 	ulong queued_esr;
+	spinlock_t wdt_lock;
+	struct timer_list wdt_timer;
 	u32 tlbcfg[4];
 	u32 mmucfg;
 	u32 epr;
@@ -486,6 +488,7 @@ struct kvm_vcpu_arch {
 	u8 osi_needed;
 	u8 osi_enabled;
 	u8 papr_enabled;
+	u8 watchdog_enabled;
 	u8 sane;
 	u8 cpu_type;
 	u8 hcall_needed;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 3dfc437fb9d9..c06a64b53362 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -68,6 +68,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
 extern void kvmppc_decrementer_func(unsigned long data);
 extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
+extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
+extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 /* Core-specific hooks */
 
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 2d916c4982c5..e07e6af5e1ff 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -539,6 +539,13 @@
 #define TCR_FIE		0x00800000	/* FIT Interrupt Enable */
 #define TCR_ARE		0x00400000	/* Auto Reload Enable */
 
+#ifdef CONFIG_E500
+#define TCR_GET_WP(tcr)  ((((tcr) & 0xC0000000) >> 30) | \
+			      (((tcr) & 0x1E0000) >> 15))
+#else
+#define TCR_GET_WP(tcr)  (((tcr) & 0xC0000000) >> 30)
+#endif
+
 /* Bit definitions for the TSR. */
 #define TSR_ENW		0x80000000	/* Enable Next Watchdog */
 #define TSR_WIS		0x40000000	/* WDT Interrupt Status */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 3f2a8360c857..e94666566fa9 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+}
+
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index c36493087dbf..09e8bf33a8c9 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -209,6 +209,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
 	clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
 }
 
+static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
+}
+
+static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
+{
+	clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
+}
+
 static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
 {
 #ifdef CONFIG_KVM_BOOKE_HV
@@ -328,6 +338,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 		msr_mask = MSR_CE | MSR_ME | MSR_DE;
 		int_class = INT_CLASS_NONCRIT;
 		break;
+	case BOOKE_IRQPRIO_WATCHDOG:
 	case BOOKE_IRQPRIO_CRITICAL:
 	case BOOKE_IRQPRIO_DBELL_CRIT:
 		allowed = vcpu->arch.shared->msr & MSR_CE;
@@ -407,12 +418,121 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 	return allowed;
 }
 
+/*
+ * Return the number of jiffies until the next timeout.  If the timeout is
+ * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
+ * because the larger value can break the timer APIs.
+ */
+static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
+{
+	u64 tb, wdt_tb, wdt_ticks = 0;
+	u64 nr_jiffies = 0;
+	u32 period = TCR_GET_WP(vcpu->arch.tcr);
+
+	wdt_tb = 1ULL << (63 - period);
+	tb = get_tb();
+	/*
+	 * The watchdog timeout will hapeen when TB bit corresponding
+	 * to watchdog will toggle from 0 to 1.
+	 */
+	if (tb & wdt_tb)
+		wdt_ticks = wdt_tb;
+
+	wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
+
+	/* Convert timebase ticks to jiffies */
+	nr_jiffies = wdt_ticks;
+
+	if (do_div(nr_jiffies, tb_ticks_per_jiffy))
+		nr_jiffies++;
+
+	return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
+}
+
+static void arm_next_watchdog(struct kvm_vcpu *vcpu)
+{
+	unsigned long nr_jiffies;
+	unsigned long flags;
+
+	/*
+	 * If TSR_ENW and TSR_WIS are not set then no need to exit to
+	 * userspace, so clear the KVM_REQ_WATCHDOG request.
+	 */
+	if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS))
+		clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
+
+	spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
+	nr_jiffies = watchdog_next_timeout(vcpu);
+	/*
+	 * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
+	 * then do not run the watchdog timer as this can break timer APIs.
+	 */
+	if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
+		mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
+	else
+		del_timer(&vcpu->arch.wdt_timer);
+	spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
+}
+
+void kvmppc_watchdog_func(unsigned long data)
+{
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+	u32 tsr, new_tsr;
+	int final;
+
+	do {
+		new_tsr = tsr = vcpu->arch.tsr;
+		final = 0;
+
+		/* Time out event */
+		if (tsr & TSR_ENW) {
+			if (tsr & TSR_WIS)
+				final = 1;
+			else
+				new_tsr = tsr | TSR_WIS;
+		} else {
+			new_tsr = tsr | TSR_ENW;
+		}
+	} while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
+
+	if (new_tsr & TSR_WIS) {
+		smp_wmb();
+		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+		kvm_vcpu_kick(vcpu);
+	}
+
+	/*
+	 * If this is final watchdog expiry and some action is required
+	 * then exit to userspace.
+	 */
+	if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
+	    vcpu->arch.watchdog_enabled) {
+		smp_wmb();
+		kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
+		kvm_vcpu_kick(vcpu);
+	}
+
+	/*
+	 * Stop running the watchdog timer after final expiration to
+	 * prevent the host from being flooded with timers if the
+	 * guest sets a short period.
+	 * Timers will resume when TSR/TCR is updated next time.
+	 */
+	if (!final)
+		arm_next_watchdog(vcpu);
+}
+
 static void update_timer_ints(struct kvm_vcpu *vcpu)
 {
 	if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
 		kvmppc_core_queue_dec(vcpu);
 	else
 		kvmppc_core_dequeue_dec(vcpu);
+
+	if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
+		kvmppc_core_queue_watchdog(vcpu);
+	else
+		kvmppc_core_dequeue_watchdog(vcpu);
 }
 
 static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
@@ -466,6 +586,11 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 		kvmppc_core_flush_tlb(vcpu);
 #endif
 
+	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
+		vcpu->run->exit_reason = KVM_EXIT_WATCHDOG;
+		r = 0;
+	}
+
 	return r;
 }
 
@@ -995,6 +1120,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	/* setup watchdog timer once */
+	spin_lock_init(&vcpu->arch.wdt_lock);
+	setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
+		    (unsigned long)vcpu);
+
+	return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+	del_timer_sync(&vcpu->arch.wdt_timer);
+}
+
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
@@ -1090,7 +1230,13 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
 	}
 
 	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
+		u32 old_tsr = vcpu->arch.tsr;
+
 		vcpu->arch.tsr = sregs->u.e.tsr;
+
+		if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
+			arm_next_watchdog(vcpu);
+
 		update_timer_ints(vcpu);
 	}
 
@@ -1251,6 +1397,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
 void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
 {
 	vcpu->arch.tcr = new_tcr;
+	arm_next_watchdog(vcpu);
 	update_timer_ints(vcpu);
 }
 
@@ -1265,6 +1412,14 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
 void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
 {
 	clear_bits(tsr_bits, &vcpu->arch.tsr);
+
+	/*
+	 * We may have stopped the watchdog due to
+	 * being stuck on final expiration.
+	 */
+	if (tsr_bits & (TSR_ENW | TSR_WIS))
+		arm_next_watchdog(vcpu);
+
 	update_timer_ints(vcpu);
 }
 
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 12834bb608ab..5a66ade7fd17 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 		kvmppc_clr_tsr_bits(vcpu, spr_val);
 		break;
 	case SPRN_TCR:
+		/*
+		 * WRC is a 2-bit field that is supposed to preserve its
+		 * value once written to non-zero.
+		 */
+		if (vcpu->arch.tcr & TCR_WRC_MASK) {
+			spr_val &= ~TCR_WRC_MASK;
+			spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
+		}
 		kvmppc_set_tcr(vcpu, spr_val);
 		break;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 54b12af577d0..0ffd7d17adc7 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -300,6 +300,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	switch (ext) {
 #ifdef CONFIG_BOOKE
 	case KVM_CAP_PPC_BOOKE_SREGS:
+	case KVM_CAP_PPC_BOOKE_WATCHDOG:
 #else
 	case KVM_CAP_PPC_SEGSTATE:
 	case KVM_CAP_PPC_HIOR:
@@ -476,6 +477,8 @@ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
+	int ret;
+
 	hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
 	tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
 	vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
@@ -484,13 +487,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_KVM_EXIT_TIMING
 	mutex_init(&vcpu->arch.exit_timing_lock);
 #endif
-
-	return 0;
+	ret = kvmppc_subarch_vcpu_init(vcpu);
+	return ret;
 }
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 	kvmppc_mmu_destroy(vcpu);
+	kvmppc_subarch_vcpu_uninit(vcpu);
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -735,6 +739,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		r = 0;
 		vcpu->arch.papr_enabled = true;
 		break;
+#ifdef CONFIG_BOOKE
+	case KVM_CAP_PPC_BOOKE_WATCHDOG:
+		r = 0;
+		vcpu->arch.watchdog_enabled = true;
+		break;
+#endif
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
 	case KVM_CAP_SW_TLB: {
 		struct kvm_config_tlb cfg;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 4cb3761bebae..1649d4b57e1f 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -167,6 +167,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_OSI              18
 #define KVM_EXIT_PAPR_HCALL	  19
 #define KVM_EXIT_S390_UCONTROL	  20
+#define KVM_EXIT_WATCHDOG         21
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
@@ -628,6 +629,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_READONLY_MEM 81
 #endif
 #define KVM_CAP_IRQFD_RESAMPLE 82
+#define KVM_CAP_PPC_BOOKE_WATCHDOG 83
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2850656e2e96..0ca3663206f8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -118,6 +118,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_IMMEDIATE_EXIT    15
 #define KVM_REQ_PMU               16
 #define KVM_REQ_PMI               17
+#define KVM_REQ_WATCHDOG          18
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
-- 
cgit v1.2.3-55-g7522


From ed7a8d7a3c5dd4adedb271112b6faba45c7037f9 Mon Sep 17 00:00:00 2001
From: Paul Mackerras
Date: Thu, 13 Sep 2012 21:44:30 +0000
Subject: KVM: Move some PPC ioctl definitions to the correct place

This moves the definitions of KVM_CREATE_SPAPR_TCE and
KVM_ALLOCATE_RMA in include/linux/kvm.h from the section listing the
vcpu ioctls to the section listing VM ioctls, as these are both
implemented and documented as VM ioctls.

Fortunately there is no actual collision of ioctl numbers at this
point.  Moving these to the correct section will reduce the
probability of a future collision.  This does not change the
user/kernel ABI at all.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 include/linux/kvm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 1649d4b57e1f..65ad5c624c70 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -852,6 +852,9 @@ struct kvm_s390_ucas_mapping {
 #define KVM_PPC_GET_SMMU_INFO	  _IOR(KVMIO,  0xa6, struct kvm_ppc_smmu_info)
 /* Available with KVM_CAP_PPC_ALLOC_HTAB */
 #define KVM_PPC_ALLOCATE_HTAB	  _IOWR(KVMIO, 0xa7, __u32)
+#define KVM_CREATE_SPAPR_TCE	  _IOW(KVMIO,  0xa8, struct kvm_create_spapr_tce)
+/* Available with KVM_CAP_RMA */
+#define KVM_ALLOCATE_RMA	  _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
 
 /*
  * ioctls for vcpu fds
@@ -915,9 +918,6 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_XCRS */
 #define KVM_GET_XCRS		  _IOR(KVMIO,  0xa6, struct kvm_xcrs)
 #define KVM_SET_XCRS		  _IOW(KVMIO,  0xa7, struct kvm_xcrs)
-#define KVM_CREATE_SPAPR_TCE	  _IOW(KVMIO,  0xa8, struct kvm_create_spapr_tce)
-/* Available with KVM_CAP_RMA */
-#define KVM_ALLOCATE_RMA	  _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
 /* Available with KVM_CAP_SW_TLB */
 #define KVM_DIRTY_TLB		  _IOW(KVMIO,  0xaa, struct kvm_dirty_tlb)
 /* Available with KVM_CAP_ONE_REG */
-- 
cgit v1.2.3-55-g7522


From 385ddeac7ed99cf7dc62d76274d55fbd7cae1b5a Mon Sep 17 00:00:00 2001
From: Luck, Tony
Date: Fri, 5 Oct 2012 15:05:34 -0700
Subject: X86 ACPI: Use #ifdef not #if for CONFIG_X86 check

Fix a build warning on ia64:

include/linux/acpi.h:437:5: warning: "CONFIG_X86" is not defined

Signed-off-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/506f59ae9600b36a4@agluck-desktop.sc.intel.com
Cc: Len Brown <lenb@kernel.org>
Cc: Thomas Renninger <trenn@suse.de>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d14081c10c17..49fe586e3b1e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -434,7 +434,7 @@ void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
 
 acpi_status acpi_os_prepare_sleep(u8 sleep_state,
 				  u32 pm1a_control, u32 pm1b_control);
-#if CONFIG_X86
+#ifdef CONFIG_X86
 void arch_reserve_mem_area(acpi_physical_address addr, size_t size);
 #else
 static inline void arch_reserve_mem_area(acpi_physical_address addr,
-- 
cgit v1.2.3-55-g7522


From 8b6e4547e0e4b6aac11df6d8d4e71ea2ab159b5e Mon Sep 17 00:00:00 2001
From: Jan Kiszka
Date: Thu, 20 Sep 2012 07:43:08 +0200
Subject: KVM: x86: Convert kvm_arch_vcpu_reset into private kvm_vcpu_reset

There are no external callers of this function as there is no concept of
resetting a vcpu from generic code.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c       | 8 +++++---
 include/linux/kvm_host.h | 1 -
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1eefebe5d727..dfed7ca2d27a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -160,6 +160,8 @@ u64 __read_mostly host_xcr0;
 
 int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
 
+static int kvm_vcpu_reset(struct kvm_vcpu *vcpu);
+
 static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
 {
 	int i;
@@ -5426,7 +5428,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		pr_debug("vcpu %d received sipi with vector # %x\n",
 			 vcpu->vcpu_id, vcpu->arch.sipi_vector);
 		kvm_lapic_reset(vcpu);
-		r = kvm_arch_vcpu_reset(vcpu);
+		r = kvm_vcpu_reset(vcpu);
 		if (r)
 			return r;
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -6036,7 +6038,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	r = vcpu_load(vcpu);
 	if (r)
 		return r;
-	r = kvm_arch_vcpu_reset(vcpu);
+	r = kvm_vcpu_reset(vcpu);
 	if (r == 0)
 		r = kvm_mmu_setup(vcpu);
 	vcpu_put(vcpu);
@@ -6058,7 +6060,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->vcpu_free(vcpu);
 }
 
-int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
+static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
 {
 	atomic_set(&vcpu->arch.nmi_queued, 0);
 	vcpu->arch.nmi_pending = 0;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 93bfc9f9815c..c35b1c08c004 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -582,7 +582,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id);
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
-int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
 int kvm_arch_hardware_enable(void *garbage);
 void kvm_arch_hardware_disable(void *garbage);
 int kvm_arch_hardware_setup(void);
-- 
cgit v1.2.3-55-g7522


From 90f790d2dc96f5a61855ae65b90e30c40c893a20 Mon Sep 17 00:00:00 2001
From: Mark Brown
Date: Mon, 20 Aug 2012 21:45:05 +0100
Subject: regmap: irq: Allow users to retrieve the irq_domain

This is useful for integration with other subsystems, especially MFD,
and provides an alternative API for users that request their own IRQs.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/regmap-irq.c | 19 +++++++++++++++++++
 include/linux/regmap.h           |  2 ++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 5b6b1d8e6cc0..5972ad958544 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -458,3 +458,22 @@ int regmap_irq_get_virq(struct regmap_irq_chip_data *data, int irq)
 	return irq_create_mapping(data->domain, irq);
 }
 EXPORT_SYMBOL_GPL(regmap_irq_get_virq);
+
+/**
+ * regmap_irq_get_domain(): Retrieve the irq_domain for the chip
+ *
+ * Useful for drivers to request their own IRQs and for integration
+ * with subsystems.  For ease of integration NULL is accepted as a
+ * domain, allowing devices to just call this even if no domain is
+ * allocated.
+ *
+ * @data: regmap_irq controller to operate on.
+ */
+struct irq_domain *regmap_irq_get_domain(struct regmap_irq_chip_data *data)
+{
+	if (data)
+		return data->domain;
+	else
+		return NULL;
+}
+EXPORT_SYMBOL_GPL(regmap_irq_get_domain);
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index e3bcc3f4dcb8..b9e99799965d 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -19,6 +19,7 @@
 struct module;
 struct device;
 struct i2c_client;
+struct irq_domain;
 struct spi_device;
 struct regmap;
 struct regmap_range_cfg;
@@ -317,6 +318,7 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *data);
 int regmap_irq_chip_get_base(struct regmap_irq_chip_data *data);
 int regmap_irq_get_virq(struct regmap_irq_chip_data *data, int irq);
+struct irq_domain *regmap_irq_get_domain(struct regmap_irq_chip_data *data);
 
 #else
 
-- 
cgit v1.2.3-55-g7522


From e3549cd01347ef211d01353bdf2572b086574007 Mon Sep 17 00:00:00 2001
From: Mark Brown
Date: Tue, 2 Oct 2012 20:17:15 +0100
Subject: regmap: Rename n_ranges to num_ranges

This makes things consistent with the rest of the API and is actually what
the documentation says. We don't currently have any in tree users so low
cost.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/regmap.c | 4 ++--
 include/linux/regmap.h       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 52069d29ff12..ea9d6eb826bd 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -519,7 +519,7 @@ struct regmap *regmap_init(struct device *dev,
 	}
 
 	map->range_tree = RB_ROOT;
-	for (i = 0; i < config->n_ranges; i++) {
+	for (i = 0; i < config->num_ranges; i++) {
 		const struct regmap_range_cfg *range_cfg = &config->ranges[i];
 		struct regmap_range_node *new;
 
@@ -532,7 +532,7 @@ struct regmap *regmap_init(struct device *dev,
 
 		/* Make sure, that this register range has no selector
 		   or data window within its boundary */
-		for (j = 0; j < config->n_ranges; j++) {
+		for (j = 0; j < config->num_ranges; j++) {
 			unsigned sel_reg = config->ranges[j].selector_reg;
 			unsigned win_min = config->ranges[j].window_start;
 			unsigned win_max = win_min +
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index e3bcc3f4dcb8..afc8e1a2cd1b 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -133,7 +133,7 @@ struct regmap_config {
 	enum regmap_endian val_format_endian;
 
 	const struct regmap_range_cfg *ranges;
-	unsigned int n_ranges;
+	unsigned int num_ranges;
 };
 
 /**
-- 
cgit v1.2.3-55-g7522


From d058bb49618482f2eff0db57618c9a7352916dd5 Mon Sep 17 00:00:00 2001
From: Mark Brown
Date: Wed, 3 Oct 2012 12:40:47 +0100
Subject: regmap: Allow ranges to be named

For more useful diagnostics.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/internal.h | 1 +
 drivers/base/regmap/regmap.c   | 1 +
 include/linux/regmap.h         | 4 ++++
 3 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 80f9ab9c3aa4..27e66c3e7a59 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -120,6 +120,7 @@ int _regmap_write(struct regmap *map, unsigned int reg,
 
 struct regmap_range_node {
 	struct rb_node node;
+	const char *name;
 
 	unsigned int range_min;
 	unsigned int range_max;
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 0544f63ecd31..ce5129df4406 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -579,6 +579,7 @@ struct regmap *regmap_init(struct device *dev,
 			goto err_range;
 		}
 
+		new->name = range_cfg->name;
 		new->range_min = range_cfg->range_min;
 		new->range_max = range_cfg->range_max;
 		new->selector_reg = range_cfg->selector_reg;
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index afc8e1a2cd1b..9f228d7f7ac4 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -142,6 +142,8 @@ struct regmap_config {
  *     1. page selector register update;
  *     2. access through data window registers.
  *
+ * @name: Descriptive name for diagnostics
+ *
  * @range_min: Address of the lowest register address in virtual range.
  * @range_max: Address of the highest register in virtual range.
  *
@@ -153,6 +155,8 @@ struct regmap_config {
  * @window_len: Number of registers in data window.
  */
 struct regmap_range_cfg {
+	const char *name;
+
 	/* Registers of virtual address range */
 	unsigned int range_min;
 	unsigned int range_max;
-- 
cgit v1.2.3-55-g7522


From b8a6d9980f75cf5401a5ab23834eace1cb23c4f1 Mon Sep 17 00:00:00 2001
From: Shawn Guo
Date: Fri, 14 Sep 2012 10:35:54 +0800
Subject: dma: ipu: rename mach/ipu.h to include/linux/dma/ipu-dma.h

The header ipu.h really belongs to dma subsystem rather than imx
platform.  Rename it to ipu-dma.h and put it into include/linux/dma/.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Acked-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: linux-media@vger.kernel.org
Cc: linux-fbdev@vger.kernel.org
---
 arch/arm/mach-imx/include/mach/ipu.h           | 177 -------------------------
 drivers/dma/ipu/ipu_idmac.c                    |   3 +-
 drivers/dma/ipu/ipu_irq.c                      |   3 +-
 drivers/media/platform/soc_camera/mx3_camera.c |   2 +-
 drivers/video/mx3fb.c                          |   2 +-
 include/linux/dma/ipu-dma.h                    | 177 +++++++++++++++++++++++++
 6 files changed, 181 insertions(+), 183 deletions(-)
 delete mode 100644 arch/arm/mach-imx/include/mach/ipu.h
 create mode 100644 include/linux/dma/ipu-dma.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/include/mach/ipu.h b/arch/arm/mach-imx/include/mach/ipu.h
deleted file mode 100644
index 539e559d18b2..000000000000
--- a/arch/arm/mach-imx/include/mach/ipu.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (C) 2008
- * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
- *
- * Copyright (C) 2005-2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _IPU_H_
-#define _IPU_H_
-
-#include <linux/types.h>
-#include <linux/dmaengine.h>
-
-/* IPU DMA Controller channel definitions. */
-enum ipu_channel {
-	IDMAC_IC_0 = 0,		/* IC (encoding task) to memory */
-	IDMAC_IC_1 = 1,		/* IC (viewfinder task) to memory */
-	IDMAC_ADC_0 = 1,
-	IDMAC_IC_2 = 2,
-	IDMAC_ADC_1 = 2,
-	IDMAC_IC_3 = 3,
-	IDMAC_IC_4 = 4,
-	IDMAC_IC_5 = 5,
-	IDMAC_IC_6 = 6,
-	IDMAC_IC_7 = 7,		/* IC (sensor data) to memory */
-	IDMAC_IC_8 = 8,
-	IDMAC_IC_9 = 9,
-	IDMAC_IC_10 = 10,
-	IDMAC_IC_11 = 11,
-	IDMAC_IC_12 = 12,
-	IDMAC_IC_13 = 13,
-	IDMAC_SDC_0 = 14,	/* Background synchronous display data */
-	IDMAC_SDC_1 = 15,	/* Foreground data (overlay) */
-	IDMAC_SDC_2 = 16,
-	IDMAC_SDC_3 = 17,
-	IDMAC_ADC_2 = 18,
-	IDMAC_ADC_3 = 19,
-	IDMAC_ADC_4 = 20,
-	IDMAC_ADC_5 = 21,
-	IDMAC_ADC_6 = 22,
-	IDMAC_ADC_7 = 23,
-	IDMAC_PF_0 = 24,
-	IDMAC_PF_1 = 25,
-	IDMAC_PF_2 = 26,
-	IDMAC_PF_3 = 27,
-	IDMAC_PF_4 = 28,
-	IDMAC_PF_5 = 29,
-	IDMAC_PF_6 = 30,
-	IDMAC_PF_7 = 31,
-};
-
-/* Order significant! */
-enum ipu_channel_status {
-	IPU_CHANNEL_FREE,
-	IPU_CHANNEL_INITIALIZED,
-	IPU_CHANNEL_READY,
-	IPU_CHANNEL_ENABLED,
-};
-
-#define IPU_CHANNELS_NUM 32
-
-enum pixel_fmt {
-	/* 1 byte */
-	IPU_PIX_FMT_GENERIC,
-	IPU_PIX_FMT_RGB332,
-	IPU_PIX_FMT_YUV420P,
-	IPU_PIX_FMT_YUV422P,
-	IPU_PIX_FMT_YUV420P2,
-	IPU_PIX_FMT_YVU422P,
-	/* 2 bytes */
-	IPU_PIX_FMT_RGB565,
-	IPU_PIX_FMT_RGB666,
-	IPU_PIX_FMT_BGR666,
-	IPU_PIX_FMT_YUYV,
-	IPU_PIX_FMT_UYVY,
-	/* 3 bytes */
-	IPU_PIX_FMT_RGB24,
-	IPU_PIX_FMT_BGR24,
-	/* 4 bytes */
-	IPU_PIX_FMT_GENERIC_32,
-	IPU_PIX_FMT_RGB32,
-	IPU_PIX_FMT_BGR32,
-	IPU_PIX_FMT_ABGR32,
-	IPU_PIX_FMT_BGRA32,
-	IPU_PIX_FMT_RGBA32,
-};
-
-enum ipu_color_space {
-	IPU_COLORSPACE_RGB,
-	IPU_COLORSPACE_YCBCR,
-	IPU_COLORSPACE_YUV
-};
-
-/*
- * Enumeration of IPU rotation modes
- */
-enum ipu_rotate_mode {
-	/* Note the enum values correspond to BAM value */
-	IPU_ROTATE_NONE = 0,
-	IPU_ROTATE_VERT_FLIP = 1,
-	IPU_ROTATE_HORIZ_FLIP = 2,
-	IPU_ROTATE_180 = 3,
-	IPU_ROTATE_90_RIGHT = 4,
-	IPU_ROTATE_90_RIGHT_VFLIP = 5,
-	IPU_ROTATE_90_RIGHT_HFLIP = 6,
-	IPU_ROTATE_90_LEFT = 7,
-};
-
-/*
- * Enumeration of DI ports for ADC.
- */
-enum display_port {
-	DISP0,
-	DISP1,
-	DISP2,
-	DISP3
-};
-
-struct idmac_video_param {
-	unsigned short		in_width;
-	unsigned short		in_height;
-	uint32_t		in_pixel_fmt;
-	unsigned short		out_width;
-	unsigned short		out_height;
-	uint32_t		out_pixel_fmt;
-	unsigned short		out_stride;
-	bool			graphics_combine_en;
-	bool			global_alpha_en;
-	bool			key_color_en;
-	enum display_port	disp;
-	unsigned short		out_left;
-	unsigned short		out_top;
-};
-
-/*
- * Union of initialization parameters for a logical channel. So far only video
- * parameters are used.
- */
-union ipu_channel_param {
-	struct idmac_video_param video;
-};
-
-struct idmac_tx_desc {
-	struct dma_async_tx_descriptor	txd;
-	struct scatterlist		*sg;	/* scatterlist for this */
-	unsigned int			sg_len;	/* tx-descriptor. */
-	struct list_head		list;
-};
-
-struct idmac_channel {
-	struct dma_chan		dma_chan;
-	dma_cookie_t		completed;	/* last completed cookie	   */
-	union ipu_channel_param	params;
-	enum ipu_channel	link;	/* input channel, linked to the output	   */
-	enum ipu_channel_status	status;
-	void			*client;	/* Only one client per channel	   */
-	unsigned int		n_tx_desc;
-	struct idmac_tx_desc	*desc;		/* allocated tx-descriptors	   */
-	struct scatterlist	*sg[2];	/* scatterlist elements in buffer-0 and -1 */
-	struct list_head	free_list;	/* free tx-descriptors		   */
-	struct list_head	queue;		/* queued tx-descriptors	   */
-	spinlock_t		lock;		/* protects sg[0,1], queue	   */
-	struct mutex		chan_mutex; /* protects status, cookie, free_list  */
-	bool			sec_chan_en;
-	int			active_buffer;
-	unsigned int		eof_irq;
-	char			eof_name[16];	/* EOF IRQ name for request_irq()  */
-};
-
-#define to_tx_desc(tx) container_of(tx, struct idmac_tx_desc, txd)
-#define to_idmac_chan(c) container_of(c, struct idmac_channel, dma_chan)
-
-#endif
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index c7573e50aa14..65855373cee6 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -22,8 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
-
-#include <mach/ipu.h>
+#include <linux/dma/ipu-dma.h>
 
 #include "../dmaengine.h"
 #include "ipu_intern.h"
diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c
index fa95bcc3de1f..a5ee37d5320f 100644
--- a/drivers/dma/ipu/ipu_irq.c
+++ b/drivers/dma/ipu/ipu_irq.c
@@ -15,8 +15,7 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/module.h>
-
-#include <mach/ipu.h>
+#include <linux/dma/ipu-dma.h>
 
 #include "ipu_intern.h"
 
diff --git a/drivers/media/platform/soc_camera/mx3_camera.c b/drivers/media/platform/soc_camera/mx3_camera.c
index 3557ac97e430..64d39b1b5582 100644
--- a/drivers/media/platform/soc_camera/mx3_camera.c
+++ b/drivers/media/platform/soc_camera/mx3_camera.c
@@ -17,6 +17,7 @@
 #include <linux/vmalloc.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/dma/ipu-dma.h>
 
 #include <media/v4l2-common.h>
 #include <media/v4l2-dev.h>
@@ -24,7 +25,6 @@
 #include <media/soc_camera.h>
 #include <media/soc_mediabus.h>
 
-#include <mach/ipu.h>
 #include <linux/platform_data/camera-mx3.h>
 #include <linux/platform_data/dma-imx.h>
 
diff --git a/drivers/video/mx3fb.c b/drivers/video/mx3fb.c
index ce1d452464ed..0324c07be895 100644
--- a/drivers/video/mx3fb.c
+++ b/drivers/video/mx3fb.c
@@ -26,10 +26,10 @@
 #include <linux/console.h>
 #include <linux/clk.h>
 #include <linux/mutex.h>
+#include <linux/dma/ipu-dma.h>
 
 #include <linux/platform_data/dma-imx.h>
 #include <mach/hardware.h>
-#include <mach/ipu.h>
 #include <linux/platform_data/video-mx3fb.h>
 
 #include <asm/io.h>
diff --git a/include/linux/dma/ipu-dma.h b/include/linux/dma/ipu-dma.h
new file mode 100644
index 000000000000..18031115c668
--- /dev/null
+++ b/include/linux/dma/ipu-dma.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2008
+ * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
+ *
+ * Copyright (C) 2005-2007 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_DMA_IPU_DMA_H
+#define __LINUX_DMA_IPU_DMA_H
+
+#include <linux/types.h>
+#include <linux/dmaengine.h>
+
+/* IPU DMA Controller channel definitions. */
+enum ipu_channel {
+	IDMAC_IC_0 = 0,		/* IC (encoding task) to memory */
+	IDMAC_IC_1 = 1,		/* IC (viewfinder task) to memory */
+	IDMAC_ADC_0 = 1,
+	IDMAC_IC_2 = 2,
+	IDMAC_ADC_1 = 2,
+	IDMAC_IC_3 = 3,
+	IDMAC_IC_4 = 4,
+	IDMAC_IC_5 = 5,
+	IDMAC_IC_6 = 6,
+	IDMAC_IC_7 = 7,		/* IC (sensor data) to memory */
+	IDMAC_IC_8 = 8,
+	IDMAC_IC_9 = 9,
+	IDMAC_IC_10 = 10,
+	IDMAC_IC_11 = 11,
+	IDMAC_IC_12 = 12,
+	IDMAC_IC_13 = 13,
+	IDMAC_SDC_0 = 14,	/* Background synchronous display data */
+	IDMAC_SDC_1 = 15,	/* Foreground data (overlay) */
+	IDMAC_SDC_2 = 16,
+	IDMAC_SDC_3 = 17,
+	IDMAC_ADC_2 = 18,
+	IDMAC_ADC_3 = 19,
+	IDMAC_ADC_4 = 20,
+	IDMAC_ADC_5 = 21,
+	IDMAC_ADC_6 = 22,
+	IDMAC_ADC_7 = 23,
+	IDMAC_PF_0 = 24,
+	IDMAC_PF_1 = 25,
+	IDMAC_PF_2 = 26,
+	IDMAC_PF_3 = 27,
+	IDMAC_PF_4 = 28,
+	IDMAC_PF_5 = 29,
+	IDMAC_PF_6 = 30,
+	IDMAC_PF_7 = 31,
+};
+
+/* Order significant! */
+enum ipu_channel_status {
+	IPU_CHANNEL_FREE,
+	IPU_CHANNEL_INITIALIZED,
+	IPU_CHANNEL_READY,
+	IPU_CHANNEL_ENABLED,
+};
+
+#define IPU_CHANNELS_NUM 32
+
+enum pixel_fmt {
+	/* 1 byte */
+	IPU_PIX_FMT_GENERIC,
+	IPU_PIX_FMT_RGB332,
+	IPU_PIX_FMT_YUV420P,
+	IPU_PIX_FMT_YUV422P,
+	IPU_PIX_FMT_YUV420P2,
+	IPU_PIX_FMT_YVU422P,
+	/* 2 bytes */
+	IPU_PIX_FMT_RGB565,
+	IPU_PIX_FMT_RGB666,
+	IPU_PIX_FMT_BGR666,
+	IPU_PIX_FMT_YUYV,
+	IPU_PIX_FMT_UYVY,
+	/* 3 bytes */
+	IPU_PIX_FMT_RGB24,
+	IPU_PIX_FMT_BGR24,
+	/* 4 bytes */
+	IPU_PIX_FMT_GENERIC_32,
+	IPU_PIX_FMT_RGB32,
+	IPU_PIX_FMT_BGR32,
+	IPU_PIX_FMT_ABGR32,
+	IPU_PIX_FMT_BGRA32,
+	IPU_PIX_FMT_RGBA32,
+};
+
+enum ipu_color_space {
+	IPU_COLORSPACE_RGB,
+	IPU_COLORSPACE_YCBCR,
+	IPU_COLORSPACE_YUV
+};
+
+/*
+ * Enumeration of IPU rotation modes
+ */
+enum ipu_rotate_mode {
+	/* Note the enum values correspond to BAM value */
+	IPU_ROTATE_NONE = 0,
+	IPU_ROTATE_VERT_FLIP = 1,
+	IPU_ROTATE_HORIZ_FLIP = 2,
+	IPU_ROTATE_180 = 3,
+	IPU_ROTATE_90_RIGHT = 4,
+	IPU_ROTATE_90_RIGHT_VFLIP = 5,
+	IPU_ROTATE_90_RIGHT_HFLIP = 6,
+	IPU_ROTATE_90_LEFT = 7,
+};
+
+/*
+ * Enumeration of DI ports for ADC.
+ */
+enum display_port {
+	DISP0,
+	DISP1,
+	DISP2,
+	DISP3
+};
+
+struct idmac_video_param {
+	unsigned short		in_width;
+	unsigned short		in_height;
+	uint32_t		in_pixel_fmt;
+	unsigned short		out_width;
+	unsigned short		out_height;
+	uint32_t		out_pixel_fmt;
+	unsigned short		out_stride;
+	bool			graphics_combine_en;
+	bool			global_alpha_en;
+	bool			key_color_en;
+	enum display_port	disp;
+	unsigned short		out_left;
+	unsigned short		out_top;
+};
+
+/*
+ * Union of initialization parameters for a logical channel. So far only video
+ * parameters are used.
+ */
+union ipu_channel_param {
+	struct idmac_video_param video;
+};
+
+struct idmac_tx_desc {
+	struct dma_async_tx_descriptor	txd;
+	struct scatterlist		*sg;	/* scatterlist for this */
+	unsigned int			sg_len;	/* tx-descriptor. */
+	struct list_head		list;
+};
+
+struct idmac_channel {
+	struct dma_chan		dma_chan;
+	dma_cookie_t		completed;	/* last completed cookie	   */
+	union ipu_channel_param	params;
+	enum ipu_channel	link;	/* input channel, linked to the output	   */
+	enum ipu_channel_status	status;
+	void			*client;	/* Only one client per channel	   */
+	unsigned int		n_tx_desc;
+	struct idmac_tx_desc	*desc;		/* allocated tx-descriptors	   */
+	struct scatterlist	*sg[2];	/* scatterlist elements in buffer-0 and -1 */
+	struct list_head	free_list;	/* free tx-descriptors		   */
+	struct list_head	queue;		/* queued tx-descriptors	   */
+	spinlock_t		lock;		/* protects sg[0,1], queue	   */
+	struct mutex		chan_mutex; /* protects status, cookie, free_list  */
+	bool			sec_chan_en;
+	int			active_buffer;
+	unsigned int		eof_irq;
+	char			eof_name[16];	/* EOF IRQ name for request_irq()  */
+};
+
+#define to_tx_desc(tx) container_of(tx, struct idmac_tx_desc, txd)
+#define to_idmac_chan(c) container_of(c, struct idmac_channel, dma_chan)
+
+#endif /* __LINUX_DMA_IPU_DMA_H */
-- 
cgit v1.2.3-55-g7522


From e51d0f0ac4b7f513808743c6a62f0387eebd0144 Mon Sep 17 00:00:00 2001
From: Shawn Guo
Date: Sat, 15 Sep 2012 21:11:28 +0800
Subject: dma: imx-dma: remove cpu_is_xxx by using platform_device_id

It changes the driver to use platform_device_id rather than cpu_is_xxx
to determine the controller type, and updates the platform code
accordingly.

As the result, mach/hardware.h inclusion gets removed from the driver.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Javier Martin <javier.martin@vista-silicon.com>
Cc: Vinod Koul <vinod.koul@linux.intel.com>
---
 arch/arm/mach-imx/clk-imx1.c                 |  3 +-
 arch/arm/mach-imx/clk-imx21.c                |  4 +--
 arch/arm/mach-imx/clk-imx27.c                |  4 +--
 arch/arm/mach-imx/devices/devices-common.h   |  2 +-
 arch/arm/mach-imx/devices/platform-imx-dma.c |  4 +--
 arch/arm/mach-imx/mm-imx1.c                  |  3 +-
 arch/arm/mach-imx/mm-imx21.c                 |  3 +-
 arch/arm/mach-imx/mm-imx27.c                 |  3 +-
 drivers/dma/imx-dma.c                        | 54 ++++++++++++++++++++++++----
 include/linux/platform_data/dma-imx.h        |  4 ++-
 10 files changed, 66 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/clk-imx1.c b/arch/arm/mach-imx/clk-imx1.c
index 1cc7a735ed19..49a74440bc48 100644
--- a/arch/arm/mach-imx/clk-imx1.c
+++ b/arch/arm/mach-imx/clk-imx1.c
@@ -83,7 +83,8 @@ int __init mx1_clocks_init(unsigned long fref)
 			pr_err("imx1 clk %d: register failed with %ld\n",
 				i, PTR_ERR(clk[i]));
 
-	clk_register_clkdev(clk[dma_gate], "ahb", "imx-dma");
+	clk_register_clkdev(clk[dma_gate], "ahb", "imx1-dma");
+	clk_register_clkdev(clk[hclk], "ipg", "imx1-dma");
 	clk_register_clkdev(clk[csi_gate], NULL, "mx1-camera.0");
 	clk_register_clkdev(clk[mma_gate], "mma", NULL);
 	clk_register_clkdev(clk[usbd_gate], NULL, "imx_udc.0");
diff --git a/arch/arm/mach-imx/clk-imx21.c b/arch/arm/mach-imx/clk-imx21.c
index 96a4788032d1..8aec572f84fe 100644
--- a/arch/arm/mach-imx/clk-imx21.c
+++ b/arch/arm/mach-imx/clk-imx21.c
@@ -163,8 +163,8 @@ int __init mx21_clocks_init(unsigned long lref, unsigned long href)
 	clk_register_clkdev(clk[usb_gate], "per", "imx21-hcd.0");
 	clk_register_clkdev(clk[usb_hclk_gate], "ahb", "imx21-hcd.0");
 	clk_register_clkdev(clk[nfc_gate], NULL, "imx21-nand.0");
-	clk_register_clkdev(clk[dma_hclk_gate], "ahb", "imx-dma");
-	clk_register_clkdev(clk[dma_gate], "ipg", "imx-dma");
+	clk_register_clkdev(clk[dma_hclk_gate], "ahb", "imx21-dma");
+	clk_register_clkdev(clk[dma_gate], "ipg", "imx21-dma");
 	clk_register_clkdev(clk[wdog_gate], NULL, "imx2-wdt.0");
 	clk_register_clkdev(clk[i2c_gate], NULL, "imx21-i2c.0");
 	clk_register_clkdev(clk[kpp_gate], NULL, "mxc-keypad");
diff --git a/arch/arm/mach-imx/clk-imx27.c b/arch/arm/mach-imx/clk-imx27.c
index 86a0c4262471..a5f0e3b6dec6 100644
--- a/arch/arm/mach-imx/clk-imx27.c
+++ b/arch/arm/mach-imx/clk-imx27.c
@@ -242,8 +242,8 @@ int __init mx27_clocks_init(unsigned long fref)
 	clk_register_clkdev(clk[nfc_baud_gate], NULL, "imx27-nand.0");
 	clk_register_clkdev(clk[vpu_baud_gate], "per", "coda-imx27.0");
 	clk_register_clkdev(clk[vpu_ahb_gate], "ahb", "coda-imx27.0");
-	clk_register_clkdev(clk[dma_ahb_gate], "ahb", "imx-dma");
-	clk_register_clkdev(clk[dma_ipg_gate], "ipg", "imx-dma");
+	clk_register_clkdev(clk[dma_ahb_gate], "ahb", "imx27-dma");
+	clk_register_clkdev(clk[dma_ipg_gate], "ipg", "imx27-dma");
 	clk_register_clkdev(clk[fec_ipg_gate], "ipg", "imx27-fec.0");
 	clk_register_clkdev(clk[fec_ahb_gate], "ahb", "imx27-fec.0");
 	clk_register_clkdev(clk[wdog_ipg_gate], NULL, "imx2-wdt.0");
diff --git a/arch/arm/mach-imx/devices/devices-common.h b/arch/arm/mach-imx/devices/devices-common.h
index e4368f6c5fb4..36eb3f09f5d7 100644
--- a/arch/arm/mach-imx/devices/devices-common.h
+++ b/arch/arm/mach-imx/devices/devices-common.h
@@ -329,7 +329,7 @@ struct platform_device *__init imx_add_spi_imx(
 		const struct imx_spi_imx_data *data,
 		const struct spi_imx_master *pdata);
 
-struct platform_device *imx_add_imx_dma(resource_size_t iobase,
+struct platform_device *imx_add_imx_dma(char *name, resource_size_t iobase,
 					int irq, int irq_err);
 struct platform_device *imx_add_imx_sdma(char *name,
 	resource_size_t iobase, int irq, struct sdma_platform_data *pdata);
diff --git a/arch/arm/mach-imx/devices/platform-imx-dma.c b/arch/arm/mach-imx/devices/platform-imx-dma.c
index f9003e4d0f57..ccdb5dc4ddbd 100644
--- a/arch/arm/mach-imx/devices/platform-imx-dma.c
+++ b/arch/arm/mach-imx/devices/platform-imx-dma.c
@@ -8,7 +8,7 @@
  */
 #include "devices-common.h"
 
-struct platform_device __init __maybe_unused *imx_add_imx_dma(
+struct platform_device __init __maybe_unused *imx_add_imx_dma(char *name,
 	resource_size_t iobase, int irq, int irq_err)
 {
 	struct resource res[] = {
@@ -28,7 +28,7 @@ struct platform_device __init __maybe_unused *imx_add_imx_dma(
 	};
 
 	return platform_device_register_resndata(&mxc_ahb_bus,
-			"imx-dma", -1, res, ARRAY_SIZE(res), NULL, 0);
+			name, -1, res, ARRAY_SIZE(res), NULL, 0);
 }
 
 struct platform_device __init __maybe_unused *imx_add_imx_sdma(char *name,
diff --git a/arch/arm/mach-imx/mm-imx1.c b/arch/arm/mach-imx/mm-imx1.c
index 9de81bf6de96..79f6c0b8f69f 100644
--- a/arch/arm/mach-imx/mm-imx1.c
+++ b/arch/arm/mach-imx/mm-imx1.c
@@ -60,6 +60,7 @@ void __init imx1_soc_init(void)
 						MX1_GPIO_INT_PORTC, 0);
 	mxc_register_gpio("imx1-gpio", 3, MX1_GPIO4_BASE_ADDR, SZ_256,
 						MX1_GPIO_INT_PORTD, 0);
-	imx_add_imx_dma(MX1_DMA_BASE_ADDR, MX1_DMA_INT, MX1_DMA_ERR);
+	imx_add_imx_dma("imx1-dma", MX1_DMA_BASE_ADDR,
+			MX1_DMA_INT, MX1_DMA_ERR);
 	pinctrl_provide_dummies();
 }
diff --git a/arch/arm/mach-imx/mm-imx21.c b/arch/arm/mach-imx/mm-imx21.c
index 1c295154c296..3b97ea63b5fb 100644
--- a/arch/arm/mach-imx/mm-imx21.c
+++ b/arch/arm/mach-imx/mm-imx21.c
@@ -90,7 +90,8 @@ void __init imx21_soc_init(void)
 	mxc_register_gpio("imx21-gpio", 5, MX21_GPIO6_BASE_ADDR, SZ_256, MX21_INT_GPIO, 0);
 
 	pinctrl_provide_dummies();
-	imx_add_imx_dma(MX21_DMA_BASE_ADDR, MX21_INT_DMACH0, 0); /* No ERR irq */
+	imx_add_imx_dma("imx21-dma", MX21_DMA_BASE_ADDR,
+			MX21_INT_DMACH0, 0); /* No ERR irq */
 	platform_device_register_simple("imx21-audmux", 0, imx21_audmux_res,
 					ARRAY_SIZE(imx21_audmux_res));
 }
diff --git a/arch/arm/mach-imx/mm-imx27.c b/arch/arm/mach-imx/mm-imx27.c
index d389f4af33c0..91e8da832810 100644
--- a/arch/arm/mach-imx/mm-imx27.c
+++ b/arch/arm/mach-imx/mm-imx27.c
@@ -91,7 +91,8 @@ void __init imx27_soc_init(void)
 	mxc_register_gpio("imx21-gpio", 5, MX27_GPIO6_BASE_ADDR, SZ_256, MX27_INT_GPIO, 0);
 
 	pinctrl_provide_dummies();
-	imx_add_imx_dma(MX27_DMA_BASE_ADDR, MX27_INT_DMACH0, 0); /* No ERR irq */
+	imx_add_imx_dma("imx27-dma", MX27_DMA_BASE_ADDR,
+			MX27_INT_DMACH0, 0); /* No ERR irq */
 	/* imx27 has the imx21 type audmux */
 	platform_device_register_simple("imx21-audmux", 0, imx27_audmux_res,
 					ARRAY_SIZE(imx27_audmux_res));
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index 88e8a8d89b56..a3a8270e76fb 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -29,7 +29,6 @@
 
 #include <asm/irq.h>
 #include <linux/platform_data/dma-imx.h>
-#include <mach/hardware.h>
 
 #include "dmaengine.h"
 #define IMXDMA_MAX_CHAN_DESCRIPTORS	16
@@ -167,6 +166,12 @@ struct imxdma_channel {
 	int				slot_2d;
 };
 
+enum imx_dma_type {
+	IMX1_DMA,
+	IMX21_DMA,
+	IMX27_DMA,
+};
+
 struct imxdma_engine {
 	struct device			*dev;
 	struct device_dma_parameters	dma_parms;
@@ -177,8 +182,40 @@ struct imxdma_engine {
 	spinlock_t			lock;
 	struct imx_dma_2d_config	slots_2d[IMX_DMA_2D_SLOTS];
 	struct imxdma_channel		channel[IMX_DMA_CHANNELS];
+	enum imx_dma_type		devtype;
 };
 
+static struct platform_device_id imx_dma_devtype[] = {
+	{
+		.name = "imx1-dma",
+		.driver_data = IMX1_DMA,
+	}, {
+		.name = "imx21-dma",
+		.driver_data = IMX21_DMA,
+	}, {
+		.name = "imx27-dma",
+		.driver_data = IMX27_DMA,
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(platform, imx_dma_devtype);
+
+static inline int is_imx1_dma(struct imxdma_engine *imxdma)
+{
+	return imxdma->devtype == IMX1_DMA;
+}
+
+static inline int is_imx21_dma(struct imxdma_engine *imxdma)
+{
+	return imxdma->devtype == IMX21_DMA;
+}
+
+static inline int is_imx27_dma(struct imxdma_engine *imxdma)
+{
+	return imxdma->devtype == IMX27_DMA;
+}
+
 static struct imxdma_channel *to_imxdma_chan(struct dma_chan *chan)
 {
 	return container_of(chan, struct imxdma_channel, chan);
@@ -212,7 +249,9 @@ static unsigned imx_dmav1_readl(struct imxdma_engine *imxdma, unsigned offset)
 
 static int imxdma_hw_chain(struct imxdma_channel *imxdmac)
 {
-	if (cpu_is_mx27())
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+
+	if (is_imx27_dma(imxdma))
 		return imxdmac->hw_chaining;
 	else
 		return 0;
@@ -267,7 +306,7 @@ static void imxdma_enable_hw(struct imxdma_desc *d)
 	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) |
 			 CCR_CEN | CCR_ACRPT, DMA_CCR(channel));
 
-	if ((cpu_is_mx21() || cpu_is_mx27()) &&
+	if (!is_imx1_dma(imxdma) &&
 			d->sg && imxdma_hw_chain(imxdmac)) {
 		d->sg = sg_next(d->sg);
 		if (d->sg) {
@@ -436,7 +475,7 @@ static irqreturn_t dma_irq_handler(int irq, void *dev_id)
 	struct imxdma_engine *imxdma = dev_id;
 	int i, disr;
 
-	if (cpu_is_mx21() || cpu_is_mx27())
+	if (!is_imx1_dma(imxdma))
 		imxdma_err_handler(irq, dev_id);
 
 	disr = imx_dmav1_readl(imxdma, DMA_DISR);
@@ -967,6 +1006,8 @@ static int __init imxdma_probe(struct platform_device *pdev)
 	if (!imxdma)
 		return -ENOMEM;
 
+	imxdma->devtype = pdev->id_entry->driver_data;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	imxdma->base = devm_request_and_ioremap(&pdev->dev, res);
 	if (!imxdma->base)
@@ -990,7 +1031,7 @@ static int __init imxdma_probe(struct platform_device *pdev)
 	/* reset DMA module */
 	imx_dmav1_writel(imxdma, DCR_DRST, DMA_DCR);
 
-	if (cpu_is_mx1()) {
+	if (is_imx1_dma(imxdma)) {
 		ret = devm_request_irq(&pdev->dev, irq,
 				       dma_irq_handler, 0, "DMA", imxdma);
 		if (ret) {
@@ -1038,7 +1079,7 @@ static int __init imxdma_probe(struct platform_device *pdev)
 	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
 		struct imxdma_channel *imxdmac = &imxdma->channel[i];
 
-		if (cpu_is_mx21() || cpu_is_mx27()) {
+		if (!is_imx1_dma(imxdma)) {
 			ret = devm_request_irq(&pdev->dev, irq + i,
 					dma_irq_handler, 0, "DMA", imxdma);
 			if (ret) {
@@ -1118,6 +1159,7 @@ static struct platform_driver imxdma_driver = {
 	.driver		= {
 		.name	= "imx-dma",
 	},
+	.id_table	= imx_dma_devtype,
 	.remove		= __exit_p(imxdma_remove),
 };
 
diff --git a/include/linux/platform_data/dma-imx.h b/include/linux/platform_data/dma-imx.h
index 1b9080385b46..f6d30cc1cb77 100644
--- a/include/linux/platform_data/dma-imx.h
+++ b/include/linux/platform_data/dma-imx.h
@@ -61,7 +61,9 @@ static inline int imx_dma_is_ipu(struct dma_chan *chan)
 static inline int imx_dma_is_general_purpose(struct dma_chan *chan)
 {
 	return strstr(dev_name(chan->device->dev), "sdma") ||
-		!strcmp(dev_name(chan->device->dev), "imx-dma");
+		!strcmp(dev_name(chan->device->dev), "imx1-dma") ||
+		!strcmp(dev_name(chan->device->dev), "imx21-dma") ||
+		!strcmp(dev_name(chan->device->dev), "imx27-dma");
 }
 
 #endif
-- 
cgit v1.2.3-55-g7522


From 1e66210a3135b544713a9455e78e36f6f8d1bf77 Mon Sep 17 00:00:00 2001
From: Shawn Guo
Date: Sun, 16 Sep 2012 22:16:44 +0800
Subject: ARM: imx: remove header file mach/irqs.h

The only mach/irqs.h user outside arch/arm/mach-imx is
sound/soc/fsl/imx-pcm-fiq.c, which refers to mxc_set_irq_fiq().
Move the declaration into include/linux/platform_data/asoc-imx-ssi.h,
so that we can remove mach/irqs.h includsion from imx-pcm-fiq.c.

Inside arch/arm/mach-imx, the only users to mach/irqs.h are avic.c
and tzic.c for referring to macro FIQ_START.  Let's move the macro
into irq-common.h and get rid of mach/irqs.h completely.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: alsa-devel@alsa-project.org
---
 arch/arm/mach-imx/avic.c                   |  1 -
 arch/arm/mach-imx/include/mach/irqs.h      | 21 ---------------------
 arch/arm/mach-imx/irq-common.h             |  3 +++
 arch/arm/mach-imx/tzic.c                   |  2 --
 include/linux/platform_data/asoc-imx-ssi.h |  2 ++
 sound/soc/fsl/imx-pcm-fiq.c                |  1 -
 6 files changed, 5 insertions(+), 25 deletions(-)
 delete mode 100644 arch/arm/mach-imx/include/mach/irqs.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/avic.c b/arch/arm/mach-imx/avic.c
index adc64bc5b343..0eff23ed92b9 100644
--- a/arch/arm/mach-imx/avic.c
+++ b/arch/arm/mach-imx/avic.c
@@ -24,7 +24,6 @@
 #include <linux/of.h>
 #include <asm/mach/irq.h>
 #include <asm/exception.h>
-#include <mach/irqs.h>
 
 #include "common.h"
 #include "hardware.h"
diff --git a/arch/arm/mach-imx/include/mach/irqs.h b/arch/arm/mach-imx/include/mach/irqs.h
deleted file mode 100644
index d73f5e8ea9cb..000000000000
--- a/arch/arm/mach-imx/include/mach/irqs.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- *  Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
- */
-
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __ASM_ARCH_MXC_IRQS_H__
-#define __ASM_ARCH_MXC_IRQS_H__
-
-extern int imx_irq_set_priority(unsigned char irq, unsigned char prio);
-
-/* all normal IRQs can be FIQs */
-#define FIQ_START	0
-/* switch between IRQ and FIQ */
-extern int mxc_set_irq_fiq(unsigned int irq, unsigned int type);
-
-#endif /* __ASM_ARCH_MXC_IRQS_H__ */
diff --git a/arch/arm/mach-imx/irq-common.h b/arch/arm/mach-imx/irq-common.h
index 6ccb3a14c693..5b2dabba330f 100644
--- a/arch/arm/mach-imx/irq-common.h
+++ b/arch/arm/mach-imx/irq-common.h
@@ -19,6 +19,9 @@
 #ifndef __PLAT_MXC_IRQ_COMMON_H__
 #define __PLAT_MXC_IRQ_COMMON_H__
 
+/* all normal IRQs can be FIQs */
+#define FIQ_START	0
+
 struct mxc_extra_irq
 {
 	int (*set_priority)(unsigned char irq, unsigned char prio);
diff --git a/arch/arm/mach-imx/tzic.c b/arch/arm/mach-imx/tzic.c
index c7625b4a916b..9721161f208f 100644
--- a/arch/arm/mach-imx/tzic.c
+++ b/arch/arm/mach-imx/tzic.c
@@ -21,8 +21,6 @@
 #include <asm/mach/irq.h>
 #include <asm/exception.h>
 
-#include <mach/irqs.h>
-
 #include "common.h"
 #include "hardware.h"
 #include "irq-common.h"
diff --git a/include/linux/platform_data/asoc-imx-ssi.h b/include/linux/platform_data/asoc-imx-ssi.h
index 63f3c2804239..92c7fd72f636 100644
--- a/include/linux/platform_data/asoc-imx-ssi.h
+++ b/include/linux/platform_data/asoc-imx-ssi.h
@@ -17,5 +17,7 @@ struct imx_ssi_platform_data {
 	void (*ac97_warm_reset)(struct snd_ac97 *ac97);
 };
 
+extern int mxc_set_irq_fiq(unsigned int irq, unsigned int type);
+
 #endif /* __MACH_SSI_H */
 
diff --git a/sound/soc/fsl/imx-pcm-fiq.c b/sound/soc/fsl/imx-pcm-fiq.c
index 22c6130957ba..9ffc9e66308f 100644
--- a/sound/soc/fsl/imx-pcm-fiq.c
+++ b/sound/soc/fsl/imx-pcm-fiq.c
@@ -29,7 +29,6 @@
 
 #include <asm/fiq.h>
 
-#include <mach/irqs.h>
 #include <linux/platform_data/asoc-imx-ssi.h>
 
 #include "imx-ssi.h"
-- 
cgit v1.2.3-55-g7522


From c0cc3f1665256b7cfdc1d581f997dcea1af71405 Mon Sep 17 00:00:00 2001
From: Mark Brown
Date: Fri, 28 Sep 2012 16:50:15 +0100
Subject: ASoC: wm8994: Allow a delay between jack insertion and microphone
 detect

This can be used to provide some additional settling time to ensure that
we don't start microphone detection while the microphone pin is connected
to one of the headphone pins.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/mfd/wm8994/pdata.h |  5 +++
 sound/soc/codecs/wm8994.c        | 74 +++++++++++++++++++++++++++++-----------
 2 files changed, 60 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index fc87be4fdc25..8e21a094836d 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -176,6 +176,11 @@ struct wm8994_pdata {
         unsigned int lineout1fb:1;
         unsigned int lineout2fb:1;
 
+	/* Delay between detecting a jack and starting microphone
+	 * detect (specified in ms)
+	 */
+	int micdet_delay;
+
 	/* IRQ for microphone detection if brought out directly as a
 	 * signal.
 	 */
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 2b2dadc54dac..07095a9ca9a6 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -3470,11 +3470,46 @@ static void wm8958_default_micdet(u16 status, void *data)
 	}
 }
 
+/* Deferred mic detection to allow for extra settling time */
+static void wm1811_mic_work(struct work_struct *work)
+{
+	struct wm8994_priv *wm8994 = container_of(work, struct wm8994_priv,
+						  mic_work.work);
+	struct snd_soc_codec *codec = wm8994->hubs.codec;
+
+	pm_runtime_get_sync(codec->dev);
+
+	/* If required for an external cap force MICBIAS on */
+	if (wm8994->pdata->jd_ext_cap) {
+		snd_soc_dapm_force_enable_pin(&codec->dapm,
+					      "MICBIAS2");
+		snd_soc_dapm_sync(&codec->dapm);
+	}
+
+	mutex_lock(&wm8994->accdet_lock);
+
+	dev_dbg(codec->dev, "Starting mic detection\n");
+
+	/*
+	 * Start off measument of microphone impedence to find out
+	 * what's actually there.
+	 */
+	wm8994->mic_detecting = true;
+	wm1811_jackdet_set_mode(codec, WM1811_JACKDET_MODE_MIC);
+
+	snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
+			    WM8958_MICD_ENA, WM8958_MICD_ENA);
+
+	mutex_unlock(&wm8994->accdet_lock);
+
+	pm_runtime_put(codec->dev);
+}
+
 static irqreturn_t wm1811_jackdet_irq(int irq, void *data)
 {
 	struct wm8994_priv *wm8994 = data;
 	struct snd_soc_codec *codec = wm8994->hubs.codec;
-	int reg;
+	int reg, delay;
 	bool present;
 
 	pm_runtime_get_sync(codec->dev);
@@ -3505,18 +3540,14 @@ static irqreturn_t wm1811_jackdet_irq(int irq, void *data)
 		snd_soc_update_bits(codec, WM1811_JACKDET_CTRL,
 				    WM1811_JACKDET_DB, 0);
 
-		/*
-		 * Start off measument of microphone impedence to find
-		 * out what's actually there.
-		 */
-		wm8994->mic_detecting = true;
-		wm1811_jackdet_set_mode(codec, WM1811_JACKDET_MODE_MIC);
-
-		snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
-				    WM8958_MICD_ENA, WM8958_MICD_ENA);
+		delay = wm8994->pdata->micdet_delay;
+		schedule_delayed_work(&wm8994->mic_work,
+				      msecs_to_jiffies(delay));
 	} else {
 		dev_dbg(codec->dev, "Jack not detected\n");
 
+		cancel_delayed_work_sync(&wm8994->mic_work);
+
 		snd_soc_update_bits(codec, WM8958_MICBIAS2,
 				    WM8958_MICB2_DISCH, WM8958_MICB2_DISCH);
 
@@ -3533,14 +3564,9 @@ static irqreturn_t wm1811_jackdet_irq(int irq, void *data)
 
 	mutex_unlock(&wm8994->accdet_lock);
 
-	/* If required for an external cap force MICBIAS on */
-	if (wm8994->pdata->jd_ext_cap) {
-		if (present)
-			snd_soc_dapm_force_enable_pin(&codec->dapm,
-						      "MICBIAS2");
-		else
-			snd_soc_dapm_disable_pin(&codec->dapm, "MICBIAS2");
-	}
+	/* Turn off MICBIAS if it was on for an external cap */
+	if (wm8994->pdata->jd_ext_cap && !present)
+		snd_soc_dapm_disable_pin(&codec->dapm, "MICBIAS2");
 
 	if (present)
 		snd_soc_jack_report(wm8994->micdet[0].jack,
@@ -3763,10 +3789,20 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
 	snd_soc_codec_set_cache_io(codec, 16, 16, SND_SOC_REGMAP);
 
 	mutex_init(&wm8994->accdet_lock);
-	INIT_DELAYED_WORK(&wm8994->mic_work, wm8994_mic_work);
 	INIT_DELAYED_WORK(&wm8994->jackdet_bootstrap,
 			  wm1811_jackdet_bootstrap);
 
+	switch (control->type) {
+	case WM8994:
+		INIT_DELAYED_WORK(&wm8994->mic_work, wm8994_mic_work);
+		break;
+	case WM1811:
+		INIT_DELAYED_WORK(&wm8994->mic_work, wm1811_mic_work);
+		break;
+	default:
+		break;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(wm8994->fll_locked); i++)
 		init_completion(&wm8994->fll_locked[i]);
 
-- 
cgit v1.2.3-55-g7522


From 0c57067430a2b729bc08c92b17eb4f29d9bbfaae Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Sat, 6 Oct 2012 20:47:46 +0530
Subject: regulator: tps51632: Add tps51632 regulator driver

The TPS51632 is a driverless step down controller with
serial control. Advanced features such as D-Cap+
architecture with overlapping pulse support and OSR
overshoot reduction provide fast transient response,
lowest output capacitance and high efficiency.
The TPS51632 supports both I2C and DVFS interfaces
(through PWM) for dynamic control of the output voltage
and current monitor telemetry.
Add regulator driver for TPS51632.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/Kconfig                    |  11 +
 drivers/regulator/Makefile                   |   1 +
 drivers/regulator/tps51632-regulator.c       | 332 +++++++++++++++++++++++++++
 include/linux/regulator/tps51632-regulator.h |  47 ++++
 4 files changed, 391 insertions(+)
 create mode 100644 drivers/regulator/tps51632-regulator.c
 create mode 100644 include/linux/regulator/tps51632-regulator.h

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 67d47b59a66d..aa9e8a18262d 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -335,6 +335,17 @@ config REGULATOR_PALMAS
 	  on the muxing. This is handled automatically in the driver by
 	  reading the mux info from OTP.
 
+config REGULATOR_TPS51632
+	tristate "TI TPS51632 Power Regulator"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  This driver supports TPS51632 voltage regulator chip.
+	  The TPS52632 is 3-2-1 Phase D-Cap+ Step Down Driverless Controller
+	  with Serial VID control and DVFS.
+	  The voltage output can be configure through I2C interface or PWM
+	  interface.
+
 config REGULATOR_TPS6105X
 	tristate "TI TPS6105X Power regulators"
 	depends on TPS6105X
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index e431eed8a878..ec1aec460bf6 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_REGULATOR_MC13783) += mc13783-regulator.o
 obj-$(CONFIG_REGULATOR_MC13892) += mc13892-regulator.o
 obj-$(CONFIG_REGULATOR_MC13XXX_CORE) +=  mc13xxx-regulator-core.o
 obj-$(CONFIG_REGULATOR_PALMAS) += palmas-regulator.o
+obj-$(CONFIG_REGULATOR_TPS51632) += tps51632-regulator.o
 obj-$(CONFIG_REGULATOR_PCAP) += pcap-regulator.o
 obj-$(CONFIG_REGULATOR_PCF50633) += pcf50633-regulator.o
 obj-$(CONFIG_REGULATOR_RC5T583)  += rc5t583-regulator.o
diff --git a/drivers/regulator/tps51632-regulator.c b/drivers/regulator/tps51632-regulator.c
new file mode 100644
index 000000000000..34603640d6d9
--- /dev/null
+++ b/drivers/regulator/tps51632-regulator.c
@@ -0,0 +1,332 @@
+/*
+ * tps51632-regulator.c -- TI TPS51632
+ *
+ * Regulator driver for TPS51632 3-2-1 Phase D-Cap Step Down Driverless
+ * Controller with serial VID control and DVFS.
+ *
+ * Copyright (c) 2012, NVIDIA Corporation.
+ *
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/tps51632-regulator.h>
+#include <linux/slab.h>
+
+/* Register definitions */
+#define TPS51632_VOLTAGE_SELECT_REG		0x0
+#define TPS51632_VOLTAGE_BASE_REG		0x1
+#define TPS51632_OFFSET_REG			0x2
+#define TPS51632_IMON_REG			0x3
+#define TPS51632_VMAX_REG			0x4
+#define TPS51632_DVFS_CONTROL_REG		0x5
+#define TPS51632_POWER_STATE_REG		0x6
+#define TPS51632_SLEW_REGS			0x7
+#define TPS51632_FAULT_REG			0x14
+
+#define TPS51632_MAX_REG			0x15
+
+#define TPS51632_VOUT_MASK			0x7F
+#define TPS51632_VOUT_OFFSET_MASK		0x1F
+#define TPS51632_VMAX_MASK			0x7F
+#define TPS51632_VMAX_LOCK			0x80
+
+/* TPS51632_DVFS_CONTROL_REG */
+#define TPS51632_DVFS_PWMEN			0x1
+#define TPS51632_DVFS_STEP_20			0x2
+#define TPS51632_DVFS_VMAX_PG			0x4
+#define TPS51632_DVFS_PWMRST			0x8
+#define TPS51632_DVFS_OCA_EN			0x10
+#define TPS51632_DVFS_FCCM			0x20
+
+/* TPS51632_POWER_STATE_REG */
+#define TPS51632_POWER_STATE_MASK		0x03
+#define TPS51632_POWER_STATE_MULTI_PHASE_CCM	0x0
+#define TPS51632_POWER_STATE_SINGLE_PHASE_CCM	0x1
+#define TPS51632_POWER_STATE_SINGLE_PHASE_DCM	0x2
+
+#define TPS51632_MIN_VOLATGE			500000
+#define TPS51632_MAX_VOLATGE			1520000
+#define TPS51632_VOLATGE_STEP_10mV		10000
+#define TPS51632_VOLATGE_STEP_20mV		20000
+#define TPS51632_MAX_VSEL			0x7F
+#define TPS51632_MIN_VSEL			0x19
+#define TPS51632_DEFAULT_RAMP_DELAY		6000
+#define TPS51632_VOLT_VSEL(uV)					\
+		(DIV_ROUND_UP(uV - TPS51632_MIN_VOLATGE,	\
+			TPS51632_VOLATGE_STEP_10mV) +		\
+			TPS51632_MIN_VSEL)
+
+/* TPS51632 chip information */
+struct tps51632_chip {
+	struct device *dev;
+	struct regulator_desc desc;
+	struct regulator_dev *rdev;
+	struct regmap *regmap;
+	bool enable_pwm_dvfs;
+};
+
+static int tps51632_dcdc_get_voltage_sel(struct regulator_dev *rdev)
+{
+	struct tps51632_chip *tps = rdev_get_drvdata(rdev);
+	unsigned int data;
+	int ret;
+	unsigned int reg = TPS51632_VOLTAGE_SELECT_REG;
+	int vsel;
+
+	if (tps->enable_pwm_dvfs)
+		reg = TPS51632_VOLTAGE_BASE_REG;
+
+	ret = regmap_read(tps->regmap, reg, &data);
+	if (ret < 0) {
+		dev_err(tps->dev, "reg read failed, err %d\n", ret);
+		return ret;
+	}
+
+	vsel = data & TPS51632_VOUT_MASK;
+
+	if (vsel < TPS51632_MIN_VSEL)
+		return 0;
+	else
+		return vsel - TPS51632_MIN_VSEL;
+}
+
+static int tps51632_dcdc_set_voltage_sel(struct regulator_dev *rdev,
+		unsigned selector)
+{
+	struct tps51632_chip *tps = rdev_get_drvdata(rdev);
+	int vsel;
+	int ret;
+	unsigned int reg = TPS51632_VOLTAGE_SELECT_REG;
+
+	if (tps->enable_pwm_dvfs)
+		reg = TPS51632_VOLTAGE_BASE_REG;
+
+	vsel = selector + TPS51632_MIN_VSEL;
+	if (vsel > TPS51632_MAX_VSEL)
+		return -EINVAL;
+
+	ret = regmap_write(tps->regmap, TPS51632_VOLTAGE_SELECT_REG, vsel);
+	if (ret < 0)
+		dev_err(tps->dev, "reg write failed, err %d\n", ret);
+	return ret;
+}
+
+static int tps51632_dcdc_set_ramp_delay(struct regulator_dev *rdev,
+		int ramp_delay)
+{
+	struct tps51632_chip *tps = rdev_get_drvdata(rdev);
+	int bit = ramp_delay/6000;
+	int ret;
+
+	if (bit)
+		bit--;
+	ret = regmap_write(tps->regmap, TPS51632_SLEW_REGS, BIT(bit));
+	if (ret < 0)
+		dev_err(tps->dev, "SLEW reg write failed, err %d\n", ret);
+	return ret;
+}
+
+static struct regulator_ops tps51632_dcdc_ops = {
+	.get_voltage_sel	= tps51632_dcdc_get_voltage_sel,
+	.set_voltage_sel	= tps51632_dcdc_set_voltage_sel,
+	.list_voltage		= regulator_list_voltage_linear,
+	.set_voltage_time_sel	= regulator_set_voltage_time_sel,
+	.set_ramp_delay		= tps51632_dcdc_set_ramp_delay,
+};
+
+static int __devinit tps51632_init_dcdc(struct tps51632_chip *tps,
+		struct tps51632_regulator_platform_data *pdata)
+{
+	int ret;
+	uint8_t	control = 0;
+	int vsel;
+
+	if (!pdata->enable_pwm_dvfs)
+		goto skip_pwm_config;
+
+	control |= TPS51632_DVFS_PWMEN;
+	tps->enable_pwm_dvfs = pdata->enable_pwm_dvfs;
+	vsel = TPS51632_VOLT_VSEL(pdata->base_voltage_uV);
+	ret = regmap_write(tps->regmap, TPS51632_VOLTAGE_BASE_REG, vsel);
+	if (ret < 0) {
+		dev_err(tps->dev, "BASE reg write failed, err %d\n", ret);
+		return ret;
+	}
+
+	if (pdata->dvfs_step_20mV)
+		control |= TPS51632_DVFS_STEP_20;
+
+	if (pdata->max_voltage_uV) {
+		unsigned int vmax;
+		/**
+		 * TPS51632 hw behavior: VMAX register can be write only
+		 * once as it get locked after first write. The lock get
+		 * reset only when device is power-reset.
+		 * Write register only when lock bit is not enabled.
+		 */
+		ret = regmap_read(tps->regmap, TPS51632_VMAX_REG, &vmax);
+		if (ret < 0) {
+			dev_err(tps->dev, "VMAX read failed, err %d\n", ret);
+			return ret;
+		}
+		if (!(vmax & TPS51632_VMAX_LOCK)) {
+			vsel = TPS51632_VOLT_VSEL(pdata->max_voltage_uV);
+			ret = regmap_write(tps->regmap, TPS51632_VMAX_REG,
+					vsel);
+			if (ret < 0) {
+				dev_err(tps->dev,
+					"VMAX write failed, err %d\n", ret);
+				return ret;
+			}
+		}
+	}
+
+skip_pwm_config:
+	ret = regmap_write(tps->regmap, TPS51632_DVFS_CONTROL_REG, control);
+	if (ret < 0)
+		dev_err(tps->dev, "DVFS reg write failed, err %d\n", ret);
+	return ret;
+}
+
+static bool rd_wr_reg(struct device *dev, unsigned int reg)
+{
+	if ((reg >= 0x8) && (reg <= 0x10))
+		return false;
+	return true;
+}
+
+static const struct regmap_config tps51632_regmap_config = {
+	.reg_bits		= 8,
+	.val_bits		= 8,
+	.writeable_reg		= rd_wr_reg,
+	.readable_reg		= rd_wr_reg,
+	.max_register		= TPS51632_MAX_REG - 1,
+	.cache_type		= REGCACHE_RBTREE,
+};
+
+static int __devinit tps51632_probe(struct i2c_client *client,
+				const struct i2c_device_id *id)
+{
+	struct tps51632_regulator_platform_data *pdata;
+	struct regulator_dev *rdev;
+	struct tps51632_chip *tps;
+	int ret;
+	struct regulator_config config = { };
+
+	pdata = client->dev.platform_data;
+	if (!pdata) {
+		dev_err(&client->dev, "No Platform data\n");
+		return -EINVAL;
+	}
+
+	tps = devm_kzalloc(&client->dev, sizeof(*tps), GFP_KERNEL);
+	if (!tps) {
+		dev_err(&client->dev, "Memory allocation failed\n");
+		return -ENOMEM;
+	}
+
+	tps->dev = &client->dev;
+	tps->desc.name = id->name;
+	tps->desc.id = 0;
+	tps->desc.ramp_delay = TPS51632_DEFAULT_RAMP_DELAY;
+	tps->desc.min_uV = TPS51632_MIN_VOLATGE;
+	tps->desc.uV_step = TPS51632_VOLATGE_STEP_10mV;
+	tps->desc.n_voltages = (TPS51632_MAX_VSEL - TPS51632_MIN_VSEL) + 1;
+	tps->desc.ops = &tps51632_dcdc_ops;
+	tps->desc.type = REGULATOR_VOLTAGE;
+	tps->desc.owner = THIS_MODULE;
+
+	tps->regmap = devm_regmap_init_i2c(client, &tps51632_regmap_config);
+	if (IS_ERR(tps->regmap)) {
+		ret = PTR_ERR(tps->regmap);
+		dev_err(&client->dev, "regmap init failed, err %d\n", ret);
+		return ret;
+	}
+	i2c_set_clientdata(client, tps);
+
+	ret = tps51632_init_dcdc(tps, pdata);
+	if (ret < 0) {
+		dev_err(tps->dev, "Init failed, err = %d\n", ret);
+		return ret;
+	}
+
+	/* Register the regulators */
+	config.dev = &client->dev;
+	config.init_data = pdata->reg_init_data;
+	config.driver_data = tps;
+	config.regmap = tps->regmap;
+	config.of_node = client->dev.of_node;
+
+	rdev = regulator_register(&tps->desc, &config);
+	if (IS_ERR(rdev)) {
+		dev_err(tps->dev, "regulator register failed\n");
+		return PTR_ERR(rdev);
+	}
+
+	tps->rdev = rdev;
+	return 0;
+}
+
+static int __devexit tps51632_remove(struct i2c_client *client)
+{
+	struct tps51632_chip *tps = i2c_get_clientdata(client);
+
+	regulator_unregister(tps->rdev);
+	return 0;
+}
+
+static const struct i2c_device_id tps51632_id[] = {
+	{.name = "tps51632",},
+	{},
+};
+
+MODULE_DEVICE_TABLE(i2c, tps51632_id);
+
+static struct i2c_driver tps51632_i2c_driver = {
+	.driver = {
+		.name = "tps51632",
+		.owner = THIS_MODULE,
+	},
+	.probe = tps51632_probe,
+	.remove = __devexit_p(tps51632_remove),
+	.id_table = tps51632_id,
+};
+
+static int __init tps51632_init(void)
+{
+	return i2c_add_driver(&tps51632_i2c_driver);
+}
+subsys_initcall(tps51632_init);
+
+static void __exit tps51632_cleanup(void)
+{
+	i2c_del_driver(&tps51632_i2c_driver);
+}
+module_exit(tps51632_cleanup);
+
+MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
+MODULE_DESCRIPTION("TPS51632 voltage regulator driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regulator/tps51632-regulator.h b/include/linux/regulator/tps51632-regulator.h
new file mode 100644
index 000000000000..d00841e1a75a
--- /dev/null
+++ b/include/linux/regulator/tps51632-regulator.h
@@ -0,0 +1,47 @@
+/*
+ * tps51632-regulator.h -- TPS51632 regulator
+ *
+ * Interface for regulator driver for TPS51632 3-2-1 Phase D-Cap Step Down
+ * Driverless Controller with serial VID control and DVFS.
+ *
+ * Copyright (C) 2012 NVIDIA Corporation
+
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA	02110-1301, USA.
+ *
+ */
+
+#ifndef __LINUX_REGULATOR_TPS51632_H
+#define __LINUX_REGULATOR_TPS51632_H
+
+/*
+ * struct tps51632_regulator_platform_data - tps51632 regulator platform data.
+ *
+ * @reg_init_data: The regulator init data.
+ * @enable_pwm_dvfs: Enable PWM DVFS or not.
+ * @dvfs_step_20mV: Step for DVFS is 20mV or 10mV.
+ * @max_voltage_uV: Maximum possible voltage in PWM-DVFS mode.
+ * @base_voltage_uV: Base voltage when PWM-DVFS enabled.
+ */
+struct tps51632_regulator_platform_data {
+	struct regulator_init_data *reg_init_data;
+	bool enable_pwm_dvfs;
+	bool dvfs_step_20mV;
+	int max_voltage_uV;
+	int base_voltage_uV;
+};
+
+#endif /* __LINUX_REGULATOR_TPS51632_H */
-- 
cgit v1.2.3-55-g7522


From bd7a2b600ace90c8819495b639a744c8f5c68feb Mon Sep 17 00:00:00 2001
From: Pawel Moll
Date: Mon, 24 Sep 2012 18:56:53 +0100
Subject: regulator: core: Support for continuous voltage range

Some regulators can set any voltage within the constraints range,
not being limited to specified operating points.

This patch makes it possible to describe such regulator and makes
the regulator_is_supported_voltage() function behave correctly.

Signed-off-by: Pawel Moll <pawel.moll@arm.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/core.c         | 5 +++++
 include/linux/regulator/driver.h | 3 +++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 5c4829cba6a6..f7c74db7465c 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1979,6 +1979,11 @@ int regulator_is_supported_voltage(struct regulator *regulator,
 			return ret;
 	}
 
+	/* Any voltage within constrains range is fine? */
+	if (rdev->desc->continuous_voltage_range)
+		return min_uV >= rdev->constraints->min_uV &&
+				max_uV <= rdev->constraints->max_uV;
+
 	ret = regulator_count_voltages(regulator);
 	if (ret < 0)
 		return ret;
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 7932a3bf21bd..f2b72b230b9b 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -181,6 +181,8 @@ enum regulator_type {
  * @type: Indicates if the regulator is a voltage or current regulator.
  * @owner: Module providing the regulator, used for refcounting.
  *
+ * @continuous_voltage_range: Indicates if the regulator can set any
+ *                            voltage within constrains range.
  * @n_voltages: Number of selectors available for ops.list_voltage().
  *
  * @min_uV: Voltage given by the lowest selector (if linear mapping)
@@ -199,6 +201,7 @@ struct regulator_desc {
 	const char *name;
 	const char *supply_name;
 	int id;
+	bool continuous_voltage_range;
 	unsigned n_voltages;
 	struct regulator_ops *ops;
 	int irq;
-- 
cgit v1.2.3-55-g7522


From 757ef79188657087f96f6f12c003b682860fede2 Mon Sep 17 00:00:00 2001
From: Jon Hunter
Date: Thu, 28 Jun 2012 13:41:29 -0500
Subject: ARM: OMAP2+: GPMC: Remove unused OneNAND get_freq() platform function

A platform function pointer for getting the frequency of a OneNAND device
was added so that a platform could specify a custom function for returning
the frequency and not just rely on the OneNAND version to determine the
frequency. However, this platform function pointer is not currently being
used and I am not sure if it ever has.

OneNAND devices are not so common these days and as far as I know not being
used with new devices. Therefore, it is most likely that this get_freq()
function pointer will not be used and so remove it.

Given that the get_freq() function pointer is not used, neither is the
clk_dep variable and so all references to it can also be removed.

Signed-off-by: Jon Hunter <jon-hunter@ti.com>
Signed-off-by: Afzal Mohammed <afzal@ti.com>
---
 arch/arm/mach-omap2/gpmc-onenand.c              | 39 ++++---------------------
 include/linux/platform_data/mtd-onenand-omap2.h |  8 -----
 2 files changed, 5 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
index 29d391b273fc..50165aa4205a 100644
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -140,21 +140,10 @@ static void set_onenand_cfg(void __iomem *onenand_base)
 }
 
 static int omap2_onenand_get_freq(struct omap_onenand_platform_data *cfg,
-				  void __iomem *onenand_base, bool *clk_dep)
+				  void __iomem *onenand_base)
 {
 	u16 ver = readw(onenand_base + ONENAND_REG_VERSION_ID);
-	int freq = 0;
-
-	if (cfg->get_freq) {
-		struct onenand_freq_info fi;
-
-		fi.maf_id = readw(onenand_base + ONENAND_REG_MANUFACTURER_ID);
-		fi.dev_id = readw(onenand_base + ONENAND_REG_DEVICE_ID);
-		fi.ver_id = ver;
-		freq = cfg->get_freq(&fi, clk_dep);
-		if (freq)
-			return freq;
-	}
+	int freq;
 
 	switch ((ver >> 4) & 0xf) {
 	case 0:
@@ -182,7 +171,7 @@ static int omap2_onenand_get_freq(struct omap_onenand_platform_data *cfg,
 
 static struct gpmc_timings
 omap2_onenand_calc_sync_timings(struct omap_onenand_platform_data *cfg,
-				int freq, bool clk_dep)
+				int freq)
 {
 	struct gpmc_timings t;
 	const int t_cer  = 15;
@@ -261,22 +250,6 @@ omap2_onenand_calc_sync_timings(struct omap_onenand_platform_data *cfg,
 	else
 		latency = 4;
 
-	if (clk_dep) {
-		if (gpmc_clk_ns < 12) { /* >83Mhz */
-			t_ces   = 3;
-			t_avds  = 4;
-		} else if (gpmc_clk_ns < 15) { /* >66Mhz */
-			t_ces   = 5;
-			t_avds  = 4;
-		} else if (gpmc_clk_ns < 25) { /* >40Mhz */
-			t_ces   = 6;
-			t_avds  = 5;
-		} else {
-			t_ces   = 7;
-			t_avds  = 7;
-		}
-	}
-
 	/* Set synchronous read timings */
 	memset(&t, 0, sizeof(t));
 
@@ -399,16 +372,14 @@ static int omap2_onenand_setup_sync(void __iomem *onenand_base, int *freq_ptr)
 {
 	int ret, freq = *freq_ptr;
 	struct gpmc_timings t;
-	bool clk_dep = false;
 
 	if (!freq) {
 		/* Very first call freq is not known */
-		freq = omap2_onenand_get_freq(gpmc_onenand_data,
-						onenand_base, &clk_dep);
+		freq = omap2_onenand_get_freq(gpmc_onenand_data, onenand_base);
 		set_onenand_cfg(onenand_base);
 	}
 
-	t = omap2_onenand_calc_sync_timings(gpmc_onenand_data, freq, clk_dep);
+	t = omap2_onenand_calc_sync_timings(gpmc_onenand_data, freq);
 
 	ret = gpmc_set_sync_mode(gpmc_onenand_data->cs, &t);
 	if (IS_ERR_VALUE(ret))
diff --git a/include/linux/platform_data/mtd-onenand-omap2.h b/include/linux/platform_data/mtd-onenand-omap2.h
index 2858667d2e4f..21bb0ff4052e 100644
--- a/include/linux/platform_data/mtd-onenand-omap2.h
+++ b/include/linux/platform_data/mtd-onenand-omap2.h
@@ -15,20 +15,12 @@
 #define ONENAND_SYNC_READ	(1 << 0)
 #define ONENAND_SYNC_READWRITE	(1 << 1)
 
-struct onenand_freq_info {
-	u16			maf_id;
-	u16			dev_id;
-	u16			ver_id;
-};
-
 struct omap_onenand_platform_data {
 	int			cs;
 	int			gpio_irq;
 	struct mtd_partition	*parts;
 	int			nr_parts;
 	int			(*onenand_setup)(void __iomem *, int *freq_ptr);
-	int		(*get_freq)(const struct onenand_freq_info *freq_info,
-				    bool *clk_dep);
 	int			dma_channel;
 	u8			flags;
 	u8			regulator_can_sleep;
-- 
cgit v1.2.3-55-g7522


From eb77b6a78ac7f63985a3f592baf80ff33d213c48 Mon Sep 17 00:00:00 2001
From: Afzal Mohammed
Date: Fri, 5 Oct 2012 11:39:54 +0530
Subject: ARM: OMAP2+: onenand: connected soc info in pdata

onenand driver needs to know whether soc is falling under
34xx family to properly handle onenand. But driver is not
supposed to do cpu_is_* check, hence educate platform data
with this information. Driver can make use of it to avoid
cpu_is_* check.

Signed-off-by: Afzal Mohammed <afzal@ti.com>
---
 arch/arm/mach-omap2/gpmc-onenand.c              | 5 +++++
 include/linux/platform_data/mtd-onenand-omap2.h | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
index 06fd84657218..4a1c0b7ab432 100644
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -428,6 +428,11 @@ void __init gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
 		gpmc_onenand_data->flags |= ONENAND_SYNC_READ;
 	}
 
+	if (cpu_is_omap34xx())
+		gpmc_onenand_data->flags |= ONENAND_IN_OMAP34XX;
+	else
+		gpmc_onenand_data->flags &= ~ONENAND_IN_OMAP34XX;
+
 	err = gpmc_cs_request(gpmc_onenand_data->cs, ONENAND_IO_SIZE,
 				(unsigned long *)&gpmc_onenand_resource.start);
 	if (err < 0) {
diff --git a/include/linux/platform_data/mtd-onenand-omap2.h b/include/linux/platform_data/mtd-onenand-omap2.h
index 21bb0ff4052e..ef9c60d4b29b 100644
--- a/include/linux/platform_data/mtd-onenand-omap2.h
+++ b/include/linux/platform_data/mtd-onenand-omap2.h
@@ -14,6 +14,7 @@
 
 #define ONENAND_SYNC_READ	(1 << 0)
 #define ONENAND_SYNC_READWRITE	(1 << 1)
+#define	ONENAND_IN_OMAP34XX	(1 << 2)
 
 struct omap_onenand_platform_data {
 	int			cs;
-- 
cgit v1.2.3-55-g7522


From b6ab13e7d6d2778702baea7433d0bd9f234d5083 Mon Sep 17 00:00:00 2001
From: Afzal Mohammed
Date: Sat, 29 Sep 2012 10:32:42 +0530
Subject: ARM: OMAP2+: onenand: header cleanup

For common arm zImage existing onenand header file
in platform specific location was moved to generic
platform data location, but it contained more than
platform data, remove it. New local header has been
created for exposing functions.

Signed-off-by: Afzal Mohammed <afzal@ti.com>
---
 arch/arm/mach-omap2/board-flash.c               |  1 +
 arch/arm/mach-omap2/board-igep0020.c            |  1 +
 arch/arm/mach-omap2/board-n8x0.c                |  1 +
 arch/arm/mach-omap2/board-rm680.c               |  1 +
 arch/arm/mach-omap2/board-rx51-peripherals.c    |  1 +
 arch/arm/mach-omap2/gpmc-onenand.c              |  1 +
 arch/arm/mach-omap2/gpmc-onenand.h              | 24 ++++++++++++++++++++++++
 include/linux/platform_data/mtd-onenand-omap2.h | 19 +++----------------
 8 files changed, 33 insertions(+), 16 deletions(-)
 create mode 100644 arch/arm/mach-omap2/gpmc-onenand.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-flash.c b/arch/arm/mach-omap2/board-flash.c
index d4ac5352a71c..f438d511ba11 100644
--- a/arch/arm/mach-omap2/board-flash.c
+++ b/arch/arm/mach-omap2/board-flash.c
@@ -25,6 +25,7 @@
 
 #include "common.h"
 #include "board-flash.h"
+#include "gpmc-onenand.h"
 
 #define REG_FPGA_REV			0x10
 #define REG_FPGA_DIP_SWITCH_INPUT2	0x60
diff --git a/arch/arm/mach-omap2/board-igep0020.c b/arch/arm/mach-omap2/board-igep0020.c
index f6b3ed0e5c94..83c6efaf4226 100644
--- a/arch/arm/mach-omap2/board-igep0020.c
+++ b/arch/arm/mach-omap2/board-igep0020.c
@@ -43,6 +43,7 @@
 #include "common-board-devices.h"
 #include "board-flash.h"
 #include "control.h"
+#include "gpmc-onenand.h"
 
 #define IGEP2_SMSC911X_CS       5
 #define IGEP2_SMSC911X_GPIO     176
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index d95f727ca39a..92b19166aac8 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -32,6 +32,7 @@
 #include <plat/mmc.h>
 
 #include "mux.h"
+#include "gpmc-onenand.h"
 
 #define TUSB6010_ASYNC_CS	1
 #define TUSB6010_SYNC_CS	4
diff --git a/arch/arm/mach-omap2/board-rm680.c b/arch/arm/mach-omap2/board-rm680.c
index 45997bfbcbd2..154cf337c0c7 100644
--- a/arch/arm/mach-omap2/board-rm680.c
+++ b/arch/arm/mach-omap2/board-rm680.c
@@ -33,6 +33,7 @@
 #include "hsmmc.h"
 #include "sdram-nokia.h"
 #include "common-board-devices.h"
+#include "gpmc-onenand.h"
 
 static struct regulator_consumer_supply rm680_vemmc_consumers[] = {
 	REGULATOR_SUPPLY("vmmc", "omap_hsmmc.1"),
diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index 020e03c95bfe..45760044b2bd 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -54,6 +54,7 @@
 #include "mux.h"
 #include "hsmmc.h"
 #include "common-board-devices.h"
+#include "gpmc-onenand.h"
 
 #define SYSTEM_REV_B_USES_VAUX3	0x1699
 #define SYSTEM_REV_S_USES_VAUX3 0x8
diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
index 4a1c0b7ab432..f318f11da2fa 100644
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -23,6 +23,7 @@
 #include <plat/gpmc.h>
 
 #include "soc.h"
+#include "gpmc-onenand.h"
 
 #define	ONENAND_IO_SIZE	SZ_128K
 
diff --git a/arch/arm/mach-omap2/gpmc-onenand.h b/arch/arm/mach-omap2/gpmc-onenand.h
new file mode 100644
index 000000000000..216f23a8b45c
--- /dev/null
+++ b/arch/arm/mach-omap2/gpmc-onenand.h
@@ -0,0 +1,24 @@
+/*
+ *  arch/arm/mach-omap2/gpmc-onenand.h
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#ifndef	__OMAP2_GPMC_ONENAND_H
+#define	__OMAP2_GPMC_ONENAND_H
+
+#include <linux/platform_data/mtd-onenand-omap2.h>
+
+#if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2)
+extern void gpmc_onenand_init(struct omap_onenand_platform_data *d);
+#else
+#define board_onenand_data	NULL
+static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d)
+{
+}
+#endif
+
+#endif
diff --git a/include/linux/platform_data/mtd-onenand-omap2.h b/include/linux/platform_data/mtd-onenand-omap2.h
index ef9c60d4b29b..685af7e8b120 100644
--- a/include/linux/platform_data/mtd-onenand-omap2.h
+++ b/include/linux/platform_data/mtd-onenand-omap2.h
@@ -9,6 +9,9 @@
  * published by the Free Software Foundation.
  */
 
+#ifndef	__MTD_ONENAND_OMAP2_H
+#define	__MTD_ONENAND_OMAP2_H
+
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 
@@ -27,20 +30,4 @@ struct omap_onenand_platform_data {
 	u8			regulator_can_sleep;
 	u8			skip_initial_unlocking;
 };
-
-#define ONENAND_MAX_PARTITIONS 8
-
-#if defined(CONFIG_MTD_ONENAND_OMAP2) || \
-	defined(CONFIG_MTD_ONENAND_OMAP2_MODULE)
-
-extern void gpmc_onenand_init(struct omap_onenand_platform_data *d);
-
-#else
-
-#define board_onenand_data	NULL
-
-static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d)
-{
-}
-
 #endif
-- 
cgit v1.2.3-55-g7522


From bc3668ea046be9e841eecfab04bddfa759e765d6 Mon Sep 17 00:00:00 2001
From: Afzal Mohammed
Date: Sat, 29 Sep 2012 12:26:13 +0530
Subject: ARM: OMAP2+: nand: header cleanup

For common arm zImage existing nand header file
in platform specific location was moved to generic
platform data location, but it contained more than
platform data, remove it. New local header has been
created for exposing functions.

Also move gpmc-nand platform data to platform header
meant for nand from gpmc header file

Signed-off-by: Afzal Mohammed <afzal@ti.com>
---
 arch/arm/mach-omap2/board-cm-t35.c           |  3 +-
 arch/arm/mach-omap2/board-cm-t3517.c         |  3 +-
 arch/arm/mach-omap2/board-flash.c            |  4 +-
 arch/arm/mach-omap2/board-omap3pandora.c     |  3 +-
 arch/arm/mach-omap2/common-board-devices.c   |  1 -
 arch/arm/mach-omap2/gpmc-nand.c              | 58 ++++++++++++++--------------
 arch/arm/mach-omap2/gpmc-nand.h              | 27 +++++++++++++
 arch/arm/mach-omap2/gpmc.c                   |  2 +
 arch/arm/plat-omap/include/plat/gpmc.h       | 28 +-------------
 include/linux/platform_data/mtd-nand-omap2.h | 41 ++++++++++++++------
 10 files changed, 98 insertions(+), 72 deletions(-)
 create mode 100644 arch/arm/mach-omap2/gpmc-nand.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c
index 376d26eb601c..fef68de716b5 100644
--- a/arch/arm/mach-omap2/board-cm-t35.c
+++ b/arch/arm/mach-omap2/board-cm-t35.c
@@ -53,6 +53,7 @@
 #include "sdram-micron-mt46h32m32lf-6.h"
 #include "hsmmc.h"
 #include "common-board-devices.h"
+#include "gpmc-nand.h"
 
 #define CM_T35_GPIO_PENDOWN		57
 #define SB_T35_USB_HUB_RESET_GPIO	167
@@ -181,7 +182,7 @@ static struct omap_nand_platform_data cm_t35_nand_data = {
 
 static void __init cm_t35_init_nand(void)
 {
-	if (gpmc_nand_init(&cm_t35_nand_data) < 0)
+	if (gpmc_nand_init(&cm_t35_nand_data, NULL) < 0)
 		pr_err("CM-T35: Unable to register NAND device\n");
 }
 #else
diff --git a/arch/arm/mach-omap2/board-cm-t3517.c b/arch/arm/mach-omap2/board-cm-t3517.c
index 59c0a45f75b0..3a19e80e0005 100644
--- a/arch/arm/mach-omap2/board-cm-t3517.c
+++ b/arch/arm/mach-omap2/board-cm-t3517.c
@@ -49,6 +49,7 @@
 #include "control.h"
 #include "common-board-devices.h"
 #include "am35xx-emac.h"
+#include "gpmc-nand.h"
 
 #if defined(CONFIG_LEDS_GPIO) || defined(CONFIG_LEDS_GPIO_MODULE)
 static struct gpio_led cm_t3517_leds[] = {
@@ -240,7 +241,7 @@ static struct omap_nand_platform_data cm_t3517_nand_data = {
 
 static void __init cm_t3517_init_nand(void)
 {
-	if (gpmc_nand_init(&cm_t3517_nand_data) < 0)
+	if (gpmc_nand_init(&cm_t3517_nand_data, NULL) < 0)
 		pr_err("CM-T3517: NAND initialization failed\n");
 }
 #else
diff --git a/arch/arm/mach-omap2/board-flash.c b/arch/arm/mach-omap2/board-flash.c
index f438d511ba11..ba9fa86a10aa 100644
--- a/arch/arm/mach-omap2/board-flash.c
+++ b/arch/arm/mach-omap2/board-flash.c
@@ -26,6 +26,7 @@
 #include "common.h"
 #include "board-flash.h"
 #include "gpmc-onenand.h"
+#include "gpmc-nand.h"
 
 #define REG_FPGA_REV			0x10
 #define REG_FPGA_DIP_SWITCH_INPUT2	0x60
@@ -139,10 +140,9 @@ __init board_nand_init(struct mtd_partition *nand_parts, u8 nr_parts, u8 cs,
 	board_nand_data.parts		= nand_parts;
 	board_nand_data.nr_parts	= nr_parts;
 	board_nand_data.devsize		= nand_type;
-	board_nand_data.gpmc_t		= gpmc_t;
 
 	board_nand_data.ecc_opt = OMAP_ECC_HAMMING_CODE_DEFAULT;
-	gpmc_nand_init(&board_nand_data);
+	gpmc_nand_init(&board_nand_data, gpmc_t);
 }
 #endif /* CONFIG_MTD_NAND_OMAP2 || CONFIG_MTD_NAND_OMAP2_MODULE */
 
diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c
index 00a1f4ae6e44..f286b4b4bd5b 100644
--- a/arch/arm/mach-omap2/board-omap3pandora.c
+++ b/arch/arm/mach-omap2/board-omap3pandora.c
@@ -50,6 +50,7 @@
 #include "sdram-micron-mt46h32m32lf-6.h"
 #include "hsmmc.h"
 #include "common-board-devices.h"
+#include "gpmc-nand.h"
 
 #define PANDORA_WIFI_IRQ_GPIO		21
 #define PANDORA_WIFI_NRESET_GPIO	23
@@ -602,7 +603,7 @@ static void __init omap3pandora_init(void)
 	omap_ads7846_init(1, OMAP3_PANDORA_TS_GPIO, 0, NULL);
 	usbhs_init(&usbhs_bdata);
 	usb_musb_init(NULL);
-	gpmc_nand_init(&pandora_nand_data);
+	gpmc_nand_init(&pandora_nand_data, NULL);
 
 	/* Ensure SDRC pins are mux'd for self-refresh */
 	omap_mux_init_signal("sdrc_cke0", OMAP_PIN_OUTPUT);
diff --git a/arch/arm/mach-omap2/common-board-devices.c b/arch/arm/mach-omap2/common-board-devices.c
index 90e0597667b9..ad856092c06a 100644
--- a/arch/arm/mach-omap2/common-board-devices.c
+++ b/arch/arm/mach-omap2/common-board-devices.c
@@ -25,7 +25,6 @@
 #include <linux/spi/ads7846.h>
 
 #include <linux/platform_data/spi-omap2-mcspi.h>
-#include <linux/platform_data/mtd-nand-omap2.h>
 
 #include "common.h"
 #include "common-board-devices.h"
diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c
index 4acf497faeb3..abdb78a95a94 100644
--- a/arch/arm/mach-omap2/gpmc-nand.c
+++ b/arch/arm/mach-omap2/gpmc-nand.c
@@ -20,6 +20,10 @@
 #include <plat/gpmc.h>
 
 #include "soc.h"
+#include "gpmc-nand.h"
+
+/* minimum size for IO mapping */
+#define	NAND_IO_SIZE	4
 
 static struct resource gpmc_nand_resource[] = {
 	{
@@ -40,41 +44,36 @@ static struct platform_device gpmc_nand_device = {
 	.resource	= gpmc_nand_resource,
 };
 
-static int omap2_nand_gpmc_retime(struct omap_nand_platform_data *gpmc_nand_data)
+static int omap2_nand_gpmc_retime(
+				struct omap_nand_platform_data *gpmc_nand_data,
+				struct gpmc_timings *gpmc_t)
 {
 	struct gpmc_timings t;
 	int err;
 
-	if (!gpmc_nand_data->gpmc_t)
-		return 0;
-
 	memset(&t, 0, sizeof(t));
-	t.sync_clk = gpmc_nand_data->gpmc_t->sync_clk;
-	t.cs_on = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->cs_on);
-	t.adv_on = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->adv_on);
+	t.sync_clk = gpmc_t->sync_clk;
+	t.cs_on = gpmc_round_ns_to_ticks(gpmc_t->cs_on);
+	t.adv_on = gpmc_round_ns_to_ticks(gpmc_t->adv_on);
 
 	/* Read */
-	t.adv_rd_off = gpmc_round_ns_to_ticks(
-				gpmc_nand_data->gpmc_t->adv_rd_off);
+	t.adv_rd_off = gpmc_round_ns_to_ticks(gpmc_t->adv_rd_off);
 	t.oe_on  = t.adv_on;
-	t.access = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->access);
-	t.oe_off = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->oe_off);
-	t.cs_rd_off = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->cs_rd_off);
-	t.rd_cycle  = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->rd_cycle);
+	t.access = gpmc_round_ns_to_ticks(gpmc_t->access);
+	t.oe_off = gpmc_round_ns_to_ticks(gpmc_t->oe_off);
+	t.cs_rd_off = gpmc_round_ns_to_ticks(gpmc_t->cs_rd_off);
+	t.rd_cycle  = gpmc_round_ns_to_ticks(gpmc_t->rd_cycle);
 
 	/* Write */
-	t.adv_wr_off = gpmc_round_ns_to_ticks(
-				gpmc_nand_data->gpmc_t->adv_wr_off);
+	t.adv_wr_off = gpmc_round_ns_to_ticks(gpmc_t->adv_wr_off);
 	t.we_on  = t.oe_on;
 	if (cpu_is_omap34xx()) {
-	    t.wr_data_mux_bus =	gpmc_round_ns_to_ticks(
-				gpmc_nand_data->gpmc_t->wr_data_mux_bus);
-	    t.wr_access = gpmc_round_ns_to_ticks(
-				gpmc_nand_data->gpmc_t->wr_access);
+	    t.wr_data_mux_bus =	gpmc_round_ns_to_ticks(gpmc_t->wr_data_mux_bus);
+	    t.wr_access = gpmc_round_ns_to_ticks(gpmc_t->wr_access);
 	}
-	t.we_off = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->we_off);
-	t.cs_wr_off = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->cs_wr_off);
-	t.wr_cycle  = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->wr_cycle);
+	t.we_off = gpmc_round_ns_to_ticks(gpmc_t->we_off);
+	t.cs_wr_off = gpmc_round_ns_to_ticks(gpmc_t->cs_wr_off);
+	t.wr_cycle  = gpmc_round_ns_to_ticks(gpmc_t->wr_cycle);
 
 	/* Configure GPMC */
 	if (gpmc_nand_data->devsize == NAND_BUSWIDTH_16)
@@ -91,7 +90,8 @@ static int omap2_nand_gpmc_retime(struct omap_nand_platform_data *gpmc_nand_data
 	return 0;
 }
 
-int __init gpmc_nand_init(struct omap_nand_platform_data *gpmc_nand_data)
+int __init gpmc_nand_init(struct omap_nand_platform_data *gpmc_nand_data,
+			  struct gpmc_timings *gpmc_t)
 {
 	int err	= 0;
 	struct device *dev = &gpmc_nand_device.dev;
@@ -112,11 +112,13 @@ int __init gpmc_nand_init(struct omap_nand_platform_data *gpmc_nand_data)
 				gpmc_get_client_irq(GPMC_IRQ_FIFOEVENTENABLE);
 	gpmc_nand_resource[2].start =
 				gpmc_get_client_irq(GPMC_IRQ_COUNT_EVENT);
-	 /* Set timings in GPMC */
-	err = omap2_nand_gpmc_retime(gpmc_nand_data);
-	if (err < 0) {
-		dev_err(dev, "Unable to set gpmc timings: %d\n", err);
-		return err;
+
+	if (gpmc_t) {
+		err = omap2_nand_gpmc_retime(gpmc_nand_data, gpmc_t);
+		if (err < 0) {
+			dev_err(dev, "Unable to set gpmc timings: %d\n", err);
+			return err;
+		}
 	}
 
 	/* Enable RD PIN Monitoring Reg */
diff --git a/arch/arm/mach-omap2/gpmc-nand.h b/arch/arm/mach-omap2/gpmc-nand.h
new file mode 100644
index 000000000000..11a377f0e6f0
--- /dev/null
+++ b/arch/arm/mach-omap2/gpmc-nand.h
@@ -0,0 +1,27 @@
+/*
+ *  arch/arm/mach-omap2/gpmc-nand.h
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#ifndef	__OMAP2_GPMC_NAND_H
+#define	__OMAP2_GPMC_NAND_H
+
+#include <plat/gpmc.h>
+#include <linux/platform_data/mtd-nand-omap2.h>
+
+#if IS_ENABLED(CONFIG_MTD_NAND_OMAP2)
+extern int gpmc_nand_init(struct omap_nand_platform_data *d,
+			  struct gpmc_timings *gpmc_t);
+#else
+static inline int gpmc_nand_init(struct omap_nand_platform_data *d,
+				 struct gpmc_timings *gpmc_t)
+{
+	return 0;
+}
+#endif
+
+#endif
diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index 91af5aefeaae..34823b3092f3 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -26,6 +26,8 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 
+#include <linux/platform_data/mtd-nand-omap2.h>
+
 #include <asm/mach-types.h>
 #include <plat/gpmc.h>
 
diff --git a/arch/arm/plat-omap/include/plat/gpmc.h b/arch/arm/plat-omap/include/plat/gpmc.h
index f7e0990eaf37..4b06bb4531a2 100644
--- a/arch/arm/plat-omap/include/plat/gpmc.h
+++ b/arch/arm/plat-omap/include/plat/gpmc.h
@@ -11,6 +11,8 @@
 #ifndef __OMAP2_GPMC_H
 #define __OMAP2_GPMC_H
 
+#include <linux/platform_data/mtd-nand-omap2.h>
+
 /* Maximum Number of Chip Selects */
 #define GPMC_CS_NUM		8
 
@@ -86,16 +88,6 @@
 #define PREFETCH_FIFOTHRESHOLD_MAX	0x40
 #define PREFETCH_FIFOTHRESHOLD(val)	((val) << 8)
 
-enum omap_ecc {
-		/* 1-bit ecc: stored at end of spare area */
-	OMAP_ECC_HAMMING_CODE_DEFAULT = 0, /* Default, s/w method */
-	OMAP_ECC_HAMMING_CODE_HW, /* gpmc to detect the error */
-		/* 1-bit ecc: stored at beginning of spare area as romcode */
-	OMAP_ECC_HAMMING_CODE_HW_ROMCODE, /* gpmc method & romcode layout */
-	OMAP_ECC_BCH4_CODE_HW, /* 4-bit BCH ecc code */
-	OMAP_ECC_BCH8_CODE_HW, /* 8-bit BCH ecc code */
-};
-
 /*
  * Note that all values in this struct are in nanoseconds except sync_clk
  * (which is in picoseconds), while the register values are in gpmc_fck cycles.
@@ -133,22 +125,6 @@ struct gpmc_timings {
 	u16 wr_data_mux_bus;	/* WRDATAONADMUXBUS */
 };
 
-struct gpmc_nand_regs {
-	void __iomem	*gpmc_status;
-	void __iomem	*gpmc_nand_command;
-	void __iomem	*gpmc_nand_address;
-	void __iomem	*gpmc_nand_data;
-	void __iomem	*gpmc_prefetch_config1;
-	void __iomem	*gpmc_prefetch_config2;
-	void __iomem	*gpmc_prefetch_control;
-	void __iomem	*gpmc_prefetch_status;
-	void __iomem	*gpmc_ecc_config;
-	void __iomem	*gpmc_ecc_control;
-	void __iomem	*gpmc_ecc_size_config;
-	void __iomem	*gpmc_ecc1_result;
-	void __iomem	*gpmc_bch_result0;
-};
-
 extern void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs);
 extern int gpmc_get_client_irq(unsigned irq_config);
 
diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h
index 1a68c1e5fe53..e1965fe581d1 100644
--- a/include/linux/platform_data/mtd-nand-omap2.h
+++ b/include/linux/platform_data/mtd-nand-omap2.h
@@ -8,7 +8,9 @@
  * published by the Free Software Foundation.
  */
 
-#include <plat/gpmc.h>
+#ifndef	_MTD_NAND_OMAP2_H
+#define	_MTD_NAND_OMAP2_H
+
 #include <linux/mtd/partitions.h>
 
 enum nand_io {
@@ -18,10 +20,35 @@ enum nand_io {
 	NAND_OMAP_PREFETCH_IRQ		/* prefetch enabled irq mode */
 };
 
+enum omap_ecc {
+		/* 1-bit ecc: stored at end of spare area */
+	OMAP_ECC_HAMMING_CODE_DEFAULT = 0, /* Default, s/w method */
+	OMAP_ECC_HAMMING_CODE_HW, /* gpmc to detect the error */
+		/* 1-bit ecc: stored at beginning of spare area as romcode */
+	OMAP_ECC_HAMMING_CODE_HW_ROMCODE, /* gpmc method & romcode layout */
+	OMAP_ECC_BCH4_CODE_HW, /* 4-bit BCH ecc code */
+	OMAP_ECC_BCH8_CODE_HW, /* 8-bit BCH ecc code */
+};
+
+struct gpmc_nand_regs {
+	void __iomem	*gpmc_status;
+	void __iomem	*gpmc_nand_command;
+	void __iomem	*gpmc_nand_address;
+	void __iomem	*gpmc_nand_data;
+	void __iomem	*gpmc_prefetch_config1;
+	void __iomem	*gpmc_prefetch_config2;
+	void __iomem	*gpmc_prefetch_control;
+	void __iomem	*gpmc_prefetch_status;
+	void __iomem	*gpmc_ecc_config;
+	void __iomem	*gpmc_ecc_control;
+	void __iomem	*gpmc_ecc_size_config;
+	void __iomem	*gpmc_ecc1_result;
+	void __iomem	*gpmc_bch_result0;
+};
+
 struct omap_nand_platform_data {
 	int			cs;
 	struct mtd_partition	*parts;
-	struct gpmc_timings	*gpmc_t;
 	int			nr_parts;
 	bool			dev_ready;
 	enum nand_io		xfer_type;
@@ -30,14 +57,4 @@ struct omap_nand_platform_data {
 	struct gpmc_nand_regs	reg;
 };
 
-/* minimum size for IO mapping */
-#define	NAND_IO_SIZE	4
-
-#if defined(CONFIG_MTD_NAND_OMAP2) || defined(CONFIG_MTD_NAND_OMAP2_MODULE)
-extern int gpmc_nand_init(struct omap_nand_platform_data *d);
-#else
-static inline int gpmc_nand_init(struct omap_nand_platform_data *d)
-{
-	return 0;
-}
 #endif
-- 
cgit v1.2.3-55-g7522


From 2fdf0c98969fdac8f7b191d4988e2e436717c857 Mon Sep 17 00:00:00 2001
From: Afzal Mohammed
Date: Thu, 4 Oct 2012 15:49:04 +0530
Subject: ARM: OMAP2+: gpmc: nand register helper bch update

Update helper function that provides gpmc-nand register
details for nand driver with bch register information.
Using this nand driver can be made self sufficient to
handle remaining gpmc-nand operations by itself instead
of relying on gpmc exported nand functions.

Signed-off-by: Afzal Mohammed <afzal@ti.com>
---
 arch/arm/mach-omap2/gpmc.c                   | 18 +++++++++++++++++-
 include/linux/platform_data/mtd-nand-omap2.h |  7 ++++++-
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index 34823b3092f3..f5cde49854a5 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -61,6 +61,9 @@
 #define GPMC_ECC_SIZE_CONFIG	0x1fc
 #define GPMC_ECC1_RESULT        0x200
 #define GPMC_ECC_BCH_RESULT_0   0x240   /* not available on OMAP2 */
+#define	GPMC_ECC_BCH_RESULT_1	0x244	/* not available on OMAP2 */
+#define	GPMC_ECC_BCH_RESULT_2	0x248	/* not available on OMAP2 */
+#define	GPMC_ECC_BCH_RESULT_3	0x24c	/* not available on OMAP2 */
 
 /* GPMC ECC control settings */
 #define GPMC_ECC_CTRL_ECCCLEAR		0x100
@@ -77,6 +80,7 @@
 
 #define GPMC_CS0_OFFSET		0x60
 #define GPMC_CS_SIZE		0x30
+#define	GPMC_BCH_SIZE		0x10
 
 #define GPMC_MEM_START		0x00000000
 #define GPMC_MEM_END		0x3FFFFFFF
@@ -731,6 +735,8 @@ EXPORT_SYMBOL(gpmc_prefetch_reset);
 
 void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs)
 {
+	int i;
+
 	reg->gpmc_status = gpmc_base + GPMC_STATUS;
 	reg->gpmc_nand_command = gpmc_base + GPMC_CS0_OFFSET +
 				GPMC_CS_NAND_COMMAND + GPMC_CS_SIZE * cs;
@@ -746,7 +752,17 @@ void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs)
 	reg->gpmc_ecc_control = gpmc_base + GPMC_ECC_CONTROL;
 	reg->gpmc_ecc_size_config = gpmc_base + GPMC_ECC_SIZE_CONFIG;
 	reg->gpmc_ecc1_result = gpmc_base + GPMC_ECC1_RESULT;
-	reg->gpmc_bch_result0 = gpmc_base + GPMC_ECC_BCH_RESULT_0;
+
+	for (i = 0; i < GPMC_BCH_NUM_REMAINDER; i++) {
+		reg->gpmc_bch_result0[i] = gpmc_base + GPMC_ECC_BCH_RESULT_0 +
+					   GPMC_BCH_SIZE * i;
+		reg->gpmc_bch_result1[i] = gpmc_base + GPMC_ECC_BCH_RESULT_1 +
+					   GPMC_BCH_SIZE * i;
+		reg->gpmc_bch_result2[i] = gpmc_base + GPMC_ECC_BCH_RESULT_2 +
+					   GPMC_BCH_SIZE * i;
+		reg->gpmc_bch_result3[i] = gpmc_base + GPMC_ECC_BCH_RESULT_3 +
+					   GPMC_BCH_SIZE * i;
+	}
 }
 
 int gpmc_get_client_irq(unsigned irq_config)
diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h
index e1965fe581d1..24d32ca34bef 100644
--- a/include/linux/platform_data/mtd-nand-omap2.h
+++ b/include/linux/platform_data/mtd-nand-omap2.h
@@ -13,6 +13,8 @@
 
 #include <linux/mtd/partitions.h>
 
+#define	GPMC_BCH_NUM_REMAINDER	8
+
 enum nand_io {
 	NAND_OMAP_PREFETCH_POLLED = 0,	/* prefetch polled mode, default */
 	NAND_OMAP_POLLED,		/* polled mode, without prefetch */
@@ -43,7 +45,10 @@ struct gpmc_nand_regs {
 	void __iomem	*gpmc_ecc_control;
 	void __iomem	*gpmc_ecc_size_config;
 	void __iomem	*gpmc_ecc1_result;
-	void __iomem	*gpmc_bch_result0;
+	void __iomem	*gpmc_bch_result0[GPMC_BCH_NUM_REMAINDER];
+	void __iomem	*gpmc_bch_result1[GPMC_BCH_NUM_REMAINDER];
+	void __iomem	*gpmc_bch_result2[GPMC_BCH_NUM_REMAINDER];
+	void __iomem	*gpmc_bch_result3[GPMC_BCH_NUM_REMAINDER];
 };
 
 struct omap_nand_platform_data {
-- 
cgit v1.2.3-55-g7522


From 68f39e74fbc3e58ad52d008072bddacc9eee1c7e Mon Sep 17 00:00:00 2001
From: Tony Lindgren
Date: Mon, 15 Oct 2012 12:09:43 -0700
Subject: ARM: OMAP: Split plat/mmc.h into local headers and platform_data

We need to remove this from plat for ARM common zImage
support.

Also remove includes not needed by the omap_hsmmc.c driver.

Cc: linux-mmc@vger.kernel.org
Acked-by: Chris Ball <cjb@laptop.org>
Acked-by: Venkatraman S <svenkatr@ti.com>
[tony@atomide.com: fold in removal of unused driver includes]
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/board-h2-mmc.c         |   5 +-
 arch/arm/mach-omap1/board-h3-mmc.c         |   3 +-
 arch/arm/mach-omap1/board-htcherald.c      |   2 +-
 arch/arm/mach-omap1/board-innovator.c      |   2 +-
 arch/arm/mach-omap1/board-nokia770.c       |   2 +-
 arch/arm/mach-omap1/board-sx1-mmc.c        |   3 +-
 arch/arm/mach-omap1/devices.c              |   2 +-
 arch/arm/mach-omap1/mmc.h                  |  18 +++
 arch/arm/mach-omap2/board-4430sdp.c        |   2 +-
 arch/arm/mach-omap2/board-n8x0.c           |   2 +-
 arch/arm/mach-omap2/board-omap4panda.c     |   2 +-
 arch/arm/mach-omap2/board-rm680.c          |   2 +-
 arch/arm/mach-omap2/hsmmc.c                |   2 +-
 arch/arm/mach-omap2/mmc.h                  |  23 ++++
 arch/arm/mach-omap2/msdi.c                 |   2 +-
 arch/arm/mach-omap2/omap4-common.c         |   2 +-
 arch/arm/mach-omap2/omap_hwmod_2420_data.c |   2 +-
 arch/arm/mach-omap2/omap_hwmod_2430_data.c |   2 +-
 arch/arm/mach-omap2/omap_hwmod_33xx_data.c |   2 +-
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c |   2 +-
 arch/arm/mach-omap2/omap_hwmod_44xx_data.c |   2 +-
 arch/arm/plat-omap/include/plat/mmc.h      | 188 -----------------------------
 drivers/mmc/host/omap.c                    |   3 +-
 drivers/mmc/host/omap_hsmmc.c              |   4 +-
 include/linux/platform_data/mmc-omap.h     | 147 ++++++++++++++++++++++
 25 files changed, 212 insertions(+), 214 deletions(-)
 create mode 100644 arch/arm/mach-omap1/mmc.h
 create mode 100644 arch/arm/mach-omap2/mmc.h
 delete mode 100644 arch/arm/plat-omap/include/plat/mmc.h
 create mode 100644 include/linux/platform_data/mmc-omap.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/board-h2-mmc.c b/arch/arm/mach-omap1/board-h2-mmc.c
index e1362ce48497..7119ef28e0ad 100644
--- a/arch/arm/mach-omap1/board-h2-mmc.c
+++ b/arch/arm/mach-omap1/board-h2-mmc.c
@@ -13,12 +13,11 @@
  */
 #include <linux/gpio.h>
 #include <linux/platform_device.h>
-
+#include <linux/platform_data/gpio-omap.h>
 #include <linux/i2c/tps65010.h>
 
-#include <plat/mmc.h>
-
 #include "board-h2.h"
+#include "mmc.h"
 
 #if defined(CONFIG_MMC_OMAP) || defined(CONFIG_MMC_OMAP_MODULE)
 
diff --git a/arch/arm/mach-omap1/board-h3-mmc.c b/arch/arm/mach-omap1/board-h3-mmc.c
index c74daace8cd6..17d77914d769 100644
--- a/arch/arm/mach-omap1/board-h3-mmc.c
+++ b/arch/arm/mach-omap1/board-h3-mmc.c
@@ -16,9 +16,8 @@
 
 #include <linux/i2c/tps65010.h>
 
-#include <plat/mmc.h>
-
 #include "board-h3.h"
+#include "mmc.h"
 
 #if defined(CONFIG_MMC_OMAP) || defined(CONFIG_MMC_OMAP_MODULE)
 
diff --git a/arch/arm/mach-omap1/board-htcherald.c b/arch/arm/mach-omap1/board-htcherald.c
index 87ab2086ef96..f23200ceb43d 100644
--- a/arch/arm/mach-omap1/board-htcherald.c
+++ b/arch/arm/mach-omap1/board-htcherald.c
@@ -43,7 +43,7 @@
 #include <asm/mach/arch.h>
 
 #include <mach/omap7xx.h>
-#include <plat/mmc.h>
+#include "mmc.h"
 
 #include <mach/irqs.h>
 #include <mach/usb.h>
diff --git a/arch/arm/mach-omap1/board-innovator.c b/arch/arm/mach-omap1/board-innovator.c
index db5f7d2976e7..411cc5b14ce3 100644
--- a/arch/arm/mach-omap1/board-innovator.c
+++ b/arch/arm/mach-omap1/board-innovator.c
@@ -36,13 +36,13 @@
 #include <plat/fpga.h>
 #include <plat/tc.h>
 #include <linux/platform_data/keypad-omap.h>
-#include <plat/mmc.h>
 
 #include <mach/hardware.h>
 #include <mach/usb.h>
 
 #include "iomap.h"
 #include "common.h"
+#include "mmc.h"
 
 /* At OMAP1610 Innovator the Ethernet is directly connected to CS1 */
 #define INNOVATOR1610_ETHR_START	0x04000300
diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index 7d5c06d6a52a..cb72f2474430 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c
@@ -29,13 +29,13 @@
 #include <asm/mach/map.h>
 
 #include <mach/mux.h>
-#include <plat/mmc.h>
 #include <plat/clock.h>
 
 #include <mach/hardware.h>
 #include <mach/usb.h>
 
 #include "common.h"
+#include "mmc.h"
 
 #define ADS7846_PENDOWN_GPIO	15
 
diff --git a/arch/arm/mach-omap1/board-sx1-mmc.c b/arch/arm/mach-omap1/board-sx1-mmc.c
index 5932d56e17bf..4fcf19c78a08 100644
--- a/arch/arm/mach-omap1/board-sx1-mmc.c
+++ b/arch/arm/mach-omap1/board-sx1-mmc.c
@@ -16,9 +16,10 @@
 #include <linux/platform_device.h>
 
 #include <mach/hardware.h>
-#include <plat/mmc.h>
 #include <mach/board-sx1.h>
 
+#include "mmc.h"
+
 #if defined(CONFIG_MMC_OMAP) || defined(CONFIG_MMC_OMAP_MODULE)
 
 static int mmc_set_power(struct device *dev, int slot, int power_on,
diff --git a/arch/arm/mach-omap1/devices.c b/arch/arm/mach-omap1/devices.c
index d3fec92c54cb..f85836ae6691 100644
--- a/arch/arm/mach-omap1/devices.c
+++ b/arch/arm/mach-omap1/devices.c
@@ -22,7 +22,6 @@
 #include <plat/tc.h>
 #include <mach/mux.h>
 #include <plat/dma.h>
-#include <plat/mmc.h>
 
 #include <mach/omap7xx.h>
 #include <mach/camera.h>
@@ -30,6 +29,7 @@
 
 #include "common.h"
 #include "clock.h"
+#include "mmc.h"
 
 #if defined(CONFIG_SND_SOC) || defined(CONFIG_SND_SOC_MODULE)
 
diff --git a/arch/arm/mach-omap1/mmc.h b/arch/arm/mach-omap1/mmc.h
new file mode 100644
index 000000000000..39c2b13de884
--- /dev/null
+++ b/arch/arm/mach-omap1/mmc.h
@@ -0,0 +1,18 @@
+#include <linux/mmc/host.h>
+#include <linux/platform_data/mmc-omap.h>
+
+#define OMAP15XX_NR_MMC		1
+#define OMAP16XX_NR_MMC		2
+#define OMAP1_MMC_SIZE		0x080
+#define OMAP1_MMC1_BASE		0xfffb7800
+#define OMAP1_MMC2_BASE		0xfffb7c00	/* omap16xx only */
+
+#if defined(CONFIG_MMC_OMAP) || defined(CONFIG_MMC_OMAP_MODULE)
+void omap1_init_mmc(struct omap_mmc_platform_data **mmc_data,
+				int nr_controllers);
+#else
+static inline void omap1_init_mmc(struct omap_mmc_platform_data **mmc_data,
+				int nr_controllers)
+{
+}
+#endif
diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index 3669c120c7e8..2ab267ec3b75 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -35,7 +35,6 @@
 
 #include "common.h"
 #include <plat/usb.h>
-#include <plat/mmc.h>
 #include "omap4-keypad.h"
 #include <video/omapdss.h>
 #include <video/omap-panel-nokia-dsi.h>
@@ -45,6 +44,7 @@
 
 #include "soc.h"
 #include "mux.h"
+#include "mmc.h"
 #include "hsmmc.h"
 #include "control.h"
 #include "common-board-devices.h"
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index d95f727ca39a..f865ace5cf85 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -29,7 +29,7 @@
 
 #include "common.h"
 #include <plat/menelaus.h>
-#include <plat/mmc.h>
+#include "mmc.h"
 
 #include "mux.h"
 
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index bfcd397e233c..e9ce9fb9ffac 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -40,10 +40,10 @@
 
 #include "common.h"
 #include <plat/usb.h>
-#include <plat/mmc.h>
 #include <video/omap-panel-tfp410.h>
 
 #include "soc.h"
+#include "mmc.h"
 #include "hsmmc.h"
 #include "control.h"
 #include "mux.h"
diff --git a/arch/arm/mach-omap2/board-rm680.c b/arch/arm/mach-omap2/board-rm680.c
index 45997bfbcbd2..f4b3c288e16b 100644
--- a/arch/arm/mach-omap2/board-rm680.c
+++ b/arch/arm/mach-omap2/board-rm680.c
@@ -23,13 +23,13 @@
 #include <asm/mach-types.h>
 
 #include <plat/i2c.h>
-#include <plat/mmc.h>
 #include <plat/usb.h>
 #include <plat/gpmc.h>
 #include "common.h"
 #include <plat/serial.h>
 
 #include "mux.h"
+#include "mmc.h"
 #include "hsmmc.h"
 #include "sdram-nokia.h"
 #include "common-board-devices.h"
diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c
index 4d3a6324155f..ec6c2230cb38 100644
--- a/arch/arm/mach-omap2/hsmmc.c
+++ b/arch/arm/mach-omap2/hsmmc.c
@@ -17,11 +17,11 @@
 #include <mach/hardware.h>
 #include <linux/platform_data/gpio-omap.h>
 
-#include <plat/mmc.h>
 #include <plat/omap-pm.h>
 #include <plat/omap_device.h>
 
 #include "mux.h"
+#include "mmc.h"
 #include "hsmmc.h"
 #include "control.h"
 
diff --git a/arch/arm/mach-omap2/mmc.h b/arch/arm/mach-omap2/mmc.h
new file mode 100644
index 000000000000..0cd4b089da9c
--- /dev/null
+++ b/arch/arm/mach-omap2/mmc.h
@@ -0,0 +1,23 @@
+#include <linux/mmc/host.h>
+#include <linux/platform_data/mmc-omap.h>
+
+#define OMAP24XX_NR_MMC		2
+#define OMAP2420_MMC_SIZE	OMAP1_MMC_SIZE
+#define OMAP2_MMC1_BASE		0x4809c000
+
+#define OMAP4_MMC_REG_OFFSET	0x100
+
+#if defined(CONFIG_MMC_OMAP) || defined(CONFIG_MMC_OMAP_MODULE)
+void omap242x_init_mmc(struct omap_mmc_platform_data **mmc_data);
+#else
+static inline void omap242x_init_mmc(struct omap_mmc_platform_data **mmc_data)
+{
+}
+#endif
+
+struct omap_hwmod;
+int omap_msdi_reset(struct omap_hwmod *oh);
+
+/* called from board-specific card detection service routine */
+extern void omap_mmc_notify_cover_event(struct device *dev, int slot,
+					int is_closed);
diff --git a/arch/arm/mach-omap2/msdi.c b/arch/arm/mach-omap2/msdi.c
index 9e57b4aadb06..a6020d750431 100644
--- a/arch/arm/mach-omap2/msdi.c
+++ b/arch/arm/mach-omap2/msdi.c
@@ -27,11 +27,11 @@
 
 #include <plat/omap_hwmod.h>
 #include <plat/omap_device.h>
-#include <plat/mmc.h>
 
 #include "common.h"
 #include "control.h"
 #include "mux.h"
+#include "mmc.h"
 
 /*
  * MSDI_CON_OFFSET: offset in bytes of the MSDI IP block's CON register
diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c
index e1f289748c5d..de89d382f52c 100644
--- a/arch/arm/mach-omap2/omap4-common.c
+++ b/arch/arm/mach-omap2/omap4-common.c
@@ -27,12 +27,12 @@
 
 #include <plat/sram.h>
 #include <plat/omap-secure.h>
-#include <plat/mmc.h>
 
 #include "omap-wakeupgen.h"
 
 #include "soc.h"
 #include "common.h"
+#include "mmc.h"
 #include "hsmmc.h"
 #include "omap4-sar-layout.h"
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_2420_data.c b/arch/arm/mach-omap2/omap_hwmod_2420_data.c
index b5db6007c523..82ad9a57661e 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2420_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2420_data.c
@@ -21,12 +21,12 @@
 #include <plat/dmtimer.h>
 #include "l3_2xxx.h"
 #include "l4_2xxx.h"
-#include <plat/mmc.h>
 
 #include "omap_hwmod_common_data.h"
 
 #include "cm-regbits-24xx.h"
 #include "prm-regbits-24xx.h"
+#include "mmc.h"
 #include "wd_timer.h"
 
 /*
diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
index c455e41b0237..5c833fb35d21 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
@@ -20,7 +20,7 @@
 #include <plat/serial.h>
 #include <plat/i2c.h>
 #include <plat/dmtimer.h>
-#include <plat/mmc.h>
+#include "mmc.h"
 #include "l3_2xxx.h"
 
 #include "soc.h"
diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
index 59d5c1cd316d..d023a99cfd7a 100644
--- a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
@@ -19,7 +19,6 @@
 #include <linux/platform_data/gpio-omap.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
 #include <plat/dma.h>
-#include <plat/mmc.h>
 #include <plat/i2c.h>
 
 #include "omap_hwmod_common_data.h"
@@ -28,6 +27,7 @@
 #include "cm33xx.h"
 #include "prm33xx.h"
 #include "prm-regbits-33xx.h"
+#include "mmc.h"
 
 /*
  * IP blocks
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index f67b7ee07dd4..509e8ba3fe75 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -23,7 +23,6 @@
 #include "l3_3xxx.h"
 #include "l4_3xxx.h"
 #include <plat/i2c.h>
-#include <plat/mmc.h>
 #include <linux/platform_data/asoc-ti-mcbsp.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
 #include <plat/dmtimer.h>
@@ -35,6 +34,7 @@
 #include "omap_hwmod_common_data.h"
 #include "prm-regbits-34xx.h"
 #include "cm-regbits-34xx.h"
+#include "mmc.h"
 #include "wd_timer.h"
 
 /*
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index 652d0285bd6d..f87d781c4ebb 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -27,7 +27,6 @@
 #include <plat/dma.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
 #include <linux/platform_data/asoc-ti-mcbsp.h>
-#include <plat/mmc.h>
 #include <plat/dmtimer.h>
 #include <plat/common.h>
 #include <plat/iommu.h>
@@ -37,6 +36,7 @@
 #include "cm2_44xx.h"
 #include "prm44xx.h"
 #include "prm-regbits-44xx.h"
+#include "mmc.h"
 #include "wd_timer.h"
 
 /* Base offset for all OMAP4 interrupts external to MPUSS */
diff --git a/arch/arm/plat-omap/include/plat/mmc.h b/arch/arm/plat-omap/include/plat/mmc.h
deleted file mode 100644
index 8b4e4f2da2f5..000000000000
--- a/arch/arm/plat-omap/include/plat/mmc.h
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * MMC definitions for OMAP2
- *
- * Copyright (C) 2006 Nokia Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __OMAP2_MMC_H
-#define __OMAP2_MMC_H
-
-#include <linux/types.h>
-#include <linux/device.h>
-#include <linux/mmc/host.h>
-
-#include <plat/omap_hwmod.h>
-
-#define OMAP15XX_NR_MMC		1
-#define OMAP16XX_NR_MMC		2
-#define OMAP1_MMC_SIZE		0x080
-#define OMAP1_MMC1_BASE		0xfffb7800
-#define OMAP1_MMC2_BASE		0xfffb7c00	/* omap16xx only */
-
-#define OMAP24XX_NR_MMC		2
-#define OMAP2420_MMC_SIZE	OMAP1_MMC_SIZE
-#define OMAP2_MMC1_BASE		0x4809c000
-
-#define OMAP4_MMC_REG_OFFSET	0x100
-
-#define OMAP_MMC_MAX_SLOTS	2
-
-/*
- * struct omap_mmc_dev_attr.flags possibilities
- *
- * OMAP_HSMMC_SUPPORTS_DUAL_VOLT: Some HSMMC controller instances can
- *    operate with either 1.8Vdc or 3.0Vdc card voltages; this flag
- *    should be set if this is the case.  See for example Section 22.5.3
- *    "MMC/SD/SDIO1 Bus Voltage Selection" of the OMAP34xx Multimedia
- *    Device Silicon Revision 3.1.x Revision ZR (July 2011) (SWPU223R).
- *
- * OMAP_HSMMC_BROKEN_MULTIBLOCK_READ: Multiple-block read transfers
- *    don't work correctly on some MMC controller instances on some
- *    OMAP3 SoCs; this flag should be set if this is the case.  See
- *    for example Advisory 2.1.1.128 "MMC: Multiple Block Read
- *    Operation Issue" in _OMAP3530/3525/3515/3503 Silicon Errata_
- *    Revision F (October 2010) (SPRZ278F).
- */
-#define OMAP_HSMMC_SUPPORTS_DUAL_VOLT		BIT(0)
-#define OMAP_HSMMC_BROKEN_MULTIBLOCK_READ	BIT(1)
-
-struct omap_mmc_dev_attr {
-	u8 flags;
-};
-
-struct omap_mmc_platform_data {
-	/* back-link to device */
-	struct device *dev;
-
-	/* number of slots per controller */
-	unsigned nr_slots:2;
-
-	/* set if your board has components or wiring that limits the
-	 * maximum frequency on the MMC bus */
-	unsigned int max_freq;
-
-	/* switch the bus to a new slot */
-	int (*switch_slot)(struct device *dev, int slot);
-	/* initialize board-specific MMC functionality, can be NULL if
-	 * not supported */
-	int (*init)(struct device *dev);
-	void (*cleanup)(struct device *dev);
-	void (*shutdown)(struct device *dev);
-
-	/* To handle board related suspend/resume functionality for MMC */
-	int (*suspend)(struct device *dev, int slot);
-	int (*resume)(struct device *dev, int slot);
-
-	/* Return context loss count due to PM states changing */
-	int (*get_context_loss_count)(struct device *dev);
-
-	/* Integrating attributes from the omap_hwmod layer */
-	u8 controller_flags;
-
-	/* Register offset deviation */
-	u16 reg_offset;
-
-	struct omap_mmc_slot_data {
-
-		/*
-		 * 4/8 wires and any additional host capabilities
-		 * need to OR'd all capabilities (ref. linux/mmc/host.h)
-		 */
-		u8  wires;	/* Used for the MMC driver on omap1 and 2420 */
-		u32 caps;	/* Used for the MMC driver on 2430 and later */
-		u32 pm_caps;	/* PM capabilities of the mmc */
-
-		/*
-		 * nomux means "standard" muxing is wrong on this board, and
-		 * that board-specific code handled it before common init logic.
-		 */
-		unsigned nomux:1;
-
-		/* switch pin can be for card detect (default) or card cover */
-		unsigned cover:1;
-
-		/* use the internal clock */
-		unsigned internal_clock:1;
-
-		/* nonremovable e.g. eMMC */
-		unsigned nonremovable:1;
-
-		/* Try to sleep or power off when possible */
-		unsigned power_saving:1;
-
-		/* If using power_saving and the MMC power is not to go off */
-		unsigned no_off:1;
-
-		/* eMMC does not handle power off when not in sleep state */
-		unsigned no_regulator_off_init:1;
-
-		/* Regulator off remapped to sleep */
-		unsigned vcc_aux_disable_is_sleep:1;
-
-		/* we can put the features above into this variable */
-#define HSMMC_HAS_PBIAS		(1 << 0)
-#define HSMMC_HAS_UPDATED_RESET	(1 << 1)
-		unsigned features;
-
-		int switch_pin;			/* gpio (card detect) */
-		int gpio_wp;			/* gpio (write protect) */
-
-		int (*set_bus_mode)(struct device *dev, int slot, int bus_mode);
-		int (*set_power)(struct device *dev, int slot,
-				 int power_on, int vdd);
-		int (*get_ro)(struct device *dev, int slot);
-		void (*remux)(struct device *dev, int slot, int power_on);
-		/* Call back before enabling / disabling regulators */
-		void (*before_set_reg)(struct device *dev, int slot,
-				       int power_on, int vdd);
-		/* Call back after enabling / disabling regulators */
-		void (*after_set_reg)(struct device *dev, int slot,
-				      int power_on, int vdd);
-		/* if we have special card, init it using this callback */
-		void (*init_card)(struct mmc_card *card);
-
-		/* return MMC cover switch state, can be NULL if not supported.
-		 *
-		 * possible return values:
-		 *   0 - closed
-		 *   1 - open
-		 */
-		int (*get_cover_state)(struct device *dev, int slot);
-
-		const char *name;
-		u32 ocr_mask;
-
-		/* Card detection IRQs */
-		int card_detect_irq;
-		int (*card_detect)(struct device *dev, int slot);
-
-		unsigned int ban_openended:1;
-
-	} slots[OMAP_MMC_MAX_SLOTS];
-};
-
-/* called from board-specific card detection service routine */
-extern void omap_mmc_notify_cover_event(struct device *dev, int slot,
-					int is_closed);
-
-#if defined(CONFIG_MMC_OMAP) || defined(CONFIG_MMC_OMAP_MODULE)
-void omap1_init_mmc(struct omap_mmc_platform_data **mmc_data,
-				int nr_controllers);
-void omap242x_init_mmc(struct omap_mmc_platform_data **mmc_data);
-#else
-static inline void omap1_init_mmc(struct omap_mmc_platform_data **mmc_data,
-				int nr_controllers)
-{
-}
-static inline void omap242x_init_mmc(struct omap_mmc_platform_data **mmc_data)
-{
-}
-#endif
-
-extern int omap_msdi_reset(struct omap_hwmod *oh);
-
-#endif
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 48ad361613ef..e7c61b9b044c 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -28,8 +28,9 @@
 #include <linux/clk.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
+#include <linux/platform_data/mmc-omap.h>
 
-#include <plat/mmc.h>
+#include <plat/cpu.h>
 #include <plat/dma.h>
 
 #define	OMAP_MMC_REG_CMD	0x00
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 54bfd0cc106b..9b24bd46aad3 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -38,9 +38,7 @@
 #include <linux/gpio.h>
 #include <linux/regulator/consumer.h>
 #include <linux/pm_runtime.h>
-#include <mach/hardware.h>
-#include <plat/mmc.h>
-#include <plat/cpu.h>
+#include <linux/platform_data/mmc-omap.h>
 
 /* OMAP HSMMC Host Controller Registers */
 #define OMAP_HSMMC_SYSSTATUS	0x0014
diff --git a/include/linux/platform_data/mmc-omap.h b/include/linux/platform_data/mmc-omap.h
new file mode 100644
index 000000000000..39a7abc5604c
--- /dev/null
+++ b/include/linux/platform_data/mmc-omap.h
@@ -0,0 +1,147 @@
+/*
+ * MMC definitions for OMAP2
+ *
+ * Copyright (C) 2006 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define OMAP_MMC_MAX_SLOTS	2
+
+/*
+ * struct omap_mmc_dev_attr.flags possibilities
+ *
+ * OMAP_HSMMC_SUPPORTS_DUAL_VOLT: Some HSMMC controller instances can
+ *    operate with either 1.8Vdc or 3.0Vdc card voltages; this flag
+ *    should be set if this is the case.  See for example Section 22.5.3
+ *    "MMC/SD/SDIO1 Bus Voltage Selection" of the OMAP34xx Multimedia
+ *    Device Silicon Revision 3.1.x Revision ZR (July 2011) (SWPU223R).
+ *
+ * OMAP_HSMMC_BROKEN_MULTIBLOCK_READ: Multiple-block read transfers
+ *    don't work correctly on some MMC controller instances on some
+ *    OMAP3 SoCs; this flag should be set if this is the case.  See
+ *    for example Advisory 2.1.1.128 "MMC: Multiple Block Read
+ *    Operation Issue" in _OMAP3530/3525/3515/3503 Silicon Errata_
+ *    Revision F (October 2010) (SPRZ278F).
+ */
+#define OMAP_HSMMC_SUPPORTS_DUAL_VOLT		BIT(0)
+#define OMAP_HSMMC_BROKEN_MULTIBLOCK_READ	BIT(1)
+
+struct mmc_card;
+
+struct omap_mmc_dev_attr {
+	u8 flags;
+};
+
+struct omap_mmc_platform_data {
+	/* back-link to device */
+	struct device *dev;
+
+	/* number of slots per controller */
+	unsigned nr_slots:2;
+
+	/* set if your board has components or wiring that limits the
+	 * maximum frequency on the MMC bus */
+	unsigned int max_freq;
+
+	/* switch the bus to a new slot */
+	int (*switch_slot)(struct device *dev, int slot);
+	/* initialize board-specific MMC functionality, can be NULL if
+	 * not supported */
+	int (*init)(struct device *dev);
+	void (*cleanup)(struct device *dev);
+	void (*shutdown)(struct device *dev);
+
+	/* To handle board related suspend/resume functionality for MMC */
+	int (*suspend)(struct device *dev, int slot);
+	int (*resume)(struct device *dev, int slot);
+
+	/* Return context loss count due to PM states changing */
+	int (*get_context_loss_count)(struct device *dev);
+
+	/* Integrating attributes from the omap_hwmod layer */
+	u8 controller_flags;
+
+	/* Register offset deviation */
+	u16 reg_offset;
+
+	struct omap_mmc_slot_data {
+
+		/*
+		 * 4/8 wires and any additional host capabilities
+		 * need to OR'd all capabilities (ref. linux/mmc/host.h)
+		 */
+		u8  wires;	/* Used for the MMC driver on omap1 and 2420 */
+		u32 caps;	/* Used for the MMC driver on 2430 and later */
+		u32 pm_caps;	/* PM capabilities of the mmc */
+
+		/*
+		 * nomux means "standard" muxing is wrong on this board, and
+		 * that board-specific code handled it before common init logic.
+		 */
+		unsigned nomux:1;
+
+		/* switch pin can be for card detect (default) or card cover */
+		unsigned cover:1;
+
+		/* use the internal clock */
+		unsigned internal_clock:1;
+
+		/* nonremovable e.g. eMMC */
+		unsigned nonremovable:1;
+
+		/* Try to sleep or power off when possible */
+		unsigned power_saving:1;
+
+		/* If using power_saving and the MMC power is not to go off */
+		unsigned no_off:1;
+
+		/* eMMC does not handle power off when not in sleep state */
+		unsigned no_regulator_off_init:1;
+
+		/* Regulator off remapped to sleep */
+		unsigned vcc_aux_disable_is_sleep:1;
+
+		/* we can put the features above into this variable */
+#define HSMMC_HAS_PBIAS		(1 << 0)
+#define HSMMC_HAS_UPDATED_RESET	(1 << 1)
+		unsigned features;
+
+		int switch_pin;			/* gpio (card detect) */
+		int gpio_wp;			/* gpio (write protect) */
+
+		int (*set_bus_mode)(struct device *dev, int slot, int bus_mode);
+		int (*set_power)(struct device *dev, int slot,
+				 int power_on, int vdd);
+		int (*get_ro)(struct device *dev, int slot);
+		void (*remux)(struct device *dev, int slot, int power_on);
+		/* Call back before enabling / disabling regulators */
+		void (*before_set_reg)(struct device *dev, int slot,
+				       int power_on, int vdd);
+		/* Call back after enabling / disabling regulators */
+		void (*after_set_reg)(struct device *dev, int slot,
+				      int power_on, int vdd);
+		/* if we have special card, init it using this callback */
+		void (*init_card)(struct mmc_card *card);
+
+		/* return MMC cover switch state, can be NULL if not supported.
+		 *
+		 * possible return values:
+		 *   0 - closed
+		 *   1 - open
+		 */
+		int (*get_cover_state)(struct device *dev, int slot);
+
+		const char *name;
+		u32 ocr_mask;
+
+		/* Card detection IRQs */
+		int card_detect_irq;
+		int (*card_detect)(struct device *dev, int slot);
+
+		unsigned int ban_openended:1;
+
+	} slots[OMAP_MMC_MAX_SLOTS];
+};
-- 
cgit v1.2.3-55-g7522


From 53db20d123f7a1bf44e46b727775403672655fde Mon Sep 17 00:00:00 2001
From: Tony Lindgren
Date: Mon, 15 Oct 2012 12:10:33 -0700
Subject: mmc: omap: Remove cpu_is_omap usage from the driver

This is needed for the ARM common zImage support.

We can use the existing slot features to pass omap1
specific options to the driver. For omap2 we don't
want to pass anything new as that will be eventually
moved to use device tree based init.

Note that this patch depends on earlier patch that
moves plat/mmc.h into include/linux/platform_data.

Cc: linux-mmc@vger.kernel.org
Cc: Venkatraman S <svenkatr@ti.com>
Acked-by: Chris Ball <cjb@laptop.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/devices.c          |  7 +++++++
 drivers/mmc/host/omap.c                | 23 +++++++++++++++--------
 include/linux/platform_data/mmc-omap.h |  3 +++
 3 files changed, 25 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/devices.c b/arch/arm/mach-omap1/devices.c
index f85836ae6691..d807d949913d 100644
--- a/arch/arm/mach-omap1/devices.c
+++ b/arch/arm/mach-omap1/devices.c
@@ -175,6 +175,13 @@ static int __init omap_mmc_add(const char *name, int id, unsigned long base,
 	res[3].name = "tx";
 	res[3].flags = IORESOURCE_DMA;
 
+	if (cpu_is_omap7xx())
+		data->slots[0].features = MMC_OMAP7XX;
+	if (cpu_is_omap15xx())
+		data->slots[0].features = MMC_OMAP15XX;
+	if (cpu_is_omap16xx())
+		data->slots[0].features = MMC_OMAP16XX;
+
 	ret = platform_device_add_resources(pdev, res, ARRAY_SIZE(res));
 	if (ret == 0)
 		ret = platform_device_add_data(pdev, data, sizeof(*data));
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index e7c61b9b044c..9f0e26fffaea 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -30,7 +30,6 @@
 #include <linux/slab.h>
 #include <linux/platform_data/mmc-omap.h>
 
-#include <plat/cpu.h>
 #include <plat/dma.h>
 
 #define	OMAP_MMC_REG_CMD	0x00
@@ -73,6 +72,13 @@
 #define	OMAP_MMC_STAT_CARD_BUSY		(1 <<  2)
 #define	OMAP_MMC_STAT_END_OF_CMD	(1 <<  0)
 
+#define mmc_omap7xx()	(host->features & MMC_OMAP7XX)
+#define mmc_omap15xx()	(host->features & MMC_OMAP15XX)
+#define mmc_omap16xx()	(host->features & MMC_OMAP16XX)
+#define MMC_OMAP1_MASK	(MMC_OMAP7XX | MMC_OMAP15XX | MMC_OMAP16XX)
+#define mmc_omap1()	(host->features & MMC_OMAP1_MASK)
+#define mmc_omap2()	(!mmc_omap1())
+
 #define OMAP_MMC_REG(host, reg)		(OMAP_MMC_REG_##reg << (host)->reg_shift)
 #define OMAP_MMC_READ(host, reg)	__raw_readw((host)->virt_base + OMAP_MMC_REG(host, reg))
 #define OMAP_MMC_WRITE(host, reg, val)	__raw_writew((val), (host)->virt_base + OMAP_MMC_REG(host, reg))
@@ -148,6 +154,7 @@ struct mmc_omap_host {
 	u32			buffer_bytes_left;
 	u32			total_bytes_left;
 
+	unsigned		features;
 	unsigned		use_dma:1;
 	unsigned		brs_received:1, dma_done:1;
 	unsigned		dma_in_use:1;
@@ -989,7 +996,7 @@ mmc_omap_prepare_data(struct mmc_omap_host *host, struct mmc_request *req)
 		 * blocksize is at least that large. Blocksize is
 		 * usually 512 bytes; but not for some SD reads.
 		 */
-		burst = cpu_is_omap15xx() ? 32 : 64;
+		burst = mmc_omap15xx() ? 32 : 64;
 		if (burst > data->blksz)
 			burst = data->blksz;
 
@@ -1105,8 +1112,7 @@ static void mmc_omap_set_power(struct mmc_omap_slot *slot, int power_on,
 	if (slot->pdata->set_power != NULL)
 		slot->pdata->set_power(mmc_dev(slot->mmc), slot->id, power_on,
 					vdd);
-
-	if (cpu_is_omap24xx()) {
+	if (mmc_omap2()) {
 		u16 w;
 
 		if (power_on) {
@@ -1240,7 +1246,7 @@ static int __devinit mmc_omap_new_slot(struct mmc_omap_host *host, int id)
 	mmc->ops = &mmc_omap_ops;
 	mmc->f_min = 400000;
 
-	if (cpu_class_is_omap2())
+	if (mmc_omap2())
 		mmc->f_max = 48000000;
 	else
 		mmc->f_max = 24000000;
@@ -1360,6 +1366,7 @@ static int __devinit mmc_omap_probe(struct platform_device *pdev)
 	init_waitqueue_head(&host->slot_wq);
 
 	host->pdata = pdata;
+	host->features = host->pdata->slots[0].features;
 	host->dev = &pdev->dev;
 	platform_set_drvdata(pdev, host);
 
@@ -1392,7 +1399,7 @@ static int __devinit mmc_omap_probe(struct platform_device *pdev)
 	host->dma_tx_burst = -1;
 	host->dma_rx_burst = -1;
 
-	if (cpu_is_omap24xx())
+	if (mmc_omap2())
 		sig = host->id == 0 ? OMAP24XX_DMA_MMC1_TX : OMAP24XX_DMA_MMC2_TX;
 	else
 		sig = host->id == 0 ? OMAP_DMA_MMC_TX : OMAP_DMA_MMC2_TX;
@@ -1408,7 +1415,7 @@ static int __devinit mmc_omap_probe(struct platform_device *pdev)
 		dev_warn(host->dev, "unable to obtain TX DMA engine channel %u\n",
 			sig);
 #endif
-	if (cpu_is_omap24xx())
+	if (mmc_omap2())
 		sig = host->id == 0 ? OMAP24XX_DMA_MMC1_RX : OMAP24XX_DMA_MMC2_RX;
 	else
 		sig = host->id == 0 ? OMAP_DMA_MMC_RX : OMAP_DMA_MMC2_RX;
@@ -1436,7 +1443,7 @@ static int __devinit mmc_omap_probe(struct platform_device *pdev)
 	}
 
 	host->nr_slots = pdata->nr_slots;
-	host->reg_shift = (cpu_is_omap7xx() ? 1 : 2);
+	host->reg_shift = (mmc_omap7xx() ? 1 : 2);
 
 	host->mmc_omap_wq = alloc_workqueue("mmc_omap", 0, 0);
 	if (!host->mmc_omap_wq)
diff --git a/include/linux/platform_data/mmc-omap.h b/include/linux/platform_data/mmc-omap.h
index 39a7abc5604c..2bf6ea82ff94 100644
--- a/include/linux/platform_data/mmc-omap.h
+++ b/include/linux/platform_data/mmc-omap.h
@@ -107,6 +107,9 @@ struct omap_mmc_platform_data {
 		/* we can put the features above into this variable */
 #define HSMMC_HAS_PBIAS		(1 << 0)
 #define HSMMC_HAS_UPDATED_RESET	(1 << 1)
+#define MMC_OMAP7XX		(1 << 2)
+#define MMC_OMAP15XX		(1 << 3)
+#define MMC_OMAP16XX		(1 << 4)
 		unsigned features;
 
 		int switch_pin;			/* gpio (card detect) */
-- 
cgit v1.2.3-55-g7522


From 1b61810d196b84bfdc14c289126d98de765b7f97 Mon Sep 17 00:00:00 2001
From: Chad Reese
Date: Mon, 8 Oct 2012 17:38:32 -0700
Subject: RapidIO: Fix comment error.

The resource index for the mailboxes was incorrect.

Signed-off-by: Chad Reese <kreese@caviumnetworks.com>
Acked-by: Alexandre Bounine <alexandre.bounine@idt.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/rio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rio.h b/include/linux/rio.h
index a90ebadd9da0..abddba9db130 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -62,7 +62,7 @@
  *
  *	0	RapidIO inbound doorbells
  *	1	RapidIO inbound mailboxes
- *	1	RapidIO outbound mailboxes
+ *	2	RapidIO outbound mailboxes
  */
 #define RIO_DOORBELL_RESOURCE	0
 #define RIO_INB_MBOX_RESOURCE	1
-- 
cgit v1.2.3-55-g7522


From 7bd3b61853da6d2fc579e27c7a1ac1e9fc968d1a Mon Sep 17 00:00:00 2001
From: Tony Lindgren
Date: Mon, 15 Oct 2012 13:53:41 -0700
Subject: ARM: OMAP2: Move plat/menelaus.h to linux/mfd/menelaus.h

We can move menelaus.h to live with other mfd headers to
get it out of plat for ARM common zImage support.

Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/board-h4.c             |  2 +-
 arch/arm/mach-omap2/board-n8x0.c           |  2 +-
 arch/arm/plat-omap/include/plat/menelaus.h | 49 ------------------------------
 drivers/mfd/menelaus.c                     |  2 +-
 include/linux/mfd/menelaus.h               | 47 ++++++++++++++++++++++++++++
 5 files changed, 50 insertions(+), 52 deletions(-)
 delete mode 100644 arch/arm/plat-omap/include/plat/menelaus.h
 create mode 100644 include/linux/mfd/menelaus.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-h4.c b/arch/arm/mach-omap2/board-h4.c
index 8d04bf851af4..3977a0276b4c 100644
--- a/arch/arm/mach-omap2/board-h4.c
+++ b/arch/arm/mach-omap2/board-h4.c
@@ -26,12 +26,12 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/input/matrix_keypad.h>
+#include <linux/mfd/menelaus.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 
-#include <plat/menelaus.h>
 #include <plat/dma.h>
 #include <plat/gpmc.h>
 #include "debug-devices.h"
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index d95f727ca39a..e3e8325054b2 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -22,13 +22,13 @@
 #include <linux/usb/musb.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
 #include <linux/platform_data/mtd-onenand-omap2.h>
+#include <linux/mfd/menelaus.h>
 #include <sound/tlv320aic3x.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach-types.h>
 
 #include "common.h"
-#include <plat/menelaus.h>
 #include <plat/mmc.h>
 
 #include "mux.h"
diff --git a/arch/arm/plat-omap/include/plat/menelaus.h b/arch/arm/plat-omap/include/plat/menelaus.h
deleted file mode 100644
index 4a970ec62dd1..000000000000
--- a/arch/arm/plat-omap/include/plat/menelaus.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * arch/arm/plat-omap/include/mach/menelaus.h
- *
- * Functions to access Menelaus power management chip
- */
-
-#ifndef __ASM_ARCH_MENELAUS_H
-#define __ASM_ARCH_MENELAUS_H
-
-struct device;
-
-struct menelaus_platform_data {
-	int (* late_init)(struct device *dev);
-};
-
-extern int menelaus_register_mmc_callback(void (*callback)(void *data, u8 card_mask),
-					  void *data);
-extern void menelaus_unregister_mmc_callback(void);
-extern int menelaus_set_mmc_opendrain(int slot, int enable);
-extern int menelaus_set_mmc_slot(int slot, int enable, int power, int cd_on);
-
-extern int menelaus_set_vmem(unsigned int mV);
-extern int menelaus_set_vio(unsigned int mV);
-extern int menelaus_set_vmmc(unsigned int mV);
-extern int menelaus_set_vaux(unsigned int mV);
-extern int menelaus_set_vdcdc(int dcdc, unsigned int mV);
-extern int menelaus_set_slot_sel(int enable);
-extern int menelaus_get_slot_pin_states(void);
-extern int menelaus_set_vcore_sw(unsigned int mV);
-extern int menelaus_set_vcore_hw(unsigned int roof_mV, unsigned int floor_mV);
-
-#define EN_VPLL_SLEEP	(1 << 7)
-#define EN_VMMC_SLEEP	(1 << 6)
-#define EN_VAUX_SLEEP	(1 << 5)
-#define EN_VIO_SLEEP	(1 << 4)
-#define EN_VMEM_SLEEP	(1 << 3)
-#define EN_DC3_SLEEP	(1 << 2)
-#define EN_DC2_SLEEP	(1 << 1)
-#define EN_VC_SLEEP	(1 << 0)
-
-extern int menelaus_set_regulator_sleep(int enable, u32 val);
-
-#if defined(CONFIG_ARCH_OMAP2) && defined(CONFIG_MENELAUS)
-#define omap_has_menelaus()	1
-#else
-#define omap_has_menelaus()	0
-#endif
-
-#endif
diff --git a/drivers/mfd/menelaus.c b/drivers/mfd/menelaus.c
index 55d589981412..998ce8cb3065 100644
--- a/drivers/mfd/menelaus.c
+++ b/drivers/mfd/menelaus.c
@@ -41,11 +41,11 @@
 #include <linux/rtc.h>
 #include <linux/bcd.h>
 #include <linux/slab.h>
+#include <linux/mfd/menelaus.h>
 
 #include <asm/mach/irq.h>
 
 #include <asm/gpio.h>
-#include <plat/menelaus.h>
 
 #define DRIVER_NAME			"menelaus"
 
diff --git a/include/linux/mfd/menelaus.h b/include/linux/mfd/menelaus.h
new file mode 100644
index 000000000000..f097e89134cb
--- /dev/null
+++ b/include/linux/mfd/menelaus.h
@@ -0,0 +1,47 @@
+/*
+ * Functions to access Menelaus power management chip
+ */
+
+#ifndef __ASM_ARCH_MENELAUS_H
+#define __ASM_ARCH_MENELAUS_H
+
+struct device;
+
+struct menelaus_platform_data {
+	int (* late_init)(struct device *dev);
+};
+
+extern int menelaus_register_mmc_callback(void (*callback)(void *data, u8 card_mask),
+					  void *data);
+extern void menelaus_unregister_mmc_callback(void);
+extern int menelaus_set_mmc_opendrain(int slot, int enable);
+extern int menelaus_set_mmc_slot(int slot, int enable, int power, int cd_on);
+
+extern int menelaus_set_vmem(unsigned int mV);
+extern int menelaus_set_vio(unsigned int mV);
+extern int menelaus_set_vmmc(unsigned int mV);
+extern int menelaus_set_vaux(unsigned int mV);
+extern int menelaus_set_vdcdc(int dcdc, unsigned int mV);
+extern int menelaus_set_slot_sel(int enable);
+extern int menelaus_get_slot_pin_states(void);
+extern int menelaus_set_vcore_sw(unsigned int mV);
+extern int menelaus_set_vcore_hw(unsigned int roof_mV, unsigned int floor_mV);
+
+#define EN_VPLL_SLEEP	(1 << 7)
+#define EN_VMMC_SLEEP	(1 << 6)
+#define EN_VAUX_SLEEP	(1 << 5)
+#define EN_VIO_SLEEP	(1 << 4)
+#define EN_VMEM_SLEEP	(1 << 3)
+#define EN_DC3_SLEEP	(1 << 2)
+#define EN_DC2_SLEEP	(1 << 1)
+#define EN_VC_SLEEP	(1 << 0)
+
+extern int menelaus_set_regulator_sleep(int enable, u32 val);
+
+#if defined(CONFIG_ARCH_OMAP2) && defined(CONFIG_MENELAUS)
+#define omap_has_menelaus()	1
+#else
+#define omap_has_menelaus()	0
+#endif
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 98aed08e16c5f18d0c31fc07127bc163ccd0d04c Mon Sep 17 00:00:00 2001
From: Jean Pihet
Date: Thu, 4 Oct 2012 18:47:11 +0200
Subject: ARM: OMAP: SmartReflex: pass device dependent data via platform data

Remove the device dependent code (ex. cpu_is_xxx()) and settings
from the driver code and instead pass them via the platform
data. This allows a clean separation of the driver code and the platform
code, as required by the move of the platform header files to
include/linux/platform_data.

Note about the smartreflex functional clocks: the smartreflex fclks
are derived from sys_clk and have the same name as the main_clk from
the hwmod entry, in order for the SmartReflex driver to request the
fclk (using clk_get(dev, "fck")).

Signed-off-by: Jean Pihet <j-pihet@ti.com>
Signed-off-by: Kevin Hilman <khilman@ti.com>
---
 arch/arm/mach-omap2/sr_device.c   | 13 ++++++++++
 drivers/power/avs/smartreflex.c   | 54 ++++++++++++---------------------------
 include/linux/power/smartreflex.h | 14 ++++++++--
 3 files changed, 42 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/sr_device.c b/arch/arm/mach-omap2/sr_device.c
index f8217a5a4a26..a04bc25a1d26 100644
--- a/arch/arm/mach-omap2/sr_device.c
+++ b/arch/arm/mach-omap2/sr_device.c
@@ -121,6 +121,19 @@ static int __init sr_dev_init(struct omap_hwmod *oh, void *user)
 	sr_data->senn_mod = 0x1;
 	sr_data->senp_mod = 0x1;
 
+	if (cpu_is_omap34xx() || cpu_is_omap44xx()) {
+		sr_data->err_weight = OMAP3430_SR_ERRWEIGHT;
+		sr_data->err_maxlimit = OMAP3430_SR_ERRMAXLIMIT;
+		sr_data->accum_data = OMAP3430_SR_ACCUMDATA;
+		if (!(strcmp(sr_data->name, "smartreflex_mpu"))) {
+			sr_data->senn_avgweight = OMAP3430_SR1_SENNAVGWEIGHT;
+			sr_data->senp_avgweight = OMAP3430_SR1_SENPAVGWEIGHT;
+		} else {
+			sr_data->senn_avgweight = OMAP3430_SR2_SENNAVGWEIGHT;
+			sr_data->senp_avgweight = OMAP3430_SR2_SENPAVGWEIGHT;
+		}
+	}
+
 	sr_data->voltdm = voltdm_lookup(sr_dev_attr->sensor_voltdm_name);
 	if (!sr_data->voltdm) {
 		pr_err("%s: Unable to get voltage domain pointer for VDD %s\n",
diff --git a/drivers/power/avs/smartreflex.c b/drivers/power/avs/smartreflex.c
index 24768a27e1d8..4c4519e59be4 100644
--- a/drivers/power/avs/smartreflex.c
+++ b/drivers/power/avs/smartreflex.c
@@ -130,24 +130,21 @@ static irqreturn_t sr_interrupt(int irq, void *data)
 
 static void sr_set_clk_length(struct omap_sr *sr)
 {
-	struct clk *sys_ck;
-	u32 sys_clk_speed;
+	struct clk *fck;
+	u32 fclk_speed;
 
-	if (cpu_is_omap34xx())
-		sys_ck = clk_get(NULL, "sys_ck");
-	else
-		sys_ck = clk_get(NULL, "sys_clkin_ck");
+	fck = clk_get(&sr->pdev->dev, "fck");
 
-	if (IS_ERR(sys_ck)) {
-		dev_err(&sr->pdev->dev, "%s: unable to get sys clk\n",
-			__func__);
+	if (IS_ERR(fck)) {
+		dev_err(&sr->pdev->dev, "%s: unable to get fck for device %s\n",
+				__func__, dev_name(&sr->pdev->dev));
 		return;
 	}
 
-	sys_clk_speed = clk_get_rate(sys_ck);
-	clk_put(sys_ck);
+	fclk_speed = clk_get_rate(fck);
+	clk_put(fck);
 
-	switch (sys_clk_speed) {
+	switch (fclk_speed) {
 	case 12000000:
 		sr->clk_length = SRCLKLENGTH_12MHZ_SYSCLK;
 		break;
@@ -164,34 +161,12 @@ static void sr_set_clk_length(struct omap_sr *sr)
 		sr->clk_length = SRCLKLENGTH_38MHZ_SYSCLK;
 		break;
 	default:
-		dev_err(&sr->pdev->dev, "%s: Invalid sysclk value: %d\n",
-			__func__, sys_clk_speed);
+		dev_err(&sr->pdev->dev, "%s: Invalid fclk rate: %d\n",
+			__func__, fclk_speed);
 		break;
 	}
 }
 
-static void sr_set_regfields(struct omap_sr *sr)
-{
-	/*
-	 * For time being these values are defined in smartreflex.h
-	 * and populated during init. May be they can be moved to board
-	 * file or pmic specific data structure. In that case these structure
-	 * fields will have to be populated using the pdata or pmic structure.
-	 */
-	if (cpu_is_omap34xx() || cpu_is_omap44xx()) {
-		sr->err_weight = OMAP3430_SR_ERRWEIGHT;
-		sr->err_maxlimit = OMAP3430_SR_ERRMAXLIMIT;
-		sr->accum_data = OMAP3430_SR_ACCUMDATA;
-		if (!(strcmp(sr->name, "smartreflex_mpu_iva"))) {
-			sr->senn_avgweight = OMAP3430_SR1_SENNAVGWEIGHT;
-			sr->senp_avgweight = OMAP3430_SR1_SENPAVGWEIGHT;
-		} else {
-			sr->senn_avgweight = OMAP3430_SR2_SENNAVGWEIGHT;
-			sr->senp_avgweight = OMAP3430_SR2_SENPAVGWEIGHT;
-		}
-	}
-}
-
 static void sr_start_vddautocomp(struct omap_sr *sr)
 {
 	if (!sr_class || !(sr_class->enable) || !(sr_class->configure)) {
@@ -924,8 +899,14 @@ static int __init omap_sr_probe(struct platform_device *pdev)
 	sr_info->nvalue_count = pdata->nvalue_count;
 	sr_info->senn_mod = pdata->senn_mod;
 	sr_info->senp_mod = pdata->senp_mod;
+	sr_info->err_weight = pdata->err_weight;
+	sr_info->err_maxlimit = pdata->err_maxlimit;
+	sr_info->accum_data = pdata->accum_data;
+	sr_info->senn_avgweight = pdata->senn_avgweight;
+	sr_info->senp_avgweight = pdata->senp_avgweight;
 	sr_info->autocomp_active = false;
 	sr_info->ip_type = pdata->ip_type;
+
 	sr_info->base = ioremap(mem->start, resource_size(mem));
 	if (!sr_info->base) {
 		dev_err(&pdev->dev, "%s: ioremap fail\n", __func__);
@@ -937,7 +918,6 @@ static int __init omap_sr_probe(struct platform_device *pdev)
 		sr_info->irq = irq->start;
 
 	sr_set_clk_length(sr_info);
-	sr_set_regfields(sr_info);
 
 	list_add(&sr_info->node, &sr_list);
 
diff --git a/include/linux/power/smartreflex.h b/include/linux/power/smartreflex.h
index 4a496ebc7d73..c0f44c2b006d 100644
--- a/include/linux/power/smartreflex.h
+++ b/include/linux/power/smartreflex.h
@@ -260,8 +260,13 @@ struct omap_sr_nvalue_table {
  *
  * @name:		instance name
  * @ip_type:		Smartreflex IP type.
- * @senp_mod:		SENPENABLE value for the sr
- * @senn_mod:		SENNENABLE value for sr
+ * @senp_mod:		SENPENABLE value of the sr CONFIG register
+ * @senn_mod:		SENNENABLE value for sr CONFIG register
+ * @err_weight		ERRWEIGHT value of the sr ERRCONFIG register
+ * @err_maxlimit	ERRMAXLIMIT value of the sr ERRCONFIG register
+ * @accum_data		ACCUMDATA value of the sr CONFIG register
+ * @senn_avgweight	SENNAVGWEIGHT value of the sr AVGWEIGHT register
+ * @senp_avgweight	SENPAVGWEIGHT value of the sr AVGWEIGHT register
  * @nvalue_count:	Number of distinct nvalues in the nvalue table
  * @enable_on_init:	whether this sr module needs to enabled at
  *			boot up or not.
@@ -274,6 +279,11 @@ struct omap_sr_data {
 	int				ip_type;
 	u32				senp_mod;
 	u32				senn_mod;
+	u32				err_weight;
+	u32				err_maxlimit;
+	u32				accum_data;
+	u32				senn_avgweight;
+	u32				senp_avgweight;
 	int				nvalue_count;
 	bool				enable_on_init;
 	struct omap_sr_nvalue_table	*nvalue_table;
-- 
cgit v1.2.3-55-g7522


From 5edee61edeaaebafe584f8fb7074c1ef4658596b Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Tue, 16 Oct 2012 15:03:14 -0700
Subject: cgroup: cgroup_subsys->fork() should be called after the task is
 added to css_set

cgroup core has a bug which violates a basic rule about event
notifications - when a new entity needs to be added, you add that to
the notification list first and then make the new entity conform to
the current state.  If done in the reverse order, an event happening
inbetween will be lost.

cgroup_subsys->fork() is invoked way before the new task is added to
the css_set.  Currently, cgroup_freezer is the only user of ->fork()
and uses it to make new tasks conform to the current state of the
freezer.  If FROZEN state is requested while fork is in progress
between cgroup_fork_callbacks() and cgroup_post_fork(), the child
could escape freezing - the cgroup isn't frozen when ->fork() is
called and the freezer couldn't see the new task on the css_set.

This patch moves cgroup_subsys->fork() invocation to
cgroup_post_fork() after the new task is added to the css_set.
cgroup_fork_callbacks() is removed.

Because now a task may be migrated during cgroup_subsys->fork(),
freezer_fork() is updated so that it adheres to the usual RCU locking
and the rather pointless comment on why locking can be different there
is removed (if it doesn't make anything simpler, why even bother?).

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: stable@vger.kernel.org
---
 include/linux/cgroup.h  |  1 -
 kernel/cgroup.c         | 62 ++++++++++++++++++++++++-------------------------
 kernel/cgroup_freezer.c | 13 ++++-------
 kernel/fork.c           |  9 +------
 4 files changed, 35 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f8a030ced0c7..4cd1d0fd2542 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -34,7 +34,6 @@ extern int cgroup_lock_is_held(void);
 extern bool cgroup_lock_live_group(struct cgroup *cgrp);
 extern void cgroup_unlock(void);
 extern void cgroup_fork(struct task_struct *p);
-extern void cgroup_fork_callbacks(struct task_struct *p);
 extern void cgroup_post_fork(struct task_struct *p);
 extern void cgroup_exit(struct task_struct *p, int run_callbacks);
 extern int cgroupstats_build(struct cgroupstats *stats,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 13774b3b39aa..b7a0171067ea 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4843,45 +4843,20 @@ void cgroup_fork(struct task_struct *child)
 	INIT_LIST_HEAD(&child->cg_list);
 }
 
-/**
- * cgroup_fork_callbacks - run fork callbacks
- * @child: the new task
- *
- * Called on a new task very soon before adding it to the
- * tasklist. No need to take any locks since no-one can
- * be operating on this task.
- */
-void cgroup_fork_callbacks(struct task_struct *child)
-{
-	if (need_forkexit_callback) {
-		int i;
-		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-			struct cgroup_subsys *ss = subsys[i];
-
-			/*
-			 * forkexit callbacks are only supported for
-			 * builtin subsystems.
-			 */
-			if (!ss || ss->module)
-				continue;
-
-			if (ss->fork)
-				ss->fork(child);
-		}
-	}
-}
-
 /**
  * cgroup_post_fork - called on a new task after adding it to the task list
  * @child: the task in question
  *
- * Adds the task to the list running through its css_set if necessary.
- * Has to be after the task is visible on the task list in case we race
- * with the first call to cgroup_iter_start() - to guarantee that the
- * new task ends up on its list.
+ * Adds the task to the list running through its css_set if necessary and
+ * call the subsystem fork() callbacks.  Has to be after the task is
+ * visible on the task list in case we race with the first call to
+ * cgroup_iter_start() - to guarantee that the new task ends up on its
+ * list.
  */
 void cgroup_post_fork(struct task_struct *child)
 {
+	int i;
+
 	/*
 	 * use_task_css_set_links is set to 1 before we walk the tasklist
 	 * under the tasklist_lock and we read it here after we added the child
@@ -4910,7 +4885,30 @@ void cgroup_post_fork(struct task_struct *child)
 		}
 		write_unlock(&css_set_lock);
 	}
+
+	/*
+	 * Call ss->fork().  This must happen after @child is linked on
+	 * css_set; otherwise, @child might change state between ->fork()
+	 * and addition to css_set.
+	 */
+	if (need_forkexit_callback) {
+		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+			struct cgroup_subsys *ss = subsys[i];
+
+			/*
+			 * fork/exit callbacks are supported only for
+			 * builtin subsystems and we don't need further
+			 * synchronization as they never go away.
+			 */
+			if (!ss || ss->module)
+				continue;
+
+			if (ss->fork)
+				ss->fork(child);
+		}
+	}
 }
+
 /**
  * cgroup_exit - detach cgroup from exiting task
  * @tsk: pointer to task_struct of exiting process
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index b1724ce98981..12bfedb598c2 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -186,23 +186,15 @@ static void freezer_fork(struct task_struct *task)
 {
 	struct freezer *freezer;
 
-	/*
-	 * No lock is needed, since the task isn't on tasklist yet,
-	 * so it can't be moved to another cgroup, which means the
-	 * freezer won't be removed and will be valid during this
-	 * function call.  Nevertheless, apply RCU read-side critical
-	 * section to suppress RCU lockdep false positives.
-	 */
 	rcu_read_lock();
 	freezer = task_freezer(task);
-	rcu_read_unlock();
 
 	/*
 	 * The root cgroup is non-freezable, so we can skip the
 	 * following check.
 	 */
 	if (!freezer->css.cgroup->parent)
-		return;
+		goto out;
 
 	spin_lock_irq(&freezer->lock);
 	BUG_ON(freezer->state == CGROUP_FROZEN);
@@ -210,7 +202,10 @@ static void freezer_fork(struct task_struct *task)
 	/* Locking avoids race with FREEZING -> THAWED transitions. */
 	if (freezer->state == CGROUP_FREEZING)
 		freeze_task(task);
+
 	spin_unlock_irq(&freezer->lock);
+out:
+	rcu_read_unlock();
 }
 
 /*
diff --git a/kernel/fork.c b/kernel/fork.c
index 8b20ab7d3aa2..acc4cb62f32f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1135,7 +1135,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 {
 	int retval;
 	struct task_struct *p;
-	int cgroup_callbacks_done = 0;
 
 	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 		return ERR_PTR(-EINVAL);
@@ -1393,12 +1392,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	INIT_LIST_HEAD(&p->thread_group);
 	p->task_works = NULL;
 
-	/* Now that the task is set up, run cgroup callbacks if
-	 * necessary. We need to run them before the task is visible
-	 * on the tasklist. */
-	cgroup_fork_callbacks(p);
-	cgroup_callbacks_done = 1;
-
 	/* Need tasklist lock for parent etc handling! */
 	write_lock_irq(&tasklist_lock);
 
@@ -1503,7 +1496,7 @@ bad_fork_cleanup_cgroup:
 #endif
 	if (clone_flags & CLONE_THREAD)
 		threadgroup_change_end(current);
-	cgroup_exit(p, cgroup_callbacks_done);
+	cgroup_exit(p, 0);
 	delayacct_tsk_free(p);
 	module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
-- 
cgit v1.2.3-55-g7522


From dd67d32dbc5de299d70cc9e10c6c1e29ffa56b92 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Tue, 16 Oct 2012 15:03:14 -0700
Subject: freezer: add missing mb's to freezer_count() and
 freezer_should_skip()

A task is considered frozen enough between freezer_do_not_count() and
freezer_count() and freezers use freezer_should_skip() to test this
condition.  This supposedly works because freezer_count() always calls
try_to_freezer() after clearing %PF_FREEZER_SKIP.

However, there currently is nothing which guarantees that
freezer_count() sees %true freezing() after clearing %PF_FREEZER_SKIP
when freezing is in progress, and vice-versa.  A task can escape the
freezing condition in effect by freezer_count() seeing !freezing() and
freezer_should_skip() seeing %PF_FREEZER_SKIP.

This patch adds smp_mb()'s to freezer_count() and
freezer_should_skip() such that either %true freezing() is visible to
freezer_count() or !PF_FREEZER_SKIP is visible to
freezer_should_skip().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: stable@vger.kernel.org
---
 include/linux/freezer.h | 50 +++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index d09af4b67cf1..ee899329e65a 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -75,28 +75,62 @@ static inline bool cgroup_freezing(struct task_struct *task)
  */
 
 
-/* Tell the freezer not to count the current task as freezable. */
+/**
+ * freezer_do_not_count - tell freezer to ignore %current
+ *
+ * Tell freezers to ignore the current task when determining whether the
+ * target frozen state is reached.  IOW, the current task will be
+ * considered frozen enough by freezers.
+ *
+ * The caller shouldn't do anything which isn't allowed for a frozen task
+ * until freezer_cont() is called.  Usually, freezer[_do_not]_count() pair
+ * wrap a scheduling operation and nothing much else.
+ */
 static inline void freezer_do_not_count(void)
 {
 	current->flags |= PF_FREEZER_SKIP;
 }
 
-/*
- * Tell the freezer to count the current task as freezable again and try to
- * freeze it.
+/**
+ * freezer_count - tell freezer to stop ignoring %current
+ *
+ * Undo freezer_do_not_count().  It tells freezers that %current should be
+ * considered again and tries to freeze if freezing condition is already in
+ * effect.
  */
 static inline void freezer_count(void)
 {
 	current->flags &= ~PF_FREEZER_SKIP;
+	/*
+	 * If freezing is in progress, the following paired with smp_mb()
+	 * in freezer_should_skip() ensures that either we see %true
+	 * freezing() or freezer_should_skip() sees !PF_FREEZER_SKIP.
+	 */
+	smp_mb();
 	try_to_freeze();
 }
 
-/*
- * Check if the task should be counted as freezable by the freezer
+/**
+ * freezer_should_skip - whether to skip a task when determining frozen
+ *			 state is reached
+ * @p: task in quesion
+ *
+ * This function is used by freezers after establishing %true freezing() to
+ * test whether a task should be skipped when determining the target frozen
+ * state is reached.  IOW, if this function returns %true, @p is considered
+ * frozen enough.
  */
-static inline int freezer_should_skip(struct task_struct *p)
+static inline bool freezer_should_skip(struct task_struct *p)
 {
-	return !!(p->flags & PF_FREEZER_SKIP);
+	/*
+	 * The following smp_mb() paired with the one in freezer_count()
+	 * ensures that either freezer_count() sees %true freezing() or we
+	 * see cleared %PF_FREEZER_SKIP and return %false.  This makes it
+	 * impossible for a task to slip frozen state testing after
+	 * clearing %PF_FREEZER_SKIP.
+	 */
+	smp_mb();
+	return p->flags & PF_FREEZER_SKIP;
 }
 
 /*
-- 
cgit v1.2.3-55-g7522


From 0384e90b853357d24935c65ba0e1bdd27faa6e58 Mon Sep 17 00:00:00 2001
From: Daniel Mack
Date: Sun, 7 Oct 2012 18:19:44 +0200
Subject: spi/mcspi: allow configuration of pin directions

Allow D0 to be an input and D1 to be an output, configurable via
platform data and a new DT property.

Based on a patch from  Matus Ujhelyi <matus.ujhelyi@streamunlimited.com>

Signed-off-by: Daniel Mack <zonque@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 Documentation/devicetree/bindings/spi/omap-spi.txt |  4 +++-
 drivers/spi/spi-omap2-mcspi.c                      | 25 ++++++++++++++++------
 include/linux/platform_data/spi-omap2-mcspi.h      |  4 ++++
 3 files changed, 25 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/spi/omap-spi.txt b/Documentation/devicetree/bindings/spi/omap-spi.txt
index 81df374adbb9..2ef0a6b85653 100644
--- a/Documentation/devicetree/bindings/spi/omap-spi.txt
+++ b/Documentation/devicetree/bindings/spi/omap-spi.txt
@@ -6,7 +6,9 @@ Required properties:
   - "ti,omap4-spi" for OMAP4+.
 - ti,spi-num-cs : Number of chipselect supported  by the instance.
 - ti,hwmods: Name of the hwmod associated to the McSPI
-
+- ti,pindir-d0-in-d1-out: Select the D0 pin as input and D1 as
+			  output. The default is D0 as output and
+			  D1 as input.
 
 Example:
 
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 3542fdc664b1..51046332677c 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -130,6 +130,7 @@ struct omap2_mcspi {
 	struct omap2_mcspi_dma	*dma_channels;
 	struct device		*dev;
 	struct omap2_mcspi_regs ctx;
+	unsigned int		pin_dir:1;
 };
 
 struct omap2_mcspi_cs {
@@ -765,8 +766,15 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi,
 	/* standard 4-wire master mode:  SCK, MOSI/out, MISO/in, nCS
 	 * REVISIT: this controller could support SPI_3WIRE mode.
 	 */
-	l &= ~(OMAP2_MCSPI_CHCONF_IS|OMAP2_MCSPI_CHCONF_DPE1);
-	l |= OMAP2_MCSPI_CHCONF_DPE0;
+	if (mcspi->pin_dir == MCSPI_PINDIR_D0_OUT_D1_IN) {
+		l &= ~OMAP2_MCSPI_CHCONF_IS;
+		l &= ~OMAP2_MCSPI_CHCONF_DPE1;
+		l |= OMAP2_MCSPI_CHCONF_DPE0;
+	} else {
+		l |= OMAP2_MCSPI_CHCONF_IS;
+		l |= OMAP2_MCSPI_CHCONF_DPE1;
+		l &= ~OMAP2_MCSPI_CHCONF_DPE0;
+	}
 
 	/* wordlength */
 	l &= ~OMAP2_MCSPI_CHCONF_WL_MASK;
@@ -1167,6 +1175,11 @@ static int __devinit omap2_mcspi_probe(struct platform_device *pdev)
 	master->cleanup = omap2_mcspi_cleanup;
 	master->dev.of_node = node;
 
+	dev_set_drvdata(&pdev->dev, master);
+
+	mcspi = spi_master_get_devdata(master);
+	mcspi->master = master;
+
 	match = of_match_device(omap_mcspi_of_match, &pdev->dev);
 	if (match) {
 		u32 num_cs = 1; /* default number of chipselect */
@@ -1175,19 +1188,17 @@ static int __devinit omap2_mcspi_probe(struct platform_device *pdev)
 		of_property_read_u32(node, "ti,spi-num-cs", &num_cs);
 		master->num_chipselect = num_cs;
 		master->bus_num = bus_num++;
+		if (of_get_property(node, "ti,pindir-d0-in-d1-out", NULL))
+			mcspi->pin_dir = MCSPI_PINDIR_D0_IN_D1_OUT;
 	} else {
 		pdata = pdev->dev.platform_data;
 		master->num_chipselect = pdata->num_cs;
 		if (pdev->id != -1)
 			master->bus_num = pdev->id;
+		mcspi->pin_dir = pdata->pin_dir;
 	}
 	regs_offset = pdata->regs_offset;
 
-	dev_set_drvdata(&pdev->dev, master);
-
-	mcspi = spi_master_get_devdata(master);
-	mcspi->master = master;
-
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (r == NULL) {
 		status = -ENODEV;
diff --git a/include/linux/platform_data/spi-omap2-mcspi.h b/include/linux/platform_data/spi-omap2-mcspi.h
index a357eb26bd25..ce70f7b5a8e1 100644
--- a/include/linux/platform_data/spi-omap2-mcspi.h
+++ b/include/linux/platform_data/spi-omap2-mcspi.h
@@ -7,9 +7,13 @@
 
 #define OMAP4_MCSPI_REG_OFFSET 0x100
 
+#define MCSPI_PINDIR_D0_OUT_D1_IN	0
+#define MCSPI_PINDIR_D0_IN_D1_OUT	1
+
 struct omap2_mcspi_platform_config {
 	unsigned short	num_cs;
 	unsigned int regs_offset;
+	unsigned int pin_dir:1;
 };
 
 struct omap2_mcspi_dev_attr {
-- 
cgit v1.2.3-55-g7522


From 0d4529c534c1c664f25088eb5f5b4d7ce0ee2510 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi
Date: Tue, 16 Oct 2012 15:56:59 +0200
Subject: regmap: make lock/unlock functions customizable

It is sometimes convenient for a regmap user to override the standard
regmap lock/unlock functions with custom functions.
For instance this can be useful in case an already existing spinlock
or mutex has to be used for locking a set of registers instead of the
internal regmap spinlock/mutex.
Note that the fast_io field of struct regmap_bus is ignored in case
custom locking functions are used.

Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/internal.h |  4 +--
 drivers/base/regmap/regmap.c   | 65 ++++++++++++++++++++++++------------------
 include/linux/regmap.h         | 16 ++++++++++-
 3 files changed, 54 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 80f9ab9c3aa4..b1ee824e2a5f 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -31,14 +31,12 @@ struct regmap_format {
 	unsigned int (*parse_val)(void *buf);
 };
 
-typedef void (*regmap_lock)(struct regmap *map);
-typedef void (*regmap_unlock)(struct regmap *map);
-
 struct regmap {
 	struct mutex mutex;
 	spinlock_t spinlock;
 	regmap_lock lock;
 	regmap_unlock unlock;
+	void *lock_arg; /* This is passed to lock/unlock functions */
 
 	struct device *dev; /* Device we do I/O on */
 	void *work_buf;     /* Scratch buffer used to format I/O */
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 52069d29ff12..5d8b75255b50 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -214,23 +214,27 @@ static unsigned int regmap_parse_32_native(void *buf)
 	return *(u32 *)buf;
 }
 
-static void regmap_lock_mutex(struct regmap *map)
+static void regmap_lock_mutex(void *__map)
 {
+	struct regmap *map = __map;
 	mutex_lock(&map->mutex);
 }
 
-static void regmap_unlock_mutex(struct regmap *map)
+static void regmap_unlock_mutex(void *__map)
 {
+	struct regmap *map = __map;
 	mutex_unlock(&map->mutex);
 }
 
-static void regmap_lock_spinlock(struct regmap *map)
+static void regmap_lock_spinlock(void *__map)
 {
+	struct regmap *map = __map;
 	spin_lock(&map->spinlock);
 }
 
-static void regmap_unlock_spinlock(struct regmap *map)
+static void regmap_unlock_spinlock(void *__map)
 {
+	struct regmap *map = __map;
 	spin_unlock(&map->spinlock);
 }
 
@@ -335,14 +339,21 @@ struct regmap *regmap_init(struct device *dev,
 		goto err;
 	}
 
-	if (bus->fast_io) {
-		spin_lock_init(&map->spinlock);
-		map->lock = regmap_lock_spinlock;
-		map->unlock = regmap_unlock_spinlock;
+	if (config->lock && config->unlock) {
+		map->lock = config->lock;
+		map->unlock = config->unlock;
+		map->lock_arg = config->lock_arg;
 	} else {
-		mutex_init(&map->mutex);
-		map->lock = regmap_lock_mutex;
-		map->unlock = regmap_unlock_mutex;
+		if (bus->fast_io) {
+			spin_lock_init(&map->spinlock);
+			map->lock = regmap_lock_spinlock;
+			map->unlock = regmap_unlock_spinlock;
+		} else {
+			mutex_init(&map->mutex);
+			map->lock = regmap_lock_mutex;
+			map->unlock = regmap_unlock_mutex;
+		}
+		map->lock_arg = map;
 	}
 	map->format.reg_bytes = DIV_ROUND_UP(config->reg_bits, 8);
 	map->format.pad_bytes = config->pad_bits / 8;
@@ -939,11 +950,11 @@ int regmap_write(struct regmap *map, unsigned int reg, unsigned int val)
 	if (reg % map->reg_stride)
 		return -EINVAL;
 
-	map->lock(map);
+	map->lock(map->lock_arg);
 
 	ret = _regmap_write(map, reg, val);
 
-	map->unlock(map);
+	map->unlock(map->lock_arg);
 
 	return ret;
 }
@@ -975,11 +986,11 @@ int regmap_raw_write(struct regmap *map, unsigned int reg,
 	if (reg % map->reg_stride)
 		return -EINVAL;
 
-	map->lock(map);
+	map->lock(map->lock_arg);
 
 	ret = _regmap_raw_write(map, reg, val, val_len);
 
-	map->unlock(map);
+	map->unlock(map->lock_arg);
 
 	return ret;
 }
@@ -1011,7 +1022,7 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 	if (reg % map->reg_stride)
 		return -EINVAL;
 
-	map->lock(map);
+	map->lock(map->lock_arg);
 
 	/* No formatting is require if val_byte is 1 */
 	if (val_bytes == 1) {
@@ -1047,7 +1058,7 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 		kfree(wval);
 
 out:
-	map->unlock(map);
+	map->unlock(map->lock_arg);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(regmap_bulk_write);
@@ -1137,11 +1148,11 @@ int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val)
 	if (reg % map->reg_stride)
 		return -EINVAL;
 
-	map->lock(map);
+	map->lock(map->lock_arg);
 
 	ret = _regmap_read(map, reg, val);
 
-	map->unlock(map);
+	map->unlock(map->lock_arg);
 
 	return ret;
 }
@@ -1171,7 +1182,7 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 	if (reg % map->reg_stride)
 		return -EINVAL;
 
-	map->lock(map);
+	map->lock(map->lock_arg);
 
 	if (regmap_volatile_range(map, reg, val_count) || map->cache_bypass ||
 	    map->cache_type == REGCACHE_NONE) {
@@ -1193,7 +1204,7 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 	}
 
  out:
-	map->unlock(map);
+	map->unlock(map->lock_arg);
 
 	return ret;
 }
@@ -1300,9 +1311,9 @@ int regmap_update_bits(struct regmap *map, unsigned int reg,
 	bool change;
 	int ret;
 
-	map->lock(map);
+	map->lock(map->lock_arg);
 	ret = _regmap_update_bits(map, reg, mask, val, &change);
-	map->unlock(map);
+	map->unlock(map->lock_arg);
 
 	return ret;
 }
@@ -1326,9 +1337,9 @@ int regmap_update_bits_check(struct regmap *map, unsigned int reg,
 {
 	int ret;
 
-	map->lock(map);
+	map->lock(map->lock_arg);
 	ret = _regmap_update_bits(map, reg, mask, val, change);
-	map->unlock(map);
+	map->unlock(map->lock_arg);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(regmap_update_bits_check);
@@ -1357,7 +1368,7 @@ int regmap_register_patch(struct regmap *map, const struct reg_default *regs,
 	if (map->patch)
 		return -EBUSY;
 
-	map->lock(map);
+	map->lock(map->lock_arg);
 
 	bypass = map->cache_bypass;
 
@@ -1385,7 +1396,7 @@ int regmap_register_patch(struct regmap *map, const struct reg_default *regs,
 out:
 	map->cache_bypass = bypass;
 
-	map->unlock(map);
+	map->unlock(map->lock_arg);
 
 	return ret;
 }
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index e3bcc3f4dcb8..5d243786f254 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -53,6 +53,9 @@ enum regmap_endian {
 	REGMAP_ENDIAN_NATIVE,
 };
 
+typedef void (*regmap_lock)(void *);
+typedef void (*regmap_unlock)(void *);
+
 /**
  * Configuration for the register map of a device.
  *
@@ -75,6 +78,12 @@ enum regmap_endian {
  * @precious_reg: Optional callback returning true if the rgister
  *                should not be read outside of a call from the driver
  *                (eg, a clear on read interrupt status register).
+ * @lock:         Optional lock callback (overrides regmap's default lock
+ *                function, based on spinlock or mutex).
+ * @unlock:       As above for unlocking.
+ * @lock_arg:     this field is passed as the only argument of lock/unlock
+ *                functions (ignored in case regular lock/unlock functions
+ *                are not overridden).
  *
  * @max_register: Optional, specifies the maximum valid register index.
  * @reg_defaults: Power on reset values for registers (for use with
@@ -116,6 +125,9 @@ struct regmap_config {
 	bool (*readable_reg)(struct device *dev, unsigned int reg);
 	bool (*volatile_reg)(struct device *dev, unsigned int reg);
 	bool (*precious_reg)(struct device *dev, unsigned int reg);
+	regmap_lock lock;
+	regmap_unlock unlock;
+	void *lock_arg;
 
 	unsigned int max_register;
 	const struct reg_default *reg_defaults;
@@ -181,7 +193,9 @@ typedef void (*regmap_hw_free_context)(void *context);
  * Description of a hardware bus for the register map infrastructure.
  *
  * @fast_io: Register IO is fast. Use a spinlock instead of a mutex
- *           to perform locking.
+ *	     to perform locking. This field is ignored if custom lock/unlock
+ *	     functions are used (see fields lock/unlock of
+ *	     struct regmap_config).
  * @write: Write operation.
  * @gather_write: Write operation with split register/value, return -ENOTSUPP
  *                if not implemented  on a given device.
-- 
cgit v1.2.3-55-g7522


From d4950281d72d8845225e3a39dbeb366c40c824c9 Mon Sep 17 00:00:00 2001
From: Mahesh Palivela
Date: Wed, 10 Oct 2012 11:25:40 +0000
Subject: ieee80211: Rename VHT cap struct

Rename struct ieee80211_vht_capabilities to ieee80211_vht_cap
and renamed its member vht_capabilities_info to vht_cap_info.

Signed-off-by: Mahesh Palivela <maheshp@posedge.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 45 +++++++++++++++++++++++++++++++--------------
 net/mac80211/main.c       |  2 +-
 net/mac80211/mlme.c       |  4 ++--
 net/mac80211/util.c       |  4 ++--
 4 files changed, 36 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 2385119f8bb0..8c803f0e4925 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1107,20 +1107,6 @@ struct ieee80211_ht_operation {
 #define WLAN_HT_SMPS_CONTROL_STATIC	1
 #define WLAN_HT_SMPS_CONTROL_DYNAMIC	3
 
-#define VHT_MCS_SUPPORTED_SET_SIZE      8
-
-struct ieee80211_vht_capabilities {
-	__le32 vht_capabilities_info;
-	u8 vht_supported_mcs_set[VHT_MCS_SUPPORTED_SET_SIZE];
-} __packed;
-
-struct ieee80211_vht_operation {
-	u8 vht_op_info_chwidth;
-	u8 vht_op_info_chan_center_freq_seg1_idx;
-	u8 vht_op_info_chan_center_freq_seg2_idx;
-	__le16 vht_basic_mcs_set;
-} __packed;
-
 /**
  * struct ieee80211_vht_mcs_info - VHT MCS information
  * @rx_mcs_map: RX MCS map 2 bits for each stream, total 8 streams
@@ -1141,6 +1127,37 @@ struct ieee80211_vht_mcs_info {
 	__le16 tx_highest;
 } __packed;
 
+/**
+ * struct ieee80211_vht_cap - VHT capabilities
+ *
+ * This structure is the "VHT capabilities element" as
+ * described in 802.11ac D3.0 8.4.2.160
+ * @vht_cap_info: VHT capability info
+ * @supp_mcs: VHT MCS supported rates
+ */
+struct ieee80211_vht_cap {
+	__le32 vht_cap_info;
+	struct ieee80211_vht_mcs_info supp_mcs;
+} __packed;
+
+/**
+ * struct ieee80211_vht_operation - VHT operation IE
+ *
+ * This structure is the "VHT operation element" as
+ * described in 802.11ac D3.0 8.4.2.161
+ * @chan_width: Operating channel width
+ * @center_freq_seg1_idx: center freq segment 1 index
+ * @center_freq_seg2_idx: center freq segment 2 index
+ * @basic_mcs_set: VHT Basic MCS rate set
+ */
+struct ieee80211_vht_operation {
+	u8 chan_width;
+	u8 center_freq_seg1_idx;
+	u8 center_freq_seg2_idx;
+	__le16 basic_mcs_set;
+} __packed;
+
+
 #define IEEE80211_VHT_MCS_ZERO_TO_SEVEN_SUPPORT 0
 #define IEEE80211_VHT_MCS_ZERO_TO_EIGHT_SUPPORT 1
 #define IEEE80211_VHT_MCS_ZERO_TO_NINE_SUPPORT  2
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 473b755b349f..620f427069c8 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -876,7 +876,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	if (supp_vht)
 		local->scan_ies_len +=
-			2 + sizeof(struct ieee80211_vht_capabilities);
+			2 + sizeof(struct ieee80211_vht_cap);
 
 	if (!local->ops->hw_scan) {
 		/* For hw_scan, driver needs to set these up. */
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f24884a60614..4af5a3eb892e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -353,7 +353,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
 	cap = vht_cap.cap;
 
 	/* reserve and fill IE */
-	pos = skb_put(skb, sizeof(struct ieee80211_vht_capabilities) + 2);
+	pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2);
 	ieee80211_ie_build_vht_cap(pos, &vht_cap, cap);
 }
 
@@ -412,7 +412,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 			4 + /* power capability */
 			2 + 2 * sband->n_channels + /* supported channels */
 			2 + sizeof(struct ieee80211_ht_cap) + /* HT */
-			2 + sizeof(struct ieee80211_vht_capabilities) + /* VHT */
+			2 + sizeof(struct ieee80211_vht_cap) + /* VHT */
 			assoc_data->ie_len + /* extra IEs */
 			9, /* WMM */
 			GFP_KERNEL);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 558412d75ac3..3dca9827624a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1775,8 +1775,8 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
 	__le32 tmp;
 
 	*pos++ = WLAN_EID_VHT_CAPABILITY;
-	*pos++ = sizeof(struct ieee80211_vht_capabilities);
-	memset(pos, 0, sizeof(struct ieee80211_vht_capabilities));
+	*pos++ = sizeof(struct ieee80211_vht_cap);
+	memset(pos, 0, sizeof(struct ieee80211_vht_cap));
 
 	/* capability flags */
 	tmp = cpu_to_le32(cap);
-- 
cgit v1.2.3-55-g7522


From 24282a1ca33b4a2cdfb907fb7a3ba4d0f6e93311 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Tue, 9 Oct 2012 15:18:59 +0530
Subject: regulator: tps65090: Register all regulators in single probe call

MFD driver registers the regulator driver once per device and
hence it is require to register all regulators in single probe
call.
Following are details of changes done to achieve this:
- Move the regulator enums to mfd header and remove the
  tps65090-regulator.h as it does not contain more info.
- Add max regulator and register all regulators even if there
  is no regulator init data from platform.
- Convert regulator init data to pointer type in platform data.
- Add input supply name in regulator desc to provide input supply.
- Separate desc information from driver information.
- Disable external control bit to have control through register write.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/tps65090-regulator.c       | 195 +++++++++++++++++++--------
 include/linux/mfd/tps65090.h                 |  28 ++++
 include/linux/regulator/tps65090-regulator.h |  50 -------
 3 files changed, 165 insertions(+), 108 deletions(-)
 delete mode 100644 include/linux/regulator/tps65090-regulator.h

(limited to 'include/linux')

diff --git a/drivers/regulator/tps65090-regulator.c b/drivers/regulator/tps65090-regulator.c
index 001ad554ac62..5f7f9311024e 100644
--- a/drivers/regulator/tps65090-regulator.c
+++ b/drivers/regulator/tps65090-regulator.c
@@ -24,15 +24,11 @@
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/mfd/tps65090.h>
-#include <linux/regulator/tps65090-regulator.h>
 
 struct tps65090_regulator {
-	int		id;
-	/* used by regulator core */
-	struct regulator_desc	desc;
-
-	/* Device */
 	struct device		*dev;
+	struct regulator_desc	*desc;
+	struct regulator_dev	*rdev;
 };
 
 static struct regulator_ops tps65090_ops = {
@@ -41,44 +37,80 @@ static struct regulator_ops tps65090_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
 };
 
-#define tps65090_REG(_id)				\
+#define tps65090_REG_DESC(_id, _sname, _en_reg, _ops)	\
 {							\
-	.id		= TPS65090_ID_##_id,		\
-	.desc = {					\
-		.name = tps65090_rails(_id),		\
-		.id = TPS65090_ID_##_id,		\
-		.ops = &tps65090_ops,			\
-		.type = REGULATOR_VOLTAGE,		\
-		.owner = THIS_MODULE,			\
-		.enable_reg = (TPS65090_ID_##_id) + 12,	\
-		.enable_mask = BIT(0),			\
-	},						\
+	.name = "TPS65090_RAILS"#_id,			\
+	.supply_name = _sname,				\
+	.id = TPS65090_ID_##_id,			\
+	.ops = &_ops,					\
+	.enable_reg = _en_reg,				\
+	.enable_mask = BIT(0),				\
+	.type = REGULATOR_VOLTAGE,			\
+	.owner = THIS_MODULE,				\
 }
 
-static struct tps65090_regulator TPS65090_regulator[] = {
-	tps65090_REG(DCDC1),
-	tps65090_REG(DCDC2),
-	tps65090_REG(DCDC3),
-	tps65090_REG(FET1),
-	tps65090_REG(FET2),
-	tps65090_REG(FET3),
-	tps65090_REG(FET4),
-	tps65090_REG(FET5),
-	tps65090_REG(FET6),
-	tps65090_REG(FET7),
+static struct regulator_desc tps65090_regulator_desc[] = {
+	tps65090_REG_DESC(DCDC1, "vsys1",   0x0C, tps65090_ops),
+	tps65090_REG_DESC(DCDC2, "vsys2",   0x0D, tps65090_ops),
+	tps65090_REG_DESC(DCDC3, "vsys3",   0x0E, tps65090_ops),
+	tps65090_REG_DESC(FET1,  "infet1",  0x0F, tps65090_ops),
+	tps65090_REG_DESC(FET2,  "infet2",  0x10, tps65090_ops),
+	tps65090_REG_DESC(FET3,  "infet3",  0x11, tps65090_ops),
+	tps65090_REG_DESC(FET4,  "infet4",  0x12, tps65090_ops),
+	tps65090_REG_DESC(FET5,  "infet5",  0x13, tps65090_ops),
+	tps65090_REG_DESC(FET6,  "infet6",  0x14, tps65090_ops),
+	tps65090_REG_DESC(FET7,  "infet7",  0x15, tps65090_ops),
 };
 
-static inline struct tps65090_regulator *find_regulator_info(int id)
+static inline bool is_dcdc(int id)
 {
-	struct tps65090_regulator *ri;
-	int i;
+	switch (id) {
+	case TPS65090_ID_DCDC1:
+	case TPS65090_ID_DCDC2:
+	case TPS65090_ID_DCDC3:
+		return true;
+	default:
+		return false;
+	}
+}
 
-	for (i = 0; i < ARRAY_SIZE(TPS65090_regulator); i++) {
-		ri = &TPS65090_regulator[i];
-		if (ri->desc.id == id)
-			return ri;
+static int __devinit tps65090_config_ext_control(
+	struct tps65090_regulator *ri, bool enable)
+{
+	int ret;
+	struct device *parent = ri->dev->parent;
+	unsigned int reg_en_reg = ri->desc->enable_reg;
+
+	if (enable)
+		ret = tps65090_set_bits(parent, reg_en_reg, 1);
+	else
+		ret =  tps65090_clr_bits(parent, reg_en_reg, 1);
+	if (ret < 0)
+		dev_err(ri->dev, "Error in updating reg 0x%x\n", reg_en_reg);
+	return ret;
+}
+
+static int __devinit tps65090_regulator_disable_ext_control(
+		struct tps65090_regulator *ri,
+		struct tps65090_regulator_plat_data *tps_pdata)
+{
+	int ret = 0;
+	struct device *parent = ri->dev->parent;
+	unsigned int reg_en_reg = ri->desc->enable_reg;
+
+	/*
+	 * First enable output for internal control if require.
+	 * And then disable external control.
+	 */
+	if (tps_pdata->reg_init_data->constraints.always_on ||
+			tps_pdata->reg_init_data->constraints.boot_on) {
+		ret =  tps65090_set_bits(parent, reg_en_reg, 0);
+		if (ret < 0) {
+			dev_err(ri->dev, "Error in set reg 0x%x\n", reg_en_reg);
+			return ret;
+		}
 	}
-	return NULL;
+	return tps65090_config_ext_control(ri, false);
 }
 
 static int __devinit tps65090_regulator_probe(struct platform_device *pdev)
@@ -87,40 +119,87 @@ static int __devinit tps65090_regulator_probe(struct platform_device *pdev)
 	struct tps65090_regulator *ri = NULL;
 	struct regulator_config config = { };
 	struct regulator_dev *rdev;
-	struct tps65090_regulator_platform_data *tps_pdata;
-	int id = pdev->id;
+	struct tps65090_regulator_plat_data *tps_pdata;
+	struct tps65090_regulator *pmic;
+	struct tps65090_platform_data *tps65090_pdata;
+	int num;
+	int ret;
 
-	dev_dbg(&pdev->dev, "Probing regulator %d\n", id);
+	dev_dbg(&pdev->dev, "Probing regulator\n");
 
-	ri = find_regulator_info(id);
-	if (ri == NULL) {
-		dev_err(&pdev->dev, "invalid regulator ID specified\n");
+	tps65090_pdata = dev_get_platdata(pdev->dev.parent);
+	if (!tps65090_pdata) {
+		dev_err(&pdev->dev, "Platform data missing\n");
 		return -EINVAL;
 	}
-	tps_pdata = pdev->dev.platform_data;
-	ri->dev = &pdev->dev;
-
-	config.dev = &pdev->dev;
-	config.init_data = &tps_pdata->regulator;
-	config.driver_data = ri;
-	config.regmap = tps65090_mfd->rmap;
-
-	rdev = regulator_register(&ri->desc, &config);
-	if (IS_ERR(rdev)) {
-		dev_err(&pdev->dev, "failed to register regulator %s\n",
-				ri->desc.name);
-		return PTR_ERR(rdev);
+
+	pmic = devm_kzalloc(&pdev->dev, TPS65090_ID_MAX * sizeof(*pmic),
+			GFP_KERNEL);
+	if (!pmic) {
+		dev_err(&pdev->dev, "mem alloc for pmic failed\n");
+		return -ENOMEM;
+	}
+
+	for (num = 0; num < TPS65090_ID_MAX; num++) {
+		tps_pdata = tps65090_pdata->reg_pdata[num];
+
+		ri = &pmic[num];
+		ri->dev = &pdev->dev;
+		ri->desc = &tps65090_regulator_desc[num];
+
+		/*
+		 * TPS5090 DCDC support the control from external digital input.
+		 * It may be possible that during boot, the external control is
+		 * enabled. Disabling external control for DCDC.
+		 */
+		if (tps_pdata && is_dcdc(num) && tps_pdata->reg_init_data) {
+			ret = tps65090_regulator_disable_ext_control(
+						ri, tps_pdata);
+			if (ret < 0) {
+				dev_err(&pdev->dev,
+						"failed disable ext control\n");
+				goto scrub;
+			}
+		}
+		config.dev = &pdev->dev;
+		config.driver_data = ri;
+		config.regmap = tps65090_mfd->rmap;
+		if (tps_pdata)
+			config.init_data = tps_pdata->reg_init_data;
+		else
+			config.init_data = NULL;
+
+		rdev = regulator_register(ri->desc, &config);
+		if (IS_ERR(rdev)) {
+			dev_err(&pdev->dev, "failed to register regulator %s\n",
+				ri->desc->name);
+			ret = PTR_ERR(rdev);
+			goto scrub;
+		}
+		ri->rdev = rdev;
 	}
 
-	platform_set_drvdata(pdev, rdev);
+	platform_set_drvdata(pdev, pmic);
 	return 0;
+
+scrub:
+	while (--num >= 0) {
+		ri = &pmic[num];
+		regulator_unregister(ri->rdev);
+	}
+	return ret;
 }
 
 static int __devexit tps65090_regulator_remove(struct platform_device *pdev)
 {
-	struct regulator_dev *rdev = platform_get_drvdata(pdev);
+	struct tps65090_regulator *pmic = platform_get_drvdata(pdev);
+	struct tps65090_regulator *ri;
+	int num;
 
-	regulator_unregister(rdev);
+	for (num = 0; num < TPS65090_ID_MAX; ++num) {
+		ri = &pmic[num];
+		regulator_unregister(ri->rdev);
+	}
 	return 0;
 }
 
diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index 6bc31d854626..d06c63335a2a 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h
@@ -24,6 +24,23 @@
 
 #include <linux/irq.h>
 
+/* TPS65090 Regulator ID */
+enum {
+	TPS65090_ID_DCDC1,
+	TPS65090_ID_DCDC2,
+	TPS65090_ID_DCDC3,
+	TPS65090_ID_FET1,
+	TPS65090_ID_FET2,
+	TPS65090_ID_FET3,
+	TPS65090_ID_FET4,
+	TPS65090_ID_FET5,
+	TPS65090_ID_FET6,
+	TPS65090_ID_FET7,
+
+	/* Last entry for maximum ID */
+	TPS65090_ID_MAX,
+};
+
 struct tps65090 {
 	struct mutex		lock;
 	struct device		*dev;
@@ -41,10 +58,21 @@ struct tps65090_subdev_info {
 	void		*platform_data;
 };
 
+/*
+ * struct tps65090_regulator_plat_data
+ *
+ * @reg_init_data: The regulator init data.
+ */
+
+struct tps65090_regulator_plat_data {
+	struct regulator_init_data *reg_init_data;
+};
+
 struct tps65090_platform_data {
 	int irq_base;
 	int num_subdevs;
 	struct tps65090_subdev_info *subdevs;
+	struct tps65090_regulator_plat_data *reg_pdata[TPS65090_ID_MAX];
 };
 
 /*
diff --git a/include/linux/regulator/tps65090-regulator.h b/include/linux/regulator/tps65090-regulator.h
deleted file mode 100644
index 0fa04b64db3e..000000000000
--- a/include/linux/regulator/tps65090-regulator.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Regulator driver interface for TI TPS65090 PMIC family
- *
- * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
-
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
-
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
-
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __REGULATOR_TPS65090_H
-#define __REGULATOR_TPS65090_H
-
-#include <linux/regulator/machine.h>
-
-#define tps65090_rails(_name) "tps65090_"#_name
-
-enum {
-	TPS65090_ID_DCDC1,
-	TPS65090_ID_DCDC2,
-	TPS65090_ID_DCDC3,
-	TPS65090_ID_FET1,
-	TPS65090_ID_FET2,
-	TPS65090_ID_FET3,
-	TPS65090_ID_FET4,
-	TPS65090_ID_FET5,
-	TPS65090_ID_FET6,
-	TPS65090_ID_FET7,
-};
-
-/*
- * struct tps65090_regulator_platform_data
- *
- * @regulator: The regulator init data.
- * @slew_rate_uV_per_us: Slew rate microvolt per microsec.
- */
-
-struct tps65090_regulator_platform_data {
-	struct regulator_init_data regulator;
-};
-
-#endif	/* __REGULATOR_TPS65090_H */
-- 
cgit v1.2.3-55-g7522


From 8620ca9f77b71a0069a6151e859b988117ef1fa5 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Tue, 9 Oct 2012 15:19:00 +0530
Subject: regulator: tps65090: rename driver name and regulator name

To make the names proper and more appropriate:
Rename the driver name from tps65090-regulator to tps65090-pmic.
Rename the regulators from TPS65090_ID_* to TPS65090_REGULATOR_*

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/tps65090-regulator.c | 16 ++++++++--------
 include/linux/mfd/tps65090.h           | 24 ++++++++++++------------
 2 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/tps65090-regulator.c b/drivers/regulator/tps65090-regulator.c
index 5f7f9311024e..584a185f89e1 100644
--- a/drivers/regulator/tps65090-regulator.c
+++ b/drivers/regulator/tps65090-regulator.c
@@ -41,7 +41,7 @@ static struct regulator_ops tps65090_ops = {
 {							\
 	.name = "TPS65090_RAILS"#_id,			\
 	.supply_name = _sname,				\
-	.id = TPS65090_ID_##_id,			\
+	.id = TPS65090_REGULATOR_##_id,			\
 	.ops = &_ops,					\
 	.enable_reg = _en_reg,				\
 	.enable_mask = BIT(0),				\
@@ -65,9 +65,9 @@ static struct regulator_desc tps65090_regulator_desc[] = {
 static inline bool is_dcdc(int id)
 {
 	switch (id) {
-	case TPS65090_ID_DCDC1:
-	case TPS65090_ID_DCDC2:
-	case TPS65090_ID_DCDC3:
+	case TPS65090_REGULATOR_DCDC1:
+	case TPS65090_REGULATOR_DCDC2:
+	case TPS65090_REGULATOR_DCDC3:
 		return true;
 	default:
 		return false;
@@ -133,14 +133,14 @@ static int __devinit tps65090_regulator_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	pmic = devm_kzalloc(&pdev->dev, TPS65090_ID_MAX * sizeof(*pmic),
+	pmic = devm_kzalloc(&pdev->dev, TPS65090_REGULATOR_MAX * sizeof(*pmic),
 			GFP_KERNEL);
 	if (!pmic) {
 		dev_err(&pdev->dev, "mem alloc for pmic failed\n");
 		return -ENOMEM;
 	}
 
-	for (num = 0; num < TPS65090_ID_MAX; num++) {
+	for (num = 0; num < TPS65090_REGULATOR_MAX; num++) {
 		tps_pdata = tps65090_pdata->reg_pdata[num];
 
 		ri = &pmic[num];
@@ -196,7 +196,7 @@ static int __devexit tps65090_regulator_remove(struct platform_device *pdev)
 	struct tps65090_regulator *ri;
 	int num;
 
-	for (num = 0; num < TPS65090_ID_MAX; ++num) {
+	for (num = 0; num < TPS65090_REGULATOR_MAX; ++num) {
 		ri = &pmic[num];
 		regulator_unregister(ri->rdev);
 	}
@@ -205,7 +205,7 @@ static int __devexit tps65090_regulator_remove(struct platform_device *pdev)
 
 static struct platform_driver tps65090_regulator_driver = {
 	.driver	= {
-		.name	= "tps65090-regulator",
+		.name	= "tps65090-pmic",
 		.owner	= THIS_MODULE,
 	},
 	.probe		= tps65090_regulator_probe,
diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index d06c63335a2a..958bf155097c 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h
@@ -26,19 +26,19 @@
 
 /* TPS65090 Regulator ID */
 enum {
-	TPS65090_ID_DCDC1,
-	TPS65090_ID_DCDC2,
-	TPS65090_ID_DCDC3,
-	TPS65090_ID_FET1,
-	TPS65090_ID_FET2,
-	TPS65090_ID_FET3,
-	TPS65090_ID_FET4,
-	TPS65090_ID_FET5,
-	TPS65090_ID_FET6,
-	TPS65090_ID_FET7,
+	TPS65090_REGULATOR_DCDC1,
+	TPS65090_REGULATOR_DCDC2,
+	TPS65090_REGULATOR_DCDC3,
+	TPS65090_REGULATOR_FET1,
+	TPS65090_REGULATOR_FET2,
+	TPS65090_REGULATOR_FET3,
+	TPS65090_REGULATOR_FET4,
+	TPS65090_REGULATOR_FET5,
+	TPS65090_REGULATOR_FET6,
+	TPS65090_REGULATOR_FET7,
 
 	/* Last entry for maximum ID */
-	TPS65090_ID_MAX,
+	TPS65090_REGULATOR_MAX,
 };
 
 struct tps65090 {
@@ -72,7 +72,7 @@ struct tps65090_platform_data {
 	int irq_base;
 	int num_subdevs;
 	struct tps65090_subdev_info *subdevs;
-	struct tps65090_regulator_plat_data *reg_pdata[TPS65090_ID_MAX];
+	struct tps65090_regulator_plat_data *reg_pdata[TPS65090_REGULATOR_MAX];
 };
 
 /*
-- 
cgit v1.2.3-55-g7522


From 3a81ef8c27cea5c749a45765da4e06a7af75be2b Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Tue, 9 Oct 2012 15:19:01 +0530
Subject: regulator: tps65090: Add support for LDO regulators

TPS65090 supports the two LDOs, LDO1 and LDO2. These are
always ON regulators. The output on these LDOs are available
once the input voltage available for these LDOs.
Add support for these LDOs regulators.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/tps65090-regulator.c | 5 +++++
 include/linux/mfd/tps65090.h           | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/tps65090-regulator.c b/drivers/regulator/tps65090-regulator.c
index 584a185f89e1..0732d9ba3f40 100644
--- a/drivers/regulator/tps65090-regulator.c
+++ b/drivers/regulator/tps65090-regulator.c
@@ -37,6 +37,9 @@ static struct regulator_ops tps65090_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
 };
 
+static struct regulator_ops tps65090_ldo_ops = {
+};
+
 #define tps65090_REG_DESC(_id, _sname, _en_reg, _ops)	\
 {							\
 	.name = "TPS65090_RAILS"#_id,			\
@@ -60,6 +63,8 @@ static struct regulator_desc tps65090_regulator_desc[] = {
 	tps65090_REG_DESC(FET5,  "infet5",  0x13, tps65090_ops),
 	tps65090_REG_DESC(FET6,  "infet6",  0x14, tps65090_ops),
 	tps65090_REG_DESC(FET7,  "infet7",  0x15, tps65090_ops),
+	tps65090_REG_DESC(LDO1,  "vsys_l1", 0,    tps65090_ldo_ops),
+	tps65090_REG_DESC(LDO2,  "vsys_l2", 0,    tps65090_ldo_ops),
 };
 
 static inline bool is_dcdc(int id)
diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index 958bf155097c..598921221ff0 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h
@@ -36,6 +36,8 @@ enum {
 	TPS65090_REGULATOR_FET5,
 	TPS65090_REGULATOR_FET6,
 	TPS65090_REGULATOR_FET7,
+	TPS65090_REGULATOR_LDO1,
+	TPS65090_REGULATOR_LDO2,
 
 	/* Last entry for maximum ID */
 	TPS65090_REGULATOR_MAX,
-- 
cgit v1.2.3-55-g7522


From f329b1755b475f64f0472cda1ae9602e092f6f05 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Tue, 9 Oct 2012 15:19:02 +0530
Subject: regulator: tps65090: add external control support for DCDC

The TPS65090's DCDC output can also be enable/disable through the
external digital input signal. Add support for enable/disable
either through register access via I2C or through external
control inputs. The external control inputs can be driven through
GPIOs also and hence adding support for passing the GPIO number.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/tps65090-regulator.c | 77 +++++++++++++++++++++++++---------
 include/linux/mfd/tps65090.h           |  7 +++-
 2 files changed, 63 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/tps65090-regulator.c b/drivers/regulator/tps65090-regulator.c
index 0732d9ba3f40..412413838e12 100644
--- a/drivers/regulator/tps65090-regulator.c
+++ b/drivers/regulator/tps65090-regulator.c
@@ -18,6 +18,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/platform_device.h>
@@ -31,10 +32,13 @@ struct tps65090_regulator {
 	struct regulator_dev	*rdev;
 };
 
-static struct regulator_ops tps65090_ops = {
-	.enable = regulator_enable_regmap,
-	.disable = regulator_disable_regmap,
-	.is_enabled = regulator_is_enabled_regmap,
+static struct regulator_ops tps65090_ext_control_ops = {
+};
+
+static struct regulator_ops tps65090_reg_contol_ops = {
+	.enable		= regulator_enable_regmap,
+	.disable	= regulator_disable_regmap,
+	.is_enabled	= regulator_is_enabled_regmap,
 };
 
 static struct regulator_ops tps65090_ldo_ops = {
@@ -53,16 +57,16 @@ static struct regulator_ops tps65090_ldo_ops = {
 }
 
 static struct regulator_desc tps65090_regulator_desc[] = {
-	tps65090_REG_DESC(DCDC1, "vsys1",   0x0C, tps65090_ops),
-	tps65090_REG_DESC(DCDC2, "vsys2",   0x0D, tps65090_ops),
-	tps65090_REG_DESC(DCDC3, "vsys3",   0x0E, tps65090_ops),
-	tps65090_REG_DESC(FET1,  "infet1",  0x0F, tps65090_ops),
-	tps65090_REG_DESC(FET2,  "infet2",  0x10, tps65090_ops),
-	tps65090_REG_DESC(FET3,  "infet3",  0x11, tps65090_ops),
-	tps65090_REG_DESC(FET4,  "infet4",  0x12, tps65090_ops),
-	tps65090_REG_DESC(FET5,  "infet5",  0x13, tps65090_ops),
-	tps65090_REG_DESC(FET6,  "infet6",  0x14, tps65090_ops),
-	tps65090_REG_DESC(FET7,  "infet7",  0x15, tps65090_ops),
+	tps65090_REG_DESC(DCDC1, "vsys1",   0x0C, tps65090_reg_contol_ops),
+	tps65090_REG_DESC(DCDC2, "vsys2",   0x0D, tps65090_reg_contol_ops),
+	tps65090_REG_DESC(DCDC3, "vsys3",   0x0E, tps65090_reg_contol_ops),
+	tps65090_REG_DESC(FET1,  "infet1",  0x0F, tps65090_reg_contol_ops),
+	tps65090_REG_DESC(FET2,  "infet2",  0x10, tps65090_reg_contol_ops),
+	tps65090_REG_DESC(FET3,  "infet3",  0x11, tps65090_reg_contol_ops),
+	tps65090_REG_DESC(FET4,  "infet4",  0x12, tps65090_reg_contol_ops),
+	tps65090_REG_DESC(FET5,  "infet5",  0x13, tps65090_reg_contol_ops),
+	tps65090_REG_DESC(FET6,  "infet6",  0x14, tps65090_reg_contol_ops),
+	tps65090_REG_DESC(FET7,  "infet7",  0x15, tps65090_reg_contol_ops),
 	tps65090_REG_DESC(LDO1,  "vsys_l1", 0,    tps65090_ldo_ops),
 	tps65090_REG_DESC(LDO2,  "vsys_l2", 0,    tps65090_ldo_ops),
 };
@@ -118,6 +122,22 @@ static int __devinit tps65090_regulator_disable_ext_control(
 	return tps65090_config_ext_control(ri, false);
 }
 
+static void __devinit tps65090_configure_regulator_config(
+		struct tps65090_regulator_plat_data *tps_pdata,
+		struct regulator_config *config)
+{
+	if (gpio_is_valid(tps_pdata->gpio)) {
+		int gpio_flag = GPIOF_OUT_INIT_LOW;
+
+		if (tps_pdata->reg_init_data->constraints.always_on ||
+				tps_pdata->reg_init_data->constraints.boot_on)
+			gpio_flag = GPIOF_OUT_INIT_HIGH;
+
+		config->ena_gpio = tps_pdata->gpio;
+		config->ena_gpio_flags = gpio_flag;
+	}
+}
+
 static int __devinit tps65090_regulator_probe(struct platform_device *pdev)
 {
 	struct tps65090 *tps65090_mfd = dev_get_drvdata(pdev->dev.parent);
@@ -154,18 +174,24 @@ static int __devinit tps65090_regulator_probe(struct platform_device *pdev)
 
 		/*
 		 * TPS5090 DCDC support the control from external digital input.
-		 * It may be possible that during boot, the external control is
-		 * enabled. Disabling external control for DCDC.
+		 * Configure it as per platform data.
 		 */
 		if (tps_pdata && is_dcdc(num) && tps_pdata->reg_init_data) {
-			ret = tps65090_regulator_disable_ext_control(
+			if (tps_pdata->enable_ext_control) {
+				tps65090_configure_regulator_config(
+						tps_pdata, &config);
+				ri->desc->ops = &tps65090_ext_control_ops;
+			} else {
+				ret = tps65090_regulator_disable_ext_control(
 						ri, tps_pdata);
-			if (ret < 0) {
-				dev_err(&pdev->dev,
+				if (ret < 0) {
+					dev_err(&pdev->dev,
 						"failed disable ext control\n");
-				goto scrub;
+					goto scrub;
+				}
 			}
 		}
+
 		config.dev = &pdev->dev;
 		config.driver_data = ri;
 		config.regmap = tps65090_mfd->rmap;
@@ -182,6 +208,17 @@ static int __devinit tps65090_regulator_probe(struct platform_device *pdev)
 			goto scrub;
 		}
 		ri->rdev = rdev;
+
+		/* Enable external control if it is require */
+		if (tps_pdata && is_dcdc(num) && tps_pdata->reg_init_data &&
+				tps_pdata->enable_ext_control) {
+			ret = tps65090_config_ext_control(ri, true);
+			if (ret < 0) {
+				/* Increment num to get unregister rdev */
+				num++;
+				goto scrub;
+			}
+		}
 	}
 
 	platform_set_drvdata(pdev, pmic);
diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index 598921221ff0..804e280c1e1d 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h
@@ -64,10 +64,15 @@ struct tps65090_subdev_info {
  * struct tps65090_regulator_plat_data
  *
  * @reg_init_data: The regulator init data.
+ * @enable_ext_control: Enable extrenal control or not. Only available for
+ *     DCDC1, DCDC2 and DCDC3.
+ * @gpio: Gpio number if external control is enabled and controlled through
+ *     gpio.
  */
-
 struct tps65090_regulator_plat_data {
 	struct regulator_init_data *reg_init_data;
+	bool enable_ext_control;
+	int gpio;
 };
 
 struct tps65090_platform_data {
-- 
cgit v1.2.3-55-g7522


From a8f7445c7b0d2b7a523e521e2d14974804910ad0 Mon Sep 17 00:00:00 2001
From: Tony Lindgren
Date: Tue, 2 Oct 2012 14:12:31 -0700
Subject: ARM: OMAP: Move plat/led.h to include/linux/platform_data

We need to move this away from plat for ARM common
zImage support.

Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/board-palmtt.c      |  2 +-
 arch/arm/mach-omap2/board-apollon.c     |  2 +-
 arch/arm/plat-omap/include/plat/led.h   | 24 ------------------------
 include/linux/platform_data/leds-omap.h | 22 ++++++++++++++++++++++
 4 files changed, 24 insertions(+), 26 deletions(-)
 delete mode 100644 arch/arm/plat-omap/include/plat/led.h
 create mode 100644 include/linux/platform_data/leds-omap.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/board-palmtt.c b/arch/arm/mach-omap1/board-palmtt.c
index b1d2e802fec0..09c2418e947d 100644
--- a/arch/arm/mach-omap1/board-palmtt.c
+++ b/arch/arm/mach-omap1/board-palmtt.c
@@ -28,12 +28,12 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
 #include <linux/platform_data/omap1_bl.h>
+#include <linux/platform_data/leds-omap.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 
-#include <plat/led.h>
 #include <mach/flash.h>
 #include <mach/mux.h>
 #include <plat-omap/dma-omap.h>
diff --git a/arch/arm/mach-omap2/board-apollon.c b/arch/arm/mach-omap2/board-apollon.c
index 8cdd18699de6..64cf1bde0f3b 100644
--- a/arch/arm/mach-omap2/board-apollon.c
+++ b/arch/arm/mach-omap2/board-apollon.c
@@ -28,12 +28,12 @@
 #include <linux/clk.h>
 #include <linux/smc91x.h>
 #include <linux/gpio.h>
+#include <linux/platform_data/leds-omap.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 
-#include <plat/led.h>
 #include "common.h"
 #include "gpmc.h"
 
diff --git a/arch/arm/plat-omap/include/plat/led.h b/arch/arm/plat-omap/include/plat/led.h
deleted file mode 100644
index 25e451e7e2fd..000000000000
--- a/arch/arm/plat-omap/include/plat/led.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- *  arch/arm/plat-omap/include/mach/led.h
- *
- *  Copyright (C) 2006 Samsung Electronics
- *  Kyungmin Park <kyungmin.park@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef ASMARM_ARCH_LED_H
-#define ASMARM_ARCH_LED_H
-
-struct omap_led_config {
-	struct led_classdev	cdev;
-	s16			gpio;
-};
-
-struct omap_led_platform_data {
-	s16			nr_leds;
-	struct omap_led_config	*leds;
-};
-
-#endif
diff --git a/include/linux/platform_data/leds-omap.h b/include/linux/platform_data/leds-omap.h
new file mode 100644
index 000000000000..56c9b2a0ada5
--- /dev/null
+++ b/include/linux/platform_data/leds-omap.h
@@ -0,0 +1,22 @@
+/*
+ *  Copyright (C) 2006 Samsung Electronics
+ *  Kyungmin Park <kyungmin.park@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef ASMARM_ARCH_LED_H
+#define ASMARM_ARCH_LED_H
+
+struct omap_led_config {
+	struct led_classdev	cdev;
+	s16			gpio;
+};
+
+struct omap_led_platform_data {
+	s16			nr_leds;
+	struct omap_led_config	*leds;
+};
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 3821b4247b40d6b95a59a2895ea6e9bd3f983f04 Mon Sep 17 00:00:00 2001
From: Arend van Spriel
Date: Fri, 12 Oct 2012 12:28:16 +0200
Subject: wireless: remove duplicate enum ieee80211_eid definitions

WLAN_EID_WPA and WLAN_EID_GENERIC mapped to the same value
as WLAN_EID_VENDOR_SPECIFIC. The last one being more in line
with the standard specification. Removing WLAN_EID_WPA and
WLAN_EID_GENERIC as there are no longer drivers using these.

Cc: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 8c803f0e4925..85764a900731 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1457,8 +1457,6 @@ enum ieee80211_eid {
 
 	WLAN_EID_RSN = 48,
 	WLAN_EID_MMIE = 76,
-	WLAN_EID_WPA = 221,
-	WLAN_EID_GENERIC = 221,
 	WLAN_EID_VENDOR_SPECIFIC = 221,
 	WLAN_EID_QOS_PARAMETER = 222,
 
-- 
cgit v1.2.3-55-g7522


From b9bf6882c1f9451ce1e80aaed32107673a735613 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong
Date: Wed, 17 Oct 2012 13:46:52 +0800
Subject: KVM: VMX: report internal error for the unhandleable event

VM exits during Event Delivery is really unexpected if it is not caused
by Exceptions/EPT-VIOLATION/TASK_SWITCH, we'd better to report an internal
and freeze the guest, the VMM has the chance to check the guest

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c  | 19 +++++++++++++++----
 include/linux/kvm.h |  8 ++++++--
 2 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ad6b1dd06f8b..b8a0841dfe7d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5979,13 +5979,24 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 		return 0;
 	}
 
+	/*
+	 * Note:
+	 * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
+	 * delivery event since it indicates guest is accessing MMIO.
+	 * The vm-exit can be triggered again after return to guest that
+	 * will cause infinite loop.
+	 */
 	if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
 			(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
 			exit_reason != EXIT_REASON_EPT_VIOLATION &&
-			exit_reason != EXIT_REASON_TASK_SWITCH))
-		printk(KERN_WARNING "%s: unexpected, valid vectoring info "
-		       "(0x%x) and exit reason is 0x%x\n",
-		       __func__, vectoring_info, exit_reason);
+			exit_reason != EXIT_REASON_TASK_SWITCH)) {
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
+		vcpu->run->internal.ndata = 2;
+		vcpu->run->internal.data[0] = vectoring_info;
+		vcpu->run->internal.data[1] = exit_reason;
+		return 0;
+	}
 
 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
 	    !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 65ad5c624c70..494a84c37c3e 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -170,8 +170,12 @@ struct kvm_pit_config {
 #define KVM_EXIT_WATCHDOG         21
 
 /* For KVM_EXIT_INTERNAL_ERROR */
-#define KVM_INTERNAL_ERROR_EMULATION 1
-#define KVM_INTERNAL_ERROR_SIMUL_EX 2
+/* Emulate instruction failed. */
+#define KVM_INTERNAL_ERROR_EMULATION	1
+/* Encounter unexpected simultaneous exceptions. */
+#define KVM_INTERNAL_ERROR_SIMUL_EX	2
+/* Encounter unexpected vm-exit due to delivery event. */
+#define KVM_INTERNAL_ERROR_DELIVERY_EV	3
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
-- 
cgit v1.2.3-55-g7522


From b2409b65d9401f7e3c572fc59db6a920fc7a163e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab
Date: Fri, 19 Oct 2012 07:29:17 -0300
Subject: [media] remove include/linux/dvb/dmx.h

The only reason for this header is to make sure that include
linux/time.h were added before uapi/*/dmx.h. Just push down the
time.h header on the few places where this is used, and drop
this new header.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb-core/dmxdev.h    |  1 +
 drivers/media/firewire/firedtv.h   |  1 +
 drivers/media/pci/ttpci/av7110.h   |  1 +
 drivers/media/usb/tlg2300/pd-dvb.c |  1 +
 include/linux/dvb/dmx.h            | 29 -----------------------------
 5 files changed, 4 insertions(+), 29 deletions(-)
 delete mode 100644 include/linux/dvb/dmx.h

(limited to 'include/linux')

diff --git a/drivers/media/dvb-core/dmxdev.h b/drivers/media/dvb-core/dmxdev.h
index 02ebe28f830d..48c6cf92ab99 100644
--- a/drivers/media/dvb-core/dmxdev.h
+++ b/drivers/media/dvb-core/dmxdev.h
@@ -26,6 +26,7 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
+#include <linux/time.h>
 #include <linux/timer.h>
 #include <linux/wait.h>
 #include <linux/fs.h>
diff --git a/drivers/media/firewire/firedtv.h b/drivers/media/firewire/firedtv.h
index 4fdcd8cb7530..c2ba085e0d20 100644
--- a/drivers/media/firewire/firedtv.h
+++ b/drivers/media/firewire/firedtv.h
@@ -13,6 +13,7 @@
 #ifndef _FIREDTV_H
 #define _FIREDTV_H
 
+#include <linux/time.h>
 #include <linux/dvb/dmx.h>
 #include <linux/dvb/frontend.h>
 #include <linux/list.h>
diff --git a/drivers/media/pci/ttpci/av7110.h b/drivers/media/pci/ttpci/av7110.h
index 88b3b2d6cc0e..a378662b1dcf 100644
--- a/drivers/media/pci/ttpci/av7110.h
+++ b/drivers/media/pci/ttpci/av7110.h
@@ -6,6 +6,7 @@
 #include <linux/netdevice.h>
 #include <linux/i2c.h>
 #include <linux/input.h>
+#include <linux/time.h>
 
 #include <linux/dvb/video.h>
 #include <linux/dvb/audio.h>
diff --git a/drivers/media/usb/tlg2300/pd-dvb.c b/drivers/media/usb/tlg2300/pd-dvb.c
index 30fcb117e898..ca4994a5190c 100644
--- a/drivers/media/usb/tlg2300/pd-dvb.c
+++ b/drivers/media/usb/tlg2300/pd-dvb.c
@@ -1,6 +1,7 @@
 #include "pd-common.h"
 #include <linux/kernel.h>
 #include <linux/usb.h>
+#include <linux/time.h>
 #include <linux/dvb/dmx.h>
 #include <linux/delay.h>
 #include <linux/gfp.h>
diff --git a/include/linux/dvb/dmx.h b/include/linux/dvb/dmx.h
deleted file mode 100644
index 0be6d8f2b52b..000000000000
--- a/include/linux/dvb/dmx.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * dmx.h
- *
- * Copyright (C) 2000 Marcus Metzler <marcus@convergence.de>
- *                  & Ralph  Metzler <ralph@convergence.de>
- *                    for convergence integrated media GmbH
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- */
-#ifndef _DVBDMX_H_
-#define _DVBDMX_H_
-
-#include <linux/time.h>
-#include <uapi/linux/dvb/dmx.h>
-
-#endif /*_DVBDMX_H_*/
-- 
cgit v1.2.3-55-g7522


From 74df06daf632ce2d321d01cb046004768352efc4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab
Date: Fri, 19 Oct 2012 07:41:50 -0300
Subject: [media] Remove include/linux/dvb/ stuff

The only file left there is include/linux/dvb/video.h. The
only function for it is to include linux/compiler.h, but this
is already indirectly included. So, get rid of it.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/dvb/Kbuild  |  0
 include/linux/dvb/video.h | 29 -----------------------------
 2 files changed, 29 deletions(-)
 delete mode 100644 include/linux/dvb/Kbuild
 delete mode 100644 include/linux/dvb/video.h

(limited to 'include/linux')

diff --git a/include/linux/dvb/Kbuild b/include/linux/dvb/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/linux/dvb/video.h b/include/linux/dvb/video.h
deleted file mode 100644
index 85c20d925696..000000000000
--- a/include/linux/dvb/video.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * video.h
- *
- * Copyright (C) 2000 Marcus Metzler <marcus@convergence.de>
- *                  & Ralph  Metzler <ralph@convergence.de>
- *                    for convergence integrated media GmbH
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- */
-#ifndef _DVBVIDEO_H_
-#define _DVBVIDEO_H_
-
-#include <linux/compiler.h>
-#include <uapi/linux/dvb/video.h>
-
-#endif /*_DVBVIDEO_H_*/
-- 
cgit v1.2.3-55-g7522


From 103d9fb907058e4eb052f4f7302d1b07eb6a7792 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen
Date: Tue, 16 Oct 2012 17:29:00 +0100
Subject: iio: Add a logarithmic fractional value type

For ADCs or DACs the denominator for fractional types often is a power of two.
In this case we can use a shift operation instead of the rather expensive 64 bit
division. This patch adds a new fractional type which expects the denominator to
be specified as the log2 of the actual denominator. E.g. for ADCs and DACs this
will usually be the number of significant bits.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-core.c | 5 +++++
 drivers/iio/inkern.c            | 3 +++
 include/linux/iio/types.h       | 1 +
 3 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 6eb24dbc081e..37650a72b31f 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -397,6 +397,11 @@ static ssize_t iio_read_channel_info(struct device *dev,
 		val2 = do_div(tmp, 1000000000LL);
 		val = tmp;
 		return sprintf(buf, "%d.%09u\n", val, val2);
+	case IIO_VAL_FRACTIONAL_LOG2:
+		tmp = (s64)val * 1000000000LL >> val2;
+		val2 = do_div(tmp, 1000000000LL);
+		val = tmp;
+		return sprintf(buf, "%d.%09u\n", val, val2);
 	default:
 		return 0;
 	}
diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index 5230a33886c0..b394621d362c 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -314,6 +314,9 @@ static int iio_convert_raw_to_processed_unlocked(struct iio_channel *chan,
 		*processed = div_s64(raw64 * (s64)scale_val * scale,
 				     scale_val2);
 		break;
+	case IIO_VAL_FRACTIONAL_LOG2:
+		*processed = (raw64 * (s64)scale_val * scale) >> scale_val2;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index 5c647ecfd5ba..87b196a2d698 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -58,5 +58,6 @@ enum iio_modifier {
 #define IIO_VAL_INT_PLUS_NANO 3
 #define IIO_VAL_INT_PLUS_MICRO_DB 4
 #define IIO_VAL_FRACTIONAL 10
+#define IIO_VAL_FRACTIONAL_LOG2 11
 
 #endif /* _IIO_TYPES_H_ */
-- 
cgit v1.2.3-55-g7522


From 8341dc04dfb33fbae71727ae648e20c51abc40e3 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen
Date: Tue, 16 Oct 2012 17:29:00 +0100
Subject: iio:dac: Add support for the ad5449

This patch adds support for the AD5415, AD5426, AD5429, AD5432, AD5439, AD5443
and AD5449 single and dual channel DACs.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/dac/Kconfig              |  10 +
 drivers/iio/dac/Makefile             |   1 +
 drivers/iio/dac/ad5449.c             | 375 +++++++++++++++++++++++++++++++++++
 include/linux/platform_data/ad5449.h |  40 ++++
 4 files changed, 426 insertions(+)
 create mode 100644 drivers/iio/dac/ad5449.c
 create mode 100644 include/linux/platform_data/ad5449.h

(limited to 'include/linux')

diff --git a/drivers/iio/dac/Kconfig b/drivers/iio/dac/Kconfig
index b1c0ee5294ca..f68756e6dd62 100644
--- a/drivers/iio/dac/Kconfig
+++ b/drivers/iio/dac/Kconfig
@@ -67,6 +67,16 @@ config AD5446
 	  To compile this driver as a module, choose M here: the
 	  module will be called ad5446.
 
+config AD5449
+	tristate "Analog Device AD5449 and similar DACs driver"
+	depends on SPI_MASTER
+	help
+	  Say yes here to build support for Analog Devices AD5415, AD5426, AD5429,
+	  AD5432, AD5439, AD5443, AD5449 Digital to Analog Converters.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ad5449.
+
 config AD5504
 	tristate "Analog Devices AD5504/AD5501 DAC SPI driver"
 	depends on SPI
diff --git a/drivers/iio/dac/Makefile b/drivers/iio/dac/Makefile
index c0d333b23ba3..5b528ebb3343 100644
--- a/drivers/iio/dac/Makefile
+++ b/drivers/iio/dac/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_AD5624R_SPI) += ad5624r_spi.o
 obj-$(CONFIG_AD5064) += ad5064.o
 obj-$(CONFIG_AD5504) += ad5504.o
 obj-$(CONFIG_AD5446) += ad5446.o
+obj-$(CONFIG_AD5449) += ad5449.o
 obj-$(CONFIG_AD5755) += ad5755.o
 obj-$(CONFIG_AD5764) += ad5764.o
 obj-$(CONFIG_AD5791) += ad5791.o
diff --git a/drivers/iio/dac/ad5449.c b/drivers/iio/dac/ad5449.c
new file mode 100644
index 000000000000..5b43030fe6e3
--- /dev/null
+++ b/drivers/iio/dac/ad5449.c
@@ -0,0 +1,375 @@
+/*
+ * AD5415, AD5426, AD5429, AD5432, AD5439, AD5443, AD5449 Digital to Analog
+ * Converter driver.
+ *
+ * Copyright 2012 Analog Devices Inc.
+ *  Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/spi/spi.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/regulator/consumer.h>
+#include <asm/unaligned.h>
+
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+
+#include <linux/platform_data/ad5449.h>
+
+#define AD5449_MAX_CHANNELS		2
+#define AD5449_MAX_VREFS		2
+
+#define AD5449_CMD_NOOP			0x0
+#define AD5449_CMD_LOAD_AND_UPDATE(x)	(0x1 + (x) * 3)
+#define AD5449_CMD_READ(x)		(0x2 + (x) * 3)
+#define AD5449_CMD_LOAD(x)		(0x3 + (x) * 3)
+#define AD5449_CMD_CTRL			13
+
+#define AD5449_CTRL_SDO_OFFSET		10
+#define AD5449_CTRL_DAISY_CHAIN		BIT(9)
+#define AD5449_CTRL_HCLR_TO_MIDSCALE	BIT(8)
+#define AD5449_CTRL_SAMPLE_RISING	BIT(7)
+
+/**
+ * struct ad5449_chip_info - chip specific information
+ * @channels:		Channel specification
+ * @num_channels:	Number of channels
+ * @has_ctrl:		Chip has a control register
+ */
+struct ad5449_chip_info {
+	const struct iio_chan_spec *channels;
+	unsigned int num_channels;
+	bool has_ctrl;
+};
+
+/**
+ * struct ad5449 - driver instance specific data
+ * @spi:		the SPI device for this driver instance
+ * @chip_info:		chip model specific constants, available modes etc
+ * @vref_reg:		vref supply regulators
+ * @has_sdo:		whether the SDO line is connected
+ * @dac_cache:		Cache for the DAC values
+ * @data:		spi transfer buffers
+ */
+struct ad5449 {
+	struct spi_device		*spi;
+	const struct ad5449_chip_info	*chip_info;
+	struct regulator_bulk_data	vref_reg[AD5449_MAX_VREFS];
+
+	bool has_sdo;
+	uint16_t dac_cache[AD5449_MAX_CHANNELS];
+
+	/*
+	 * DMA (thus cache coherency maintenance) requires the
+	 * transfer buffers to live in their own cache lines.
+	 */
+	__be16 data[2] ____cacheline_aligned;
+};
+
+enum ad5449_type {
+	ID_AD5426,
+	ID_AD5429,
+	ID_AD5432,
+	ID_AD5439,
+	ID_AD5443,
+	ID_AD5449,
+};
+
+static int ad5449_write(struct iio_dev *indio_dev, unsigned int addr,
+	unsigned int val)
+{
+	struct ad5449 *st = iio_priv(indio_dev);
+	int ret;
+
+	mutex_lock(&indio_dev->mlock);
+	st->data[0] = cpu_to_be16((addr << 12) | val);
+	ret = spi_write(st->spi, st->data, 2);
+	mutex_unlock(&indio_dev->mlock);
+
+	return ret;
+}
+
+static int ad5449_read(struct iio_dev *indio_dev, unsigned int addr,
+	unsigned int *val)
+{
+	struct ad5449 *st = iio_priv(indio_dev);
+	int ret;
+	struct spi_message msg;
+	struct spi_transfer t[] = {
+		{
+			.tx_buf = &st->data[0],
+			.len = 2,
+			.cs_change = 1,
+		}, {
+			.tx_buf = &st->data[1],
+			.rx_buf = &st->data[1],
+			.len = 2,
+		},
+	};
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&t[0], &msg);
+	spi_message_add_tail(&t[1], &msg);
+
+	mutex_lock(&indio_dev->mlock);
+	st->data[0] = cpu_to_be16(addr << 12);
+	st->data[1] = cpu_to_be16(AD5449_CMD_NOOP);
+
+	ret = spi_sync(st->spi, &msg);
+	if (ret < 0)
+		return ret;
+
+	*val = be16_to_cpu(st->data[1]);
+	mutex_unlock(&indio_dev->mlock);
+
+	return 0;
+}
+
+static int ad5449_read_raw(struct iio_dev *indio_dev,
+	struct iio_chan_spec const *chan, int *val, int *val2, long info)
+{
+	struct ad5449 *st = iio_priv(indio_dev);
+	struct regulator_bulk_data *reg;
+	int scale_uv;
+	int ret;
+
+	switch (info) {
+	case IIO_CHAN_INFO_RAW:
+		if (st->has_sdo) {
+			ret = ad5449_read(indio_dev,
+				AD5449_CMD_READ(chan->address), val);
+			if (ret)
+				return ret;
+			*val &= 0xfff;
+		} else {
+			*val = st->dac_cache[chan->address];
+		}
+
+		return IIO_VAL_INT;
+	case IIO_CHAN_INFO_SCALE:
+		reg = &st->vref_reg[chan->channel];
+		scale_uv = regulator_get_voltage(reg->consumer);
+		if (scale_uv < 0)
+			return scale_uv;
+
+		*val = scale_uv / 1000;
+		*val2 = chan->scan_type.realbits;
+
+		return IIO_VAL_FRACTIONAL_LOG2;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static int ad5449_write_raw(struct iio_dev *indio_dev,
+	struct iio_chan_spec const *chan, int val, int val2, long info)
+{
+	struct ad5449 *st = iio_priv(indio_dev);
+	int ret;
+
+	switch (info) {
+	case IIO_CHAN_INFO_RAW:
+		if (val < 0 || val >= (1 << chan->scan_type.realbits))
+			return -EINVAL;
+
+		ret = ad5449_write(indio_dev,
+			AD5449_CMD_LOAD_AND_UPDATE(chan->address),
+			val << chan->scan_type.shift);
+		if (ret == 0)
+			st->dac_cache[chan->address] = val;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static const struct iio_info ad5449_info = {
+	.read_raw = ad5449_read_raw,
+	.write_raw = ad5449_write_raw,
+	.driver_module = THIS_MODULE,
+};
+
+#define AD5449_CHANNEL(chan, bits) {				\
+	.type = IIO_VOLTAGE,					\
+	.indexed = 1,						\
+	.output = 1,						\
+	.channel = (chan),					\
+	.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT |		\
+		IIO_CHAN_INFO_SCALE_SEPARATE_BIT,		\
+	.address = (chan),					\
+	.scan_type = IIO_ST('u', (bits), 16, 12 - (bits)),	\
+}
+
+#define DECLARE_AD5449_CHANNELS(name, bits) \
+const struct iio_chan_spec name[] = { \
+	AD5449_CHANNEL(0, bits), \
+	AD5449_CHANNEL(1, bits), \
+}
+
+static DECLARE_AD5449_CHANNELS(ad5429_channels, 8);
+static DECLARE_AD5449_CHANNELS(ad5439_channels, 10);
+static DECLARE_AD5449_CHANNELS(ad5449_channels, 12);
+
+static const struct ad5449_chip_info ad5449_chip_info[] = {
+	[ID_AD5426] = {
+		.channels = ad5429_channels,
+		.num_channels = 1,
+		.has_ctrl = false,
+	},
+	[ID_AD5429] = {
+		.channels = ad5429_channels,
+		.num_channels = 2,
+		.has_ctrl = true,
+	},
+	[ID_AD5432] = {
+		.channels = ad5439_channels,
+		.num_channels = 1,
+		.has_ctrl = false,
+	},
+	[ID_AD5439] = {
+		.channels = ad5439_channels,
+		.num_channels = 2,
+		.has_ctrl = true,
+	},
+	[ID_AD5443] = {
+		.channels = ad5449_channels,
+		.num_channels = 1,
+		.has_ctrl = false,
+	},
+	[ID_AD5449] = {
+		.channels = ad5449_channels,
+		.num_channels = 2,
+		.has_ctrl = true,
+	},
+};
+
+static const char *ad5449_vref_name(struct ad5449 *st, int n)
+{
+	if (st->chip_info->num_channels == 1)
+		return "VREF";
+
+	if (n == 0)
+		return "VREFA";
+	else
+		return "VREFB";
+}
+
+static int __devinit ad5449_spi_probe(struct spi_device *spi)
+{
+	struct ad5449_platform_data *pdata = spi->dev.platform_data;
+	const struct spi_device_id *id = spi_get_device_id(spi);
+	struct iio_dev *indio_dev;
+	struct ad5449 *st;
+	unsigned int i;
+	int ret;
+
+	indio_dev = iio_device_alloc(sizeof(*st));
+	if (indio_dev == NULL)
+		return -ENOMEM;
+
+	st = iio_priv(indio_dev);
+	spi_set_drvdata(spi, indio_dev);
+
+	st->chip_info = &ad5449_chip_info[id->driver_data];
+	st->spi = spi;
+
+	for (i = 0; i < st->chip_info->num_channels; ++i)
+		st->vref_reg[i].supply = ad5449_vref_name(st, i);
+
+	ret = regulator_bulk_get(&spi->dev, st->chip_info->num_channels,
+				st->vref_reg);
+	if (ret)
+		goto error_free;
+
+	ret = regulator_bulk_enable(st->chip_info->num_channels, st->vref_reg);
+	if (ret)
+		goto error_free_reg;
+
+	indio_dev->dev.parent = &spi->dev;
+	indio_dev->name = id->name;
+	indio_dev->info = &ad5449_info;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->channels = st->chip_info->channels;
+	indio_dev->num_channels = st->chip_info->num_channels;
+
+	if (st->chip_info->has_ctrl) {
+		unsigned int ctrl = 0x00;
+		if (pdata) {
+			if (pdata->hardware_clear_to_midscale)
+				ctrl |= AD5449_CTRL_HCLR_TO_MIDSCALE;
+			ctrl |= pdata->sdo_mode << AD5449_CTRL_SDO_OFFSET;
+			st->has_sdo = pdata->sdo_mode != AD5449_SDO_DISABLED;
+		} else {
+			st->has_sdo = true;
+		}
+		ad5449_write(indio_dev, AD5449_CMD_CTRL, ctrl);
+	}
+
+	ret = iio_device_register(indio_dev);
+	if (ret)
+		goto error_disable_reg;
+
+	return 0;
+
+error_disable_reg:
+	regulator_bulk_disable(st->chip_info->num_channels, st->vref_reg);
+error_free_reg:
+	regulator_bulk_free(st->chip_info->num_channels, st->vref_reg);
+error_free:
+	iio_device_free(indio_dev);
+
+	return ret;
+}
+
+static int __devexit ad5449_spi_remove(struct spi_device *spi)
+{
+	struct iio_dev *indio_dev = spi_get_drvdata(spi);
+	struct ad5449 *st = iio_priv(indio_dev);
+
+	iio_device_unregister(indio_dev);
+
+	regulator_bulk_disable(st->chip_info->num_channels, st->vref_reg);
+	regulator_bulk_free(st->chip_info->num_channels, st->vref_reg);
+
+	iio_device_free(indio_dev);
+
+	return 0;
+}
+
+static const struct spi_device_id ad5449_spi_ids[] = {
+	{ "ad5415", ID_AD5449 },
+	{ "ad5426", ID_AD5426 },
+	{ "ad5429", ID_AD5429 },
+	{ "ad5432", ID_AD5432 },
+	{ "ad5439", ID_AD5439 },
+	{ "ad5443", ID_AD5443 },
+	{ "ad5449", ID_AD5449 },
+	{}
+};
+MODULE_DEVICE_TABLE(spi, ad5449_spi_ids);
+
+static struct spi_driver ad5449_spi_driver = {
+	.driver = {
+		.name = "ad5449",
+		.owner = THIS_MODULE,
+	},
+	.probe = ad5449_spi_probe,
+	.remove = __devexit_p(ad5449_spi_remove),
+	.id_table = ad5449_spi_ids,
+};
+module_spi_driver(ad5449_spi_driver);
+
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_DESCRIPTION("Analog Devices AD5449 and similar DACs");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/ad5449.h b/include/linux/platform_data/ad5449.h
new file mode 100644
index 000000000000..bd712bd4b94e
--- /dev/null
+++ b/include/linux/platform_data/ad5449.h
@@ -0,0 +1,40 @@
+/*
+ * AD5415, AD5426, AD5429, AD5432, AD5439, AD5443, AD5449 Digital to Analog
+ * Converter driver.
+ *
+ * Copyright 2012 Analog Devices Inc.
+ *  Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#ifndef __LINUX_PLATFORM_DATA_AD5449_H__
+#define __LINUX_PLATFORM_DATA_AD5449_H__
+
+/**
+ * enum ad5449_sdo_mode - AD5449 SDO pin configuration
+ * @AD5449_SDO_DRIVE_FULL: Drive the SDO pin with full strength.
+ * @AD5449_SDO_DRIVE_WEAK: Drive the SDO pin with not full strength.
+ * @AD5449_SDO_OPEN_DRAIN: Operate the SDO pin in open-drain mode.
+ * @AD5449_SDO_DISABLED: Disable the SDO pin, in this mode it is not possible to
+ *			read back from the device.
+ */
+enum ad5449_sdo_mode {
+	AD5449_SDO_DRIVE_FULL = 0x0,
+	AD5449_SDO_DRIVE_WEAK = 0x1,
+	AD5449_SDO_OPEN_DRAIN = 0x2,
+	AD5449_SDO_DISABLED = 0x3,
+};
+
+/**
+ * struct ad5449_platform_data - Platform data for the ad5449 DAC driver
+ * @sdo_mode: SDO pin mode
+ * @hardware_clear_to_midscale: Whether asserting the hardware CLR pin sets the
+ *			outputs to midscale (true) or to zero scale(false).
+ */
+struct ad5449_platform_data {
+	enum ad5449_sdo_mode sdo_mode;
+	bool hardware_clear_to_midscale;
+};
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 49655bb8a51565f0375a4f783334c9de78134be5 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Sat, 29 Sep 2012 20:29:49 +0200
Subject: bcma: just do the necessary things in early register on SoCs

Some parts of the initialization for chip common and the pcie core are
accessing the sprom struct, but it is not initialized at that stage.
Just do the necessary thing in the early register on SoCs and not the
complete initialization to read out the nvram from the flash chip.
After it is possible to read out the nvram, the sprom should be parsed
from it and the full initialization of the cores should be run.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_chipcommon.c            | 23 ++++++++++++++++++-----
 drivers/bcma/driver_chipcommon_pmu.c        |  5 ++++-
 drivers/bcma/driver_mips.c                  | 26 +++++++++++++++++++-------
 drivers/bcma/main.c                         |  8 ++++----
 include/linux/bcma/bcma_driver_chipcommon.h |  3 +++
 include/linux/bcma/bcma_driver_mips.h       |  3 +++
 6 files changed, 51 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index a4c3ebcc4c86..ffd74e51f02d 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -22,12 +22,9 @@ static inline u32 bcma_cc_write32_masked(struct bcma_drv_cc *cc, u16 offset,
 	return value;
 }
 
-void bcma_core_chipcommon_init(struct bcma_drv_cc *cc)
+void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc)
 {
-	u32 leddc_on = 10;
-	u32 leddc_off = 90;
-
-	if (cc->setup_done)
+	if (cc->early_setup_done)
 		return;
 
 	if (cc->core->id.rev >= 11)
@@ -36,6 +33,22 @@ void bcma_core_chipcommon_init(struct bcma_drv_cc *cc)
 	if (cc->core->id.rev >= 35)
 		cc->capabilities_ext = bcma_cc_read32(cc, BCMA_CC_CAP_EXT);
 
+	if (cc->capabilities & BCMA_CC_CAP_PMU)
+		bcma_pmu_early_init(cc);
+
+	cc->early_setup_done = true;
+}
+
+void bcma_core_chipcommon_init(struct bcma_drv_cc *cc)
+{
+	u32 leddc_on = 10;
+	u32 leddc_off = 90;
+
+	if (cc->setup_done)
+		return;
+
+	bcma_core_chipcommon_early_init(cc);
+
 	if (cc->core->id.rev >= 20) {
 		bcma_cc_write32(cc, BCMA_CC_GPIOPULLUP, 0);
 		bcma_cc_write32(cc, BCMA_CC_GPIOPULLDOWN, 0);
diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c
index 201faf106b3f..a63ddd9c70eb 100644
--- a/drivers/bcma/driver_chipcommon_pmu.c
+++ b/drivers/bcma/driver_chipcommon_pmu.c
@@ -144,7 +144,7 @@ static void bcma_pmu_workarounds(struct bcma_drv_cc *cc)
 	}
 }
 
-void bcma_pmu_init(struct bcma_drv_cc *cc)
+void bcma_pmu_early_init(struct bcma_drv_cc *cc)
 {
 	u32 pmucap;
 
@@ -153,7 +153,10 @@ void bcma_pmu_init(struct bcma_drv_cc *cc)
 
 	bcma_debug(cc->core->bus, "Found rev %u PMU (capabilities 0x%08X)\n",
 		   cc->pmu.rev, pmucap);
+}
 
+void bcma_pmu_init(struct bcma_drv_cc *cc)
+{
 	if (cc->pmu.rev == 1)
 		bcma_cc_mask32(cc, BCMA_CC_PMU_CTL,
 			      ~BCMA_CC_PMU_CTL_NOILPONW);
diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index cc65b45b4368..f44f1fb67011 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -212,16 +212,33 @@ static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore)
 	}
 }
 
+void bcma_core_mips_early_init(struct bcma_drv_mips *mcore)
+{
+	struct bcma_bus *bus = mcore->core->bus;
+
+	if (mcore->early_setup_done)
+		return;
+
+	bcma_chipco_serial_init(&bus->drv_cc);
+	bcma_core_mips_flash_detect(mcore);
+
+	mcore->early_setup_done = true;
+}
+
 void bcma_core_mips_init(struct bcma_drv_mips *mcore)
 {
 	struct bcma_bus *bus;
 	struct bcma_device *core;
 	bus = mcore->core->bus;
 
+	if (mcore->setup_done)
+		return;
+
 	bcma_info(bus, "Initializing MIPS core...\n");
 
-	if (!mcore->setup_done)
-		mcore->assigned_irqs = 1;
+	bcma_core_mips_early_init(mcore);
+
+	mcore->assigned_irqs = 1;
 
 	/* Assign IRQs to all cores on the bus */
 	list_for_each_entry(core, &bus->cores, list) {
@@ -256,10 +273,5 @@ void bcma_core_mips_init(struct bcma_drv_mips *mcore)
 	bcma_info(bus, "IRQ reconfiguration done\n");
 	bcma_core_mips_dump_irq(bus);
 
-	if (mcore->setup_done)
-		return;
-
-	bcma_chipco_serial_init(&bus->drv_cc);
-	bcma_core_mips_flash_detect(mcore);
 	mcore->setup_done = true;
 }
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 432aeeedfd5e..bea2d7cfa6c2 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -274,18 +274,18 @@ int __init bcma_bus_early_register(struct bcma_bus *bus,
 		return -1;
 	}
 
-	/* Init CC core */
+	/* Early init CC core */
 	core = bcma_find_core(bus, bcma_cc_core_id(bus));
 	if (core) {
 		bus->drv_cc.core = core;
-		bcma_core_chipcommon_init(&bus->drv_cc);
+		bcma_core_chipcommon_early_init(&bus->drv_cc);
 	}
 
-	/* Init MIPS core */
+	/* Early init MIPS core */
 	core = bcma_find_core(bus, BCMA_CORE_MIPS_74K);
 	if (core) {
 		bus->drv_mips.core = core;
-		bcma_core_mips_init(&bus->drv_mips);
+		bcma_core_mips_early_init(&bus->drv_mips);
 	}
 
 	bcma_info(bus, "Early bus registered\n");
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 1cf1749440ac..fbde7cbd2d7d 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -552,6 +552,7 @@ struct bcma_drv_cc {
 	u32 capabilities;
 	u32 capabilities_ext;
 	u8 setup_done:1;
+	u8 early_setup_done:1;
 	/* Fast Powerup Delay constant */
 	u16 fast_pwrup_delay;
 	struct bcma_chipcommon_pmu pmu;
@@ -583,6 +584,7 @@ struct bcma_drv_cc {
 	bcma_cc_write32(cc, offset, (bcma_cc_read32(cc, offset) & (mask)) | (set))
 
 extern void bcma_core_chipcommon_init(struct bcma_drv_cc *cc);
+extern void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc);
 
 extern void bcma_chipco_suspend(struct bcma_drv_cc *cc);
 extern void bcma_chipco_resume(struct bcma_drv_cc *cc);
@@ -606,6 +608,7 @@ u32 bcma_chipco_gpio_polarity(struct bcma_drv_cc *cc, u32 mask, u32 value);
 
 /* PMU support */
 extern void bcma_pmu_init(struct bcma_drv_cc *cc);
+extern void bcma_pmu_early_init(struct bcma_drv_cc *cc);
 
 extern void bcma_chipco_pll_write(struct bcma_drv_cc *cc, u32 offset,
 				  u32 value);
diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h
index c0043645cdcb..0baf8a56b794 100644
--- a/include/linux/bcma/bcma_driver_mips.h
+++ b/include/linux/bcma/bcma_driver_mips.h
@@ -35,13 +35,16 @@ struct bcma_device;
 struct bcma_drv_mips {
 	struct bcma_device *core;
 	u8 setup_done:1;
+	u8 early_setup_done:1;
 	unsigned int assigned_irqs;
 };
 
 #ifdef CONFIG_BCMA_DRIVER_MIPS
 extern void bcma_core_mips_init(struct bcma_drv_mips *mcore);
+extern void bcma_core_mips_early_init(struct bcma_drv_mips *mcore);
 #else
 static inline void bcma_core_mips_init(struct bcma_drv_mips *mcore) { }
+static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { }
 #endif
 
 extern u32 bcma_cpu_clock(struct bcma_drv_mips *mcore);
-- 
cgit v1.2.3-55-g7522


From 360dc31e9cc3f74d0d91a9d19b154c12592f4ffc Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Sat, 29 Sep 2012 20:33:49 +0200
Subject: bcma: mark pflash as present when available

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_mips.c                  | 1 +
 include/linux/bcma/bcma_driver_chipcommon.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index f44f1fb67011..c0fb1d45c6db 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -190,6 +190,7 @@ static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore)
 		break;
 	case BCMA_CC_FLASHT_PARA:
 		bcma_debug(bus, "Found parallel flash\n");
+		bus->drv_cc.pflash.present = true;
 		bus->drv_cc.pflash.window = 0x1c000000;
 		bus->drv_cc.pflash.window_size = 0x02000000;
 
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index fbde7cbd2d7d..79993969953a 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -510,6 +510,7 @@ struct bcma_chipcommon_pmu {
 
 #ifdef CONFIG_BCMA_DRIVER_MIPS
 struct bcma_pflash {
+	bool present;
 	u8 buswidth;
 	u32 window;
 	u32 window_size;
-- 
cgit v1.2.3-55-g7522


From cc787081bcab5a83051c2a936927634d066e7284 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Sat, 29 Sep 2012 20:33:50 +0200
Subject: bcma: add and use constants for the flash windows

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_chipcommon_sflash.c | 4 ++--
 drivers/bcma/driver_mips.c              | 4 ++--
 include/linux/bcma/bcma_regs.h          | 5 ++++-
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon_sflash.c b/drivers/bcma/driver_chipcommon_sflash.c
index 2c4eec2ca5a0..a0993fcd13c2 100644
--- a/drivers/bcma/driver_chipcommon_sflash.c
+++ b/drivers/bcma/driver_chipcommon_sflash.c
@@ -12,7 +12,7 @@
 
 static struct resource bcma_sflash_resource = {
 	.name	= "bcma_sflash",
-	.start	= BCMA_SFLASH,
+	.start	= BCMA_SOC_FLASH2,
 	.end	= 0,
 	.flags  = IORESOURCE_MEM | IORESOURCE_READONLY,
 };
@@ -116,7 +116,7 @@ int bcma_sflash_init(struct bcma_drv_cc *cc)
 		return -ENOTSUPP;
 	}
 
-	sflash->window = BCMA_SFLASH;
+	sflash->window = BCMA_SOC_FLASH2;
 	sflash->blocksize = e->blocksize;
 	sflash->numblocks = e->numblocks;
 	sflash->size = sflash->blocksize * sflash->numblocks;
diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index c0fb1d45c6db..d8e2fba482e6 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -191,8 +191,8 @@ static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore)
 	case BCMA_CC_FLASHT_PARA:
 		bcma_debug(bus, "Found parallel flash\n");
 		bus->drv_cc.pflash.present = true;
-		bus->drv_cc.pflash.window = 0x1c000000;
-		bus->drv_cc.pflash.window_size = 0x02000000;
+		bus->drv_cc.pflash.window = BCMA_SOC_FLASH2;
+		bus->drv_cc.pflash.window_size = BCMA_SOC_FLASH2_SZ;
 
 		if ((bcma_read32(bus->drv_cc.core, BCMA_CC_FLASH_CFG) &
 		     BCMA_CC_FLASH_CFG_DS) == 0)
diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h
index 6c9cb93ae3de..7e8104bb7a7e 100644
--- a/include/linux/bcma/bcma_regs.h
+++ b/include/linux/bcma/bcma_regs.h
@@ -85,6 +85,9 @@
 							 * (2 ZettaBytes), high 32 bits
 							 */
 
-#define BCMA_SFLASH			0x1c000000
+#define BCMA_SOC_FLASH1			0x1fc00000	/* MIPS Flash Region 1 */
+#define BCMA_SOC_FLASH1_SZ		0x00400000	/* MIPS Size of Flash Region 1 */
+#define BCMA_SOC_FLASH2			0x1c000000	/* Flash Region 2 (region 1 shadowed here) */
+#define BCMA_SOC_FLASH2_SZ		0x02000000	/* Size of Flash Region 2 */
 
 #endif /* LINUX_BCMA_REGS_H_ */
-- 
cgit v1.2.3-55-g7522


From e661b75a44cc811426ea005c3cb858e45bd73d57 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Sat, 29 Sep 2012 20:33:51 +0200
Subject: bcma: mark nflash if it is the boot flash

There are some devices which are able to boot from nand flash and other
are using a serial flash for booting. Add a bool to indicate that the
device is booted from that flash chip and not from some other chip also
connected to the SoC. This is needed to find the nvram, as it is stored
on the flash the devices booted from.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_chipcommon_nflash.c     | 3 +++
 include/linux/bcma/bcma_driver_chipcommon.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon_nflash.c b/drivers/bcma/driver_chipcommon_nflash.c
index 9042781edec3..dbda91e4dff5 100644
--- a/drivers/bcma/driver_chipcommon_nflash.c
+++ b/drivers/bcma/driver_chipcommon_nflash.c
@@ -32,6 +32,9 @@ int bcma_nflash_init(struct bcma_drv_cc *cc)
 	}
 
 	cc->nflash.present = true;
+	if (cc->core->id.rev == 38 &&
+	    (cc->status & BCMA_CC_CHIPST_5357_NAND_BOOT))
+		cc->nflash.boot = true;
 
 	/* Prepare platform device, but don't register it yet. It's too early,
 	 * malloc (required by device_private_init) is not available yet. */
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 79993969953a..145f3c56227f 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -533,6 +533,7 @@ struct mtd_info;
 
 struct bcma_nflash {
 	bool present;
+	bool boot;		/* This is the flash the SoC boots from */
 
 	struct mtd_info *mtd;
 };
-- 
cgit v1.2.3-55-g7522


From 54c974984e8840c9e20390ce16e3d9f4ea674499 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Sat, 29 Sep 2012 20:36:17 +0200
Subject: ssb: move parallel flash config into an own struct

This is a preparing step for adding serial flash support.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 arch/mips/bcm47xx/nvram.c           |  4 ++--
 arch/mips/bcm47xx/wgt634u.c         |  8 ++++----
 drivers/ssb/driver_mipscore.c       | 14 +++++++-------
 include/linux/ssb/ssb_driver_mips.h |  9 ++++++---
 4 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/bcm47xx/nvram.c b/arch/mips/bcm47xx/nvram.c
index d43ceff5be47..48a4c70b3842 100644
--- a/arch/mips/bcm47xx/nvram.c
+++ b/arch/mips/bcm47xx/nvram.c
@@ -43,8 +43,8 @@ static void early_nvram_init(void)
 #ifdef CONFIG_BCM47XX_SSB
 	case BCM47XX_BUS_TYPE_SSB:
 		mcore_ssb = &bcm47xx_bus.ssb.mipscore;
-		base = mcore_ssb->flash_window;
-		lim = mcore_ssb->flash_window_size;
+		base = mcore_ssb->pflash.window;
+		lim = mcore_ssb->pflash.window_size;
 		break;
 #endif
 #ifdef CONFIG_BCM47XX_BCMA
diff --git a/arch/mips/bcm47xx/wgt634u.c b/arch/mips/bcm47xx/wgt634u.c
index e9f9ec8d443b..e80d585731aa 100644
--- a/arch/mips/bcm47xx/wgt634u.c
+++ b/arch/mips/bcm47xx/wgt634u.c
@@ -156,10 +156,10 @@ static int __init wgt634u_init(void)
 					    SSB_CHIPCO_IRQ_GPIO);
 		}
 
-		wgt634u_flash_data.width = mcore->flash_buswidth;
-		wgt634u_flash_resource.start = mcore->flash_window;
-		wgt634u_flash_resource.end = mcore->flash_window
-					   + mcore->flash_window_size
+		wgt634u_flash_data.width = mcore->pflash.buswidth;
+		wgt634u_flash_resource.start = mcore->pflash.window;
+		wgt634u_flash_resource.end = mcore->pflash.window
+					   + mcore->pflash.window_size
 					   - 1;
 		return platform_add_devices(wgt634u_devices,
 					    ARRAY_SIZE(wgt634u_devices));
diff --git a/drivers/ssb/driver_mipscore.c b/drivers/ssb/driver_mipscore.c
index c6250867a95d..dcad2c40836e 100644
--- a/drivers/ssb/driver_mipscore.c
+++ b/drivers/ssb/driver_mipscore.c
@@ -192,9 +192,9 @@ static void ssb_mips_flash_detect(struct ssb_mipscore *mcore)
 
 	/* When there is no chipcommon on the bus there is 4MB flash */
 	if (!bus->chipco.dev) {
-		mcore->flash_buswidth = 2;
-		mcore->flash_window = SSB_FLASH1;
-		mcore->flash_window_size = SSB_FLASH1_SZ;
+		mcore->pflash.buswidth = 2;
+		mcore->pflash.window = SSB_FLASH1;
+		mcore->pflash.window_size = SSB_FLASH1_SZ;
 		return;
 	}
 
@@ -206,13 +206,13 @@ static void ssb_mips_flash_detect(struct ssb_mipscore *mcore)
 		break;
 	case SSB_CHIPCO_FLASHT_PARA:
 		pr_debug("Found parallel flash\n");
-		mcore->flash_window = SSB_FLASH2;
-		mcore->flash_window_size = SSB_FLASH2_SZ;
+		mcore->pflash.window = SSB_FLASH2;
+		mcore->pflash.window_size = SSB_FLASH2_SZ;
 		if ((ssb_read32(bus->chipco.dev, SSB_CHIPCO_FLASH_CFG)
 		               & SSB_CHIPCO_CFG_DS16) == 0)
-			mcore->flash_buswidth = 1;
+			mcore->pflash.buswidth = 1;
 		else
-			mcore->flash_buswidth = 2;
+			mcore->pflash.buswidth = 2;
 		break;
 	}
 }
diff --git a/include/linux/ssb/ssb_driver_mips.h b/include/linux/ssb/ssb_driver_mips.h
index 5f44e9740cd2..5d057db53071 100644
--- a/include/linux/ssb/ssb_driver_mips.h
+++ b/include/linux/ssb/ssb_driver_mips.h
@@ -13,6 +13,11 @@ struct ssb_serial_port {
 	unsigned int reg_shift;
 };
 
+struct ssb_pflash {
+	u8 buswidth;
+	u32 window;
+	u32 window_size;
+};
 
 struct ssb_mipscore {
 	struct ssb_device *dev;
@@ -20,9 +25,7 @@ struct ssb_mipscore {
 	int nr_serial_ports;
 	struct ssb_serial_port serial_ports[4];
 
-	u8 flash_buswidth;
-	u32 flash_window;
-	u32 flash_window_size;
+	struct ssb_pflash pflash;
 };
 
 extern void ssb_mipscore_init(struct ssb_mipscore *mcore);
-- 
cgit v1.2.3-55-g7522


From d954162c54ea81cee12acec1ee5fa19214d0de96 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Sat, 29 Sep 2012 20:36:18 +0200
Subject: ssb: add attribute to indicate a parallel flash is available

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/driver_mipscore.c       | 2 ++
 include/linux/ssb/ssb_driver_mips.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_mipscore.c b/drivers/ssb/driver_mipscore.c
index dcad2c40836e..b918ba922306 100644
--- a/drivers/ssb/driver_mipscore.c
+++ b/drivers/ssb/driver_mipscore.c
@@ -192,6 +192,7 @@ static void ssb_mips_flash_detect(struct ssb_mipscore *mcore)
 
 	/* When there is no chipcommon on the bus there is 4MB flash */
 	if (!bus->chipco.dev) {
+		mcore->pflash.present = true;
 		mcore->pflash.buswidth = 2;
 		mcore->pflash.window = SSB_FLASH1;
 		mcore->pflash.window_size = SSB_FLASH1_SZ;
@@ -206,6 +207,7 @@ static void ssb_mips_flash_detect(struct ssb_mipscore *mcore)
 		break;
 	case SSB_CHIPCO_FLASHT_PARA:
 		pr_debug("Found parallel flash\n");
+		mcore->pflash.present = true;
 		mcore->pflash.window = SSB_FLASH2;
 		mcore->pflash.window_size = SSB_FLASH2_SZ;
 		if ((ssb_read32(bus->chipco.dev, SSB_CHIPCO_FLASH_CFG)
diff --git a/include/linux/ssb/ssb_driver_mips.h b/include/linux/ssb/ssb_driver_mips.h
index 5d057db53071..07a9c7a2e088 100644
--- a/include/linux/ssb/ssb_driver_mips.h
+++ b/include/linux/ssb/ssb_driver_mips.h
@@ -14,6 +14,7 @@ struct ssb_serial_port {
 };
 
 struct ssb_pflash {
+	bool present;
 	u8 buswidth;
 	u32 window;
 	u32 window_size;
-- 
cgit v1.2.3-55-g7522


From dfae714361ba75323914da19eb411aaae53d6af0 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Sat, 29 Sep 2012 20:40:18 +0200
Subject: bcma: add an extra pcie core struct

The BCM4706 has two PCIe host controller on the bcma bus. For PCIe
client mode it is assumed that there is only one PCIe controller so the
PCIe driver, like b43 and brcmsmac are accessing the first PCIe
controller when they want to issue a operation on the host controller.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_pci_host.c                    |  4 ++++
 drivers/bcma/main.c                               | 25 ++++++++++++++++++++---
 drivers/net/wireless/b43/main.c                   |  2 +-
 drivers/net/wireless/brcm80211/brcmsmac/aiutils.c |  4 ++--
 drivers/net/wireless/brcm80211/brcmsmac/main.c    |  2 +-
 include/linux/bcma/bcma.h                         |  2 +-
 6 files changed, 31 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_pci_host.c b/drivers/bcma/driver_pci_host.c
index 9baf886e82df..63172c9f871a 100644
--- a/drivers/bcma/driver_pci_host.c
+++ b/drivers/bcma/driver_pci_host.c
@@ -452,6 +452,8 @@ void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc)
 			pc_host->mem_resource.start = BCMA_SOC_PCI_MEM;
 			pc_host->mem_resource.end = BCMA_SOC_PCI_MEM +
 						    BCMA_SOC_PCI_MEM_SZ - 1;
+			pc_host->io_resource.start = 0x100;
+			pc_host->io_resource.end = 0x47F;
 			pci_membase_1G = BCMA_SOC_PCIE_DMA_H32;
 			pcicore_write32(pc, BCMA_CORE_PCI_SBTOPCI0,
 					tmp | BCMA_SOC_PCI_MEM);
@@ -459,6 +461,8 @@ void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc)
 			pc_host->mem_resource.start = BCMA_SOC_PCI1_MEM;
 			pc_host->mem_resource.end = BCMA_SOC_PCI1_MEM +
 						    BCMA_SOC_PCI_MEM_SZ - 1;
+			pc_host->io_resource.start = 0x480;
+			pc_host->io_resource.end = 0x7FF;
 			pci_membase_1G = BCMA_SOC_PCIE1_DMA_H32;
 			pc_host->host_cfg_addr = BCMA_SOC_PCI1_CFG;
 			pcicore_write32(pc, BCMA_CORE_PCI_SBTOPCI0,
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index d6b5c4ca4c43..7f473cf0469e 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -81,6 +81,18 @@ struct bcma_device *bcma_find_core(struct bcma_bus *bus, u16 coreid)
 }
 EXPORT_SYMBOL_GPL(bcma_find_core);
 
+static struct bcma_device *bcma_find_core_unit(struct bcma_bus *bus, u16 coreid,
+					       u8 unit)
+{
+	struct bcma_device *core;
+
+	list_for_each_entry(core, &bus->cores, list) {
+		if (core->id.id == coreid && core->core_unit == unit)
+			return core;
+	}
+	return NULL;
+}
+
 static void bcma_release_core_dev(struct device *dev)
 {
 	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
@@ -211,10 +223,17 @@ int __devinit bcma_bus_register(struct bcma_bus *bus)
 	}
 
 	/* Init PCIE core */
-	core = bcma_find_core(bus, BCMA_CORE_PCIE);
+	core = bcma_find_core_unit(bus, BCMA_CORE_PCIE, 0);
+	if (core) {
+		bus->drv_pci[0].core = core;
+		bcma_core_pci_init(&bus->drv_pci[0]);
+	}
+
+	/* Init PCIE core */
+	core = bcma_find_core_unit(bus, BCMA_CORE_PCIE, 1);
 	if (core) {
-		bus->drv_pci.core = core;
-		bcma_core_pci_init(&bus->drv_pci);
+		bus->drv_pci[1].core = core;
+		bcma_core_pci_init(&bus->drv_pci[1]);
 	}
 
 	/* Init GBIT MAC COMMON core */
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 73730e94e0ac..7358ea2eb576 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -4652,7 +4652,7 @@ static int b43_wireless_core_init(struct b43_wldev *dev)
 	switch (dev->dev->bus_type) {
 #ifdef CONFIG_B43_BCMA
 	case B43_BUS_BCMA:
-		bcma_core_pci_irq_ctl(&dev->dev->bdev->bus->drv_pci,
+		bcma_core_pci_irq_ctl(&dev->dev->bdev->bus->drv_pci[0],
 				      dev->dev->bdev, true);
 		break;
 #endif
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/aiutils.c b/drivers/net/wireless/brcm80211/brcmsmac/aiutils.c
index b89f1272b93f..de96290f5ccd 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/aiutils.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/aiutils.c
@@ -692,7 +692,7 @@ void ai_pci_up(struct si_pub *sih)
 	sii = container_of(sih, struct si_info, pub);
 
 	if (sii->icbus->hosttype == BCMA_HOSTTYPE_PCI)
-		bcma_core_pci_extend_L1timer(&sii->icbus->drv_pci, true);
+		bcma_core_pci_extend_L1timer(&sii->icbus->drv_pci[0], true);
 }
 
 /* Unconfigure and/or apply various WARs when going down */
@@ -703,7 +703,7 @@ void ai_pci_down(struct si_pub *sih)
 	sii = container_of(sih, struct si_info, pub);
 
 	if (sii->icbus->hosttype == BCMA_HOSTTYPE_PCI)
-		bcma_core_pci_extend_L1timer(&sii->icbus->drv_pci, false);
+		bcma_core_pci_extend_L1timer(&sii->icbus->drv_pci[0], false);
 }
 
 /* Enable BT-COEX & Ex-PA for 4313 */
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c
index 75086b37c817..565c15abbed5 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/main.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c
@@ -5077,7 +5077,7 @@ static int brcms_b_up_prep(struct brcms_hardware *wlc_hw)
 	 * Configure pci/pcmcia here instead of in brcms_c_attach()
 	 * to allow mfg hotswap:  down, hotswap (chip power cycle), up.
 	 */
-	bcma_core_pci_irq_ctl(&wlc_hw->d11core->bus->drv_pci, wlc_hw->d11core,
+	bcma_core_pci_irq_ctl(&wlc_hw->d11core->bus->drv_pci[0], wlc_hw->d11core,
 			      true);
 
 	/*
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 4180eb78d575..fd15d9829705 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -251,7 +251,7 @@ struct bcma_bus {
 	u8 num;
 
 	struct bcma_drv_cc drv_cc;
-	struct bcma_drv_pci drv_pci;
+	struct bcma_drv_pci drv_pci[2];
 	struct bcma_drv_mips drv_mips;
 	struct bcma_drv_gmac_cmn drv_gmac_cmn;
 
-- 
cgit v1.2.3-55-g7522


From 733deca197143857f938b41d671cd7ce9c53c4bc Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Sat, 20 Oct 2012 13:13:11 -0400
Subject: Drop struct pt_regs * argument in compat_sys_execve()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/tile/include/asm/compat.h    | 6 ------
 arch/tile/include/asm/processor.h | 3 +++
 arch/tile/kernel/compat.c         | 1 -
 arch/tile/kernel/intvec_64.S      | 1 -
 arch/tile/kernel/process.c        | 6 +++---
 include/linux/compat.h            | 2 --
 6 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index 3063e6fc8daa..3bcf1b94b563 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -275,9 +275,6 @@ extern int compat_setup_rt_frame(int sig, struct k_sigaction *ka,
 struct compat_sigaction;
 struct compat_siginfo;
 struct compat_sigaltstack;
-long compat_sys_execve(const char __user *path,
-		       compat_uptr_t __user *argv,
-		       compat_uptr_t __user *envp, struct pt_regs *);
 long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act,
 			     struct compat_sigaction __user *oact,
 			     size_t sigsetsize);
@@ -304,9 +301,6 @@ long compat_sys_sched_rr_get_interval(compat_pid_t pid,
 				      struct compat_timespec __user *interval);
 
 /* These are the intvec_64.S trampolines. */
-long _compat_sys_execve(const char __user *path,
-			const compat_uptr_t __user *argv,
-			const compat_uptr_t __user *envp);
 long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
 			    struct compat_sigaltstack __user *uoss_ptr);
 long _compat_sys_rt_sigreturn(void);
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 8c4dd9ff91eb..9a83e531e843 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -239,6 +239,9 @@ unsigned long get_wchan(struct task_struct *p);
 #define KSTK_TOP(task)	(task_ksp0(task) - STACK_TOP_DELTA)
 #define task_pt_regs(task) \
   ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1)
+#define current_pt_regs()                                   \
+  ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \
+                      (KSTK_PTREGS_GAP - 1)) - 1)
 #define task_sp(task)	(task_pt_regs(task)->sp)
 #define task_pc(task)	(task_pt_regs(task)->pc)
 /* Aliases for pc and sp (used in fs/proc/array.c) */
diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
index d67459b9ac2a..a8e5a847037c 100644
--- a/arch/tile/kernel/compat.c
+++ b/arch/tile/kernel/compat.c
@@ -103,7 +103,6 @@ long compat_sys_sched_rr_get_interval(compat_pid_t pid,
 #define compat_sys_readahead sys32_readahead
 
 /* Call the trampolines to manage pt_regs where necessary. */
-#define compat_sys_execve _compat_sys_execve
 #define compat_sys_sigaltstack _compat_sys_sigaltstack
 #define compat_sys_rt_sigreturn _compat_sys_rt_sigreturn
 #define sys_clone _sys_clone
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 7c06d597ffd0..73f6c0a648c4 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -1194,7 +1194,6 @@ PTREGS_SYSCALL(sys_execve, r3)
 PTREGS_SYSCALL(sys_sigaltstack, r2)
 PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0)
 #ifdef CONFIG_COMPAT
-PTREGS_SYSCALL(compat_sys_execve, r3)
 PTREGS_SYSCALL(compat_sys_sigaltstack, r2)
 PTREGS_SYSCALL_SIGRETURN(compat_sys_rt_sigreturn, r0)
 #endif
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 307d010696c9..9dc139127f86 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -614,8 +614,7 @@ out:
 #ifdef CONFIG_COMPAT
 long compat_sys_execve(const char __user *path,
 		       compat_uptr_t __user *argv,
-		       compat_uptr_t __user *envp,
-		       struct pt_regs *regs)
+		       compat_uptr_t __user *envp)
 {
 	long error;
 	struct filename *filename;
@@ -624,7 +623,8 @@ long compat_sys_execve(const char __user *path,
 	error = PTR_ERR(filename);
 	if (IS_ERR(filename))
 		goto out;
-	error = compat_do_execve(filename->name, argv, envp, regs);
+	error = compat_do_execve(filename->name, argv, envp,
+				 current_pt_regs());
 	putname(filename);
 	if (error == 0)
 		single_step_execve();
diff --git a/include/linux/compat.h b/include/linux/compat.h
index d0ced1011f2f..d2db71077d93 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -286,10 +286,8 @@ asmlinkage ssize_t compat_sys_pwritev(unsigned long fd,
 
 int compat_do_execve(const char *filename, const compat_uptr_t __user *argv,
 		     const compat_uptr_t __user *envp, struct pt_regs *regs);
-#ifdef __ARCH_WANT_SYS_EXECVE
 asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv,
 		     const compat_uptr_t __user *envp);
-#endif
 
 asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
-- 
cgit v1.2.3-55-g7522


From 7e05484f02e1ea05a3aae0724d4df1e8a5a1920f Mon Sep 17 00:00:00 2001
From: Bryan Venteicher
Date: Tue, 16 Oct 2012 23:55:54 +1030
Subject: virtio-scsi: Add real 2-clause BSD license to header

This is analogous to commit a1b383870a made by Rusty Russell to all
the VirtIO headers at the time. This eases the use of the header as
is by other OSes.

Signed-off-by: Bryan Venteicher <bryanv@daemoninthecloset.org>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_scsi.h | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h
index d6b4440387b7..4195b97a3def 100644
--- a/include/linux/virtio_scsi.h
+++ b/include/linux/virtio_scsi.h
@@ -1,7 +1,31 @@
+/*
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
 #ifndef _LINUX_VIRTIO_SCSI_H
 #define _LINUX_VIRTIO_SCSI_H
-/* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
 
 #define VIRTIO_SCSI_CDB_SIZE   32
 #define VIRTIO_SCSI_SENSE_SIZE 96
-- 
cgit v1.2.3-55-g7522


From b8ec56d8eabc6382bd6586024138d9f1d1121ceb Mon Sep 17 00:00:00 2001
From: Matt Porter
Date: Wed, 17 Oct 2012 16:08:03 +0200
Subject: ASoC: davinci: replace private sram api with genalloc

Removes the DaVinci private SRAM API and replaces it with
the genalloc API. The SRAM gen_pool is passed in pdata since
DaVinci is in the early stages of DT conversion.

[zonque@gmail.com: stub out gen_pool functions for
		   !CONFIG_GENERIC_ALLOCATOR]

Signed-off-by: Matt Porter <mporter@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/platform_data/davinci_asp.h |  3 +++
 sound/soc/davinci/davinci-mcasp.c         |  2 ++
 sound/soc/davinci/davinci-pcm.c           | 37 ++++++++++++++++++++++++++-----
 sound/soc/davinci/davinci-pcm.h           |  2 ++
 4 files changed, 38 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/davinci_asp.h b/include/linux/platform_data/davinci_asp.h
index d0c5825876f8..f3d6e4f20962 100644
--- a/include/linux/platform_data/davinci_asp.h
+++ b/include/linux/platform_data/davinci_asp.h
@@ -16,6 +16,8 @@
 #ifndef __DAVINCI_ASP_H
 #define __DAVINCI_ASP_H
 
+#include <linux/genalloc.h>
+
 struct snd_platform_data {
 	u32 tx_dma_offset;
 	u32 rx_dma_offset;
@@ -30,6 +32,7 @@ struct snd_platform_data {
 	unsigned enable_channel_combine:1;
 	unsigned sram_size_playback;
 	unsigned sram_size_capture;
+	struct gen_pool *sram_pool;
 
 	/*
 	 * If McBSP peripheral gets the clock from an external pin,
diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index 4fd149bb1ae7..571559501b0b 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -1167,6 +1167,7 @@ static int davinci_mcasp_probe(struct platform_device *pdev)
 	dma_data = &dev->dma_params[SNDRV_PCM_STREAM_PLAYBACK];
 	dma_data->asp_chan_q = pdata->asp_chan_q;
 	dma_data->ram_chan_q = pdata->ram_chan_q;
+	dma_data->sram_pool = pdata->sram_pool;
 	dma_data->sram_size = pdata->sram_size_playback;
 	dma_data->dma_addr = (dma_addr_t) (pdata->tx_dma_offset +
 							mem->start);
@@ -1184,6 +1185,7 @@ static int davinci_mcasp_probe(struct platform_device *pdev)
 	dma_data = &dev->dma_params[SNDRV_PCM_STREAM_CAPTURE];
 	dma_data->asp_chan_q = pdata->asp_chan_q;
 	dma_data->ram_chan_q = pdata->ram_chan_q;
+	dma_data->sram_pool = pdata->sram_pool;
 	dma_data->sram_size = pdata->sram_size_capture;
 	dma_data->dma_addr = (dma_addr_t)(pdata->rx_dma_offset +
 							mem->start);
diff --git a/sound/soc/davinci/davinci-pcm.c b/sound/soc/davinci/davinci-pcm.c
index 6ebbc8885518..afab81f844ae 100644
--- a/sound/soc/davinci/davinci-pcm.c
+++ b/sound/soc/davinci/davinci-pcm.c
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/dma-mapping.h>
 #include <linux/kernel.h>
+#include <linux/genalloc.h>
 
 #include <sound/core.h>
 #include <sound/pcm.h>
@@ -23,7 +24,6 @@
 #include <sound/soc.h>
 
 #include <asm/dma.h>
-#include <mach/sram.h>
 
 #include "davinci-pcm.h"
 
@@ -251,7 +251,9 @@ static void davinci_pcm_dma_irq(unsigned link, u16 ch_status, void *data)
 	}
 }
 
-static int allocate_sram(struct snd_pcm_substream *substream, unsigned size,
+#ifdef CONFIG_GENERIC_ALLOCATOR
+static int allocate_sram(struct snd_pcm_substream *substream,
+		struct gen_pool *sram_pool, unsigned size,
 		struct snd_pcm_hardware *ppcm)
 {
 	struct snd_dma_buffer *buf = &substream->dma_buffer;
@@ -263,9 +265,10 @@ static int allocate_sram(struct snd_pcm_substream *substream, unsigned size,
 		return 0;
 
 	ppcm->period_bytes_max = size;
-	iram_virt = sram_alloc(size, &iram_phys);
+	iram_virt = (void *)gen_pool_alloc(sram_pool, size);
 	if (!iram_virt)
 		goto exit1;
+	iram_phys = gen_pool_virt_to_phys(sram_pool, (unsigned)iram_virt);
 	iram_dma = kzalloc(sizeof(*iram_dma), GFP_KERNEL);
 	if (!iram_dma)
 		goto exit2;
@@ -277,11 +280,33 @@ static int allocate_sram(struct snd_pcm_substream *substream, unsigned size,
 	return 0;
 exit2:
 	if (iram_virt)
-		sram_free(iram_virt, size);
+		gen_pool_free(sram_pool, (unsigned)iram_virt, size);
 exit1:
 	return -ENOMEM;
 }
 
+static void davinci_free_sram(struct snd_pcm_substream *substream,
+			      struct snd_dma_buffer *iram_dma)
+{
+	struct davinci_runtime_data *prtd = substream->runtime->private_data;
+	struct gen_pool *sram_pool = prtd->params->sram_pool;
+
+	gen_pool_free(sram_pool, (unsigned) iram_dma->area, iram_dma->bytes);
+}
+#else
+static int allocate_sram(struct snd_pcm_substream *substream,
+		struct gen_pool *sram_pool, unsigned size,
+		struct snd_pcm_hardware *ppcm)
+{
+	return 0;
+}
+
+static void davinci_free_sram(struct snd_pcm_substream *substream,
+			      struct snd_dma_buffer *iram_dma)
+{
+}
+#endif
+
 /*
  * Only used with ping/pong.
  * This is called after runtime->dma_addr, period_bytes and data_type are valid
@@ -668,7 +693,7 @@ static int davinci_pcm_open(struct snd_pcm_substream *substream)
 
 	ppcm = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ?
 			&pcm_hardware_playback : &pcm_hardware_capture;
-	allocate_sram(substream, params->sram_size, ppcm);
+	allocate_sram(substream, params->sram_pool, params->sram_size, ppcm);
 	snd_soc_set_runtime_hwparams(substream, ppcm);
 	/* ensure that buffer size is a multiple of period size */
 	ret = snd_pcm_hw_constraint_integer(runtime,
@@ -811,7 +836,7 @@ static void davinci_pcm_free(struct snd_pcm *pcm)
 		buf->area = NULL;
 		iram_dma = buf->private_data;
 		if (iram_dma) {
-			sram_free(iram_dma->area, iram_dma->bytes);
+			davinci_free_sram(substream, iram_dma);
 			kfree(iram_dma);
 		}
 	}
diff --git a/sound/soc/davinci/davinci-pcm.h b/sound/soc/davinci/davinci-pcm.h
index fc4d01cdd8c9..b6ef7039dd09 100644
--- a/sound/soc/davinci/davinci-pcm.h
+++ b/sound/soc/davinci/davinci-pcm.h
@@ -12,6 +12,7 @@
 #ifndef _DAVINCI_PCM_H
 #define _DAVINCI_PCM_H
 
+#include <linux/genalloc.h>
 #include <linux/platform_data/davinci_asp.h>
 #include <mach/edma.h>
 
@@ -20,6 +21,7 @@ struct davinci_pcm_dma_params {
 	unsigned short acnt;
 	dma_addr_t dma_addr;		/* device physical address for DMA */
 	unsigned sram_size;
+	struct gen_pool *sram_pool;	/* SRAM gen_pool for ping pong */
 	enum dma_event_q asp_chan_q;	/* event queue number for ASP channel */
 	enum dma_event_q ram_chan_q;	/* event queue number for RAM channel */
 	unsigned char data_type;	/* xfer data type */
-- 
cgit v1.2.3-55-g7522


From a03bede5c73a6876fa891cfe82a65460dc9f4698 Mon Sep 17 00:00:00 2001
From: Alan Stern
Date: Mon, 1 Oct 2012 10:31:53 -0400
Subject: USB: update documentation for URB_ISO_ASAP

This patch (as1611) updates the USB documentation and kerneldoc to
give a more precise meaning for the URB_ISO_ASAP flag and to explain
more of the details of scheduling for isochronous URBs.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/usb/error-codes.txt |  5 ++---
 drivers/usb/core/urb.c            | 22 +++++++++++++++++++---
 include/linux/usb.h               | 27 ++++++++++++++++-----------
 3 files changed, 37 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/usb/error-codes.txt b/Documentation/usb/error-codes.txt
index b3f606b81a03..8d1e2a9ebbba 100644
--- a/Documentation/usb/error-codes.txt
+++ b/Documentation/usb/error-codes.txt
@@ -35,9 +35,8 @@ USB-specific:
 		d) ISO: number_of_packets is < 0
 		e) various other cases
 
--EAGAIN		a) specified ISO start frame too early
-		b) (using ISO-ASAP) too much scheduled for the future
-		   wait some time and try again.
+-EXDEV		ISO: URB_ISO_ASAP wasn't specified and all the frames
+		the URB would be scheduled in have already expired.
 
 -EFBIG		Host controller driver can't schedule that many ISO frames.
 
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index 9d912bfdcffe..3662287e2f4f 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -214,9 +214,25 @@ EXPORT_SYMBOL_GPL(usb_unanchor_urb);
  * urb->interval is modified to reflect the actual transfer period used
  * (normally some power of two units).  And for isochronous urbs,
  * urb->start_frame is modified to reflect when the URB's transfers were
- * scheduled to start.  Not all isochronous transfer scheduling policies
- * will work, but most host controller drivers should easily handle ISO
- * queues going from now until 10-200 msec into the future.
+ * scheduled to start.
+ *
+ * Not all isochronous transfer scheduling policies will work, but most
+ * host controller drivers should easily handle ISO queues going from now
+ * until 10-200 msec into the future.  Drivers should try to keep at
+ * least one or two msec of data in the queue; many controllers require
+ * that new transfers start at least 1 msec in the future when they are
+ * added.  If the driver is unable to keep up and the queue empties out,
+ * the behavior for new submissions is governed by the URB_ISO_ASAP flag.
+ * If the flag is set, or if the queue is idle, then the URB is always
+ * assigned to the first available (and not yet expired) slot in the
+ * endpoint's schedule.  If the flag is not set and the queue is active
+ * then the URB is always assigned to the next slot in the schedule
+ * following the end of the endpoint's previous URB, even if that slot is
+ * in the past.  When a packet is assigned in this way to a slot that has
+ * already expired, the packet is not transmitted and the corresponding
+ * usb_iso_packet_descriptor's status field will return -EXDEV.  If this
+ * would happen to all the packets in the URB, submission fails with a
+ * -EXDEV error code.
  *
  * For control endpoints, the synchronous usb_control_msg() call is
  * often used (in non-interrupt context) instead of this call.
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 10278d18709c..f92cdf0c1457 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1129,8 +1129,8 @@ extern int usb_disabled(void);
  * Note: URB_DIR_IN/OUT is automatically set in usb_submit_urb().
  */
 #define URB_SHORT_NOT_OK	0x0001	/* report short reads as errors */
-#define URB_ISO_ASAP		0x0002	/* iso-only, urb->start_frame
-					 * ignored */
+#define URB_ISO_ASAP		0x0002	/* iso-only; use the first unexpired
+					 * slot in the schedule */
 #define URB_NO_TRANSFER_DMA_MAP	0x0004	/* urb->transfer_dma valid on submit */
 #define URB_NO_FSBR		0x0020	/* UHCI-specific */
 #define URB_ZERO_PACKET		0x0040	/* Finish bulk OUT with short packet */
@@ -1309,15 +1309,20 @@ typedef void (*usb_complete_t)(struct urb *);
  * the transfer interval in the endpoint descriptor is logarithmic.
  * Device drivers must convert that value to linear units themselves.)
  *
- * Isochronous URBs normally use the URB_ISO_ASAP transfer flag, telling
- * the host controller to schedule the transfer as soon as bandwidth
- * utilization allows, and then set start_frame to reflect the actual frame
- * selected during submission.  Otherwise drivers must specify the start_frame
- * and handle the case where the transfer can't begin then.  However, drivers
- * won't know how bandwidth is currently allocated, and while they can
- * find the current frame using usb_get_current_frame_number () they can't
- * know the range for that frame number.  (Ranges for frame counter values
- * are HC-specific, and can go from 256 to 65536 frames from "now".)
+ * If an isochronous endpoint queue isn't already running, the host
+ * controller will schedule a new URB to start as soon as bandwidth
+ * utilization allows.  If the queue is running then a new URB will be
+ * scheduled to start in the first transfer slot following the end of the
+ * preceding URB, if that slot has not already expired.  If the slot has
+ * expired (which can happen when IRQ delivery is delayed for a long time),
+ * the scheduling behavior depends on the URB_ISO_ASAP flag.  If the flag
+ * is clear then the URB will be scheduled to start in the expired slot,
+ * implying that some of its packets will not be transferred; if the flag
+ * is set then the URB will be scheduled in the first unexpired slot,
+ * breaking the queue's synchronization.  Upon URB completion, the
+ * start_frame field will be set to the (micro)frame number in which the
+ * transfer was scheduled.  Ranges for frame counter values are HC-specific
+ * and can go from as low as 256 to as high as 65536 frames.
  *
  * Isochronous URBs have a different data transfer model, in part because
  * the quality of service is only "best effort".  Callers provide specially
-- 
cgit v1.2.3-55-g7522


From 4534874a8720a361845dce47d310a98e9aac8aeb Mon Sep 17 00:00:00 2001
From: Florian Fainelli
Date: Mon, 8 Oct 2012 15:11:21 +0200
Subject: USB: EHCI: add no_io_watchdog platform_data parameter to
 ehci-platform

Enhance the ehci-platform driver to also accept no_io_watchdog as a platform
data parameter. When no_io_watchdog is set to 1, the ehci controller will set
ehci->need_io_watchdog to 0. Since most EHCI controllers do need the I/O
watchdog to be on, only let those which need it to turn the watchdog off.

Make sure that we change need_io_watchdog after the call to ehci_setup()
because ehci_setup() will unconditionnaly set need_io_watchdog to 1.

Signed-off-by: Florian Fainelli <florian@openwrt.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-platform.c | 2 ++
 include/linux/usb/ehci_pdriver.h | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c
index 764e0100b6f4..607adf9adb83 100644
--- a/drivers/usb/host/ehci-platform.c
+++ b/drivers/usb/host/ehci-platform.c
@@ -38,6 +38,8 @@ static int ehci_platform_reset(struct usb_hcd *hcd)
 	if (retval)
 		return retval;
 
+	if (pdata->no_io_watchdog)
+		ehci->need_io_watchdog = 0;
 	if (pdata->port_power_on)
 		ehci_port_power(ehci, 1);
 	if (pdata->port_power_off)
diff --git a/include/linux/usb/ehci_pdriver.h b/include/linux/usb/ehci_pdriver.h
index c9d09f8b7ff2..67ac74bde6d0 100644
--- a/include/linux/usb/ehci_pdriver.h
+++ b/include/linux/usb/ehci_pdriver.h
@@ -29,6 +29,8 @@
  *			initialization.
  * @port_power_off:	set to 1 if the controller needs to be powered down
  *			after initialization.
+ * @no_io_watchdog:	set to 1 if the controller does not need the I/O
+ *			watchdog to run.
  *
  * These are general configuration options for the EHCI controller. All of
  * these options are activating more or less workarounds for some hardware.
@@ -41,6 +43,7 @@ struct usb_ehci_pdata {
 	unsigned	big_endian_mmio:1;
 	unsigned	port_power_on:1;
 	unsigned	port_power_off:1;
+	unsigned	no_io_watchdog:1;
 
 	/* Turn on all power and clocks */
 	int (*power_on)(struct platform_device *pdev);
-- 
cgit v1.2.3-55-g7522


From 2b16e39ee0a431d6cf6e6ca33bb08ec7dc82073f Mon Sep 17 00:00:00 2001
From: Florian Fainelli
Date: Mon, 8 Oct 2012 15:11:26 +0200
Subject: USB: ohci: allow platform driver to specify the number of ports

This patch modifies the ohci platform driver to accept the num_ports
parameter to be set via platform_data. Setting the number of ports must be
done after the call to ohci_hcd_init().

Signed-off-by: Florian Fainelli <florian@openwrt.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ohci-platform.c | 4 ++++
 include/linux/usb/ohci_pdriver.h | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/host/ohci-platform.c b/drivers/usb/host/ohci-platform.c
index e24ec9f79164..1caaf657c5ea 100644
--- a/drivers/usb/host/ohci-platform.c
+++ b/drivers/usb/host/ohci-platform.c
@@ -31,6 +31,10 @@ static int ohci_platform_reset(struct usb_hcd *hcd)
 		ohci->flags |= OHCI_QUIRK_FRAME_NO;
 
 	ohci_hcd_init(ohci);
+
+	if (pdata->num_ports)
+		ohci->num_ports = pdata->num_ports;
+
 	err = ohci_init(ohci);
 
 	return err;
diff --git a/include/linux/usb/ohci_pdriver.h b/include/linux/usb/ohci_pdriver.h
index 74e7755168b7..012f2b7eb2b6 100644
--- a/include/linux/usb/ohci_pdriver.h
+++ b/include/linux/usb/ohci_pdriver.h
@@ -25,6 +25,7 @@
  * @big_endian_desc:	BE descriptors
  * @big_endian_mmio:	BE registers
  * @no_big_frame_no:	no big endian frame_no shift
+ * @num_ports:		number of ports
  *
  * These are general configuration options for the OHCI controller. All of
  * these options are activating more or less workarounds for some hardware.
@@ -33,6 +34,7 @@ struct usb_ohci_pdata {
 	unsigned	big_endian_desc:1;
 	unsigned	big_endian_mmio:1;
 	unsigned	no_big_frame_no:1;
+	unsigned int	num_ports;
 
 	/* Turn on all power and clocks */
 	int (*power_on)(struct platform_device *pdev);
-- 
cgit v1.2.3-55-g7522


From 5f986c590fcf4284924fcda991cf14ab32bff49f Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Tue, 23 Oct 2012 01:07:27 +0200
Subject: PM / QoS: Prepare device structure for adding more constraint types

Currently struct dev_pm_info contains only one PM QoS constraints
pointer reserved for latency requirements.  Since one more device
constraints type (i.e. flags) will be necessary, introduce a new
structure, struct dev_pm_qos, that eventually will contain all of
the available device PM QoS constraints and replace the "constraints"
pointer in struct dev_pm_info with a pointer to the new structure
called "qos".

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Jean Pihet <j-pihet@ti.com>
---
 drivers/base/power/qos.c | 42 ++++++++++++++++++++++--------------------
 include/linux/pm.h       |  2 +-
 include/linux/pm_qos.h   |  4 ++++
 3 files changed, 27 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index 74a67e0019a2..40ff1b02a7c5 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -55,9 +55,7 @@ static BLOCKING_NOTIFIER_HEAD(dev_pm_notifiers);
  */
 s32 __dev_pm_qos_read_value(struct device *dev)
 {
-	struct pm_qos_constraints *c = dev->power.constraints;
-
-	return c ? pm_qos_read_value(c) : 0;
+	return dev->power.qos ? pm_qos_read_value(&dev->power.qos->latency) : 0;
 }
 
 /**
@@ -91,12 +89,12 @@ static int apply_constraint(struct dev_pm_qos_request *req,
 {
 	int ret, curr_value;
 
-	ret = pm_qos_update_target(req->dev->power.constraints,
+	ret = pm_qos_update_target(&req->dev->power.qos->latency,
 				   &req->node, action, value);
 
 	if (ret) {
 		/* Call the global callbacks if needed */
-		curr_value = pm_qos_read_value(req->dev->power.constraints);
+		curr_value = pm_qos_read_value(&req->dev->power.qos->latency);
 		blocking_notifier_call_chain(&dev_pm_notifiers,
 					     (unsigned long)curr_value,
 					     req);
@@ -114,20 +112,22 @@ static int apply_constraint(struct dev_pm_qos_request *req,
  */
 static int dev_pm_qos_constraints_allocate(struct device *dev)
 {
+	struct dev_pm_qos *qos;
 	struct pm_qos_constraints *c;
 	struct blocking_notifier_head *n;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
-	if (!c)
+	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
+	if (!qos)
 		return -ENOMEM;
 
 	n = kzalloc(sizeof(*n), GFP_KERNEL);
 	if (!n) {
-		kfree(c);
+		kfree(qos);
 		return -ENOMEM;
 	}
 	BLOCKING_INIT_NOTIFIER_HEAD(n);
 
+	c = &qos->latency;
 	plist_head_init(&c->list);
 	c->target_value = PM_QOS_DEV_LAT_DEFAULT_VALUE;
 	c->default_value = PM_QOS_DEV_LAT_DEFAULT_VALUE;
@@ -135,7 +135,7 @@ static int dev_pm_qos_constraints_allocate(struct device *dev)
 	c->notifiers = n;
 
 	spin_lock_irq(&dev->power.lock);
-	dev->power.constraints = c;
+	dev->power.qos = qos;
 	spin_unlock_irq(&dev->power.lock);
 
 	return 0;
@@ -151,7 +151,7 @@ static int dev_pm_qos_constraints_allocate(struct device *dev)
 void dev_pm_qos_constraints_init(struct device *dev)
 {
 	mutex_lock(&dev_pm_qos_mtx);
-	dev->power.constraints = NULL;
+	dev->power.qos = NULL;
 	dev->power.power_state = PMSG_ON;
 	mutex_unlock(&dev_pm_qos_mtx);
 }
@@ -164,6 +164,7 @@ void dev_pm_qos_constraints_init(struct device *dev)
  */
 void dev_pm_qos_constraints_destroy(struct device *dev)
 {
+	struct dev_pm_qos *qos;
 	struct dev_pm_qos_request *req, *tmp;
 	struct pm_qos_constraints *c;
 
@@ -176,10 +177,11 @@ void dev_pm_qos_constraints_destroy(struct device *dev)
 	mutex_lock(&dev_pm_qos_mtx);
 
 	dev->power.power_state = PMSG_INVALID;
-	c = dev->power.constraints;
-	if (!c)
+	qos = dev->power.qos;
+	if (!qos)
 		goto out;
 
+	c = &qos->latency;
 	/* Flush the constraints list for the device */
 	plist_for_each_entry_safe(req, tmp, &c->list, node) {
 		/*
@@ -191,7 +193,7 @@ void dev_pm_qos_constraints_destroy(struct device *dev)
 	}
 
 	spin_lock_irq(&dev->power.lock);
-	dev->power.constraints = NULL;
+	dev->power.qos = NULL;
 	spin_unlock_irq(&dev->power.lock);
 
 	kfree(c->notifiers);
@@ -235,7 +237,7 @@ int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req,
 
 	mutex_lock(&dev_pm_qos_mtx);
 
-	if (!dev->power.constraints) {
+	if (!dev->power.qos) {
 		if (dev->power.power_state.event == PM_EVENT_INVALID) {
 			/* The device has been removed from the system. */
 			req->dev = NULL;
@@ -290,7 +292,7 @@ int dev_pm_qos_update_request(struct dev_pm_qos_request *req,
 
 	mutex_lock(&dev_pm_qos_mtx);
 
-	if (req->dev->power.constraints) {
+	if (req->dev->power.qos) {
 		if (new_value != req->node.prio)
 			ret = apply_constraint(req, PM_QOS_UPDATE_REQ,
 					       new_value);
@@ -329,7 +331,7 @@ int dev_pm_qos_remove_request(struct dev_pm_qos_request *req)
 
 	mutex_lock(&dev_pm_qos_mtx);
 
-	if (req->dev->power.constraints) {
+	if (req->dev->power.qos) {
 		ret = apply_constraint(req, PM_QOS_REMOVE_REQ,
 				       PM_QOS_DEFAULT_VALUE);
 		memset(req, 0, sizeof(*req));
@@ -362,13 +364,13 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier)
 
 	mutex_lock(&dev_pm_qos_mtx);
 
-	if (!dev->power.constraints)
+	if (!dev->power.qos)
 		ret = dev->power.power_state.event != PM_EVENT_INVALID ?
 			dev_pm_qos_constraints_allocate(dev) : -ENODEV;
 
 	if (!ret)
 		ret = blocking_notifier_chain_register(
-				dev->power.constraints->notifiers, notifier);
+				dev->power.qos->latency.notifiers, notifier);
 
 	mutex_unlock(&dev_pm_qos_mtx);
 	return ret;
@@ -393,9 +395,9 @@ int dev_pm_qos_remove_notifier(struct device *dev,
 	mutex_lock(&dev_pm_qos_mtx);
 
 	/* Silently return if the constraints object is not present. */
-	if (dev->power.constraints)
+	if (dev->power.qos)
 		retval = blocking_notifier_chain_unregister(
-				dev->power.constraints->notifiers,
+				dev->power.qos->latency.notifiers,
 				notifier);
 
 	mutex_unlock(&dev_pm_qos_mtx);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 007e687c4f69..0ce6df94221a 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -549,7 +549,7 @@ struct dev_pm_info {
 	struct dev_pm_qos_request *pq_req;
 #endif
 	struct pm_subsys_data	*subsys_data;  /* Owned by the subsystem. */
-	struct pm_qos_constraints *constraints;
+	struct dev_pm_qos	*qos;
 };
 
 extern void update_pm_runtime_accounting(struct device *dev);
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 9924ea1f22e0..30e9ad72e797 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -57,6 +57,10 @@ struct pm_qos_constraints {
 	struct blocking_notifier_head *notifiers;
 };
 
+struct dev_pm_qos {
+	struct pm_qos_constraints latency;
+};
+
 /* Action requested to pm_qos_update_target */
 enum pm_qos_req_action {
 	PM_QOS_ADD_REQ,		/* Add a new request */
-- 
cgit v1.2.3-55-g7522


From 5efbe4279f959a3f5ed26adf5f05cb78dd1ffa7e Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Tue, 23 Oct 2012 01:07:46 +0200
Subject: PM / QoS: Introduce request and constraint data types for PM QoS
 flags

Introduce struct pm_qos_flags_request and struct pm_qos_flags
representing PM QoS flags request type and PM QoS flags constraint
type, respectively.  With these definitions the data structures
will be arranged so that the list member of a struct pm_qos_flags
object will contain the head of a list of struct pm_qos_flags_request
objects representing all of the "flags" requests present for the
given device.  Then, the effective_flags member of a struct
pm_qos_flags object will contain the bitwise OR of the flags members
of all the struct pm_qos_flags_request objects in the list.

Additionally, introduce helper function pm_qos_update_flags()
allowing the caller to manage the list of struct pm_qos_flags_request
pointed to by the list member of struct pm_qos_flags.

The flags are of type s32 so that the request's "value" field
is always of the same type regardless of what kind of request it
is (latency requests already have value fields of type s32).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Jean Pihet <j-pihet@ti.com>
Acked-by: mark gross <markgross@thegnar.org>
---
 include/linux/pm_qos.h | 17 ++++++++++++--
 kernel/power/qos.c     | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 30e9ad72e797..413ada3c7c97 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -33,6 +33,11 @@ struct pm_qos_request {
 	struct delayed_work work; /* for pm_qos_update_request_timeout */
 };
 
+struct pm_qos_flags_request {
+	struct list_head node;
+	s32 flags;	/* Do not change to 64 bit */
+};
+
 struct dev_pm_qos_request {
 	struct plist_node node;
 	struct device *dev;
@@ -45,8 +50,8 @@ enum pm_qos_type {
 };
 
 /*
- * Note: The lockless read path depends on the CPU accessing
- * target_value atomically.  Atomic access is only guaranteed on all CPU
+ * Note: The lockless read path depends on the CPU accessing target_value
+ * or effective_flags atomically.  Atomic access is only guaranteed on all CPU
  * types linux supports for 32 bit quantites
  */
 struct pm_qos_constraints {
@@ -57,6 +62,11 @@ struct pm_qos_constraints {
 	struct blocking_notifier_head *notifiers;
 };
 
+struct pm_qos_flags {
+	struct list_head list;
+	s32 effective_flags;	/* Do not change to 64 bit */
+};
+
 struct dev_pm_qos {
 	struct pm_qos_constraints latency;
 };
@@ -75,6 +85,9 @@ static inline int dev_pm_qos_request_active(struct dev_pm_qos_request *req)
 
 int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
 			 enum pm_qos_req_action action, int value);
+bool pm_qos_update_flags(struct pm_qos_flags *pqf,
+			 struct pm_qos_flags_request *req,
+			 enum pm_qos_req_action action, s32 val);
 void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class,
 			s32 value);
 void pm_qos_update_request(struct pm_qos_request *req,
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 846bd42c7ed1..2ab2819aee65 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -212,6 +212,69 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
 	}
 }
 
+/**
+ * pm_qos_flags_remove_req - Remove device PM QoS flags request.
+ * @pqf: Device PM QoS flags set to remove the request from.
+ * @req: Request to remove from the set.
+ */
+static void pm_qos_flags_remove_req(struct pm_qos_flags *pqf,
+				    struct pm_qos_flags_request *req)
+{
+	s32 val = 0;
+
+	list_del(&req->node);
+	list_for_each_entry(req, &pqf->list, node)
+		val |= req->flags;
+
+	pqf->effective_flags = val;
+}
+
+/**
+ * pm_qos_update_flags - Update a set of PM QoS flags.
+ * @pqf: Set of flags to update.
+ * @req: Request to add to the set, to modify, or to remove from the set.
+ * @action: Action to take on the set.
+ * @val: Value of the request to add or modify.
+ *
+ * Update the given set of PM QoS flags and call notifiers if the aggregate
+ * value has changed.  Returns 1 if the aggregate constraint value has changed,
+ * 0 otherwise.
+ */
+bool pm_qos_update_flags(struct pm_qos_flags *pqf,
+			 struct pm_qos_flags_request *req,
+			 enum pm_qos_req_action action, s32 val)
+{
+	unsigned long irqflags;
+	s32 prev_value, curr_value;
+
+	spin_lock_irqsave(&pm_qos_lock, irqflags);
+
+	prev_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags;
+
+	switch (action) {
+	case PM_QOS_REMOVE_REQ:
+		pm_qos_flags_remove_req(pqf, req);
+		break;
+	case PM_QOS_UPDATE_REQ:
+		pm_qos_flags_remove_req(pqf, req);
+	case PM_QOS_ADD_REQ:
+		req->flags = val;
+		INIT_LIST_HEAD(&req->node);
+		list_add_tail(&req->node, &pqf->list);
+		pqf->effective_flags |= val;
+		break;
+	default:
+		/* no action */
+		;
+	}
+
+	curr_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags;
+
+	spin_unlock_irqrestore(&pm_qos_lock, irqflags);
+
+	return prev_value != curr_value;
+}
+
 /**
  * pm_qos_request - returns current system wide qos expectation
  * @pm_qos_class: identification of which qos value is requested
-- 
cgit v1.2.3-55-g7522


From 021c870ba4ab4bc9a23d5db4e324f50f26d8ab24 Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Tue, 23 Oct 2012 01:09:00 +0200
Subject: PM / QoS: Prepare struct dev_pm_qos_request for more request types

The subsequent patches will use struct dev_pm_qos_request for
representing both latency requests and flags requests.  To make that
easier, put the node member of struct dev_pm_qos_request (under the
name "pnode") into a union called "data" that will represent the
request's  value and list node depending on its type.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Jean Pihet <j-pihet@ti.com>
Reviewed-by: mark gross <markgross@thegnar.org>
---
 drivers/base/power/qos.c   | 6 +++---
 drivers/base/power/sysfs.c | 2 +-
 include/linux/pm_qos.h     | 4 +++-
 3 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index 40ff1b02a7c5..96d27b821bb8 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -90,7 +90,7 @@ static int apply_constraint(struct dev_pm_qos_request *req,
 	int ret, curr_value;
 
 	ret = pm_qos_update_target(&req->dev->power.qos->latency,
-				   &req->node, action, value);
+				   &req->data.pnode, action, value);
 
 	if (ret) {
 		/* Call the global callbacks if needed */
@@ -183,7 +183,7 @@ void dev_pm_qos_constraints_destroy(struct device *dev)
 
 	c = &qos->latency;
 	/* Flush the constraints list for the device */
-	plist_for_each_entry_safe(req, tmp, &c->list, node) {
+	plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) {
 		/*
 		 * Update constraints list and call the notification
 		 * callbacks if needed
@@ -293,7 +293,7 @@ int dev_pm_qos_update_request(struct dev_pm_qos_request *req,
 	mutex_lock(&dev_pm_qos_mtx);
 
 	if (req->dev->power.qos) {
-		if (new_value != req->node.prio)
+		if (new_value != req->data.pnode.prio)
 			ret = apply_constraint(req, PM_QOS_UPDATE_REQ,
 					       new_value);
 	} else {
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index b91dc6f1e914..54c61ffa2044 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -221,7 +221,7 @@ static DEVICE_ATTR(autosuspend_delay_ms, 0644, autosuspend_delay_ms_show,
 static ssize_t pm_qos_latency_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%d\n", dev->power.pq_req->node.prio);
+	return sprintf(buf, "%d\n", dev->power.pq_req->data.pnode.prio);
 }
 
 static ssize_t pm_qos_latency_store(struct device *dev,
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 413ada3c7c97..3b9d14964d2b 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -39,7 +39,9 @@ struct pm_qos_flags_request {
 };
 
 struct dev_pm_qos_request {
-	struct plist_node node;
+	union {
+		struct plist_node pnode;
+	} data;
 	struct device *dev;
 };
 
-- 
cgit v1.2.3-55-g7522


From ae0fb4b72c8db7e6c4ef32bc58a43a759ad414b9 Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Tue, 23 Oct 2012 01:09:12 +0200
Subject: PM / QoS: Introduce PM QoS device flags support

Modify the device PM QoS core code to support PM QoS flags requests.

First, add a new field of type struct pm_qos_flags called "flags"
to struct dev_pm_qos for representing the list of PM QoS flags
requests for the given device.  Accordingly, add a new "type" field
to struct dev_pm_qos_request (along with an enum for representing
request types) and a new member called "flr" to its data union for
representig flags requests.

Second, modify dev_pm_qos_add_request(), dev_pm_qos_update_request(),
the internal routine apply_constraint() used by them and their
existing callers to cover flags requests as well as latency
requests.  In particular, dev_pm_qos_add_request() gets a new
argument called "type" for specifying the type of a request to be
added.

Finally, introduce two routines, __dev_pm_qos_flags() and
dev_pm_qos_flags(), allowing their callers to check which PM QoS
flags have been requested for the given device (the caller is
supposed to pass the mask of flags to check as the routine's
second argument and examine its return value for the result).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Jean Pihet <j-pihet@ti.com>
Reviewed-by: mark gross <markgross@thegnar.org>
---
 Documentation/power/pm_qos_interface.txt |   2 +-
 drivers/base/power/qos.c                 | 124 ++++++++++++++++++++++++-------
 drivers/mtd/nand/sh_flctl.c              |   4 +-
 include/linux/pm_qos.h                   |  26 ++++++-
 4 files changed, 127 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt
index 17e130a80347..79a2a58425ee 100644
--- a/Documentation/power/pm_qos_interface.txt
+++ b/Documentation/power/pm_qos_interface.txt
@@ -99,7 +99,7 @@ reading the aggregated value does not require any locking mechanism.
 
 From kernel mode the use of this interface is the following:
 
-int dev_pm_qos_add_request(device, handle, value):
+int dev_pm_qos_add_request(device, handle, type, value):
 Will insert an element into the list for that identified device with the
 target value.  Upon change to this list the new target is recomputed and any
 registered notifiers are called only if the target value is now different.
diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index 96d27b821bb8..3c66f75d14b0 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -47,6 +47,50 @@ static DEFINE_MUTEX(dev_pm_qos_mtx);
 
 static BLOCKING_NOTIFIER_HEAD(dev_pm_notifiers);
 
+/**
+ * __dev_pm_qos_flags - Check PM QoS flags for a given device.
+ * @dev: Device to check the PM QoS flags for.
+ * @mask: Flags to check against.
+ *
+ * This routine must be called with dev->power.lock held.
+ */
+enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask)
+{
+	struct dev_pm_qos *qos = dev->power.qos;
+	struct pm_qos_flags *pqf;
+	s32 val;
+
+	if (!qos)
+		return PM_QOS_FLAGS_UNDEFINED;
+
+	pqf = &qos->flags;
+	if (list_empty(&pqf->list))
+		return PM_QOS_FLAGS_UNDEFINED;
+
+	val = pqf->effective_flags & mask;
+	if (val)
+		return (val == mask) ? PM_QOS_FLAGS_ALL : PM_QOS_FLAGS_SOME;
+
+	return PM_QOS_FLAGS_NONE;
+}
+
+/**
+ * dev_pm_qos_flags - Check PM QoS flags for a given device (locked).
+ * @dev: Device to check the PM QoS flags for.
+ * @mask: Flags to check against.
+ */
+enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask)
+{
+	unsigned long irqflags;
+	enum pm_qos_flags_status ret;
+
+	spin_lock_irqsave(&dev->power.lock, irqflags);
+	ret = __dev_pm_qos_flags(dev, mask);
+	spin_unlock_irqrestore(&dev->power.lock, irqflags);
+
+	return ret;
+}
+
 /**
  * __dev_pm_qos_read_value - Get PM QoS constraint for a given device.
  * @dev: Device to get the PM QoS constraint value for.
@@ -74,30 +118,39 @@ s32 dev_pm_qos_read_value(struct device *dev)
 	return ret;
 }
 
-/*
- * apply_constraint
- * @req: constraint request to apply
- * @action: action to perform add/update/remove, of type enum pm_qos_req_action
- * @value: defines the qos request
+/**
+ * apply_constraint - Add/modify/remove device PM QoS request.
+ * @req: Constraint request to apply
+ * @action: Action to perform (add/update/remove).
+ * @value: Value to assign to the QoS request.
  *
  * Internal function to update the constraints list using the PM QoS core
  * code and if needed call the per-device and the global notification
  * callbacks
  */
 static int apply_constraint(struct dev_pm_qos_request *req,
-			    enum pm_qos_req_action action, int value)
+			    enum pm_qos_req_action action, s32 value)
 {
-	int ret, curr_value;
-
-	ret = pm_qos_update_target(&req->dev->power.qos->latency,
-				   &req->data.pnode, action, value);
+	struct dev_pm_qos *qos = req->dev->power.qos;
+	int ret;
 
-	if (ret) {
-		/* Call the global callbacks if needed */
-		curr_value = pm_qos_read_value(&req->dev->power.qos->latency);
-		blocking_notifier_call_chain(&dev_pm_notifiers,
-					     (unsigned long)curr_value,
-					     req);
+	switch(req->type) {
+	case DEV_PM_QOS_LATENCY:
+		ret = pm_qos_update_target(&qos->latency, &req->data.pnode,
+					   action, value);
+		if (ret) {
+			value = pm_qos_read_value(&qos->latency);
+			blocking_notifier_call_chain(&dev_pm_notifiers,
+						     (unsigned long)value,
+						     req);
+		}
+		break;
+	case DEV_PM_QOS_FLAGS:
+		ret = pm_qos_update_flags(&qos->flags, &req->data.flr,
+					  action, value);
+		break;
+	default:
+		ret = -EINVAL;
 	}
 
 	return ret;
@@ -134,6 +187,8 @@ static int dev_pm_qos_constraints_allocate(struct device *dev)
 	c->type = PM_QOS_MIN;
 	c->notifiers = n;
 
+	INIT_LIST_HEAD(&qos->flags.list);
+
 	spin_lock_irq(&dev->power.lock);
 	dev->power.qos = qos;
 	spin_unlock_irq(&dev->power.lock);
@@ -207,6 +262,7 @@ void dev_pm_qos_constraints_destroy(struct device *dev)
  * dev_pm_qos_add_request - inserts new qos request into the list
  * @dev: target device for the constraint
  * @req: pointer to a preallocated handle
+ * @type: type of the request
  * @value: defines the qos request
  *
  * This function inserts a new entry in the device constraints list of
@@ -222,7 +278,7 @@ void dev_pm_qos_constraints_destroy(struct device *dev)
  * from the system.
  */
 int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req,
-			   s32 value)
+			   enum dev_pm_qos_req_type type, s32 value)
 {
 	int ret = 0;
 
@@ -253,8 +309,10 @@ int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req,
 		}
 	}
 
-	if (!ret)
+	if (!ret) {
+		req->type = type;
 		ret = apply_constraint(req, PM_QOS_ADD_REQ, value);
+	}
 
  out:
 	mutex_unlock(&dev_pm_qos_mtx);
@@ -281,6 +339,7 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_add_request);
 int dev_pm_qos_update_request(struct dev_pm_qos_request *req,
 			      s32 new_value)
 {
+	s32 curr_value;
 	int ret = 0;
 
 	if (!req) /*guard against callers passing in null */
@@ -292,15 +351,27 @@ int dev_pm_qos_update_request(struct dev_pm_qos_request *req,
 
 	mutex_lock(&dev_pm_qos_mtx);
 
-	if (req->dev->power.qos) {
-		if (new_value != req->data.pnode.prio)
-			ret = apply_constraint(req, PM_QOS_UPDATE_REQ,
-					       new_value);
-	} else {
-		/* Return if the device has been removed */
+	if (!req->dev->power.qos) {
 		ret = -ENODEV;
+		goto out;
 	}
 
+	switch(req->type) {
+	case DEV_PM_QOS_LATENCY:
+		curr_value = req->data.pnode.prio;
+		break;
+	case DEV_PM_QOS_FLAGS:
+		curr_value = req->data.flr.flags;
+		break;
+	default:
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (curr_value != new_value)
+		ret = apply_constraint(req, PM_QOS_UPDATE_REQ, new_value);
+
+ out:
 	mutex_unlock(&dev_pm_qos_mtx);
 	return ret;
 }
@@ -451,7 +522,8 @@ int dev_pm_qos_add_ancestor_request(struct device *dev,
 		ancestor = ancestor->parent;
 
 	if (ancestor)
-		error = dev_pm_qos_add_request(ancestor, req, value);
+		error = dev_pm_qos_add_request(ancestor, req,
+					       DEV_PM_QOS_LATENCY, value);
 
 	if (error)
 		req->dev = NULL;
@@ -487,7 +559,7 @@ int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value)
 	if (!req)
 		return -ENOMEM;
 
-	ret = dev_pm_qos_add_request(dev, req, value);
+	ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_LATENCY, value);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 4fbfe96e37a1..f48ac5d80bbf 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -727,7 +727,9 @@ static void flctl_select_chip(struct mtd_info *mtd, int chipnr)
 
 		if (!flctl->qos_request) {
 			ret = dev_pm_qos_add_request(&flctl->pdev->dev,
-							&flctl->pm_qos, 100);
+							&flctl->pm_qos,
+							DEV_PM_QOS_LATENCY,
+							100);
 			if (ret < 0)
 				dev_err(&flctl->pdev->dev,
 					"PM QoS request failed: %d\n", ret);
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 3b9d14964d2b..3af7d8573c23 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -20,6 +20,13 @@ enum {
 	PM_QOS_NUM_CLASSES,
 };
 
+enum pm_qos_flags_status {
+	PM_QOS_FLAGS_UNDEFINED = -1,
+	PM_QOS_FLAGS_NONE,
+	PM_QOS_FLAGS_SOME,
+	PM_QOS_FLAGS_ALL,
+};
+
 #define PM_QOS_DEFAULT_VALUE -1
 
 #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
@@ -38,9 +45,16 @@ struct pm_qos_flags_request {
 	s32 flags;	/* Do not change to 64 bit */
 };
 
+enum dev_pm_qos_req_type {
+	DEV_PM_QOS_LATENCY = 1,
+	DEV_PM_QOS_FLAGS,
+};
+
 struct dev_pm_qos_request {
+	enum dev_pm_qos_req_type type;
 	union {
 		struct plist_node pnode;
+		struct pm_qos_flags_request flr;
 	} data;
 	struct device *dev;
 };
@@ -71,6 +85,7 @@ struct pm_qos_flags {
 
 struct dev_pm_qos {
 	struct pm_qos_constraints latency;
+	struct pm_qos_flags flags;
 };
 
 /* Action requested to pm_qos_update_target */
@@ -105,10 +120,12 @@ int pm_qos_request_active(struct pm_qos_request *req);
 s32 pm_qos_read_value(struct pm_qos_constraints *c);
 
 #ifdef CONFIG_PM
+enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask);
+enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask);
 s32 __dev_pm_qos_read_value(struct device *dev);
 s32 dev_pm_qos_read_value(struct device *dev);
 int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req,
-			   s32 value);
+			   enum dev_pm_qos_req_type type, s32 value);
 int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value);
 int dev_pm_qos_remove_request(struct dev_pm_qos_request *req);
 int dev_pm_qos_add_notifier(struct device *dev,
@@ -122,12 +139,19 @@ void dev_pm_qos_constraints_destroy(struct device *dev);
 int dev_pm_qos_add_ancestor_request(struct device *dev,
 				    struct dev_pm_qos_request *req, s32 value);
 #else
+static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev,
+							  s32 mask)
+			{ return PM_QOS_FLAGS_UNDEFINED; }
+static inline enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev,
+							s32 mask)
+			{ return PM_QOS_FLAGS_UNDEFINED; }
 static inline s32 __dev_pm_qos_read_value(struct device *dev)
 			{ return 0; }
 static inline s32 dev_pm_qos_read_value(struct device *dev)
 			{ return 0; }
 static inline int dev_pm_qos_add_request(struct device *dev,
 					 struct dev_pm_qos_request *req,
+					 enum dev_pm_qos_req_type type,
 					 s32 value)
 			{ return 0; }
 static inline int dev_pm_qos_update_request(struct dev_pm_qos_request *req,
-- 
cgit v1.2.3-55-g7522


From 8fcbaa2b7f5b70dba9ed1c7f91d0a270ce752e2c Mon Sep 17 00:00:00 2001
From: Jiri Slaby
Date: Thu, 18 Oct 2012 22:26:27 +0200
Subject: TTY: devpts, don't care about TTY in devpts_get_tty

The goal is to stop setting and using tty->driver_data in devpts code.
It should be used solely by the driver's code, pty in this case.

First, here we remove TTY from devpts_get_tty and rename it to
devpts_get_priv. Note we do not remove type safety, we just shift the
[implicit] (void *) cast one layer up.

index was unused in devpts_get_tty, so remove that from the prototype
too.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/pty.c         | 2 +-
 fs/devpts/inode.c         | 9 ++++-----
 include/linux/devpts_fs.h | 7 +++----
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index a82b39939a9c..65f767154d12 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -547,7 +547,7 @@ static struct tty_struct *pts_unix98_lookup(struct tty_driver *driver,
 	struct tty_struct *tty;
 
 	mutex_lock(&devpts_mutex);
-	tty = devpts_get_tty(pts_inode, idx);
+	tty = devpts_get_priv(pts_inode);
 	mutex_unlock(&devpts_mutex);
 	/* Master must be open before slave */
 	if (!tty)
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 14afbabe6546..47965807884d 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -593,10 +593,10 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
 	return ret;
 }
 
-struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number)
+void *devpts_get_priv(struct inode *pts_inode)
 {
 	struct dentry *dentry;
-	struct tty_struct *tty;
+	void *priv = NULL;
 
 	BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
 
@@ -605,13 +605,12 @@ struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number)
 	if (!dentry)
 		return NULL;
 
-	tty = NULL;
 	if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
-		tty = (struct tty_struct *)pts_inode->i_private;
+		priv = pts_inode->i_private;
 
 	dput(dentry);
 
-	return tty;
+	return priv;
 }
 
 void devpts_pty_kill(struct tty_struct *tty)
diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h
index 5ce0e5fd712e..de635a5505ea 100644
--- a/include/linux/devpts_fs.h
+++ b/include/linux/devpts_fs.h
@@ -21,8 +21,8 @@ int devpts_new_index(struct inode *ptmx_inode);
 void devpts_kill_index(struct inode *ptmx_inode, int idx);
 /* mknod in devpts */
 int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty);
-/* get tty structure */
-struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number);
+/* get private structure */
+void *devpts_get_priv(struct inode *pts_inode);
 /* unlink */
 void devpts_pty_kill(struct tty_struct *tty);
 
@@ -36,8 +36,7 @@ static inline int devpts_pty_new(struct inode *ptmx_inode,
 {
 	return -EINVAL;
 }
-static inline struct tty_struct *devpts_get_tty(struct inode *pts_inode,
-		int number)
+static inline void *devpts_get_priv(struct inode *pts_inode)
 {
 	return NULL;
 }
-- 
cgit v1.2.3-55-g7522


From 162b97cfa21f816f39ede1944f2a4220e3cf8969 Mon Sep 17 00:00:00 2001
From: Jiri Slaby
Date: Thu, 18 Oct 2012 22:26:28 +0200
Subject: TTY: devpts, return created inode from devpts_pty_new

The goal is to stop setting and using tty->driver_data in devpts code.
It should be used solely by the driver's code, pty in this case.

For the cleanup of layering, we will need the inode created in
devpts_pty_new to be stored into slave's driver_data. So we convert
devpts_pty_new to return the inode or an ERR_PTR-encoded error in case
of failure.

The move of 'inode = new_inode(sb);' from declarators to the code is
only cosmetical, but it makes the code easier to read.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/pty.c         |  7 +++++--
 fs/devpts/inode.c         | 12 ++++++------
 include/linux/devpts_fs.h |  8 ++++----
 3 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 65f767154d12..9985b451e937 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -614,6 +614,7 @@ static const struct tty_operations pty_unix98_ops = {
 static int ptmx_open(struct inode *inode, struct file *filp)
 {
 	struct tty_struct *tty;
+	struct inode *slave_inode;
 	int retval;
 	int index;
 
@@ -650,9 +651,11 @@ static int ptmx_open(struct inode *inode, struct file *filp)
 
 	tty_add_file(tty, filp);
 
-	retval = devpts_pty_new(inode, tty->link);
-	if (retval)
+	slave_inode = devpts_pty_new(inode, tty->link);
+	if (IS_ERR(slave_inode)) {
+		retval = PTR_ERR(slave_inode);
 		goto err_release;
+	}
 
 	retval = ptm_driver->ops->open(tty, filp);
 	if (retval)
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 47965807884d..ec3bab716c05 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -545,7 +545,7 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
 	mutex_unlock(&allocated_ptys_lock);
 }
 
-int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
+struct inode *devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
 {
 	/* tty layer puts index from devpts_new_index() in here */
 	int number = tty->index;
@@ -553,19 +553,19 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
 	dev_t device = MKDEV(driver->major, driver->minor_start+number);
 	struct dentry *dentry;
 	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
-	struct inode *inode = new_inode(sb);
+	struct inode *inode;
 	struct dentry *root = sb->s_root;
 	struct pts_fs_info *fsi = DEVPTS_SB(sb);
 	struct pts_mount_opts *opts = &fsi->mount_opts;
-	int ret = 0;
 	char s[12];
 
 	/* We're supposed to be given the slave end of a pty */
 	BUG_ON(driver->type != TTY_DRIVER_TYPE_PTY);
 	BUG_ON(driver->subtype != PTY_TYPE_SLAVE);
 
+	inode = new_inode(sb);
 	if (!inode)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	inode->i_ino = number + 3;
 	inode->i_uid = opts->setuid ? opts->uid : current_fsuid();
@@ -585,12 +585,12 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
 		fsnotify_create(root->d_inode, dentry);
 	} else {
 		iput(inode);
-		ret = -ENOMEM;
+		inode = ERR_PTR(-ENOMEM);
 	}
 
 	mutex_unlock(&root->d_inode->i_mutex);
 
-	return ret;
+	return inode;
 }
 
 void *devpts_get_priv(struct inode *pts_inode)
diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h
index de635a5505ea..4ca846f16fe5 100644
--- a/include/linux/devpts_fs.h
+++ b/include/linux/devpts_fs.h
@@ -20,7 +20,7 @@
 int devpts_new_index(struct inode *ptmx_inode);
 void devpts_kill_index(struct inode *ptmx_inode, int idx);
 /* mknod in devpts */
-int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty);
+struct inode *devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty);
 /* get private structure */
 void *devpts_get_priv(struct inode *pts_inode);
 /* unlink */
@@ -31,10 +31,10 @@ void devpts_pty_kill(struct tty_struct *tty);
 /* Dummy stubs in the no-pty case */
 static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; }
 static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { }
-static inline int devpts_pty_new(struct inode *ptmx_inode,
-				struct tty_struct *tty)
+static inline struct inode *devpts_pty_new(struct inode *ptmx_inode,
+		struct tty_struct *tty)
 {
-	return -EINVAL;
+	return ERR_PTR(-EINVAL);
 }
 static inline void *devpts_get_priv(struct inode *pts_inode)
 {
-- 
cgit v1.2.3-55-g7522


From f11afb61247016162aa92225a337c1575556c9d9 Mon Sep 17 00:00:00 2001
From: Jiri Slaby
Date: Thu, 18 Oct 2012 22:26:29 +0200
Subject: TTY: devpts, do not set driver_data

The goal is to stop setting and using tty->driver_data in devpts code.
It should be used solely by the driver's code, pty in this case.

Now driver_data are managed only in the pty driver. devpts_pty_new is
switched to accept what we used to dig out of tty_struct, i.e. device
node number and index.

This also removes a note about driver_data being set outside of the
driver.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/pty.c         | 10 +++++-----
 fs/devpts/inode.c         | 21 ++++++---------------
 include/linux/devpts_fs.h |  9 +++++----
 3 files changed, 16 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 9985b451e937..559e5b27941a 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -4,9 +4,6 @@
  *  Added support for a Unix98-style ptmx device.
  *    -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998
  *
- *  When reading this code see also fs/devpts. In particular note that the
- *  driver_data field is used by the devpts side as a binding to the devpts
- *  inode.
  */
 
 #include <linux/module.h>
@@ -59,7 +56,7 @@ static void pty_close(struct tty_struct *tty, struct file *filp)
 #ifdef CONFIG_UNIX98_PTYS
 		if (tty->driver == ptm_driver) {
 		        mutex_lock(&devpts_mutex);
-			devpts_pty_kill(tty->link);
+			devpts_pty_kill(tty->link->driver_data);
 		        mutex_unlock(&devpts_mutex);
 		}
 #endif
@@ -651,7 +648,9 @@ static int ptmx_open(struct inode *inode, struct file *filp)
 
 	tty_add_file(tty, filp);
 
-	slave_inode = devpts_pty_new(inode, tty->link);
+	slave_inode = devpts_pty_new(inode,
+			MKDEV(UNIX98_PTY_SLAVE_MAJOR, index), index,
+			tty->link);
 	if (IS_ERR(slave_inode)) {
 		retval = PTR_ERR(slave_inode);
 		goto err_release;
@@ -662,6 +661,7 @@ static int ptmx_open(struct inode *inode, struct file *filp)
 		goto err_release;
 
 	tty_unlock(tty);
+	tty->link->driver_data = slave_inode;
 	return 0;
 err_release:
 	tty_unlock(tty);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index ec3bab716c05..7a20d673bb8a 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -545,12 +545,9 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
 	mutex_unlock(&allocated_ptys_lock);
 }
 
-struct inode *devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
+struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
+		void *priv)
 {
-	/* tty layer puts index from devpts_new_index() in here */
-	int number = tty->index;
-	struct tty_driver *driver = tty->driver;
-	dev_t device = MKDEV(driver->major, driver->minor_start+number);
 	struct dentry *dentry;
 	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
 	struct inode *inode;
@@ -559,23 +556,18 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
 	struct pts_mount_opts *opts = &fsi->mount_opts;
 	char s[12];
 
-	/* We're supposed to be given the slave end of a pty */
-	BUG_ON(driver->type != TTY_DRIVER_TYPE_PTY);
-	BUG_ON(driver->subtype != PTY_TYPE_SLAVE);
-
 	inode = new_inode(sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 
-	inode->i_ino = number + 3;
+	inode->i_ino = index + 3;
 	inode->i_uid = opts->setuid ? opts->uid : current_fsuid();
 	inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	init_special_inode(inode, S_IFCHR|opts->mode, device);
-	inode->i_private = tty;
-	tty->driver_data = inode;
+	inode->i_private = priv;
 
-	sprintf(s, "%d", number);
+	sprintf(s, "%d", index);
 
 	mutex_lock(&root->d_inode->i_mutex);
 
@@ -613,9 +605,8 @@ void *devpts_get_priv(struct inode *pts_inode)
 	return priv;
 }
 
-void devpts_pty_kill(struct tty_struct *tty)
+void devpts_pty_kill(struct inode *inode)
 {
-	struct inode *inode = tty->driver_data;
 	struct super_block *sb = pts_sb_from_inode(inode);
 	struct dentry *root = sb->s_root;
 	struct dentry *dentry;
diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h
index 4ca846f16fe5..251a2090a554 100644
--- a/include/linux/devpts_fs.h
+++ b/include/linux/devpts_fs.h
@@ -20,11 +20,12 @@
 int devpts_new_index(struct inode *ptmx_inode);
 void devpts_kill_index(struct inode *ptmx_inode, int idx);
 /* mknod in devpts */
-struct inode *devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty);
+struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
+		void *priv);
 /* get private structure */
 void *devpts_get_priv(struct inode *pts_inode);
 /* unlink */
-void devpts_pty_kill(struct tty_struct *tty);
+void devpts_pty_kill(struct inode *inode);
 
 #else
 
@@ -32,7 +33,7 @@ void devpts_pty_kill(struct tty_struct *tty);
 static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; }
 static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { }
 static inline struct inode *devpts_pty_new(struct inode *ptmx_inode,
-		struct tty_struct *tty)
+		dev_t device, int index, void *priv)
 {
 	return ERR_PTR(-EINVAL);
 }
@@ -40,7 +41,7 @@ static inline void *devpts_get_priv(struct inode *pts_inode)
 {
 	return NULL;
 }
-static inline void devpts_pty_kill(struct tty_struct *tty) { }
+static inline void devpts_pty_kill(struct inode *inode) { }
 
 #endif
 
-- 
cgit v1.2.3-55-g7522


From 6c633f27ccf783e9a782b84e34aeaeb7949a3359 Mon Sep 17 00:00:00 2001
From: Jiri Slaby
Date: Thu, 18 Oct 2012 22:26:37 +0200
Subject: TTY: audit, stop accessing tty->icount

This is a private member of n_tty. Stop accessing it. Instead, take is
as an argument.

This is needed to allow clean switch of the private members to a
separate private structure of n_tty.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c     |  5 +++--
 drivers/tty/tty_audit.c | 15 ++++++++-------
 include/linux/tty.h     |  4 ++--
 3 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index ceae0744cbb8..3ebab0cfceb2 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -76,7 +76,7 @@
 static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
 			       unsigned char __user *ptr)
 {
-	tty_audit_add_data(tty, &x, 1);
+	tty_audit_add_data(tty, &x, 1, tty->icanon);
 	return put_user(x, ptr);
 }
 
@@ -1644,7 +1644,8 @@ static int copy_from_read_buf(struct tty_struct *tty,
 		n -= retval;
 		is_eof = n == 1 &&
 			tty->read_buf[tty->read_tail] == EOF_CHAR(tty);
-		tty_audit_add_data(tty, &tty->read_buf[tty->read_tail], n);
+		tty_audit_add_data(tty, &tty->read_buf[tty->read_tail], n,
+				tty->icanon);
 		spin_lock_irqsave(&tty->read_lock, flags);
 		tty->read_tail = (tty->read_tail + n) & (N_TTY_BUF_SIZE-1);
 		tty->read_cnt -= n;
diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c
index b0b39b823ccf..6953dc82850c 100644
--- a/drivers/tty/tty_audit.c
+++ b/drivers/tty/tty_audit.c
@@ -23,7 +23,7 @@ struct tty_audit_buf {
 };
 
 static struct tty_audit_buf *tty_audit_buf_alloc(int major, int minor,
-						 int icanon)
+						 unsigned icanon)
 {
 	struct tty_audit_buf *buf;
 
@@ -239,7 +239,8 @@ int tty_audit_push_task(struct task_struct *tsk, kuid_t loginuid, u32 sessionid)
  *	if TTY auditing is disabled or out of memory.  Otherwise, return a new
  *	reference to the buffer.
  */
-static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty)
+static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty,
+		unsigned icanon)
 {
 	struct tty_audit_buf *buf, *buf2;
 
@@ -257,7 +258,7 @@ static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty)
 
 	buf2 = tty_audit_buf_alloc(tty->driver->major,
 				   tty->driver->minor_start + tty->index,
-				   tty->icanon);
+				   icanon);
 	if (buf2 == NULL) {
 		audit_log_lost("out of memory in TTY auditing");
 		return NULL;
@@ -287,7 +288,7 @@ static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty)
  *	Audit @data of @size from @tty, if necessary.
  */
 void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
-			size_t size)
+			size_t size, unsigned icanon)
 {
 	struct tty_audit_buf *buf;
 	int major, minor;
@@ -299,7 +300,7 @@ void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
 	    && tty->driver->subtype == PTY_TYPE_MASTER)
 		return;
 
-	buf = tty_audit_buf_get(tty);
+	buf = tty_audit_buf_get(tty, icanon);
 	if (!buf)
 		return;
 
@@ -307,11 +308,11 @@ void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
 	major = tty->driver->major;
 	minor = tty->driver->minor_start + tty->index;
 	if (buf->major != major || buf->minor != minor
-	    || buf->icanon != tty->icanon) {
+	    || buf->icanon != icanon) {
 		tty_audit_buf_push_current(buf);
 		buf->major = major;
 		buf->minor = minor;
-		buf->icanon = tty->icanon;
+		buf->icanon = icanon;
 	}
 	do {
 		size_t run;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index f0b4eb47297c..f02712da5d85 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -535,7 +535,7 @@ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops);
 /* tty_audit.c */
 #ifdef CONFIG_AUDIT
 extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
-			       size_t size);
+			       size_t size, unsigned icanon);
 extern void tty_audit_exit(void);
 extern void tty_audit_fork(struct signal_struct *sig);
 extern void tty_audit_tiocsti(struct tty_struct *tty, char ch);
@@ -544,7 +544,7 @@ extern int tty_audit_push_task(struct task_struct *tsk,
 			       kuid_t loginuid, u32 sessionid);
 #else
 static inline void tty_audit_add_data(struct tty_struct *tty,
-				      unsigned char *data, size_t size)
+		unsigned char *data, size_t size, unsigned icanon)
 {
 }
 static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch)
-- 
cgit v1.2.3-55-g7522


From 53c5ee2cfb4dadc4f5c24fe671e2fbfc034c875e Mon Sep 17 00:00:00 2001
From: Jiri Slaby
Date: Thu, 18 Oct 2012 22:26:39 +0200
Subject: TTY: move ldisc data from tty_struct: simple members

Here we start moving all the n_tty related bits from tty_struct to
the newly defined n_tty_data struct in n_tty proper.

In this patch primitive members and bits are moved. The rest will be
done per-partes in the next patches.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c  | 160 ++++++++++++++++++++++++++++++---------------------
 drivers/tty/tty_io.c |   1 -
 include/linux/tty.h  |   5 --
 3 files changed, 93 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 3d1594e10d00..bd775a7c0629 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -74,13 +74,20 @@
 #define ECHO_OP_ERASE_TAB 0x82
 
 struct n_tty_data {
-	char dummy;
+	unsigned int column;
+	unsigned long overrun_time;
+	int num_overrun;
+
+	unsigned char lnext:1, erasing:1, raw:1, real_raw:1, icanon:1;
+	unsigned char echo_overrun:1;
 };
 
 static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
 			       unsigned char __user *ptr)
 {
-	tty_audit_add_data(tty, &x, 1, tty->icanon);
+	struct n_tty_data *ldata = tty->disc_data;
+
+	tty_audit_add_data(tty, &x, 1, ldata->icanon);
 	return put_user(x, ptr);
 }
 
@@ -96,6 +103,7 @@ static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
 
 static void n_tty_set_room(struct tty_struct *tty)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	int left;
 	int old_left;
 
@@ -115,7 +123,7 @@ static void n_tty_set_room(struct tty_struct *tty)
 	 * characters will be beeped.
 	 */
 	if (left <= 0)
-		left = tty->icanon && !tty->canon_data;
+		left = ldata->icanon && !tty->canon_data;
 	old_left = tty->receive_room;
 	tty->receive_room = left;
 
@@ -183,6 +191,7 @@ static void check_unthrottle(struct tty_struct *tty)
 
 static void reset_buffer_flags(struct tty_struct *tty)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	unsigned long flags;
 
 	spin_lock_irqsave(&tty->read_lock, flags);
@@ -190,10 +199,10 @@ static void reset_buffer_flags(struct tty_struct *tty)
 	spin_unlock_irqrestore(&tty->read_lock, flags);
 
 	mutex_lock(&tty->echo_lock);
-	tty->echo_pos = tty->echo_cnt = tty->echo_overrun = 0;
+	tty->echo_pos = tty->echo_cnt = ldata->echo_overrun = 0;
 	mutex_unlock(&tty->echo_lock);
 
-	tty->canon_head = tty->canon_data = tty->erasing = 0;
+	tty->canon_head = tty->canon_data = ldata->erasing = 0;
 	memset(&tty->read_flags, 0, sizeof tty->read_flags);
 	n_tty_set_room(tty);
 }
@@ -239,11 +248,12 @@ static void n_tty_flush_buffer(struct tty_struct *tty)
 
 static ssize_t n_tty_chars_in_buffer(struct tty_struct *tty)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	unsigned long flags;
 	ssize_t n = 0;
 
 	spin_lock_irqsave(&tty->read_lock, flags);
-	if (!tty->icanon) {
+	if (!ldata->icanon) {
 		n = tty->read_cnt;
 	} else if (tty->canon_data) {
 		n = (tty->canon_head > tty->read_tail) ?
@@ -305,6 +315,7 @@ static inline int is_continuation(unsigned char c, struct tty_struct *tty)
 
 static int do_output_char(unsigned char c, struct tty_struct *tty, int space)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	int	spaces;
 
 	if (!space)
@@ -313,48 +324,48 @@ static int do_output_char(unsigned char c, struct tty_struct *tty, int space)
 	switch (c) {
 	case '\n':
 		if (O_ONLRET(tty))
-			tty->column = 0;
+			ldata->column = 0;
 		if (O_ONLCR(tty)) {
 			if (space < 2)
 				return -1;
-			tty->canon_column = tty->column = 0;
+			tty->canon_column = ldata->column = 0;
 			tty->ops->write(tty, "\r\n", 2);
 			return 2;
 		}
-		tty->canon_column = tty->column;
+		tty->canon_column = ldata->column;
 		break;
 	case '\r':
-		if (O_ONOCR(tty) && tty->column == 0)
+		if (O_ONOCR(tty) && ldata->column == 0)
 			return 0;
 		if (O_OCRNL(tty)) {
 			c = '\n';
 			if (O_ONLRET(tty))
-				tty->canon_column = tty->column = 0;
+				tty->canon_column = ldata->column = 0;
 			break;
 		}
-		tty->canon_column = tty->column = 0;
+		tty->canon_column = ldata->column = 0;
 		break;
 	case '\t':
-		spaces = 8 - (tty->column & 7);
+		spaces = 8 - (ldata->column & 7);
 		if (O_TABDLY(tty) == XTABS) {
 			if (space < spaces)
 				return -1;
-			tty->column += spaces;
+			ldata->column += spaces;
 			tty->ops->write(tty, "        ", spaces);
 			return spaces;
 		}
-		tty->column += spaces;
+		ldata->column += spaces;
 		break;
 	case '\b':
-		if (tty->column > 0)
-			tty->column--;
+		if (ldata->column > 0)
+			ldata->column--;
 		break;
 	default:
 		if (!iscntrl(c)) {
 			if (O_OLCUC(tty))
 				c = toupper(c);
 			if (!is_continuation(c, tty))
-				tty->column++;
+				ldata->column++;
 		}
 		break;
 	}
@@ -415,6 +426,7 @@ static int process_output(unsigned char c, struct tty_struct *tty)
 static ssize_t process_output_block(struct tty_struct *tty,
 				    const unsigned char *buf, unsigned int nr)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	int	space;
 	int	i;
 	const unsigned char *cp;
@@ -435,30 +447,30 @@ static ssize_t process_output_block(struct tty_struct *tty,
 		switch (c) {
 		case '\n':
 			if (O_ONLRET(tty))
-				tty->column = 0;
+				ldata->column = 0;
 			if (O_ONLCR(tty))
 				goto break_out;
-			tty->canon_column = tty->column;
+			tty->canon_column = ldata->column;
 			break;
 		case '\r':
-			if (O_ONOCR(tty) && tty->column == 0)
+			if (O_ONOCR(tty) && ldata->column == 0)
 				goto break_out;
 			if (O_OCRNL(tty))
 				goto break_out;
-			tty->canon_column = tty->column = 0;
+			tty->canon_column = ldata->column = 0;
 			break;
 		case '\t':
 			goto break_out;
 		case '\b':
-			if (tty->column > 0)
-				tty->column--;
+			if (ldata->column > 0)
+				ldata->column--;
 			break;
 		default:
 			if (!iscntrl(c)) {
 				if (O_OLCUC(tty))
 					goto break_out;
 				if (!is_continuation(c, tty))
-					tty->column++;
+					ldata->column++;
 			}
 			break;
 		}
@@ -498,6 +510,7 @@ break_out:
 
 static void process_echoes(struct tty_struct *tty)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	int	space, nr;
 	unsigned char c;
 	unsigned char *cp, *buf_end;
@@ -559,22 +572,22 @@ static void process_echoes(struct tty_struct *tty)
 				space -= num_bs;
 				while (num_bs--) {
 					tty_put_char(tty, '\b');
-					if (tty->column > 0)
-						tty->column--;
+					if (ldata->column > 0)
+						ldata->column--;
 				}
 				cp += 3;
 				nr -= 3;
 				break;
 
 			case ECHO_OP_SET_CANON_COL:
-				tty->canon_column = tty->column;
+				tty->canon_column = ldata->column;
 				cp += 2;
 				nr -= 2;
 				break;
 
 			case ECHO_OP_MOVE_BACK_COL:
-				if (tty->column > 0)
-					tty->column--;
+				if (ldata->column > 0)
+					ldata->column--;
 				cp += 2;
 				nr -= 2;
 				break;
@@ -586,7 +599,7 @@ static void process_echoes(struct tty_struct *tty)
 					break;
 				}
 				tty_put_char(tty, ECHO_OP_START);
-				tty->column++;
+				ldata->column++;
 				space--;
 				cp += 2;
 				nr -= 2;
@@ -608,7 +621,7 @@ static void process_echoes(struct tty_struct *tty)
 				}
 				tty_put_char(tty, '^');
 				tty_put_char(tty, op ^ 0100);
-				tty->column += 2;
+				ldata->column += 2;
 				space -= 2;
 				cp += 2;
 				nr -= 2;
@@ -641,14 +654,14 @@ static void process_echoes(struct tty_struct *tty)
 	if (nr == 0) {
 		tty->echo_pos = 0;
 		tty->echo_cnt = 0;
-		tty->echo_overrun = 0;
+		ldata->echo_overrun = 0;
 	} else {
 		int num_processed = tty->echo_cnt - nr;
 		tty->echo_pos += num_processed;
 		tty->echo_pos &= N_TTY_BUF_SIZE - 1;
 		tty->echo_cnt = nr;
 		if (num_processed > 0)
-			tty->echo_overrun = 0;
+			ldata->echo_overrun = 0;
 	}
 
 	mutex_unlock(&tty->echo_lock);
@@ -670,6 +683,7 @@ static void process_echoes(struct tty_struct *tty)
 
 static void add_echo_byte(unsigned char c, struct tty_struct *tty)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	int	new_byte_pos;
 
 	if (tty->echo_cnt == N_TTY_BUF_SIZE) {
@@ -695,7 +709,7 @@ static void add_echo_byte(unsigned char c, struct tty_struct *tty)
 		}
 		tty->echo_pos &= N_TTY_BUF_SIZE - 1;
 
-		tty->echo_overrun = 1;
+		ldata->echo_overrun = 1;
 	} else {
 		new_byte_pos = tty->echo_pos + tty->echo_cnt;
 		new_byte_pos &= N_TTY_BUF_SIZE - 1;
@@ -845,9 +859,10 @@ static void echo_char(unsigned char c, struct tty_struct *tty)
 
 static inline void finish_erasing(struct tty_struct *tty)
 {
-	if (tty->erasing) {
+	struct n_tty_data *ldata = tty->disc_data;
+	if (ldata->erasing) {
 		echo_char_raw('/', tty);
-		tty->erasing = 0;
+		ldata->erasing = 0;
 	}
 }
 
@@ -865,6 +880,7 @@ static inline void finish_erasing(struct tty_struct *tty)
 
 static void eraser(unsigned char c, struct tty_struct *tty)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	enum { ERASE, WERASE, KILL } kill_type;
 	int head, seen_alnums, cnt;
 	unsigned long flags;
@@ -932,9 +948,9 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 		spin_unlock_irqrestore(&tty->read_lock, flags);
 		if (L_ECHO(tty)) {
 			if (L_ECHOPRT(tty)) {
-				if (!tty->erasing) {
+				if (!ldata->erasing) {
 					echo_char_raw('\\', tty);
-					tty->erasing = 1;
+					ldata->erasing = 1;
 				}
 				/* if cnt > 1, output a multi-byte character */
 				echo_char(c, tty);
@@ -1056,16 +1072,17 @@ static inline void n_tty_receive_break(struct tty_struct *tty)
 
 static inline void n_tty_receive_overrun(struct tty_struct *tty)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	char buf[64];
 
-	tty->num_overrun++;
-	if (time_before(tty->overrun_time, jiffies - HZ) ||
-			time_after(tty->overrun_time, jiffies)) {
+	ldata->num_overrun++;
+	if (time_after(jiffies, ldata->overrun_time + HZ) ||
+			time_after(ldata->overrun_time, jiffies)) {
 		printk(KERN_WARNING "%s: %d input overrun(s)\n",
 			tty_name(tty, buf),
-			tty->num_overrun);
-		tty->overrun_time = jiffies;
-		tty->num_overrun = 0;
+			ldata->num_overrun);
+		ldata->overrun_time = jiffies;
+		ldata->num_overrun = 0;
 	}
 }
 
@@ -1105,10 +1122,11 @@ static inline void n_tty_receive_parity_error(struct tty_struct *tty,
 
 static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	unsigned long flags;
 	int parmrk;
 
-	if (tty->raw) {
+	if (ldata->raw) {
 		put_tty_queue(c, tty);
 		return;
 	}
@@ -1147,8 +1165,8 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 	 * handle specially, do shortcut processing to speed things
 	 * up.
 	 */
-	if (!test_bit(c, tty->process_char_map) || tty->lnext) {
-		tty->lnext = 0;
+	if (!test_bit(c, tty->process_char_map) || ldata->lnext) {
+		ldata->lnext = 0;
 		parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0;
 		if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
 			/* beep if no space */
@@ -1222,7 +1240,7 @@ send_signal:
 	} else if (c == '\n' && I_INLCR(tty))
 		c = '\r';
 
-	if (tty->icanon) {
+	if (ldata->icanon) {
 		if (c == ERASE_CHAR(tty) || c == KILL_CHAR(tty) ||
 		    (c == WERASE_CHAR(tty) && L_IEXTEN(tty))) {
 			eraser(c, tty);
@@ -1230,7 +1248,7 @@ send_signal:
 			return;
 		}
 		if (c == LNEXT_CHAR(tty) && L_IEXTEN(tty)) {
-			tty->lnext = 1;
+			ldata->lnext = 1;
 			if (L_ECHO(tty)) {
 				finish_erasing(tty);
 				if (L_ECHOCTL(tty)) {
@@ -1373,13 +1391,14 @@ static void n_tty_write_wakeup(struct tty_struct *tty)
 static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 			      char *fp, int count)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	const unsigned char *p;
 	char *f, flags = TTY_NORMAL;
 	int	i;
 	char	buf[64];
 	unsigned long cpuflags;
 
-	if (tty->real_raw) {
+	if (ldata->real_raw) {
 		spin_lock_irqsave(&tty->read_lock, cpuflags);
 		i = min(N_TTY_BUF_SIZE - tty->read_cnt,
 			N_TTY_BUF_SIZE - tty->read_head);
@@ -1427,7 +1446,7 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 
 	n_tty_set_room(tty);
 
-	if ((!tty->icanon && (tty->read_cnt >= tty->minimum_to_wake)) ||
+	if ((!ldata->icanon && (tty->read_cnt >= tty->minimum_to_wake)) ||
 		L_EXTPROC(tty)) {
 		kill_fasync(&tty->fasync, SIGIO, POLL_IN);
 		if (waitqueue_active(&tty->read_wait))
@@ -1471,6 +1490,7 @@ int is_ignored(int sig)
 
 static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	int canon_change = 1;
 
 	if (old)
@@ -1479,16 +1499,16 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 		memset(&tty->read_flags, 0, sizeof tty->read_flags);
 		tty->canon_head = tty->read_tail;
 		tty->canon_data = 0;
-		tty->erasing = 0;
+		ldata->erasing = 0;
 	}
 
 	if (canon_change && !L_ICANON(tty) && tty->read_cnt)
 		wake_up_interruptible(&tty->read_wait);
 
-	tty->icanon = (L_ICANON(tty) != 0);
+	ldata->icanon = (L_ICANON(tty) != 0);
 	if (test_bit(TTY_HW_COOK_IN, &tty->flags)) {
-		tty->raw = 1;
-		tty->real_raw = 1;
+		ldata->raw = 1;
+		ldata->real_raw = 1;
 		n_tty_set_room(tty);
 		return;
 	}
@@ -1531,16 +1551,16 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 			set_bit(SUSP_CHAR(tty), tty->process_char_map);
 		}
 		clear_bit(__DISABLED_CHAR, tty->process_char_map);
-		tty->raw = 0;
-		tty->real_raw = 0;
+		ldata->raw = 0;
+		ldata->real_raw = 0;
 	} else {
-		tty->raw = 1;
+		ldata->raw = 1;
 		if ((I_IGNBRK(tty) || (!I_BRKINT(tty) && !I_PARMRK(tty))) &&
 		    (I_IGNPAR(tty) || !I_INPCK(tty)) &&
 		    (tty->driver->flags & TTY_DRIVER_REAL_RAW))
-			tty->real_raw = 1;
+			ldata->real_raw = 1;
 		else
-			tty->real_raw = 0;
+			ldata->real_raw = 0;
 	}
 	n_tty_set_room(tty);
 	/* The termios change make the tty ready for I/O */
@@ -1589,6 +1609,8 @@ static int n_tty_open(struct tty_struct *tty)
 	if (!ldata)
 		goto err;
 
+	ldata->overrun_time = jiffies;
+
 	/* These are ugly. Currently a malloc failure here can panic */
 	tty->read_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
 	tty->echo_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
@@ -1598,7 +1620,7 @@ static int n_tty_open(struct tty_struct *tty)
 	tty->disc_data = ldata;
 	reset_buffer_flags(tty);
 	tty_unthrottle(tty);
-	tty->column = 0;
+	ldata->column = 0;
 	n_tty_set_termios(tty, NULL);
 	tty->minimum_to_wake = 1;
 	tty->closing = 0;
@@ -1614,8 +1636,10 @@ err:
 
 static inline int input_available_p(struct tty_struct *tty, int amt)
 {
+	struct n_tty_data *ldata = tty->disc_data;
+
 	tty_flush_to_ldisc(tty);
-	if (tty->icanon && !L_EXTPROC(tty)) {
+	if (ldata->icanon && !L_EXTPROC(tty)) {
 		if (tty->canon_data)
 			return 1;
 	} else if (tty->read_cnt >= (amt ? amt : 1))
@@ -1646,6 +1670,7 @@ static int copy_from_read_buf(struct tty_struct *tty,
 				      size_t *nr)
 
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	int retval;
 	size_t n;
 	unsigned long flags;
@@ -1662,12 +1687,12 @@ static int copy_from_read_buf(struct tty_struct *tty,
 		is_eof = n == 1 &&
 			tty->read_buf[tty->read_tail] == EOF_CHAR(tty);
 		tty_audit_add_data(tty, &tty->read_buf[tty->read_tail], n,
-				tty->icanon);
+				ldata->icanon);
 		spin_lock_irqsave(&tty->read_lock, flags);
 		tty->read_tail = (tty->read_tail + n) & (N_TTY_BUF_SIZE-1);
 		tty->read_cnt -= n;
 		/* Turn single EOF into zero-length read */
-		if (L_EXTPROC(tty) && tty->icanon && is_eof && !tty->read_cnt)
+		if (L_EXTPROC(tty) && ldata->icanon && is_eof && !tty->read_cnt)
 			n = 0;
 		spin_unlock_irqrestore(&tty->read_lock, flags);
 		*b += n;
@@ -1736,6 +1761,7 @@ static int job_control(struct tty_struct *tty, struct file *file)
 static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
 			 unsigned char __user *buf, size_t nr)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	unsigned char __user *b = buf;
 	DECLARE_WAITQUEUE(wait, current);
 	int c;
@@ -1753,7 +1779,7 @@ do_it_again:
 
 	minimum = time = 0;
 	timeout = MAX_SCHEDULE_TIMEOUT;
-	if (!tty->icanon) {
+	if (!ldata->icanon) {
 		time = (HZ / 10) * TIME_CHAR(tty);
 		minimum = MIN_CHAR(tty);
 		if (minimum) {
@@ -1846,7 +1872,7 @@ do_it_again:
 			nr--;
 		}
 
-		if (tty->icanon && !L_EXTPROC(tty)) {
+		if (ldata->icanon && !L_EXTPROC(tty)) {
 			/* N.B. avoid overrun if nr == 0 */
 			spin_lock_irqsave(&tty->read_lock, flags);
 			while (nr && tty->read_cnt) {
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index e835a5b8d089..67b024ca16ec 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2932,7 +2932,6 @@ void initialize_tty_struct(struct tty_struct *tty,
 	tty_ldisc_init(tty);
 	tty->session = NULL;
 	tty->pgrp = NULL;
-	tty->overrun_time = jiffies;
 	tty_buffer_init(tty);
 	mutex_init(&tty->legacy_mutex);
 	mutex_init(&tty->termios_mutex);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index f02712da5d85..de590cec973b 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -270,13 +270,8 @@ struct tty_struct {
 	 * historical reasons, this is included in the tty structure.
 	 * Mostly locked by the BKL.
 	 */
-	unsigned int column;
-	unsigned char lnext:1, erasing:1, raw:1, real_raw:1, icanon:1;
 	unsigned char closing:1;
-	unsigned char echo_overrun:1;
 	unsigned short minimum_to_wake;
-	unsigned long overrun_time;
-	int num_overrun;
 	unsigned long process_char_map[256/(8*sizeof(unsigned long))];
 	char *read_buf;
 	int read_head;
-- 
cgit v1.2.3-55-g7522


From 3fe780b379fac2e1eeb5907ee7c864756ce7ec83 Mon Sep 17 00:00:00 2001
From: Jiri Slaby
Date: Thu, 18 Oct 2012 22:26:40 +0200
Subject: TTY: move ldisc data from tty_struct: bitmaps

Here we move bitmaps and use DECLARE_BITMAP to declare them in the new
structure. And instead of memset, we use bitmap_zero as it is more
appropriate.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c | 52 ++++++++++++++++++++++++++++------------------------
 include/linux/tty.h |  2 --
 2 files changed, 28 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index bd775a7c0629..702dd4adbdc9 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -80,6 +80,9 @@ struct n_tty_data {
 
 	unsigned char lnext:1, erasing:1, raw:1, real_raw:1, icanon:1;
 	unsigned char echo_overrun:1;
+
+	DECLARE_BITMAP(process_char_map, 256);
+	DECLARE_BITMAP(read_flags, N_TTY_BUF_SIZE);
 };
 
 static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
@@ -203,7 +206,7 @@ static void reset_buffer_flags(struct tty_struct *tty)
 	mutex_unlock(&tty->echo_lock);
 
 	tty->canon_head = tty->canon_data = ldata->erasing = 0;
-	memset(&tty->read_flags, 0, sizeof tty->read_flags);
+	bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
 	n_tty_set_room(tty);
 }
 
@@ -1165,7 +1168,7 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 	 * handle specially, do shortcut processing to speed things
 	 * up.
 	 */
-	if (!test_bit(c, tty->process_char_map) || ldata->lnext) {
+	if (!test_bit(c, ldata->process_char_map) || ldata->lnext) {
 		ldata->lnext = 0;
 		parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0;
 		if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
@@ -1321,7 +1324,7 @@ send_signal:
 
 handle_newline:
 			spin_lock_irqsave(&tty->read_lock, flags);
-			set_bit(tty->read_head, tty->read_flags);
+			set_bit(tty->read_head, ldata->read_flags);
 			put_tty_queue_nolock(c, tty);
 			tty->canon_head = tty->read_head;
 			tty->canon_data++;
@@ -1496,7 +1499,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 	if (old)
 		canon_change = (old->c_lflag ^ tty->termios.c_lflag) & ICANON;
 	if (canon_change) {
-		memset(&tty->read_flags, 0, sizeof tty->read_flags);
+		bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
 		tty->canon_head = tty->read_tail;
 		tty->canon_data = 0;
 		ldata->erasing = 0;
@@ -1516,41 +1519,41 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 	    I_ICRNL(tty) || I_INLCR(tty) || L_ICANON(tty) ||
 	    I_IXON(tty) || L_ISIG(tty) || L_ECHO(tty) ||
 	    I_PARMRK(tty)) {
-		memset(tty->process_char_map, 0, 256/8);
+		bitmap_zero(ldata->process_char_map, 256);
 
 		if (I_IGNCR(tty) || I_ICRNL(tty))
-			set_bit('\r', tty->process_char_map);
+			set_bit('\r', ldata->process_char_map);
 		if (I_INLCR(tty))
-			set_bit('\n', tty->process_char_map);
+			set_bit('\n', ldata->process_char_map);
 
 		if (L_ICANON(tty)) {
-			set_bit(ERASE_CHAR(tty), tty->process_char_map);
-			set_bit(KILL_CHAR(tty), tty->process_char_map);
-			set_bit(EOF_CHAR(tty), tty->process_char_map);
-			set_bit('\n', tty->process_char_map);
-			set_bit(EOL_CHAR(tty), tty->process_char_map);
+			set_bit(ERASE_CHAR(tty), ldata->process_char_map);
+			set_bit(KILL_CHAR(tty), ldata->process_char_map);
+			set_bit(EOF_CHAR(tty), ldata->process_char_map);
+			set_bit('\n', ldata->process_char_map);
+			set_bit(EOL_CHAR(tty), ldata->process_char_map);
 			if (L_IEXTEN(tty)) {
 				set_bit(WERASE_CHAR(tty),
-					tty->process_char_map);
+					ldata->process_char_map);
 				set_bit(LNEXT_CHAR(tty),
-					tty->process_char_map);
+					ldata->process_char_map);
 				set_bit(EOL2_CHAR(tty),
-					tty->process_char_map);
+					ldata->process_char_map);
 				if (L_ECHO(tty))
 					set_bit(REPRINT_CHAR(tty),
-						tty->process_char_map);
+						ldata->process_char_map);
 			}
 		}
 		if (I_IXON(tty)) {
-			set_bit(START_CHAR(tty), tty->process_char_map);
-			set_bit(STOP_CHAR(tty), tty->process_char_map);
+			set_bit(START_CHAR(tty), ldata->process_char_map);
+			set_bit(STOP_CHAR(tty), ldata->process_char_map);
 		}
 		if (L_ISIG(tty)) {
-			set_bit(INTR_CHAR(tty), tty->process_char_map);
-			set_bit(QUIT_CHAR(tty), tty->process_char_map);
-			set_bit(SUSP_CHAR(tty), tty->process_char_map);
+			set_bit(INTR_CHAR(tty), ldata->process_char_map);
+			set_bit(QUIT_CHAR(tty), ldata->process_char_map);
+			set_bit(SUSP_CHAR(tty), ldata->process_char_map);
 		}
-		clear_bit(__DISABLED_CHAR, tty->process_char_map);
+		clear_bit(__DISABLED_CHAR, ldata->process_char_map);
 		ldata->raw = 0;
 		ldata->real_raw = 0;
 	} else {
@@ -1879,7 +1882,7 @@ do_it_again:
 				int eol;
 
 				eol = test_and_clear_bit(tty->read_tail,
-						tty->read_flags);
+						ldata->read_flags);
 				c = tty->read_buf[tty->read_tail];
 				tty->read_tail = ((tty->read_tail+1) &
 						  (N_TTY_BUF_SIZE-1));
@@ -2105,6 +2108,7 @@ static unsigned int n_tty_poll(struct tty_struct *tty, struct file *file,
 
 static unsigned long inq_canon(struct tty_struct *tty)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	int nr, head, tail;
 
 	if (!tty->canon_data)
@@ -2114,7 +2118,7 @@ static unsigned long inq_canon(struct tty_struct *tty)
 	nr = (head - tail) & (N_TTY_BUF_SIZE-1);
 	/* Skip EOF-chars.. */
 	while (head != tail) {
-		if (test_bit(tail, tty->read_flags) &&
+		if (test_bit(tail, ldata->read_flags) &&
 		    tty->read_buf[tail] == __DISABLED_CHAR)
 			nr--;
 		tail = (tail+1) & (N_TTY_BUF_SIZE-1);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index de590cec973b..2161e6b5a94c 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -272,12 +272,10 @@ struct tty_struct {
 	 */
 	unsigned char closing:1;
 	unsigned short minimum_to_wake;
-	unsigned long process_char_map[256/(8*sizeof(unsigned long))];
 	char *read_buf;
 	int read_head;
 	int read_tail;
 	int read_cnt;
-	unsigned long read_flags[N_TTY_BUF_SIZE/(8*sizeof(unsigned long))];
 	unsigned char *echo_buf;
 	unsigned int echo_pos;
 	unsigned int echo_cnt;
-- 
cgit v1.2.3-55-g7522


From ba2e68ac6157004ee4922fb39ebd9459bbae883e Mon Sep 17 00:00:00 2001
From: Jiri Slaby
Date: Thu, 18 Oct 2012 22:26:41 +0200
Subject: TTY: move ldisc data from tty_struct: read_* and echo_* and canon_*
 stuff

All the ring-buffers...

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c | 260 +++++++++++++++++++++++++++-------------------------
 include/linux/tty.h |  10 --
 2 files changed, 137 insertions(+), 133 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 702dd4adbdc9..4794537a50ff 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -83,6 +83,19 @@ struct n_tty_data {
 
 	DECLARE_BITMAP(process_char_map, 256);
 	DECLARE_BITMAP(read_flags, N_TTY_BUF_SIZE);
+
+	char *read_buf;
+	int read_head;
+	int read_tail;
+	int read_cnt;
+
+	unsigned char *echo_buf;
+	unsigned int echo_pos;
+	unsigned int echo_cnt;
+
+	int canon_data;
+	unsigned long canon_head;
+	unsigned int canon_column;
 };
 
 static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
@@ -110,14 +123,14 @@ static void n_tty_set_room(struct tty_struct *tty)
 	int left;
 	int old_left;
 
-	/* tty->read_cnt is not read locked ? */
+	/* ldata->read_cnt is not read locked ? */
 	if (I_PARMRK(tty)) {
 		/* Multiply read_cnt by 3, since each byte might take up to
 		 * three times as many spaces when PARMRK is set (depending on
 		 * its flags, e.g. parity error). */
-		left = N_TTY_BUF_SIZE - tty->read_cnt * 3 - 1;
+		left = N_TTY_BUF_SIZE - ldata->read_cnt * 3 - 1;
 	} else
-		left = N_TTY_BUF_SIZE - tty->read_cnt - 1;
+		left = N_TTY_BUF_SIZE - ldata->read_cnt - 1;
 
 	/*
 	 * If we are doing input canonicalization, and there are no
@@ -126,7 +139,7 @@ static void n_tty_set_room(struct tty_struct *tty)
 	 * characters will be beeped.
 	 */
 	if (left <= 0)
-		left = ldata->icanon && !tty->canon_data;
+		left = ldata->icanon && !ldata->canon_data;
 	old_left = tty->receive_room;
 	tty->receive_room = left;
 
@@ -137,10 +150,12 @@ static void n_tty_set_room(struct tty_struct *tty)
 
 static void put_tty_queue_nolock(unsigned char c, struct tty_struct *tty)
 {
-	if (tty->read_cnt < N_TTY_BUF_SIZE) {
-		tty->read_buf[tty->read_head] = c;
-		tty->read_head = (tty->read_head + 1) & (N_TTY_BUF_SIZE-1);
-		tty->read_cnt++;
+	struct n_tty_data *ldata = tty->disc_data;
+
+	if (ldata->read_cnt < N_TTY_BUF_SIZE) {
+		ldata->read_buf[ldata->read_head] = c;
+		ldata->read_head = (ldata->read_head + 1) & (N_TTY_BUF_SIZE-1);
+		ldata->read_cnt++;
 	}
 }
 
@@ -198,14 +213,14 @@ static void reset_buffer_flags(struct tty_struct *tty)
 	unsigned long flags;
 
 	spin_lock_irqsave(&tty->read_lock, flags);
-	tty->read_head = tty->read_tail = tty->read_cnt = 0;
+	ldata->read_head = ldata->read_tail = ldata->read_cnt = 0;
 	spin_unlock_irqrestore(&tty->read_lock, flags);
 
 	mutex_lock(&tty->echo_lock);
-	tty->echo_pos = tty->echo_cnt = ldata->echo_overrun = 0;
+	ldata->echo_pos = ldata->echo_cnt = ldata->echo_overrun = 0;
 	mutex_unlock(&tty->echo_lock);
 
-	tty->canon_head = tty->canon_data = ldata->erasing = 0;
+	ldata->canon_head = ldata->canon_data = ldata->erasing = 0;
 	bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
 	n_tty_set_room(tty);
 }
@@ -257,11 +272,11 @@ static ssize_t n_tty_chars_in_buffer(struct tty_struct *tty)
 
 	spin_lock_irqsave(&tty->read_lock, flags);
 	if (!ldata->icanon) {
-		n = tty->read_cnt;
-	} else if (tty->canon_data) {
-		n = (tty->canon_head > tty->read_tail) ?
-			tty->canon_head - tty->read_tail :
-			tty->canon_head + (N_TTY_BUF_SIZE - tty->read_tail);
+		n = ldata->read_cnt;
+	} else if (ldata->canon_data) {
+		n = (ldata->canon_head > ldata->read_tail) ?
+			ldata->canon_head - ldata->read_tail :
+			ldata->canon_head + (N_TTY_BUF_SIZE - ldata->read_tail);
 	}
 	spin_unlock_irqrestore(&tty->read_lock, flags);
 	return n;
@@ -331,11 +346,11 @@ static int do_output_char(unsigned char c, struct tty_struct *tty, int space)
 		if (O_ONLCR(tty)) {
 			if (space < 2)
 				return -1;
-			tty->canon_column = ldata->column = 0;
+			ldata->canon_column = ldata->column = 0;
 			tty->ops->write(tty, "\r\n", 2);
 			return 2;
 		}
-		tty->canon_column = ldata->column;
+		ldata->canon_column = ldata->column;
 		break;
 	case '\r':
 		if (O_ONOCR(tty) && ldata->column == 0)
@@ -343,10 +358,10 @@ static int do_output_char(unsigned char c, struct tty_struct *tty, int space)
 		if (O_OCRNL(tty)) {
 			c = '\n';
 			if (O_ONLRET(tty))
-				tty->canon_column = ldata->column = 0;
+				ldata->canon_column = ldata->column = 0;
 			break;
 		}
-		tty->canon_column = ldata->column = 0;
+		ldata->canon_column = ldata->column = 0;
 		break;
 	case '\t':
 		spaces = 8 - (ldata->column & 7);
@@ -453,14 +468,14 @@ static ssize_t process_output_block(struct tty_struct *tty,
 				ldata->column = 0;
 			if (O_ONLCR(tty))
 				goto break_out;
-			tty->canon_column = ldata->column;
+			ldata->canon_column = ldata->column;
 			break;
 		case '\r':
 			if (O_ONOCR(tty) && ldata->column == 0)
 				goto break_out;
 			if (O_OCRNL(tty))
 				goto break_out;
-			tty->canon_column = ldata->column = 0;
+			ldata->canon_column = ldata->column = 0;
 			break;
 		case '\t':
 			goto break_out;
@@ -518,7 +533,7 @@ static void process_echoes(struct tty_struct *tty)
 	unsigned char c;
 	unsigned char *cp, *buf_end;
 
-	if (!tty->echo_cnt)
+	if (!ldata->echo_cnt)
 		return;
 
 	mutex_lock(&tty->output_lock);
@@ -526,9 +541,9 @@ static void process_echoes(struct tty_struct *tty)
 
 	space = tty_write_room(tty);
 
-	buf_end = tty->echo_buf + N_TTY_BUF_SIZE;
-	cp = tty->echo_buf + tty->echo_pos;
-	nr = tty->echo_cnt;
+	buf_end = ldata->echo_buf + N_TTY_BUF_SIZE;
+	cp = ldata->echo_buf + ldata->echo_pos;
+	nr = ldata->echo_cnt;
 	while (nr > 0) {
 		c = *cp;
 		if (c == ECHO_OP_START) {
@@ -565,7 +580,7 @@ static void process_echoes(struct tty_struct *tty)
 				 * Otherwise, tab spacing is normal.
 				 */
 				if (!(num_chars & 0x80))
-					num_chars += tty->canon_column;
+					num_chars += ldata->canon_column;
 				num_bs = 8 - (num_chars & 7);
 
 				if (num_bs > space) {
@@ -583,7 +598,7 @@ static void process_echoes(struct tty_struct *tty)
 				break;
 
 			case ECHO_OP_SET_CANON_COL:
-				tty->canon_column = ldata->column;
+				ldata->canon_column = ldata->column;
 				cp += 2;
 				nr -= 2;
 				break;
@@ -655,14 +670,14 @@ static void process_echoes(struct tty_struct *tty)
 	}
 
 	if (nr == 0) {
-		tty->echo_pos = 0;
-		tty->echo_cnt = 0;
+		ldata->echo_pos = 0;
+		ldata->echo_cnt = 0;
 		ldata->echo_overrun = 0;
 	} else {
-		int num_processed = tty->echo_cnt - nr;
-		tty->echo_pos += num_processed;
-		tty->echo_pos &= N_TTY_BUF_SIZE - 1;
-		tty->echo_cnt = nr;
+		int num_processed = ldata->echo_cnt - nr;
+		ldata->echo_pos += num_processed;
+		ldata->echo_pos &= N_TTY_BUF_SIZE - 1;
+		ldata->echo_cnt = nr;
 		if (num_processed > 0)
 			ldata->echo_overrun = 0;
 	}
@@ -689,37 +704,37 @@ static void add_echo_byte(unsigned char c, struct tty_struct *tty)
 	struct n_tty_data *ldata = tty->disc_data;
 	int	new_byte_pos;
 
-	if (tty->echo_cnt == N_TTY_BUF_SIZE) {
+	if (ldata->echo_cnt == N_TTY_BUF_SIZE) {
 		/* Circular buffer is already at capacity */
-		new_byte_pos = tty->echo_pos;
+		new_byte_pos = ldata->echo_pos;
 
 		/*
 		 * Since the buffer start position needs to be advanced,
 		 * be sure to step by a whole operation byte group.
 		 */
-		if (tty->echo_buf[tty->echo_pos] == ECHO_OP_START) {
-			if (tty->echo_buf[(tty->echo_pos + 1) &
+		if (ldata->echo_buf[ldata->echo_pos] == ECHO_OP_START) {
+			if (ldata->echo_buf[(ldata->echo_pos + 1) &
 					  (N_TTY_BUF_SIZE - 1)] ==
 						ECHO_OP_ERASE_TAB) {
-				tty->echo_pos += 3;
-				tty->echo_cnt -= 2;
+				ldata->echo_pos += 3;
+				ldata->echo_cnt -= 2;
 			} else {
-				tty->echo_pos += 2;
-				tty->echo_cnt -= 1;
+				ldata->echo_pos += 2;
+				ldata->echo_cnt -= 1;
 			}
 		} else {
-			tty->echo_pos++;
+			ldata->echo_pos++;
 		}
-		tty->echo_pos &= N_TTY_BUF_SIZE - 1;
+		ldata->echo_pos &= N_TTY_BUF_SIZE - 1;
 
 		ldata->echo_overrun = 1;
 	} else {
-		new_byte_pos = tty->echo_pos + tty->echo_cnt;
+		new_byte_pos = ldata->echo_pos + ldata->echo_cnt;
 		new_byte_pos &= N_TTY_BUF_SIZE - 1;
-		tty->echo_cnt++;
+		ldata->echo_cnt++;
 	}
 
-	tty->echo_buf[new_byte_pos] = c;
+	ldata->echo_buf[new_byte_pos] = c;
 }
 
 /**
@@ -889,7 +904,7 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 	unsigned long flags;
 
 	/* FIXME: locking needed ? */
-	if (tty->read_head == tty->canon_head) {
+	if (ldata->read_head == ldata->canon_head) {
 		/* process_output('\a', tty); */ /* what do you think? */
 		return;
 	}
@@ -900,17 +915,17 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 	else {
 		if (!L_ECHO(tty)) {
 			spin_lock_irqsave(&tty->read_lock, flags);
-			tty->read_cnt -= ((tty->read_head - tty->canon_head) &
+			ldata->read_cnt -= ((ldata->read_head - ldata->canon_head) &
 					  (N_TTY_BUF_SIZE - 1));
-			tty->read_head = tty->canon_head;
+			ldata->read_head = ldata->canon_head;
 			spin_unlock_irqrestore(&tty->read_lock, flags);
 			return;
 		}
 		if (!L_ECHOK(tty) || !L_ECHOKE(tty) || !L_ECHOE(tty)) {
 			spin_lock_irqsave(&tty->read_lock, flags);
-			tty->read_cnt -= ((tty->read_head - tty->canon_head) &
+			ldata->read_cnt -= ((ldata->read_head - ldata->canon_head) &
 					  (N_TTY_BUF_SIZE - 1));
-			tty->read_head = tty->canon_head;
+			ldata->read_head = ldata->canon_head;
 			spin_unlock_irqrestore(&tty->read_lock, flags);
 			finish_erasing(tty);
 			echo_char(KILL_CHAR(tty), tty);
@@ -924,14 +939,14 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 
 	seen_alnums = 0;
 	/* FIXME: Locking ?? */
-	while (tty->read_head != tty->canon_head) {
-		head = tty->read_head;
+	while (ldata->read_head != ldata->canon_head) {
+		head = ldata->read_head;
 
 		/* erase a single possibly multibyte character */
 		do {
 			head = (head - 1) & (N_TTY_BUF_SIZE-1);
-			c = tty->read_buf[head];
-		} while (is_continuation(c, tty) && head != tty->canon_head);
+			c = ldata->read_buf[head];
+		} while (is_continuation(c, tty) && head != ldata->canon_head);
 
 		/* do not partially erase */
 		if (is_continuation(c, tty))
@@ -944,10 +959,10 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 			else if (seen_alnums)
 				break;
 		}
-		cnt = (tty->read_head - head) & (N_TTY_BUF_SIZE-1);
+		cnt = (ldata->read_head - head) & (N_TTY_BUF_SIZE-1);
 		spin_lock_irqsave(&tty->read_lock, flags);
-		tty->read_head = head;
-		tty->read_cnt -= cnt;
+		ldata->read_head = head;
+		ldata->read_cnt -= cnt;
 		spin_unlock_irqrestore(&tty->read_lock, flags);
 		if (L_ECHO(tty)) {
 			if (L_ECHOPRT(tty)) {
@@ -959,7 +974,7 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 				echo_char(c, tty);
 				while (--cnt > 0) {
 					head = (head+1) & (N_TTY_BUF_SIZE-1);
-					echo_char_raw(tty->read_buf[head], tty);
+					echo_char_raw(ldata->read_buf[head], tty);
 					echo_move_back_col(tty);
 				}
 			} else if (kill_type == ERASE && !L_ECHOE(tty)) {
@@ -967,7 +982,7 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 			} else if (c == '\t') {
 				unsigned int num_chars = 0;
 				int after_tab = 0;
-				unsigned long tail = tty->read_head;
+				unsigned long tail = ldata->read_head;
 
 				/*
 				 * Count the columns used for characters
@@ -976,9 +991,9 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 				 * This info is used to go back the correct
 				 * number of columns.
 				 */
-				while (tail != tty->canon_head) {
+				while (tail != ldata->canon_head) {
 					tail = (tail-1) & (N_TTY_BUF_SIZE-1);
-					c = tty->read_buf[tail];
+					c = ldata->read_buf[tail];
 					if (c == '\t') {
 						after_tab = 1;
 						break;
@@ -1006,7 +1021,7 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 		if (kill_type == ERASE)
 			break;
 	}
-	if (tty->read_head == tty->canon_head && L_ECHO(tty))
+	if (ldata->read_head == ldata->canon_head && L_ECHO(tty))
 		finish_erasing(tty);
 }
 
@@ -1171,7 +1186,7 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 	if (!test_bit(c, ldata->process_char_map) || ldata->lnext) {
 		ldata->lnext = 0;
 		parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0;
-		if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
+		if (ldata->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
 			/* beep if no space */
 			if (L_ECHO(tty))
 				process_output('\a', tty);
@@ -1180,7 +1195,7 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 		if (L_ECHO(tty)) {
 			finish_erasing(tty);
 			/* Record the column of first canon char. */
-			if (tty->canon_head == tty->read_head)
+			if (ldata->canon_head == ldata->read_head)
 				echo_set_canon_col(tty);
 			echo_char(c, tty);
 			process_echoes(tty);
@@ -1264,20 +1279,20 @@ send_signal:
 		}
 		if (c == REPRINT_CHAR(tty) && L_ECHO(tty) &&
 		    L_IEXTEN(tty)) {
-			unsigned long tail = tty->canon_head;
+			unsigned long tail = ldata->canon_head;
 
 			finish_erasing(tty);
 			echo_char(c, tty);
 			echo_char_raw('\n', tty);
-			while (tail != tty->read_head) {
-				echo_char(tty->read_buf[tail], tty);
+			while (tail != ldata->read_head) {
+				echo_char(ldata->read_buf[tail], tty);
 				tail = (tail+1) & (N_TTY_BUF_SIZE-1);
 			}
 			process_echoes(tty);
 			return;
 		}
 		if (c == '\n') {
-			if (tty->read_cnt >= N_TTY_BUF_SIZE) {
+			if (ldata->read_cnt >= N_TTY_BUF_SIZE) {
 				if (L_ECHO(tty))
 					process_output('\a', tty);
 				return;
@@ -1289,9 +1304,9 @@ send_signal:
 			goto handle_newline;
 		}
 		if (c == EOF_CHAR(tty)) {
-			if (tty->read_cnt >= N_TTY_BUF_SIZE)
+			if (ldata->read_cnt >= N_TTY_BUF_SIZE)
 				return;
-			if (tty->canon_head != tty->read_head)
+			if (ldata->canon_head != ldata->read_head)
 				set_bit(TTY_PUSH, &tty->flags);
 			c = __DISABLED_CHAR;
 			goto handle_newline;
@@ -1300,7 +1315,7 @@ send_signal:
 		    (c == EOL2_CHAR(tty) && L_IEXTEN(tty))) {
 			parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty))
 				 ? 1 : 0;
-			if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk)) {
+			if (ldata->read_cnt >= (N_TTY_BUF_SIZE - parmrk)) {
 				if (L_ECHO(tty))
 					process_output('\a', tty);
 				return;
@@ -1310,7 +1325,7 @@ send_signal:
 			 */
 			if (L_ECHO(tty)) {
 				/* Record the column of first canon char. */
-				if (tty->canon_head == tty->read_head)
+				if (ldata->canon_head == ldata->read_head)
 					echo_set_canon_col(tty);
 				echo_char(c, tty);
 				process_echoes(tty);
@@ -1324,10 +1339,10 @@ send_signal:
 
 handle_newline:
 			spin_lock_irqsave(&tty->read_lock, flags);
-			set_bit(tty->read_head, ldata->read_flags);
+			set_bit(ldata->read_head, ldata->read_flags);
 			put_tty_queue_nolock(c, tty);
-			tty->canon_head = tty->read_head;
-			tty->canon_data++;
+			ldata->canon_head = ldata->read_head;
+			ldata->canon_data++;
 			spin_unlock_irqrestore(&tty->read_lock, flags);
 			kill_fasync(&tty->fasync, SIGIO, POLL_IN);
 			if (waitqueue_active(&tty->read_wait))
@@ -1337,7 +1352,7 @@ handle_newline:
 	}
 
 	parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0;
-	if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
+	if (ldata->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
 		/* beep if no space */
 		if (L_ECHO(tty))
 			process_output('\a', tty);
@@ -1349,7 +1364,7 @@ handle_newline:
 			echo_char_raw('\n', tty);
 		else {
 			/* Record the column of first canon char. */
-			if (tty->canon_head == tty->read_head)
+			if (ldata->canon_head == ldata->read_head)
 				echo_set_canon_col(tty);
 			echo_char(c, tty);
 		}
@@ -1403,21 +1418,21 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 
 	if (ldata->real_raw) {
 		spin_lock_irqsave(&tty->read_lock, cpuflags);
-		i = min(N_TTY_BUF_SIZE - tty->read_cnt,
-			N_TTY_BUF_SIZE - tty->read_head);
+		i = min(N_TTY_BUF_SIZE - ldata->read_cnt,
+			N_TTY_BUF_SIZE - ldata->read_head);
 		i = min(count, i);
-		memcpy(tty->read_buf + tty->read_head, cp, i);
-		tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1);
-		tty->read_cnt += i;
+		memcpy(ldata->read_buf + ldata->read_head, cp, i);
+		ldata->read_head = (ldata->read_head + i) & (N_TTY_BUF_SIZE-1);
+		ldata->read_cnt += i;
 		cp += i;
 		count -= i;
 
-		i = min(N_TTY_BUF_SIZE - tty->read_cnt,
-			N_TTY_BUF_SIZE - tty->read_head);
+		i = min(N_TTY_BUF_SIZE - ldata->read_cnt,
+			N_TTY_BUF_SIZE - ldata->read_head);
 		i = min(count, i);
-		memcpy(tty->read_buf + tty->read_head, cp, i);
-		tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1);
-		tty->read_cnt += i;
+		memcpy(ldata->read_buf + ldata->read_head, cp, i);
+		ldata->read_head = (ldata->read_head + i) & (N_TTY_BUF_SIZE-1);
+		ldata->read_cnt += i;
 		spin_unlock_irqrestore(&tty->read_lock, cpuflags);
 	} else {
 		for (i = count, p = cp, f = fp; i; i--, p++) {
@@ -1449,7 +1464,7 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 
 	n_tty_set_room(tty);
 
-	if ((!ldata->icanon && (tty->read_cnt >= tty->minimum_to_wake)) ||
+	if ((!ldata->icanon && (ldata->read_cnt >= tty->minimum_to_wake)) ||
 		L_EXTPROC(tty)) {
 		kill_fasync(&tty->fasync, SIGIO, POLL_IN);
 		if (waitqueue_active(&tty->read_wait))
@@ -1500,12 +1515,12 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 		canon_change = (old->c_lflag ^ tty->termios.c_lflag) & ICANON;
 	if (canon_change) {
 		bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
-		tty->canon_head = tty->read_tail;
-		tty->canon_data = 0;
+		ldata->canon_head = ldata->read_tail;
+		ldata->canon_data = 0;
 		ldata->erasing = 0;
 	}
 
-	if (canon_change && !L_ICANON(tty) && tty->read_cnt)
+	if (canon_change && !L_ICANON(tty) && ldata->read_cnt)
 		wake_up_interruptible(&tty->read_wait);
 
 	ldata->icanon = (L_ICANON(tty) != 0);
@@ -1586,11 +1601,9 @@ static void n_tty_close(struct tty_struct *tty)
 	struct n_tty_data *ldata = tty->disc_data;
 
 	n_tty_flush_buffer(tty);
-	kfree(tty->read_buf);
-	kfree(tty->echo_buf);
+	kfree(ldata->read_buf);
+	kfree(ldata->echo_buf);
 	kfree(ldata);
-	tty->read_buf = NULL;
-	tty->echo_buf = NULL;
 	tty->disc_data = NULL;
 }
 
@@ -1615,9 +1628,9 @@ static int n_tty_open(struct tty_struct *tty)
 	ldata->overrun_time = jiffies;
 
 	/* These are ugly. Currently a malloc failure here can panic */
-	tty->read_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
-	tty->echo_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
-	if (!tty->read_buf || !tty->echo_buf)
+	ldata->read_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
+	ldata->echo_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
+	if (!ldata->read_buf || !ldata->echo_buf)
 		goto err_free_bufs;
 
 	tty->disc_data = ldata;
@@ -1630,8 +1643,8 @@ static int n_tty_open(struct tty_struct *tty)
 
 	return 0;
 err_free_bufs:
-	kfree(tty->read_buf);
-	kfree(tty->echo_buf);
+	kfree(ldata->read_buf);
+	kfree(ldata->echo_buf);
 	kfree(ldata);
 err:
 	return -ENOMEM;
@@ -1643,9 +1656,9 @@ static inline int input_available_p(struct tty_struct *tty, int amt)
 
 	tty_flush_to_ldisc(tty);
 	if (ldata->icanon && !L_EXTPROC(tty)) {
-		if (tty->canon_data)
+		if (ldata->canon_data)
 			return 1;
-	} else if (tty->read_cnt >= (amt ? amt : 1))
+	} else if (ldata->read_cnt >= (amt ? amt : 1))
 		return 1;
 
 	return 0;
@@ -1681,21 +1694,21 @@ static int copy_from_read_buf(struct tty_struct *tty,
 
 	retval = 0;
 	spin_lock_irqsave(&tty->read_lock, flags);
-	n = min(tty->read_cnt, N_TTY_BUF_SIZE - tty->read_tail);
+	n = min(ldata->read_cnt, N_TTY_BUF_SIZE - ldata->read_tail);
 	n = min(*nr, n);
 	spin_unlock_irqrestore(&tty->read_lock, flags);
 	if (n) {
-		retval = copy_to_user(*b, &tty->read_buf[tty->read_tail], n);
+		retval = copy_to_user(*b, &ldata->read_buf[ldata->read_tail], n);
 		n -= retval;
 		is_eof = n == 1 &&
-			tty->read_buf[tty->read_tail] == EOF_CHAR(tty);
-		tty_audit_add_data(tty, &tty->read_buf[tty->read_tail], n,
+			ldata->read_buf[ldata->read_tail] == EOF_CHAR(tty);
+		tty_audit_add_data(tty, &ldata->read_buf[ldata->read_tail], n,
 				ldata->icanon);
 		spin_lock_irqsave(&tty->read_lock, flags);
-		tty->read_tail = (tty->read_tail + n) & (N_TTY_BUF_SIZE-1);
-		tty->read_cnt -= n;
+		ldata->read_tail = (ldata->read_tail + n) & (N_TTY_BUF_SIZE-1);
+		ldata->read_cnt -= n;
 		/* Turn single EOF into zero-length read */
-		if (L_EXTPROC(tty) && ldata->icanon && is_eof && !tty->read_cnt)
+		if (L_EXTPROC(tty) && ldata->icanon && is_eof && !ldata->read_cnt)
 			n = 0;
 		spin_unlock_irqrestore(&tty->read_lock, flags);
 		*b += n;
@@ -1878,22 +1891,22 @@ do_it_again:
 		if (ldata->icanon && !L_EXTPROC(tty)) {
 			/* N.B. avoid overrun if nr == 0 */
 			spin_lock_irqsave(&tty->read_lock, flags);
-			while (nr && tty->read_cnt) {
+			while (nr && ldata->read_cnt) {
 				int eol;
 
-				eol = test_and_clear_bit(tty->read_tail,
+				eol = test_and_clear_bit(ldata->read_tail,
 						ldata->read_flags);
-				c = tty->read_buf[tty->read_tail];
-				tty->read_tail = ((tty->read_tail+1) &
+				c = ldata->read_buf[ldata->read_tail];
+				ldata->read_tail = ((ldata->read_tail+1) &
 						  (N_TTY_BUF_SIZE-1));
-				tty->read_cnt--;
+				ldata->read_cnt--;
 				if (eol) {
 					/* this test should be redundant:
 					 * we shouldn't be reading data if
 					 * canon_data is 0
 					 */
-					if (--tty->canon_data < 0)
-						tty->canon_data = 0;
+					if (--ldata->canon_data < 0)
+						ldata->canon_data = 0;
 				}
 				spin_unlock_irqrestore(&tty->read_lock, flags);
 
@@ -2111,15 +2124,15 @@ static unsigned long inq_canon(struct tty_struct *tty)
 	struct n_tty_data *ldata = tty->disc_data;
 	int nr, head, tail;
 
-	if (!tty->canon_data)
+	if (!ldata->canon_data)
 		return 0;
-	head = tty->canon_head;
-	tail = tty->read_tail;
+	head = ldata->canon_head;
+	tail = ldata->read_tail;
 	nr = (head - tail) & (N_TTY_BUF_SIZE-1);
 	/* Skip EOF-chars.. */
 	while (head != tail) {
 		if (test_bit(tail, ldata->read_flags) &&
-		    tty->read_buf[tail] == __DISABLED_CHAR)
+		    ldata->read_buf[tail] == __DISABLED_CHAR)
 			nr--;
 		tail = (tail+1) & (N_TTY_BUF_SIZE-1);
 	}
@@ -2129,6 +2142,7 @@ static unsigned long inq_canon(struct tty_struct *tty)
 static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	int retval;
 
 	switch (cmd) {
@@ -2136,7 +2150,7 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
 		return put_user(tty_chars_in_buffer(tty), (int __user *) arg);
 	case TIOCINQ:
 		/* FIXME: Locking */
-		retval = tty->read_cnt;
+		retval = ldata->read_cnt;
 		if (L_ICANON(tty))
 			retval = inq_canon(tty);
 		return put_user(retval, (unsigned int __user *) arg);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 2161e6b5a94c..226cf20e0150 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -272,16 +272,6 @@ struct tty_struct {
 	 */
 	unsigned char closing:1;
 	unsigned short minimum_to_wake;
-	char *read_buf;
-	int read_head;
-	int read_tail;
-	int read_cnt;
-	unsigned char *echo_buf;
-	unsigned int echo_pos;
-	unsigned int echo_cnt;
-	int canon_data;
-	unsigned long canon_head;
-	unsigned int canon_column;
 	struct mutex atomic_read_lock;
 	struct mutex atomic_write_lock;
 	struct mutex output_lock;
-- 
cgit v1.2.3-55-g7522


From bddc7152f68bc1e0ee1f55a8055e33531f384101 Mon Sep 17 00:00:00 2001
From: Jiri Slaby
Date: Thu, 18 Oct 2012 22:26:42 +0200
Subject: TTY: move ldisc data from tty_struct: locks

atomic_write_lock is not n_tty specific, so move it up in the
tty_struct.

And since these are the last ones to move, remove also the comment
saying there are some ldisc' members. There are none now.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c  | 123 ++++++++++++++++++++++++++++++---------------------
 drivers/tty/tty_io.c |   4 --
 include/linux/tty.h  |  11 +----
 3 files changed, 73 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 4794537a50ff..0a6fcda96152 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -96,6 +96,11 @@ struct n_tty_data {
 	int canon_data;
 	unsigned long canon_head;
 	unsigned int canon_column;
+
+	struct mutex atomic_read_lock;
+	struct mutex output_lock;
+	struct mutex echo_lock;
+	spinlock_t read_lock;
 };
 
 static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
@@ -171,14 +176,15 @@ static void put_tty_queue_nolock(unsigned char c, struct tty_struct *tty)
 
 static void put_tty_queue(unsigned char c, struct tty_struct *tty)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	unsigned long flags;
 	/*
 	 *	The problem of stomping on the buffers ends here.
 	 *	Why didn't anyone see this one coming? --AJK
 	*/
-	spin_lock_irqsave(&tty->read_lock, flags);
+	spin_lock_irqsave(&ldata->read_lock, flags);
 	put_tty_queue_nolock(c, tty);
-	spin_unlock_irqrestore(&tty->read_lock, flags);
+	spin_unlock_irqrestore(&ldata->read_lock, flags);
 }
 
 /**
@@ -212,13 +218,13 @@ static void reset_buffer_flags(struct tty_struct *tty)
 	struct n_tty_data *ldata = tty->disc_data;
 	unsigned long flags;
 
-	spin_lock_irqsave(&tty->read_lock, flags);
+	spin_lock_irqsave(&ldata->read_lock, flags);
 	ldata->read_head = ldata->read_tail = ldata->read_cnt = 0;
-	spin_unlock_irqrestore(&tty->read_lock, flags);
+	spin_unlock_irqrestore(&ldata->read_lock, flags);
 
-	mutex_lock(&tty->echo_lock);
+	mutex_lock(&ldata->echo_lock);
 	ldata->echo_pos = ldata->echo_cnt = ldata->echo_overrun = 0;
-	mutex_unlock(&tty->echo_lock);
+	mutex_unlock(&ldata->echo_lock);
 
 	ldata->canon_head = ldata->canon_data = ldata->erasing = 0;
 	bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
@@ -270,7 +276,7 @@ static ssize_t n_tty_chars_in_buffer(struct tty_struct *tty)
 	unsigned long flags;
 	ssize_t n = 0;
 
-	spin_lock_irqsave(&tty->read_lock, flags);
+	spin_lock_irqsave(&ldata->read_lock, flags);
 	if (!ldata->icanon) {
 		n = ldata->read_cnt;
 	} else if (ldata->canon_data) {
@@ -278,7 +284,7 @@ static ssize_t n_tty_chars_in_buffer(struct tty_struct *tty)
 			ldata->canon_head - ldata->read_tail :
 			ldata->canon_head + (N_TTY_BUF_SIZE - ldata->read_tail);
 	}
-	spin_unlock_irqrestore(&tty->read_lock, flags);
+	spin_unlock_irqrestore(&ldata->read_lock, flags);
 	return n;
 }
 
@@ -408,14 +414,15 @@ static int do_output_char(unsigned char c, struct tty_struct *tty, int space)
 
 static int process_output(unsigned char c, struct tty_struct *tty)
 {
+	struct n_tty_data *ldata = tty->disc_data;
 	int	space, retval;
 
-	mutex_lock(&tty->output_lock);
+	mutex_lock(&ldata->output_lock);
 
 	space = tty_write_room(tty);
 	retval = do_output_char(c, tty, space);
 
-	mutex_unlock(&tty->output_lock);
+	mutex_unlock(&ldata->output_lock);
 	if (retval < 0)
 		return -1;
 	else
@@ -449,11 +456,11 @@ static ssize_t process_output_block(struct tty_struct *tty,
 	int	i;
 	const unsigned char *cp;
 
-	mutex_lock(&tty->output_lock);
+	mutex_lock(&ldata->output_lock);
 
 	space = tty_write_room(tty);
 	if (!space) {
-		mutex_unlock(&tty->output_lock);
+		mutex_unlock(&ldata->output_lock);
 		return 0;
 	}
 	if (nr > space)
@@ -496,7 +503,7 @@ static ssize_t process_output_block(struct tty_struct *tty,
 break_out:
 	i = tty->ops->write(tty, buf, i);
 
-	mutex_unlock(&tty->output_lock);
+	mutex_unlock(&ldata->output_lock);
 	return i;
 }
 
@@ -536,8 +543,8 @@ static void process_echoes(struct tty_struct *tty)
 	if (!ldata->echo_cnt)
 		return;
 
-	mutex_lock(&tty->output_lock);
-	mutex_lock(&tty->echo_lock);
+	mutex_lock(&ldata->output_lock);
+	mutex_lock(&ldata->echo_lock);
 
 	space = tty_write_room(tty);
 
@@ -682,8 +689,8 @@ static void process_echoes(struct tty_struct *tty)
 			ldata->echo_overrun = 0;
 	}
 
-	mutex_unlock(&tty->echo_lock);
-	mutex_unlock(&tty->output_lock);
+	mutex_unlock(&ldata->echo_lock);
+	mutex_unlock(&ldata->output_lock);
 
 	if (tty->ops->flush_chars)
 		tty->ops->flush_chars(tty);
@@ -748,12 +755,14 @@ static void add_echo_byte(unsigned char c, struct tty_struct *tty)
 
 static void echo_move_back_col(struct tty_struct *tty)
 {
-	mutex_lock(&tty->echo_lock);
+	struct n_tty_data *ldata = tty->disc_data;
+
+	mutex_lock(&ldata->echo_lock);
 
 	add_echo_byte(ECHO_OP_START, tty);
 	add_echo_byte(ECHO_OP_MOVE_BACK_COL, tty);
 
-	mutex_unlock(&tty->echo_lock);
+	mutex_unlock(&ldata->echo_lock);
 }
 
 /**
@@ -768,12 +777,14 @@ static void echo_move_back_col(struct tty_struct *tty)
 
 static void echo_set_canon_col(struct tty_struct *tty)
 {
-	mutex_lock(&tty->echo_lock);
+	struct n_tty_data *ldata = tty->disc_data;
+
+	mutex_lock(&ldata->echo_lock);
 
 	add_echo_byte(ECHO_OP_START, tty);
 	add_echo_byte(ECHO_OP_SET_CANON_COL, tty);
 
-	mutex_unlock(&tty->echo_lock);
+	mutex_unlock(&ldata->echo_lock);
 }
 
 /**
@@ -796,7 +807,9 @@ static void echo_set_canon_col(struct tty_struct *tty)
 static void echo_erase_tab(unsigned int num_chars, int after_tab,
 			   struct tty_struct *tty)
 {
-	mutex_lock(&tty->echo_lock);
+	struct n_tty_data *ldata = tty->disc_data;
+
+	mutex_lock(&ldata->echo_lock);
 
 	add_echo_byte(ECHO_OP_START, tty);
 	add_echo_byte(ECHO_OP_ERASE_TAB, tty);
@@ -810,7 +823,7 @@ static void echo_erase_tab(unsigned int num_chars, int after_tab,
 
 	add_echo_byte(num_chars, tty);
 
-	mutex_unlock(&tty->echo_lock);
+	mutex_unlock(&ldata->echo_lock);
 }
 
 /**
@@ -828,7 +841,9 @@ static void echo_erase_tab(unsigned int num_chars, int after_tab,
 
 static void echo_char_raw(unsigned char c, struct tty_struct *tty)
 {
-	mutex_lock(&tty->echo_lock);
+	struct n_tty_data *ldata = tty->disc_data;
+
+	mutex_lock(&ldata->echo_lock);
 
 	if (c == ECHO_OP_START) {
 		add_echo_byte(ECHO_OP_START, tty);
@@ -837,7 +852,7 @@ static void echo_char_raw(unsigned char c, struct tty_struct *tty)
 		add_echo_byte(c, tty);
 	}
 
-	mutex_unlock(&tty->echo_lock);
+	mutex_unlock(&ldata->echo_lock);
 }
 
 /**
@@ -856,7 +871,9 @@ static void echo_char_raw(unsigned char c, struct tty_struct *tty)
 
 static void echo_char(unsigned char c, struct tty_struct *tty)
 {
-	mutex_lock(&tty->echo_lock);
+	struct n_tty_data *ldata = tty->disc_data;
+
+	mutex_lock(&ldata->echo_lock);
 
 	if (c == ECHO_OP_START) {
 		add_echo_byte(ECHO_OP_START, tty);
@@ -867,7 +884,7 @@ static void echo_char(unsigned char c, struct tty_struct *tty)
 		add_echo_byte(c, tty);
 	}
 
-	mutex_unlock(&tty->echo_lock);
+	mutex_unlock(&ldata->echo_lock);
 }
 
 /**
@@ -914,19 +931,19 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 		kill_type = WERASE;
 	else {
 		if (!L_ECHO(tty)) {
-			spin_lock_irqsave(&tty->read_lock, flags);
+			spin_lock_irqsave(&ldata->read_lock, flags);
 			ldata->read_cnt -= ((ldata->read_head - ldata->canon_head) &
 					  (N_TTY_BUF_SIZE - 1));
 			ldata->read_head = ldata->canon_head;
-			spin_unlock_irqrestore(&tty->read_lock, flags);
+			spin_unlock_irqrestore(&ldata->read_lock, flags);
 			return;
 		}
 		if (!L_ECHOK(tty) || !L_ECHOKE(tty) || !L_ECHOE(tty)) {
-			spin_lock_irqsave(&tty->read_lock, flags);
+			spin_lock_irqsave(&ldata->read_lock, flags);
 			ldata->read_cnt -= ((ldata->read_head - ldata->canon_head) &
 					  (N_TTY_BUF_SIZE - 1));
 			ldata->read_head = ldata->canon_head;
-			spin_unlock_irqrestore(&tty->read_lock, flags);
+			spin_unlock_irqrestore(&ldata->read_lock, flags);
 			finish_erasing(tty);
 			echo_char(KILL_CHAR(tty), tty);
 			/* Add a newline if ECHOK is on and ECHOKE is off. */
@@ -960,10 +977,10 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 				break;
 		}
 		cnt = (ldata->read_head - head) & (N_TTY_BUF_SIZE-1);
-		spin_lock_irqsave(&tty->read_lock, flags);
+		spin_lock_irqsave(&ldata->read_lock, flags);
 		ldata->read_head = head;
 		ldata->read_cnt -= cnt;
-		spin_unlock_irqrestore(&tty->read_lock, flags);
+		spin_unlock_irqrestore(&ldata->read_lock, flags);
 		if (L_ECHO(tty)) {
 			if (L_ECHOPRT(tty)) {
 				if (!ldata->erasing) {
@@ -1338,12 +1355,12 @@ send_signal:
 				put_tty_queue(c, tty);
 
 handle_newline:
-			spin_lock_irqsave(&tty->read_lock, flags);
+			spin_lock_irqsave(&ldata->read_lock, flags);
 			set_bit(ldata->read_head, ldata->read_flags);
 			put_tty_queue_nolock(c, tty);
 			ldata->canon_head = ldata->read_head;
 			ldata->canon_data++;
-			spin_unlock_irqrestore(&tty->read_lock, flags);
+			spin_unlock_irqrestore(&ldata->read_lock, flags);
 			kill_fasync(&tty->fasync, SIGIO, POLL_IN);
 			if (waitqueue_active(&tty->read_wait))
 				wake_up_interruptible(&tty->read_wait);
@@ -1417,7 +1434,7 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 	unsigned long cpuflags;
 
 	if (ldata->real_raw) {
-		spin_lock_irqsave(&tty->read_lock, cpuflags);
+		spin_lock_irqsave(&ldata->read_lock, cpuflags);
 		i = min(N_TTY_BUF_SIZE - ldata->read_cnt,
 			N_TTY_BUF_SIZE - ldata->read_head);
 		i = min(count, i);
@@ -1433,7 +1450,7 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 		memcpy(ldata->read_buf + ldata->read_head, cp, i);
 		ldata->read_head = (ldata->read_head + i) & (N_TTY_BUF_SIZE-1);
 		ldata->read_cnt += i;
-		spin_unlock_irqrestore(&tty->read_lock, cpuflags);
+		spin_unlock_irqrestore(&ldata->read_lock, cpuflags);
 	} else {
 		for (i = count, p = cp, f = fp; i; i--, p++) {
 			if (f)
@@ -1626,6 +1643,10 @@ static int n_tty_open(struct tty_struct *tty)
 		goto err;
 
 	ldata->overrun_time = jiffies;
+	mutex_init(&ldata->atomic_read_lock);
+	mutex_init(&ldata->output_lock);
+	mutex_init(&ldata->echo_lock);
+	spin_lock_init(&ldata->read_lock);
 
 	/* These are ugly. Currently a malloc failure here can panic */
 	ldata->read_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
@@ -1677,7 +1698,7 @@ static inline int input_available_p(struct tty_struct *tty, int amt)
  *	buffer, and once to drain the space from the (physical) beginning of
  *	the buffer to head pointer.
  *
- *	Called under the tty->atomic_read_lock sem
+ *	Called under the ldata->atomic_read_lock sem
  *
  */
 
@@ -1693,10 +1714,10 @@ static int copy_from_read_buf(struct tty_struct *tty,
 	bool is_eof;
 
 	retval = 0;
-	spin_lock_irqsave(&tty->read_lock, flags);
+	spin_lock_irqsave(&ldata->read_lock, flags);
 	n = min(ldata->read_cnt, N_TTY_BUF_SIZE - ldata->read_tail);
 	n = min(*nr, n);
-	spin_unlock_irqrestore(&tty->read_lock, flags);
+	spin_unlock_irqrestore(&ldata->read_lock, flags);
 	if (n) {
 		retval = copy_to_user(*b, &ldata->read_buf[ldata->read_tail], n);
 		n -= retval;
@@ -1704,13 +1725,13 @@ static int copy_from_read_buf(struct tty_struct *tty,
 			ldata->read_buf[ldata->read_tail] == EOF_CHAR(tty);
 		tty_audit_add_data(tty, &ldata->read_buf[ldata->read_tail], n,
 				ldata->icanon);
-		spin_lock_irqsave(&tty->read_lock, flags);
+		spin_lock_irqsave(&ldata->read_lock, flags);
 		ldata->read_tail = (ldata->read_tail + n) & (N_TTY_BUF_SIZE-1);
 		ldata->read_cnt -= n;
 		/* Turn single EOF into zero-length read */
 		if (L_EXTPROC(tty) && ldata->icanon && is_eof && !ldata->read_cnt)
 			n = 0;
-		spin_unlock_irqrestore(&tty->read_lock, flags);
+		spin_unlock_irqrestore(&ldata->read_lock, flags);
 		*b += n;
 		*nr -= n;
 	}
@@ -1818,10 +1839,10 @@ do_it_again:
 	 *	Internal serialization of reads.
 	 */
 	if (file->f_flags & O_NONBLOCK) {
-		if (!mutex_trylock(&tty->atomic_read_lock))
+		if (!mutex_trylock(&ldata->atomic_read_lock))
 			return -EAGAIN;
 	} else {
-		if (mutex_lock_interruptible(&tty->atomic_read_lock))
+		if (mutex_lock_interruptible(&ldata->atomic_read_lock))
 			return -ERESTARTSYS;
 	}
 	packet = tty->packet;
@@ -1890,7 +1911,7 @@ do_it_again:
 
 		if (ldata->icanon && !L_EXTPROC(tty)) {
 			/* N.B. avoid overrun if nr == 0 */
-			spin_lock_irqsave(&tty->read_lock, flags);
+			spin_lock_irqsave(&ldata->read_lock, flags);
 			while (nr && ldata->read_cnt) {
 				int eol;
 
@@ -1908,25 +1929,25 @@ do_it_again:
 					if (--ldata->canon_data < 0)
 						ldata->canon_data = 0;
 				}
-				spin_unlock_irqrestore(&tty->read_lock, flags);
+				spin_unlock_irqrestore(&ldata->read_lock, flags);
 
 				if (!eol || (c != __DISABLED_CHAR)) {
 					if (tty_put_user(tty, c, b++)) {
 						retval = -EFAULT;
 						b--;
-						spin_lock_irqsave(&tty->read_lock, flags);
+						spin_lock_irqsave(&ldata->read_lock, flags);
 						break;
 					}
 					nr--;
 				}
 				if (eol) {
 					tty_audit_push(tty);
-					spin_lock_irqsave(&tty->read_lock, flags);
+					spin_lock_irqsave(&ldata->read_lock, flags);
 					break;
 				}
-				spin_lock_irqsave(&tty->read_lock, flags);
+				spin_lock_irqsave(&ldata->read_lock, flags);
 			}
-			spin_unlock_irqrestore(&tty->read_lock, flags);
+			spin_unlock_irqrestore(&ldata->read_lock, flags);
 			if (retval)
 				break;
 		} else {
@@ -1958,7 +1979,7 @@ do_it_again:
 		if (time)
 			timeout = time;
 	}
-	mutex_unlock(&tty->atomic_read_lock);
+	mutex_unlock(&ldata->atomic_read_lock);
 	remove_wait_queue(&tty->read_wait, &wait);
 
 	if (!waitqueue_active(&tty->read_wait))
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 67b024ca16ec..f90b6217b3ba 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2939,11 +2939,7 @@ void initialize_tty_struct(struct tty_struct *tty,
 	init_waitqueue_head(&tty->write_wait);
 	init_waitqueue_head(&tty->read_wait);
 	INIT_WORK(&tty->hangup_work, do_tty_hangup);
-	mutex_init(&tty->atomic_read_lock);
 	mutex_init(&tty->atomic_write_lock);
-	mutex_init(&tty->output_lock);
-	mutex_init(&tty->echo_lock);
-	spin_lock_init(&tty->read_lock);
 	spin_lock_init(&tty->ctrl_lock);
 	INIT_LIST_HEAD(&tty->tty_files);
 	INIT_WORK(&tty->SAK_work, do_SAK_work);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 226cf20e0150..08787ece3fdc 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -235,6 +235,7 @@ struct tty_struct {
 	struct mutex ldisc_mutex;
 	struct tty_ldisc *ldisc;
 
+	struct mutex atomic_write_lock;
 	struct mutex legacy_mutex;
 	struct mutex termios_mutex;
 	spinlock_t ctrl_lock;
@@ -265,20 +266,10 @@ struct tty_struct {
 
 #define N_TTY_BUF_SIZE 4096
 
-	/*
-	 * The following is data for the N_TTY line discipline.  For
-	 * historical reasons, this is included in the tty structure.
-	 * Mostly locked by the BKL.
-	 */
 	unsigned char closing:1;
 	unsigned short minimum_to_wake;
-	struct mutex atomic_read_lock;
-	struct mutex atomic_write_lock;
-	struct mutex output_lock;
-	struct mutex echo_lock;
 	unsigned char *write_buf;
 	int write_cnt;
-	spinlock_t read_lock;
 	/* If the tty has a pending do_SAK, queue it here - akpm */
 	struct work_struct SAK_work;
 	struct tty_port *port;
-- 
cgit v1.2.3-55-g7522


From 2fc20661e3171d45e8e58a61eb5c6b7d8d614fde Mon Sep 17 00:00:00 2001
From: Jiri Slaby
Date: Thu, 18 Oct 2012 22:26:44 +0200
Subject: TTY: move TTY_FLUSH* flags to tty_port

They are only TTY buffers specific. And the buffers will go to
tty_port in the next patches. So to remove the need to have both
tty_port and tty_struct at some places, let us move the flags to
tty_port.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_buffer.c | 18 ++++++++++--------
 include/linux/tty.h      |  5 +++--
 2 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 8b00f6a34a7d..6f366f257fba 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -134,17 +134,18 @@ static void __tty_buffer_flush(struct tty_struct *tty)
 
 void tty_buffer_flush(struct tty_struct *tty)
 {
+	struct tty_port *port = tty->port;
 	unsigned long flags;
 	spin_lock_irqsave(&tty->buf.lock, flags);
 
 	/* If the data is being pushed to the tty layer then we can't
 	   process it here. Instead set a flag and the flush_to_ldisc
 	   path will process the flush request before it exits */
-	if (test_bit(TTY_FLUSHING, &tty->flags)) {
-		set_bit(TTY_FLUSHPENDING, &tty->flags);
+	if (test_bit(TTYP_FLUSHING, &port->iflags)) {
+		set_bit(TTYP_FLUSHPENDING, &port->iflags);
 		spin_unlock_irqrestore(&tty->buf.lock, flags);
 		wait_event(tty->read_wait,
-				test_bit(TTY_FLUSHPENDING, &tty->flags) == 0);
+				test_bit(TTYP_FLUSHPENDING, &port->iflags) == 0);
 		return;
 	} else
 		__tty_buffer_flush(tty);
@@ -450,6 +451,7 @@ static void flush_to_ldisc(struct work_struct *work)
 {
 	struct tty_struct *tty =
 		container_of(work, struct tty_struct, buf.work);
+	struct tty_port *port = tty->port;
 	unsigned long 	flags;
 	struct tty_ldisc *disc;
 
@@ -459,7 +461,7 @@ static void flush_to_ldisc(struct work_struct *work)
 
 	spin_lock_irqsave(&tty->buf.lock, flags);
 
-	if (!test_and_set_bit(TTY_FLUSHING, &tty->flags)) {
+	if (!test_and_set_bit(TTYP_FLUSHING, &port->iflags)) {
 		struct tty_buffer *head;
 		while ((head = tty->buf.head) != NULL) {
 			int count;
@@ -477,7 +479,7 @@ static void flush_to_ldisc(struct work_struct *work)
 			/* Ldisc or user is trying to flush the buffers
 			   we are feeding to the ldisc, stop feeding the
 			   line discipline as we want to empty the queue */
-			if (test_bit(TTY_FLUSHPENDING, &tty->flags))
+			if (test_bit(TTYP_FLUSHPENDING, &port->iflags))
 				break;
 			if (!tty->receive_room)
 				break;
@@ -491,14 +493,14 @@ static void flush_to_ldisc(struct work_struct *work)
 							flag_buf, count);
 			spin_lock_irqsave(&tty->buf.lock, flags);
 		}
-		clear_bit(TTY_FLUSHING, &tty->flags);
+		clear_bit(TTYP_FLUSHING, &port->iflags);
 	}
 
 	/* We may have a deferred request to flush the input buffer,
 	   if so pull the chain under the lock and empty the queue */
-	if (test_bit(TTY_FLUSHPENDING, &tty->flags)) {
+	if (test_bit(TTYP_FLUSHPENDING, &port->iflags)) {
 		__tty_buffer_flush(tty);
-		clear_bit(TTY_FLUSHPENDING, &tty->flags);
+		clear_bit(TTYP_FLUSHPENDING, &port->iflags);
 		wake_up(&tty->read_wait);
 	}
 	spin_unlock_irqrestore(&tty->buf.lock, flags);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 08787ece3fdc..b4b3c568d242 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -197,6 +197,9 @@ struct tty_port {
 	wait_queue_head_t	close_wait;	/* Close waiters */
 	wait_queue_head_t	delta_msr_wait;	/* Modem status change */
 	unsigned long		flags;		/* TTY flags ASY_*/
+	unsigned long		iflags;		/* TTYP_ internal flags */
+#define TTYP_FLUSHING			1  /* Flushing to ldisc in progress */
+#define TTYP_FLUSHPENDING		2  /* Queued buffer flush pending */
 	unsigned char		console:1;	/* port is a console */
 	struct mutex		mutex;		/* Locking */
 	struct mutex		buf_mutex;	/* Buffer alloc lock */
@@ -309,8 +312,6 @@ struct tty_file_private {
 #define TTY_PTY_LOCK 		16	/* pty private */
 #define TTY_NO_WRITE_SPLIT 	17	/* Preserve write boundaries to driver */
 #define TTY_HUPPED 		18	/* Post driver->hangup() */
-#define TTY_FLUSHING		19	/* Flushing to ldisc in progress */
-#define TTY_FLUSHPENDING	20	/* Queued buffer flush pending */
 #define TTY_HUPPING 		21	/* ->hangup() in progress */
 
 #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
-- 
cgit v1.2.3-55-g7522


From 967fab6916681e5ab131fdef1226327b02454f19 Mon Sep 17 00:00:00 2001
From: Jiri Slaby
Date: Thu, 18 Oct 2012 22:26:46 +0200
Subject: TTY: add port -> tty link

For that purpose we have to temporarily introduce a second tty back
pointer into tty_port. It is because serial layer, and maybe others,
still do not use tty_port_tty_set/get. So that we cannot set the
tty_port->tty to NULL at will now.

Yes, the fix would be to convert whole serial layer and all its users
to tty_port_tty_set/get. However we are in the process of removing the
need of tty in most of the call sites, so this would lead to a
duplicated work.

Instead we have now tty_port->itty (internal tty) which will be used
only in flush_to_ldisc. For that one it is ensured that itty is valid
wherever the work is run. IOW, the work is synchronously cancelled
before we set itty to NULL and also before hangup is processed.

After we need only tty_port and not tty_struct in most code, this
shall be changed to tty_port_tty_set/get and itty removed completely.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/pty.c    | 2 ++
 drivers/tty/tty_io.c | 3 +++
 include/linux/tty.h  | 1 +
 3 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 2728afe52eea..c32690862671 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -345,6 +345,7 @@ static int pty_common_install(struct tty_driver *driver, struct tty_struct *tty,
 	tty_port_init(ports[1]);
 	o_tty->port = ports[0];
 	tty->port = ports[1];
+	o_tty->port->itty = o_tty;
 
 	tty_driver_kref_get(driver);
 	tty->count++;
@@ -371,6 +372,7 @@ static void pty_unix98_shutdown(struct tty_struct *tty)
 
 static void pty_cleanup(struct tty_struct *tty)
 {
+	tty->port->itty = NULL;
 	kfree(tty->port);
 }
 
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index f90b6217b3ba..202008f38ca3 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1417,6 +1417,8 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx)
 			"%s: %s driver does not set tty->port. This will crash the kernel later. Fix the driver!\n",
 			__func__, tty->driver->name);
 
+	tty->port->itty = tty;
+
 	/*
 	 * Structures all installed ... call the ldisc open routines.
 	 * If we fail here just call release_tty to clean up.  No need
@@ -1552,6 +1554,7 @@ static void release_tty(struct tty_struct *tty, int idx)
 		tty->ops->shutdown(tty);
 	tty_free_termios(tty);
 	tty_driver_remove_tty(tty->driver, tty);
+	tty->port->itty = NULL;
 
 	if (tty->link)
 		tty_kref_put(tty->link);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index b4b3c568d242..9be74d649a51 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -189,6 +189,7 @@ struct tty_port_operations {
 	
 struct tty_port {
 	struct tty_struct	*tty;		/* Back pointer */
+	struct tty_struct	*itty;		/* internal back ptr */
 	const struct tty_port_operations *ops;	/* Port operations */
 	spinlock_t		lock;		/* Lock protecting tty field */
 	int			blocked_open;	/* Waiting to open */
-- 
cgit v1.2.3-55-g7522


From ecbbfd44a08fa80e0d664814efd4c187721b85f6 Mon Sep 17 00:00:00 2001
From: Jiri Slaby
Date: Thu, 18 Oct 2012 22:26:47 +0200
Subject: TTY: move tty buffers to tty_port

So this is it. The big step why we did all the work over the past
kernel releases. Now everything is prepared, so nothing protects us
from doing that big step.

           |  |            \  \ nnnn/^l      |  |
           |  |             \  /     /       |  |
           |  '-,.__   =>    \/   ,-`    =>  |  '-,.__
           | O __.´´)        (  .`           | O __.´´)
            ~~~   ~~          ``              ~~~   ~~
The buffers are now in the tty_port structure and we can start
teaching the buffer helpers (insert char/string, flip etc.) to use
tty_port instead of tty_struct all around.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c      |   7 +++-
 drivers/tty/pty.c        |   2 +-
 drivers/tty/tty_buffer.c | 102 ++++++++++++++++++++++++-----------------------
 drivers/tty/tty_io.c     |   2 -
 drivers/tty/tty_ldisc.c  |  10 ++---
 drivers/tty/tty_port.c   |   2 +
 include/linux/tty.h      |   6 +--
 include/linux/tty_flip.h |   2 +-
 8 files changed, 70 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 531e539dbfcf..60b076cc4e20 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -149,8 +149,11 @@ static void n_tty_set_room(struct tty_struct *tty)
 	tty->receive_room = left;
 
 	/* Did this open up the receive buffer? We may need to flip */
-	if (left && !old_left)
-		schedule_work(&tty->buf.work);
+	if (left && !old_left) {
+		WARN_RATELIMIT(tty->port->itty == NULL,
+				"scheduling with invalid itty");
+		schedule_work(&tty->port->buf.work);
+	}
 }
 
 static void put_tty_queue_nolock(unsigned char c, struct n_tty_data *ldata)
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index c32690862671..4219f040adb8 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -93,7 +93,7 @@ static void pty_unthrottle(struct tty_struct *tty)
 
 static int pty_space(struct tty_struct *to)
 {
-	int n = 8192 - to->buf.memory_used;
+	int n = 8192 - to->port->buf.memory_used;
 	if (n < 0)
 		return 0;
 	return n;
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index ddd74d41cbb2..06725f5cc819 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -27,9 +27,9 @@
  *	Locking: none
  */
 
-void tty_buffer_free_all(struct tty_struct *tty)
+void tty_buffer_free_all(struct tty_port *port)
 {
-	struct tty_bufhead *buf = &tty->buf;
+	struct tty_bufhead *buf = &port->buf;
 	struct tty_buffer *thead;
 
 	while ((thead = buf->head) != NULL) {
@@ -56,11 +56,11 @@ void tty_buffer_free_all(struct tty_struct *tty)
  *	Locking: Caller must hold tty->buf.lock
  */
 
-static struct tty_buffer *tty_buffer_alloc(struct tty_struct *tty, size_t size)
+static struct tty_buffer *tty_buffer_alloc(struct tty_port *port, size_t size)
 {
 	struct tty_buffer *p;
 
-	if (tty->buf.memory_used + size > 65536)
+	if (port->buf.memory_used + size > 65536)
 		return NULL;
 	p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
 	if (p == NULL)
@@ -72,7 +72,7 @@ static struct tty_buffer *tty_buffer_alloc(struct tty_struct *tty, size_t size)
 	p->read = 0;
 	p->char_buf_ptr = (char *)(p->data);
 	p->flag_buf_ptr = (unsigned char *)p->char_buf_ptr + size;
-	tty->buf.memory_used += size;
+	port->buf.memory_used += size;
 	return p;
 }
 
@@ -87,9 +87,9 @@ static struct tty_buffer *tty_buffer_alloc(struct tty_struct *tty, size_t size)
  *	Locking: Caller must hold tty->buf.lock
  */
 
-static void tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b)
+static void tty_buffer_free(struct tty_port *port, struct tty_buffer *b)
 {
-	struct tty_bufhead *buf = &tty->buf;
+	struct tty_bufhead *buf = &port->buf;
 
 	/* Dumb strategy for now - should keep some stats */
 	buf->memory_used -= b->size;
@@ -114,14 +114,14 @@ static void tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b)
  *	Locking: Caller must hold tty->buf.lock
  */
 
-static void __tty_buffer_flush(struct tty_struct *tty)
+static void __tty_buffer_flush(struct tty_port *port)
 {
-	struct tty_bufhead *buf = &tty->buf;
+	struct tty_bufhead *buf = &port->buf;
 	struct tty_buffer *thead;
 
 	while ((thead = buf->head) != NULL) {
 		buf->head = thead->next;
-		tty_buffer_free(tty, thead);
+		tty_buffer_free(port, thead);
 	}
 	buf->tail = NULL;
 }
@@ -140,7 +140,7 @@ static void __tty_buffer_flush(struct tty_struct *tty)
 void tty_buffer_flush(struct tty_struct *tty)
 {
 	struct tty_port *port = tty->port;
-	struct tty_bufhead *buf = &tty->buf;
+	struct tty_bufhead *buf = &port->buf;
 	unsigned long flags;
 
 	spin_lock_irqsave(&buf->lock, flags);
@@ -155,7 +155,7 @@ void tty_buffer_flush(struct tty_struct *tty)
 				test_bit(TTYP_FLUSHPENDING, &port->iflags) == 0);
 		return;
 	} else
-		__tty_buffer_flush(tty);
+		__tty_buffer_flush(port);
 	spin_unlock_irqrestore(&buf->lock, flags);
 }
 
@@ -171,9 +171,9 @@ void tty_buffer_flush(struct tty_struct *tty)
  *	Locking: Caller must hold tty->buf.lock
  */
 
-static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size)
+static struct tty_buffer *tty_buffer_find(struct tty_port *port, size_t size)
 {
-	struct tty_buffer **tbh = &tty->buf.free;
+	struct tty_buffer **tbh = &port->buf.free;
 	while ((*tbh) != NULL) {
 		struct tty_buffer *t = *tbh;
 		if (t->size >= size) {
@@ -182,14 +182,14 @@ static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size)
 			t->used = 0;
 			t->commit = 0;
 			t->read = 0;
-			tty->buf.memory_used += t->size;
+			port->buf.memory_used += t->size;
 			return t;
 		}
 		tbh = &((*tbh)->next);
 	}
 	/* Round the buffer size out */
 	size = (size + 0xFF) & ~0xFF;
-	return tty_buffer_alloc(tty, size);
+	return tty_buffer_alloc(port, size);
 	/* Should possibly check if this fails for the largest buffer we
 	   have queued and recycle that ? */
 }
@@ -200,11 +200,11 @@ static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size)
  *
  *	Make at least size bytes of linear space available for the tty
  *	buffer. If we fail return the size we managed to find.
- *      Locking: Caller must hold tty->buf.lock
+ *      Locking: Caller must hold port->buf.lock
  */
-static int __tty_buffer_request_room(struct tty_struct *tty, size_t size)
+static int __tty_buffer_request_room(struct tty_port *port, size_t size)
 {
-	struct tty_bufhead *buf = &tty->buf;
+	struct tty_bufhead *buf = &port->buf;
 	struct tty_buffer *b, *n;
 	int left;
 	/* OPTIMISATION: We could keep a per tty "zero" sized buffer to
@@ -218,7 +218,7 @@ static int __tty_buffer_request_room(struct tty_struct *tty, size_t size)
 
 	if (left < size) {
 		/* This is the slow path - looking for new buffers to use */
-		if ((n = tty_buffer_find(tty, size)) != NULL) {
+		if ((n = tty_buffer_find(port, size)) != NULL) {
 			if (b != NULL) {
 				b->next = n;
 				b->commit = b->used;
@@ -241,16 +241,17 @@ static int __tty_buffer_request_room(struct tty_struct *tty, size_t size)
  *	Make at least size bytes of linear space available for the tty
  *	buffer. If we fail return the size we managed to find.
  *
- *	Locking: Takes tty->buf.lock
+ *	Locking: Takes port->buf.lock
  */
 int tty_buffer_request_room(struct tty_struct *tty, size_t size)
 {
+	struct tty_port *port = tty->port;
 	unsigned long flags;
 	int length;
 
-	spin_lock_irqsave(&tty->buf.lock, flags);
-	length = __tty_buffer_request_room(tty, size);
-	spin_unlock_irqrestore(&tty->buf.lock, flags);
+	spin_lock_irqsave(&port->buf.lock, flags);
+	length = __tty_buffer_request_room(port, size);
+	spin_unlock_irqrestore(&port->buf.lock, flags);
 	return length;
 }
 EXPORT_SYMBOL_GPL(tty_buffer_request_room);
@@ -265,13 +266,13 @@ EXPORT_SYMBOL_GPL(tty_buffer_request_room);
  *	Queue a series of bytes to the tty buffering. All the characters
  *	passed are marked with the supplied flag. Returns the number added.
  *
- *	Locking: Called functions may take tty->buf.lock
+ *	Locking: Called functions may take port->buf.lock
  */
 
 int tty_insert_flip_string_fixed_flag(struct tty_struct *tty,
 		const unsigned char *chars, char flag, size_t size)
 {
-	struct tty_bufhead *buf = &tty->buf;
+	struct tty_bufhead *buf = &tty->port->buf;
 	int copied = 0;
 	do {
 		int goal = min_t(size_t, size - copied, TTY_BUFFER_PAGE);
@@ -280,7 +281,7 @@ int tty_insert_flip_string_fixed_flag(struct tty_struct *tty,
 		struct tty_buffer *tb;
 
 		spin_lock_irqsave(&buf->lock, flags);
-		space = __tty_buffer_request_room(tty, goal);
+		space = __tty_buffer_request_room(tty->port, goal);
 		tb = buf->tail;
 		/* If there is no space then tb may be NULL */
 		if (unlikely(space == 0)) {
@@ -311,13 +312,13 @@ EXPORT_SYMBOL(tty_insert_flip_string_fixed_flag);
  *	the flags array indicates the status of the character. Returns the
  *	number added.
  *
- *	Locking: Called functions may take tty->buf.lock
+ *	Locking: Called functions may take port->buf.lock
  */
 
 int tty_insert_flip_string_flags(struct tty_struct *tty,
 		const unsigned char *chars, const char *flags, size_t size)
 {
-	struct tty_bufhead *buf = &tty->buf;
+	struct tty_bufhead *buf = &tty->port->buf;
 	int copied = 0;
 	do {
 		int goal = min_t(size_t, size - copied, TTY_BUFFER_PAGE);
@@ -326,7 +327,7 @@ int tty_insert_flip_string_flags(struct tty_struct *tty,
 		struct tty_buffer *tb;
 
 		spin_lock_irqsave(&buf->lock, __flags);
-		space = __tty_buffer_request_room(tty, goal);
+		space = __tty_buffer_request_room(tty->port, goal);
 		tb = buf->tail;
 		/* If there is no space then tb may be NULL */
 		if (unlikely(space == 0)) {
@@ -357,12 +358,12 @@ EXPORT_SYMBOL(tty_insert_flip_string_flags);
  *	Note that this function can only be used when the low_latency flag
  *	is unset. Otherwise the workqueue won't be flushed.
  *
- *	Locking: Takes tty->buf.lock
+ *	Locking: Takes port->buf.lock
  */
 
 void tty_schedule_flip(struct tty_struct *tty)
 {
-	struct tty_bufhead *buf = &tty->buf;
+	struct tty_bufhead *buf = &tty->port->buf;
 	unsigned long flags;
 
 	spin_lock_irqsave(&buf->lock, flags);
@@ -385,19 +386,19 @@ EXPORT_SYMBOL(tty_schedule_flip);
  *	that need their own block copy routines into the buffer. There is no
  *	guarantee the buffer is a DMA target!
  *
- *	Locking: May call functions taking tty->buf.lock
+ *	Locking: May call functions taking port->buf.lock
  */
 
 int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars,
-								size_t size)
+		size_t size)
 {
-	struct tty_bufhead *buf = &tty->buf;
+	struct tty_bufhead *buf = &tty->port->buf;
 	int space;
 	unsigned long flags;
 	struct tty_buffer *tb;
 
 	spin_lock_irqsave(&buf->lock, flags);
-	space = __tty_buffer_request_room(tty, size);
+	space = __tty_buffer_request_room(tty->port, size);
 
 	tb = buf->tail;
 	if (likely(space)) {
@@ -423,19 +424,19 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string);
  *	that need their own block copy routines into the buffer. There is no
  *	guarantee the buffer is a DMA target!
  *
- *	Locking: May call functions taking tty->buf.lock
+ *	Locking: May call functions taking port->buf.lock
  */
 
 int tty_prepare_flip_string_flags(struct tty_struct *tty,
 			unsigned char **chars, char **flags, size_t size)
 {
-	struct tty_bufhead *buf = &tty->buf;
+	struct tty_bufhead *buf = &tty->port->buf;
 	int space;
 	unsigned long __flags;
 	struct tty_buffer *tb;
 
 	spin_lock_irqsave(&buf->lock, __flags);
-	space = __tty_buffer_request_room(tty, size);
+	space = __tty_buffer_request_room(tty->port, size);
 
 	tb = buf->tail;
 	if (likely(space)) {
@@ -464,13 +465,16 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string_flags);
 
 static void flush_to_ldisc(struct work_struct *work)
 {
-	struct tty_struct *tty =
-		container_of(work, struct tty_struct, buf.work);
-	struct tty_port *port = tty->port;
-	struct tty_bufhead *buf = &tty->buf;
+	struct tty_port *port = container_of(work, struct tty_port, buf.work);
+	struct tty_bufhead *buf = &port->buf;
+	struct tty_struct *tty;
 	unsigned long 	flags;
 	struct tty_ldisc *disc;
 
+	tty = port->itty;
+	if (WARN_RATELIMIT(tty == NULL, "tty is NULL"))
+		return;
+
 	disc = tty_ldisc_ref(tty);
 	if (disc == NULL)	/*  !TTY_LDISC */
 		return;
@@ -489,7 +493,7 @@ static void flush_to_ldisc(struct work_struct *work)
 				if (head->next == NULL)
 					break;
 				buf->head = head->next;
-				tty_buffer_free(tty, head);
+				tty_buffer_free(port, head);
 				continue;
 			}
 			/* Ldisc or user is trying to flush the buffers
@@ -515,7 +519,7 @@ static void flush_to_ldisc(struct work_struct *work)
 	/* We may have a deferred request to flush the input buffer,
 	   if so pull the chain under the lock and empty the queue */
 	if (test_bit(TTYP_FLUSHPENDING, &port->iflags)) {
-		__tty_buffer_flush(tty);
+		__tty_buffer_flush(port);
 		clear_bit(TTYP_FLUSHPENDING, &port->iflags);
 		wake_up(&tty->read_wait);
 	}
@@ -535,7 +539,7 @@ static void flush_to_ldisc(struct work_struct *work)
 void tty_flush_to_ldisc(struct tty_struct *tty)
 {
 	if (!tty->low_latency)
-		flush_work(&tty->buf.work);
+		flush_work(&tty->port->buf.work);
 }
 
 /**
@@ -553,7 +557,7 @@ void tty_flush_to_ldisc(struct tty_struct *tty)
 
 void tty_flip_buffer_push(struct tty_struct *tty)
 {
-	struct tty_bufhead *buf = &tty->buf;
+	struct tty_bufhead *buf = &tty->port->buf;
 	unsigned long flags;
 
 	spin_lock_irqsave(&buf->lock, flags);
@@ -578,9 +582,9 @@ EXPORT_SYMBOL(tty_flip_buffer_push);
  *	Locking: none
  */
 
-void tty_buffer_init(struct tty_struct *tty)
+void tty_buffer_init(struct tty_port *port)
 {
-	struct tty_bufhead *buf = &tty->buf;
+	struct tty_bufhead *buf = &port->buf;
 
 	spin_lock_init(&buf->lock);
 	buf->head = NULL;
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 202008f38ca3..a3eba7f359ed 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -186,7 +186,6 @@ void free_tty_struct(struct tty_struct *tty)
 	if (tty->dev)
 		put_device(tty->dev);
 	kfree(tty->write_buf);
-	tty_buffer_free_all(tty);
 	tty->magic = 0xDEADDEAD;
 	kfree(tty);
 }
@@ -2935,7 +2934,6 @@ void initialize_tty_struct(struct tty_struct *tty,
 	tty_ldisc_init(tty);
 	tty->session = NULL;
 	tty->pgrp = NULL;
-	tty_buffer_init(tty);
 	mutex_init(&tty->legacy_mutex);
 	mutex_init(&tty->termios_mutex);
 	mutex_init(&tty->ldisc_mutex);
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 47e3968df10b..f4e6754525dc 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -512,7 +512,7 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
 static int tty_ldisc_halt(struct tty_struct *tty)
 {
 	clear_bit(TTY_LDISC, &tty->flags);
-	return cancel_work_sync(&tty->buf.work);
+	return cancel_work_sync(&tty->port->buf.work);
 }
 
 /**
@@ -525,7 +525,7 @@ static void tty_ldisc_flush_works(struct tty_struct *tty)
 {
 	flush_work(&tty->hangup_work);
 	flush_work(&tty->SAK_work);
-	flush_work(&tty->buf.work);
+	flush_work(&tty->port->buf.work);
 }
 
 /**
@@ -704,9 +704,9 @@ enable:
 	/* Restart the work queue in case no characters kick it off. Safe if
 	   already running */
 	if (work)
-		schedule_work(&tty->buf.work);
+		schedule_work(&tty->port->buf.work);
 	if (o_work)
-		schedule_work(&o_tty->buf.work);
+		schedule_work(&o_tty->port->buf.work);
 	mutex_unlock(&tty->ldisc_mutex);
 	tty_unlock(tty);
 	return retval;
@@ -817,7 +817,7 @@ void tty_ldisc_hangup(struct tty_struct *tty)
 	 */
 	clear_bit(TTY_LDISC, &tty->flags);
 	tty_unlock(tty);
-	cancel_work_sync(&tty->buf.work);
+	cancel_work_sync(&tty->port->buf.work);
 	mutex_unlock(&tty->ldisc_mutex);
 retry:
 	tty_lock(tty);
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index d7bdd8d0c23f..416b42f7c346 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -21,6 +21,7 @@
 void tty_port_init(struct tty_port *port)
 {
 	memset(port, 0, sizeof(*port));
+	tty_buffer_init(port);
 	init_waitqueue_head(&port->open_wait);
 	init_waitqueue_head(&port->close_wait);
 	init_waitqueue_head(&port->delta_msr_wait);
@@ -126,6 +127,7 @@ static void tty_port_destructor(struct kref *kref)
 	struct tty_port *port = container_of(kref, struct tty_port, kref);
 	if (port->xmit_buf)
 		free_page((unsigned long)port->xmit_buf);
+	tty_buffer_free_all(port);
 	if (port->ops->destruct)
 		port->ops->destruct(port);
 	else
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 9be74d649a51..d7ff88fb8967 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -188,6 +188,7 @@ struct tty_port_operations {
 };
 	
 struct tty_port {
+	struct tty_bufhead	buf;		/* Locked internally */
 	struct tty_struct	*tty;		/* Back pointer */
 	struct tty_struct	*itty;		/* internal back ptr */
 	const struct tty_port_operations *ops;	/* Port operations */
@@ -259,7 +260,6 @@ struct tty_struct {
 
 	struct tty_struct *link;
 	struct fasync_struct *fasync;
-	struct tty_bufhead buf;		/* Locked internally */
 	int alt_speed;		/* For magic substitution of 38400 bps */
 	wait_queue_head_t write_wait;
 	wait_queue_head_t read_wait;
@@ -388,9 +388,9 @@ extern void disassociate_ctty(int priv);
 extern void no_tty(void);
 extern void tty_flip_buffer_push(struct tty_struct *tty);
 extern void tty_flush_to_ldisc(struct tty_struct *tty);
-extern void tty_buffer_free_all(struct tty_struct *tty);
+extern void tty_buffer_free_all(struct tty_port *port);
 extern void tty_buffer_flush(struct tty_struct *tty);
-extern void tty_buffer_init(struct tty_struct *tty);
+extern void tty_buffer_init(struct tty_port *port);
 extern speed_t tty_get_baud_rate(struct tty_struct *tty);
 extern speed_t tty_termios_baud_rate(struct ktermios *termios);
 extern speed_t tty_termios_input_baud_rate(struct ktermios *termios);
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index 9239d033a0a3..2002344ed36a 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -11,7 +11,7 @@ void tty_schedule_flip(struct tty_struct *tty);
 static inline int tty_insert_flip_char(struct tty_struct *tty,
 					unsigned char ch, char flag)
 {
-	struct tty_buffer *tb = tty->buf.tail;
+	struct tty_buffer *tb = tty->port->buf.tail;
 	if (tb && tb->used < tb->size) {
 		tb->flag_buf_ptr[tb->used] = flag;
 		tb->char_buf_ptr[tb->used++] = ch;
-- 
cgit v1.2.3-55-g7522


From 0668744f792a737872aa1904010e5fba5f95376b Mon Sep 17 00:00:00 2001
From: Joachim Eastwood
Date: Mon, 22 Oct 2012 08:45:34 +0000
Subject: net/at91_ether: add pdata flag for reverse Eth addr

This will allow us to remove the last mach include from at91_ether
and also make it easier to share address setup with macb.

Signed-off-by: Joachim Eastwood <manabian@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/mach-at91/board-csb337.c         | 2 ++
 drivers/net/ethernet/cadence/Kconfig      | 1 -
 drivers/net/ethernet/cadence/at91_ether.c | 5 ++---
 include/linux/platform_data/macb.h        | 1 +
 4 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-at91/board-csb337.c b/arch/arm/mach-at91/board-csb337.c
index 3e37437a7a61..aa9b320bad59 100644
--- a/arch/arm/mach-at91/board-csb337.c
+++ b/arch/arm/mach-at91/board-csb337.c
@@ -53,6 +53,8 @@ static void __init csb337_init_early(void)
 static struct macb_platform_data __initdata csb337_eth_data = {
 	.phy_irq_pin	= AT91_PIN_PC2,
 	.is_rmii	= 0,
+	/* The CSB337 bootloader stores the MAC the wrong-way around */
+	.rev_eth_addr	= 1,
 };
 
 static struct at91_usbh_data __initdata csb337_usbh_data = {
diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig
index f6d0956485f1..40172d1ca605 100644
--- a/drivers/net/ethernet/cadence/Kconfig
+++ b/drivers/net/ethernet/cadence/Kconfig
@@ -21,7 +21,6 @@ if NET_CADENCE
 
 config ARM_AT91_ETHER
 	tristate "AT91RM9200 Ethernet support"
-	depends on ARM && ARCH_AT91RM9200
 	select NET_CORE
 	select MACB
 	---help---
diff --git a/drivers/net/ethernet/cadence/at91_ether.c b/drivers/net/ethernet/cadence/at91_ether.c
index 375d272d1a5f..b92815aabc65 100644
--- a/drivers/net/ethernet/cadence/at91_ether.c
+++ b/drivers/net/ethernet/cadence/at91_ether.c
@@ -32,8 +32,6 @@
 #include <linux/phy.h>
 #include <linux/io.h>
 
-#include <asm/mach-types.h>
-
 #include "macb.h"
 
 #define DRV_NAME	"at91_ether"
@@ -61,9 +59,10 @@
 
 static short __init unpack_mac_address(struct net_device *dev, unsigned int hi, unsigned int lo)
 {
+	struct macb *lp = netdev_priv(dev);
 	char addr[6];
 
-	if (machine_is_csb337()) {
+	if (lp->board_data.rev_eth_addr) {
 		addr[5] = (lo & 0xff);			/* The CSB337 bootloader stores the MAC the wrong-way around */
 		addr[4] = (lo & 0xff00) >> 8;
 		addr[3] = (lo & 0xff0000) >> 16;
diff --git a/include/linux/platform_data/macb.h b/include/linux/platform_data/macb.h
index b081c7245ec8..044a124bfbbc 100644
--- a/include/linux/platform_data/macb.h
+++ b/include/linux/platform_data/macb.h
@@ -12,6 +12,7 @@ struct macb_platform_data {
 	u32		phy_mask;
 	int		phy_irq_pin;	/* PHY IRQ */
 	u8		is_rmii;	/* using RMII interface? */
+	u8		rev_eth_addr;	/* reverse Ethernet address byte order */
 };
 
 #endif /* __MACB_PDATA_H__ */
-- 
cgit v1.2.3-55-g7522


From 51615edd0a41b36f69858b9be1fbbf6a3de84f82 Mon Sep 17 00:00:00 2001
From: Greg Suarez
Date: Mon, 22 Oct 2012 10:56:29 +0000
Subject: USB: cdc: add MBIM constants and structures

Based on revision 1.0 of "Universal Serial Bus Communications
Class Subclass Specification for Mobile Broadband Interface
Model" available from www.usb.org

Signed-off-by: Greg Suarez <gsuarez@smithmicro.com>
[bmork: added DSS defines]
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/cdc.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h
index 81a927930bfd..f35aa0a338c7 100644
--- a/include/linux/usb/cdc.h
+++ b/include/linux/usb/cdc.h
@@ -19,6 +19,7 @@
 #define USB_CDC_SUBCLASS_OBEX			0x0b
 #define USB_CDC_SUBCLASS_EEM			0x0c
 #define USB_CDC_SUBCLASS_NCM			0x0d
+#define USB_CDC_SUBCLASS_MBIM			0x0e
 
 #define USB_CDC_PROTO_NONE			0
 
@@ -33,6 +34,7 @@
 #define USB_CDC_PROTO_EEM			7
 
 #define USB_CDC_NCM_PROTO_NTB			1
+#define USB_CDC_MBIM_PROTO_NTB			2
 
 /*-------------------------------------------------------------------------*/
 
@@ -53,6 +55,7 @@
 #define USB_CDC_DMM_TYPE		0x14
 #define USB_CDC_OBEX_TYPE		0x15
 #define USB_CDC_NCM_TYPE		0x1a
+#define USB_CDC_MBIM_TYPE		0x1b
 
 /* "Header Functional Descriptor" from CDC spec  5.2.3.1 */
 struct usb_cdc_header_desc {
@@ -187,6 +190,21 @@ struct usb_cdc_ncm_desc {
 	__le16	bcdNcmVersion;
 	__u8	bmNetworkCapabilities;
 } __attribute__ ((packed));
+
+/* "MBIM Control Model Functional Descriptor" */
+struct usb_cdc_mbim_desc {
+	__u8	bLength;
+	__u8	bDescriptorType;
+	__u8	bDescriptorSubType;
+
+	__le16	bcdMBIMVersion;
+	__le16  wMaxControlMessage;
+	__u8    bNumberFilters;
+	__u8    bMaxFilterSize;
+	__le16  wMaxSegmentSize;
+	__u8    bmNetworkCapabilities;
+} __attribute__ ((packed));
+
 /*-------------------------------------------------------------------------*/
 
 /*
@@ -332,6 +350,11 @@ struct usb_cdc_ncm_nth32 {
 #define USB_CDC_NCM_NDP32_CRC_SIGN	0x316D636E /* ncm1 */
 #define USB_CDC_NCM_NDP32_NOCRC_SIGN	0x306D636E /* ncm0 */
 
+#define USB_CDC_MBIM_NDP16_IPS_SIGN     0x00535049 /* IPS<sessionID> : IPS0 for now */
+#define USB_CDC_MBIM_NDP32_IPS_SIGN     0x00737069 /* ips<sessionID> : ips0 for now */
+#define USB_CDC_MBIM_NDP16_DSS_SIGN     0x00535344 /* DSS<sessionID> */
+#define USB_CDC_MBIM_NDP32_DSS_SIGN     0x00737364 /* dss<sessionID> */
+
 /* 16-bit NCM Datagram Pointer Entry */
 struct usb_cdc_ncm_dpe16 {
 	__le16	wDatagramIndex;
-- 
cgit v1.2.3-55-g7522


From c91ce3b6bff780453283a5882ecd54f267f4682f Mon Sep 17 00:00:00 2001
From: Bjørn Mork
Date: Mon, 22 Oct 2012 10:56:35 +0000
Subject: net: cdc_ncm: export shared symbols and definitions

Move symbols and definitons which can be shared with a
MBIM driver in a new header.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   |  97 ++++------------------------------
 include/linux/usb/cdc_ncm.h | 124 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+), 86 deletions(-)
 create mode 100644 include/linux/usb/cdc_ncm.h

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 2dd27346cb39..17b4ba03cb55 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -51,90 +51,10 @@
 #include <linux/atomic.h>
 #include <linux/usb/usbnet.h>
 #include <linux/usb/cdc.h>
+#include <linux/usb/cdc_ncm.h>
 
 #define	DRIVER_VERSION				"14-Mar-2012"
 
-/* CDC NCM subclass 3.2.1 */
-#define USB_CDC_NCM_NDP16_LENGTH_MIN		0x10
-
-/* Maximum NTB length */
-#define	CDC_NCM_NTB_MAX_SIZE_TX			32768	/* bytes */
-#define	CDC_NCM_NTB_MAX_SIZE_RX			32768	/* bytes */
-
-/* Minimum value for MaxDatagramSize, ch. 6.2.9 */
-#define	CDC_NCM_MIN_DATAGRAM_SIZE		1514	/* bytes */
-
-/* Minimum value for MaxDatagramSize, ch. 8.1.3 */
-#define CDC_MBIM_MIN_DATAGRAM_SIZE              2048    /* bytes */
-
-#define	CDC_NCM_MIN_TX_PKT			512	/* bytes */
-
-/* Default value for MaxDatagramSize */
-#define	CDC_NCM_MAX_DATAGRAM_SIZE		8192	/* bytes */
-
-/*
- * Maximum amount of datagrams in NCM Datagram Pointer Table, not counting
- * the last NULL entry.
- */
-#define	CDC_NCM_DPT_DATAGRAMS_MAX		40
-
-/* Restart the timer, if amount of datagrams is less than given value */
-#define	CDC_NCM_RESTART_TIMER_DATAGRAM_CNT	3
-#define	CDC_NCM_TIMER_PENDING_CNT		2
-#define CDC_NCM_TIMER_INTERVAL			(400UL * NSEC_PER_USEC)
-
-/* The following macro defines the minimum header space */
-#define	CDC_NCM_MIN_HDR_SIZE \
-	(sizeof(struct usb_cdc_ncm_nth16) + sizeof(struct usb_cdc_ncm_ndp16) + \
-	(CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16))
-
-#define CDC_NCM_NDP_SIZE \
-	(sizeof(struct usb_cdc_ncm_ndp16) +				\
-	      (CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16))
-
-struct cdc_ncm_ctx {
-	struct usb_cdc_ncm_ntb_parameters ncm_parm;
-	struct hrtimer tx_timer;
-	struct tasklet_struct bh;
-
-	const struct usb_cdc_ncm_desc *func_desc;
-	const struct usb_cdc_mbim_desc *mbim_desc;
-	const struct usb_cdc_header_desc *header_desc;
-	const struct usb_cdc_union_desc *union_desc;
-	const struct usb_cdc_ether_desc *ether_desc;
-
-	struct net_device *netdev;
-	struct usb_device *udev;
-	struct usb_host_endpoint *in_ep;
-	struct usb_host_endpoint *out_ep;
-	struct usb_host_endpoint *status_ep;
-	struct usb_interface *intf;
-	struct usb_interface *control;
-	struct usb_interface *data;
-
-	struct sk_buff *tx_curr_skb;
-	struct sk_buff *tx_rem_skb;
-	__le32 tx_rem_sign;
-
-	spinlock_t mtx;
-	atomic_t stop;
-
-	u32 tx_timer_pending;
-	u32 tx_curr_frame_num;
-	u32 rx_speed;
-	u32 tx_speed;
-	u32 rx_max;
-	u32 tx_max;
-	u32 max_datagram_size;
-	u16 tx_max_datagrams;
-	u16 tx_remainder;
-	u16 tx_modulus;
-	u16 tx_ndp_modulus;
-	u16 tx_seq;
-	u16 rx_seq;
-	u16 connected;
-};
-
 static void cdc_ncm_txpath_bh(unsigned long param);
 static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx);
 static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer);
@@ -447,7 +367,7 @@ static const struct ethtool_ops cdc_ncm_ethtool_ops = {
 	.nway_reset = usbnet_nway_reset,
 };
 
-static int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting)
+int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting)
 {
 	struct cdc_ncm_ctx *ctx;
 	struct usb_driver *driver;
@@ -605,8 +525,9 @@ error:
 	dev_info(&dev->udev->dev, "bind() failure\n");
 	return -ENODEV;
 }
+EXPORT_SYMBOL_GPL(cdc_ncm_bind_common);
 
-static void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf)
+void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
 	struct usb_driver *driver = driver_of(intf);
@@ -636,6 +557,7 @@ static void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf)
 	usb_set_intfdata(ctx->intf, NULL);
 	cdc_ncm_free(ctx);
 }
+EXPORT_SYMBOL_GPL(cdc_ncm_unbind);
 
 static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
 {
@@ -701,7 +623,7 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_
 	return ndp16;
 }
 
-static struct sk_buff *
+struct sk_buff *
 cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign)
 {
 	struct usb_cdc_ncm_nth16 *nth16;
@@ -858,6 +780,7 @@ exit_no_skb:
 		cdc_ncm_tx_timeout_start(ctx);
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(cdc_ncm_fill_tx_frame);
 
 static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx)
 {
@@ -924,7 +847,7 @@ error:
 }
 
 /* verify NTB header and return offset of first NDP, or negative error */
-static int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in)
+int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in)
 {
 	struct usb_cdc_ncm_nth16 *nth16;
 	int len;
@@ -966,9 +889,10 @@ static int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_
 error:
 	return ret;
 }
+EXPORT_SYMBOL_GPL(cdc_ncm_rx_verify_nth16);
 
 /* verify NDP header and return number of datagrams, or negative error */
-static int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset)
+int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset)
 {
 	struct usb_cdc_ncm_ndp16 *ndp16;
 	int ret = -EINVAL;
@@ -999,6 +923,7 @@ static int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset)
 error:
 	return ret;
 }
+EXPORT_SYMBOL_GPL(cdc_ncm_rx_verify_ndp16);
 
 static int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in)
 {
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
new file mode 100644
index 000000000000..d1719a72f7d2
--- /dev/null
+++ b/include/linux/usb/cdc_ncm.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) ST-Ericsson 2010-2012
+ * Contact: Alexey Orishko <alexey.orishko@stericsson.com>
+ * Original author: Hans Petter Selasky <hans.petter.selasky@stericsson.com>
+ *
+ * USB Host Driver for Network Control Model (NCM)
+ * http://www.usb.org/developers/devclass_docs/NCM10.zip
+ *
+ * The NCM encoding, decoding and initialization logic
+ * derives from FreeBSD 8.x. if_cdce.c and if_cdcereg.h
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose this file to be licensed under the terms
+ * of the GNU General Public License (GPL) Version 2 or the 2-clause
+ * BSD license listed below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* CDC NCM subclass 3.2.1 */
+#define USB_CDC_NCM_NDP16_LENGTH_MIN		0x10
+
+/* Maximum NTB length */
+#define	CDC_NCM_NTB_MAX_SIZE_TX			32768	/* bytes */
+#define	CDC_NCM_NTB_MAX_SIZE_RX			32768	/* bytes */
+
+/* Minimum value for MaxDatagramSize, ch. 6.2.9 */
+#define	CDC_NCM_MIN_DATAGRAM_SIZE		1514	/* bytes */
+
+/* Minimum value for MaxDatagramSize, ch. 8.1.3 */
+#define CDC_MBIM_MIN_DATAGRAM_SIZE		2048	/* bytes */
+
+#define	CDC_NCM_MIN_TX_PKT			512	/* bytes */
+
+/* Default value for MaxDatagramSize */
+#define	CDC_NCM_MAX_DATAGRAM_SIZE		8192	/* bytes */
+
+/*
+ * Maximum amount of datagrams in NCM Datagram Pointer Table, not counting
+ * the last NULL entry.
+ */
+#define	CDC_NCM_DPT_DATAGRAMS_MAX		40
+
+/* Restart the timer, if amount of datagrams is less than given value */
+#define	CDC_NCM_RESTART_TIMER_DATAGRAM_CNT	3
+#define	CDC_NCM_TIMER_PENDING_CNT		2
+#define CDC_NCM_TIMER_INTERVAL			(400UL * NSEC_PER_USEC)
+
+/* The following macro defines the minimum header space */
+#define	CDC_NCM_MIN_HDR_SIZE \
+	(sizeof(struct usb_cdc_ncm_nth16) + sizeof(struct usb_cdc_ncm_ndp16) + \
+	(CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16))
+
+#define CDC_NCM_NDP_SIZE \
+	(sizeof(struct usb_cdc_ncm_ndp16) +				\
+	      (CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16))
+
+struct cdc_ncm_ctx {
+	struct usb_cdc_ncm_ntb_parameters ncm_parm;
+	struct hrtimer tx_timer;
+	struct tasklet_struct bh;
+
+	const struct usb_cdc_ncm_desc *func_desc;
+	const struct usb_cdc_mbim_desc   *mbim_desc;
+	const struct usb_cdc_header_desc *header_desc;
+	const struct usb_cdc_union_desc *union_desc;
+	const struct usb_cdc_ether_desc *ether_desc;
+
+	struct net_device *netdev;
+	struct usb_device *udev;
+	struct usb_host_endpoint *in_ep;
+	struct usb_host_endpoint *out_ep;
+	struct usb_host_endpoint *status_ep;
+	struct usb_interface *intf;
+	struct usb_interface *control;
+	struct usb_interface *data;
+
+	struct sk_buff *tx_curr_skb;
+	struct sk_buff *tx_rem_skb;
+	__le32 tx_rem_sign;
+
+	spinlock_t mtx;
+	atomic_t stop;
+
+	u32 tx_timer_pending;
+	u32 tx_curr_frame_num;
+	u32 rx_speed;
+	u32 tx_speed;
+	u32 rx_max;
+	u32 tx_max;
+	u32 max_datagram_size;
+	u16 tx_max_datagrams;
+	u16 tx_remainder;
+	u16 tx_modulus;
+	u16 tx_ndp_modulus;
+	u16 tx_seq;
+	u16 rx_seq;
+	u16 connected;
+};
+
+extern int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting);
+extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
+extern struct sk_buff *cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign);
+extern int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in);
+extern int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset);
-- 
cgit v1.2.3-55-g7522


From 9bf211a30b3f158c39cf1c6adb3a9388d41740ce Mon Sep 17 00:00:00 2001
From: Greg Suarez
Date: Mon, 22 Oct 2012 10:56:36 +0000
Subject: net: cdc_mbim: adding MBIM driver

The CDC Mobile Broadband Interface Model (MBIM) specification
extends CDC NCM by
 - removing the redundant ethernet header from the point-to-point
   USB channel
 - adding support for multiple IP (v4 and/or v6) sessions multiplexed
   on the same USB channel
 - adding a MBIM control channel encapsulated in CDC
 - adding Device Service Streams (DSS), which are non IP generic data
   streams multiplexed on the same USB channel as the IP sessions

MBIM devices are managed using the dedicated control channel, and no
data will flow on the data channel until a control session has been
established.  This driver has no knowledge of MBIM control messages.
It just exports the control channel to a /dev/cdc-wdmX character
device for userspace management applications. Such an application is
therefore required to use this driver.

This patch implements basic MBIM support, reusing the NCM and WDM driver
APIs, currently limited to IP sessions with SessionID 0. DSS and
multiplexed IP sessions are not yet supported.

Signed-off-by: Greg Suarez <gsuarez@smithmicro.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_mbim.c  | 365 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/cdc_ncm.h |  10 ++
 2 files changed, 375 insertions(+)
 create mode 100644 drivers/net/usb/cdc_mbim.c

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
new file mode 100644
index 000000000000..2f21139466ec
--- /dev/null
+++ b/drivers/net/usb/cdc_mbim.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2012  Smith Micro Software, Inc.
+ * Copyright (c) 2012  Bjørn Mork <bjorn@mork.no>
+ *
+ * This driver is based on and reuse most of cdc_ncm, which is
+ * Copyright (C) ST-Ericsson 2010-2012
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/ip.h>
+#include <linux/mii.h>
+#include <linux/usb.h>
+#include <linux/usb/cdc.h>
+#include <linux/usb/usbnet.h>
+#include <linux/usb/cdc-wdm.h>
+#include <linux/usb/cdc_ncm.h>
+
+/* driver specific data - must match cdc_ncm usage */
+struct cdc_mbim_state {
+	struct cdc_ncm_ctx *ctx;
+	atomic_t pmcount;
+	struct usb_driver *subdriver;
+	struct usb_interface *control;
+	struct usb_interface *data;
+};
+
+/* using a counter to merge subdriver requests with our own into a combined state */
+static int cdc_mbim_manage_power(struct usbnet *dev, int on)
+{
+	struct cdc_mbim_state *info = (void *)&dev->data;
+	int rv = 0;
+
+	dev_dbg(&dev->intf->dev, "%s() pmcount=%d, on=%d\n", __func__, atomic_read(&info->pmcount), on);
+
+	if ((on && atomic_add_return(1, &info->pmcount) == 1) || (!on && atomic_dec_and_test(&info->pmcount))) {
+		/* need autopm_get/put here to ensure the usbcore sees the new value */
+		rv = usb_autopm_get_interface(dev->intf);
+		if (rv < 0)
+			goto err;
+		dev->intf->needs_remote_wakeup = on;
+		usb_autopm_put_interface(dev->intf);
+	}
+err:
+	return rv;
+}
+
+static int cdc_mbim_wdm_manage_power(struct usb_interface *intf, int status)
+{
+	struct usbnet *dev = usb_get_intfdata(intf);
+
+	/* can be called while disconnecting */
+	if (!dev)
+		return 0;
+
+	return cdc_mbim_manage_power(dev, status);
+}
+
+
+static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf)
+{
+	struct cdc_ncm_ctx *ctx;
+	struct usb_driver *subdriver = ERR_PTR(-ENODEV);
+	int ret = -ENODEV;
+	u8 data_altsetting = CDC_NCM_DATA_ALTSETTING_NCM;
+	struct cdc_mbim_state *info = (void *)&dev->data;
+
+	/* see if interface supports MBIM alternate setting */
+	if (intf->num_altsetting == 2) {
+		if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting))
+			usb_set_interface(dev->udev,
+					  intf->cur_altsetting->desc.bInterfaceNumber,
+					  CDC_NCM_COMM_ALTSETTING_MBIM);
+		data_altsetting = CDC_NCM_DATA_ALTSETTING_MBIM;
+	}
+
+	/* Probably NCM, defer for cdc_ncm_bind */
+	if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting))
+		goto err;
+
+	ret = cdc_ncm_bind_common(dev, intf, data_altsetting);
+	if (ret)
+		goto err;
+
+	ctx = info->ctx;
+
+	/* The MBIM descriptor and the status endpoint are required */
+	if (ctx->mbim_desc && dev->status)
+		subdriver = usb_cdc_wdm_register(ctx->control,
+						 &dev->status->desc,
+						 le16_to_cpu(ctx->mbim_desc->wMaxControlMessage),
+						 cdc_mbim_wdm_manage_power);
+	if (IS_ERR(subdriver)) {
+		ret = PTR_ERR(subdriver);
+		cdc_ncm_unbind(dev, intf);
+		goto err;
+	}
+
+	/* can't let usbnet use the interrupt endpoint */
+	dev->status = NULL;
+	info->subdriver = subdriver;
+
+	/* MBIM cannot do ARP */
+	dev->net->flags |= IFF_NOARP;
+err:
+	return ret;
+}
+
+static void cdc_mbim_unbind(struct usbnet *dev, struct usb_interface *intf)
+{
+	struct cdc_mbim_state *info = (void *)&dev->data;
+	struct cdc_ncm_ctx *ctx = info->ctx;
+
+	/* disconnect subdriver from control interface */
+	if (info->subdriver && info->subdriver->disconnect)
+		info->subdriver->disconnect(ctx->control);
+	info->subdriver = NULL;
+
+	/* let NCM unbind clean up both control and data interface */
+	cdc_ncm_unbind(dev, intf);
+}
+
+
+static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
+{
+	struct sk_buff *skb_out;
+	struct cdc_mbim_state *info = (void *)&dev->data;
+	struct cdc_ncm_ctx *ctx = info->ctx;
+
+	if (!ctx)
+		goto error;
+
+	if (skb) {
+		if (skb->len <= sizeof(ETH_HLEN))
+			goto error;
+
+		skb_reset_mac_header(skb);
+		switch (eth_hdr(skb)->h_proto) {
+		case htons(ETH_P_IP):
+		case htons(ETH_P_IPV6):
+			skb_pull(skb, ETH_HLEN);
+			break;
+		default:
+			goto error;
+		}
+	}
+
+	spin_lock_bh(&ctx->mtx);
+	skb_out = cdc_ncm_fill_tx_frame(ctx, skb, cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN));
+	spin_unlock_bh(&ctx->mtx);
+	return skb_out;
+
+error:
+	if (skb)
+		dev_kfree_skb_any(skb);
+
+	return NULL;
+}
+
+static struct sk_buff *cdc_mbim_process_dgram(struct usbnet *dev, u8 *buf, size_t len)
+{
+	__be16 proto;
+	struct sk_buff *skb = NULL;
+
+	switch (*buf & 0xf0) {
+	case 0x40:
+		proto = htons(ETH_P_IP);
+		break;
+	case 0x60:
+		proto = htons(ETH_P_IPV6);
+		break;
+	default:
+		goto err;
+	}
+
+	skb = netdev_alloc_skb_ip_align(dev->net,  len + ETH_HLEN);
+	if (!skb)
+		goto err;
+
+	/* add an ethernet header */
+	skb_put(skb, ETH_HLEN);
+	skb_reset_mac_header(skb);
+	eth_hdr(skb)->h_proto = proto;
+	memset(eth_hdr(skb)->h_source, 0, ETH_ALEN);
+	memcpy(eth_hdr(skb)->h_dest, dev->net->dev_addr, ETH_ALEN);
+
+	/* add datagram */
+	memcpy(skb_put(skb, len), buf, len);
+err:
+	return skb;
+}
+
+static int cdc_mbim_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in)
+{
+	struct sk_buff *skb;
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	int len;
+	int nframes;
+	int x;
+	int offset;
+	struct usb_cdc_ncm_ndp16 *ndp16;
+	struct usb_cdc_ncm_dpe16 *dpe16;
+	int ndpoffset;
+	int loopcount = 50; /* arbitrary max preventing infinite loop */
+
+	ndpoffset = cdc_ncm_rx_verify_nth16(ctx, skb_in);
+	if (ndpoffset < 0)
+		goto error;
+
+next_ndp:
+	nframes = cdc_ncm_rx_verify_ndp16(skb_in, ndpoffset);
+	if (nframes < 0)
+		goto error;
+
+	ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb_in->data + ndpoffset);
+
+	/* only supporting IPS Session #0 for now */
+	switch (ndp16->dwSignature) {
+	case cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN):
+		break;
+	default:
+		netif_dbg(dev, rx_err, dev->net,
+			  "unsupported NDP signature <0x%08x>\n",
+			  le32_to_cpu(ndp16->dwSignature));
+		goto err_ndp;
+
+	}
+
+	dpe16 = ndp16->dpe16;
+	for (x = 0; x < nframes; x++, dpe16++) {
+		offset = le16_to_cpu(dpe16->wDatagramIndex);
+		len = le16_to_cpu(dpe16->wDatagramLength);
+
+		/*
+		 * CDC NCM ch. 3.7
+		 * All entries after first NULL entry are to be ignored
+		 */
+		if ((offset == 0) || (len == 0)) {
+			if (!x)
+				goto err_ndp; /* empty NTB */
+			break;
+		}
+
+		/* sanity checking */
+		if (((offset + len) > skb_in->len) || (len > ctx->rx_max) ||
+		    (len < sizeof(struct iphdr))) {
+			netif_dbg(dev, rx_err, dev->net,
+				  "invalid frame detected (ignored) offset[%u]=%u, length=%u, skb=%p\n",
+				  x, offset, len, skb_in);
+			if (!x)
+				goto err_ndp;
+			break;
+		} else {
+			skb = cdc_mbim_process_dgram(dev, skb_in->data + offset, len);
+			if (!skb)
+				goto error;
+			usbnet_skb_return(dev, skb);
+		}
+	}
+err_ndp:
+	/* are there more NDPs to process? */
+	ndpoffset = le16_to_cpu(ndp16->wNextNdpIndex);
+	if (ndpoffset && loopcount--)
+		goto next_ndp;
+
+	return 1;
+error:
+	return 0;
+}
+
+static int cdc_mbim_suspend(struct usb_interface *intf, pm_message_t message)
+{
+	int ret = 0;
+	struct usbnet *dev = usb_get_intfdata(intf);
+	struct cdc_mbim_state *info = (void *)&dev->data;
+	struct cdc_ncm_ctx *ctx = info->ctx;
+
+	if (ctx == NULL) {
+		ret = -1;
+		goto error;
+	}
+
+	ret = usbnet_suspend(intf, message);
+	if (ret < 0)
+		goto error;
+
+	if (intf == ctx->control && info->subdriver && info->subdriver->suspend)
+		ret = info->subdriver->suspend(intf, message);
+	if (ret < 0)
+		usbnet_resume(intf);
+
+error:
+	return ret;
+}
+
+static int cdc_mbim_resume(struct usb_interface *intf)
+{
+	int  ret = 0;
+	struct usbnet *dev = usb_get_intfdata(intf);
+	struct cdc_mbim_state *info = (void *)&dev->data;
+	struct cdc_ncm_ctx *ctx = info->ctx;
+	bool callsub = (intf == ctx->control && info->subdriver && info->subdriver->resume);
+
+	if (callsub)
+		ret = info->subdriver->resume(intf);
+	if (ret < 0)
+		goto err;
+	ret = usbnet_resume(intf);
+	if (ret < 0 && callsub && info->subdriver->suspend)
+		info->subdriver->suspend(intf, PMSG_SUSPEND);
+err:
+	return ret;
+}
+
+static const struct driver_info cdc_mbim_info = {
+	.description = "CDC MBIM",
+	.flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN,
+	.bind = cdc_mbim_bind,
+	.unbind = cdc_mbim_unbind,
+	.manage_power = cdc_mbim_manage_power,
+	.rx_fixup = cdc_mbim_rx_fixup,
+	.tx_fixup = cdc_mbim_tx_fixup,
+};
+
+static const struct usb_device_id mbim_devs[] = {
+	/* This duplicate NCM entry is intentional. MBIM devices can
+	 * be disguised as NCM by default, and this is necessary to
+	 * allow us to bind the correct driver_info to such devices.
+	 *
+	 * bind() will sort out this for us, selecting the correct
+	 * entry and reject the other
+	 */
+	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE),
+	  .driver_info = (unsigned long)&cdc_mbim_info,
+	},
+	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
+	  .driver_info = (unsigned long)&cdc_mbim_info,
+	},
+	{
+	},
+};
+MODULE_DEVICE_TABLE(usb, mbim_devs);
+
+static struct usb_driver cdc_mbim_driver = {
+	.name = "cdc_mbim",
+	.id_table = mbim_devs,
+	.probe = usbnet_probe,
+	.disconnect = usbnet_disconnect,
+	.suspend = cdc_mbim_suspend,
+	.resume = cdc_mbim_resume,
+	.reset_resume =	cdc_mbim_resume,
+	.supports_autosuspend = 1,
+	.disable_hub_initiated_lpm = 1,
+};
+module_usb_driver(cdc_mbim_driver);
+
+MODULE_AUTHOR("Greg Suarez <gsuarez@smithmicro.com>");
+MODULE_AUTHOR("Bjørn Mork <bjorn@mork.no>");
+MODULE_DESCRIPTION("USB CDC MBIM host driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index d1719a72f7d2..3b8f9d4fc3fe 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -36,6 +36,12 @@
  * SUCH DAMAGE.
  */
 
+#define CDC_NCM_COMM_ALTSETTING_NCM		0
+#define CDC_NCM_COMM_ALTSETTING_MBIM		1
+
+#define CDC_NCM_DATA_ALTSETTING_NCM		1
+#define CDC_NCM_DATA_ALTSETTING_MBIM		2
+
 /* CDC NCM subclass 3.2.1 */
 #define USB_CDC_NCM_NDP16_LENGTH_MIN		0x10
 
@@ -74,6 +80,10 @@
 	(sizeof(struct usb_cdc_ncm_ndp16) +				\
 	      (CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16))
 
+#define cdc_ncm_comm_intf_is_mbim(x)  ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \
+				       (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE)
+#define cdc_ncm_data_intf_is_mbim(x)  ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB)
+
 struct cdc_ncm_ctx {
 	struct usb_cdc_ncm_ntb_parameters ncm_parm;
 	struct hrtimer tx_timer;
-- 
cgit v1.2.3-55-g7522


From 8ca40a70a70988c0bdea106c894843f763ca2989 Mon Sep 17 00:00:00 2001
From: Christoffer Dall
Date: Sun, 14 Oct 2012 23:10:18 -0400
Subject: KVM: Take kvm instead of vcpu to mmu_notifier_retry

The mmu_notifier_retry is not specific to any vcpu (and never will be)
so only take struct kvm as a parameter.

The motivation is the ARM mmu code that needs to call this from
somewhere where we long let go of the vcpu pointer.

Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +-
 arch/x86/kvm/mmu.c                  | 4 ++--
 arch/x86/kvm/paging_tmpl.h          | 2 +-
 include/linux/kvm_host.h            | 6 +++---
 5 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 7a4aae99ac5b..2a89a36e7263 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -710,7 +710,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	/* Check if we might have been invalidated; let the guest retry if so */
 	ret = RESUME_GUEST;
-	if (mmu_notifier_retry(vcpu, mmu_seq)) {
+	if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
 		unlock_rmap(rmap);
 		goto out_unlock;
 	}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 9955216477a4..5e06e3153888 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -297,7 +297,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 		lock_rmap(rmap);
 		/* Check for pending invalidations under the rmap chain lock */
 		if (kvm->arch.using_mmu_notifiers &&
-		    mmu_notifier_retry(vcpu, mmu_seq)) {
+		    mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
 			/* inval in progress, write a non-present HPTE */
 			pteh |= HPTE_V_ABSENT;
 			pteh &= ~HPTE_V_VALID;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3d5ca7939380..6f78fa3a4706 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2886,7 +2886,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 		return r;
 
 	spin_lock(&vcpu->kvm->mmu_lock);
-	if (mmu_notifier_retry(vcpu, mmu_seq))
+	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
 		goto out_unlock;
 	kvm_mmu_free_some_pages(vcpu);
 	if (likely(!force_pt_level))
@@ -3355,7 +3355,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 		return r;
 
 	spin_lock(&vcpu->kvm->mmu_lock);
-	if (mmu_notifier_retry(vcpu, mmu_seq))
+	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
 		goto out_unlock;
 	kvm_mmu_free_some_pages(vcpu);
 	if (likely(!force_pt_level))
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index f887e4cfc1fe..d17decaf1db9 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -565,7 +565,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 		return r;
 
 	spin_lock(&vcpu->kvm->mmu_lock);
-	if (mmu_notifier_retry(vcpu, mmu_seq))
+	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
 		goto out_unlock;
 
 	kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6afc5be2615e..82e2c783a21e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -841,9 +841,9 @@ extern struct kvm_stats_debugfs_item debugfs_entries[];
 extern struct dentry *kvm_debugfs_dir;
 
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
-static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq)
+static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
 {
-	if (unlikely(vcpu->kvm->mmu_notifier_count))
+	if (unlikely(kvm->mmu_notifier_count))
 		return 1;
 	/*
 	 * Ensure the read of mmu_notifier_count happens before the read
@@ -856,7 +856,7 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
 	 * can't rely on kvm->mmu_lock to keep things ordered.
 	 */
 	smp_rmb();
-	if (vcpu->kvm->mmu_notifier_seq != mmu_seq)
+	if (kvm->mmu_notifier_seq != mmu_seq)
 		return 1;
 	return 0;
 }
-- 
cgit v1.2.3-55-g7522


From 4d9a5d4319e22670ec6d6227e12b54f361c46d0f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Thu, 11 Oct 2012 01:47:16 +0200
Subject: rcu: Remove rcu_switch()

It's only there to call rcu_user_hooks_switch(). Let's
just call rcu_user_hooks_switch() directly, we don't need this
function in the middle.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Weinberger <richard@nod.at>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 arch/um/drivers/mconsole_kern.c | 2 +-
 include/linux/rcupdate.h        | 2 ++
 include/linux/sched.h           | 8 --------
 kernel/sched/core.c             | 2 +-
 4 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 79ccfe6c7078..49e3b49e552f 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -648,7 +648,7 @@ static void stack_proc(void *arg)
 	struct task_struct *from = current, *to = arg;
 
 	to->thread.saved_task = from;
-	rcu_switch(from, to);
+	rcu_user_hooks_switch(from, to);
 	switch_to(from, to, from);
 }
 
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7c968e4f929e..5d009def7c0c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -204,6 +204,8 @@ static inline void rcu_user_enter(void) { }
 static inline void rcu_user_exit(void) { }
 static inline void rcu_user_enter_after_irq(void) { }
 static inline void rcu_user_exit_after_irq(void) { }
+static inline void rcu_user_hooks_switch(struct task_struct *prev,
+					 struct task_struct *next) { }
 #endif /* CONFIG_RCU_USER_QS */
 
 extern void exit_rcu(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0dd42a02df2e..432cc5e1bbee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1844,14 +1844,6 @@ static inline void rcu_copy_process(struct task_struct *p)
 
 #endif
 
-static inline void rcu_switch(struct task_struct *prev,
-			      struct task_struct *next)
-{
-#ifdef CONFIG_RCU_USER_QS
-	rcu_user_hooks_switch(prev, next);
-#endif
-}
-
 static inline void tsk_restore_flags(struct task_struct *task,
 				unsigned long orig_flags, unsigned long flags)
 {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d8927fda712..68414fa4cd2a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1887,7 +1887,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 #endif
 
 	/* Here we just switch the register state and the stack. */
-	rcu_switch(prev, next);
+	rcu_user_hooks_switch(prev, next);
 	switch_to(prev, next, prev);
 
 	barrier();
-- 
cgit v1.2.3-55-g7522


From 4e87b2d7e887df3fe251dd7f702591a3acf369ca Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Sat, 13 Oct 2012 01:14:14 +0800
Subject: srcu: Credit Lai Jiangshan with SRCU rewrite

Lai Jiangshan rewrote SRCU, so this commit ensures that he gets his
proper share of blame^Wcredit.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 2 ++
 kernel/srcu.c        | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 55a5c52cbb25..a55ddb19053c 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -16,8 +16,10 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  *
  * Copyright (C) IBM Corporation, 2006
+ * Copyright (C) Fujitsu, 2012
  *
  * Author: Paul McKenney <paulmck@us.ibm.com>
+ *	   Lai Jiangshan <laijs@cn.fujitsu.com>
  *
  * For detailed explanation of Read-Copy Update mechanism see -
  * 		Documentation/RCU/ *.txt
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 97c465ebd844..0b99f27fa2f7 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -16,8 +16,10 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  *
  * Copyright (C) IBM Corporation, 2006
+ * Copyright (C) Fujitsu, 2012
  *
  * Author: Paul McKenney <paulmck@us.ibm.com>
+ *	   Lai Jiangshan <laijs@cn.fujitsu.com>
  *
  * For detailed explanation of Read-Copy Update mechanism see -
  * 		Documentation/RCU/ *.txt
-- 
cgit v1.2.3-55-g7522


From f2ebfbc991044fd5b89d4529741d7500feb37fbd Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Sat, 13 Oct 2012 01:14:15 +0800
Subject: srcu: Export process_srcu()

Because process_srcu() will be used in DEFINE_SRCU(), which is a macro
that could be expanded pretty much anywhere, it can no longer be static.
Note that process_srcu() is still internal to srcu.h.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 2 ++
 kernel/srcu.c        | 6 ++----
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index a55ddb19053c..5cce128f196c 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -78,6 +78,8 @@ int init_srcu_struct(struct srcu_struct *sp);
 
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
+void process_srcu(struct work_struct *work);
+
 /**
  * call_srcu() - Queue a callback for invocation after an SRCU grace period
  * @sp: srcu_struct in queue the callback
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 0b99f27fa2f7..b363b0924561 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -94,9 +94,6 @@ static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
 	}
 }
 
-/* single-thread state-machine */
-static void process_srcu(struct work_struct *work);
-
 static int init_srcu_struct_fields(struct srcu_struct *sp)
 {
 	sp->completed = 0;
@@ -639,7 +636,7 @@ static void srcu_reschedule(struct srcu_struct *sp)
 /*
  * This is the work-queue function that handles SRCU grace periods.
  */
-static void process_srcu(struct work_struct *work)
+void process_srcu(struct work_struct *work)
 {
 	struct srcu_struct *sp;
 
@@ -650,3 +647,4 @@ static void process_srcu(struct work_struct *work)
 	srcu_invoke_callbacks(sp);
 	srcu_reschedule(sp);
 }
+EXPORT_SYMBOL_GPL(process_srcu);
-- 
cgit v1.2.3-55-g7522


From b637a328bd4f43a0e146d1eef0142b650ba0d644 Mon Sep 17 00:00:00 2001
From: Paul E. McKenney
Date: Wed, 19 Sep 2012 16:58:38 -0700
Subject: rcu: Print remote CPU's stacks in stall warnings

The RCU CPU stall warnings rely on trigger_all_cpu_backtrace() to
do NMI-based dump of the stack traces of all CPUs.  Unfortunately, a
number of architectures do not implement trigger_all_cpu_backtrace(), in
which case RCU falls back to just dumping the stack of the running CPU.
This is unhelpful in the case where the running CPU has detected that
some other CPU has stalled.

This commit therefore makes the running CPU dump the stacks of the
tasks running on the stalled CPUs.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/sched.h |  2 ++
 kernel/rcutree.c      | 25 ++++++++++++++++++++++++-
 kernel/sched/core.c   |  6 ++++++
 3 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0dd42a02df2e..ba69b5adea30 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -109,6 +109,8 @@ extern void update_cpu_load_nohz(void);
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
+extern void dump_cpu_task(int cpu);
+
 struct seq_file;
 struct cfs_rq;
 struct task_group;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 74df86bd9204..e78538712df0 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -873,6 +873,29 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
 	rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
 }
 
+/*
+ * Dump stacks of all tasks running on stalled CPUs.  This is a fallback
+ * for architectures that do not implement trigger_all_cpu_backtrace().
+ * The NMI-triggered stack traces are more accurate because they are
+ * printed by the target CPU.
+ */
+static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
+{
+	int cpu;
+	unsigned long flags;
+	struct rcu_node *rnp;
+
+	rcu_for_each_leaf_node(rsp, rnp) {
+		raw_spin_lock_irqsave(&rnp->lock, flags);
+		if (rnp->qsmask != 0) {
+			for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
+				if (rnp->qsmask & (1UL << cpu))
+					dump_cpu_task(rnp->grplo + cpu);
+		}
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	}
+}
+
 static void print_other_cpu_stall(struct rcu_state *rsp)
 {
 	int cpu;
@@ -929,7 +952,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
 	if (ndetected == 0)
 		printk(KERN_ERR "INFO: Stall ended before state dump start\n");
 	else if (!trigger_all_cpu_backtrace())
-		dump_stack();
+		rcu_dump_cpu_stacks(rsp);
 
 	/* Complain about tasks blocking the grace period. */
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d8927fda712..59d08fb1a9e3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8076,3 +8076,9 @@ struct cgroup_subsys cpuacct_subsys = {
 	.base_cftypes = files,
 };
 #endif	/* CONFIG_CGROUP_CPUACCT */
+
+void dump_cpu_task(int cpu)
+{
+	pr_info("Task dump for CPU %d:\n", cpu);
+	sched_show_task(cpu_curr(cpu));
+}
-- 
cgit v1.2.3-55-g7522


From e39473d0b9448e770f49b0b15e514be884264438 Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Wed, 24 Oct 2012 02:08:18 +0200
Subject: PM / QoS: Make it possible to expose PM QoS device flags to user
 space

Define two device PM QoS flags, PM_QOS_FLAG_NO_POWER_OFF
and PM_QOS_FLAG_REMOTE_WAKEUP, and introduce routines
dev_pm_qos_expose_flags() and dev_pm_qos_hide_flags() allowing the
caller to expose those two flags to user space or to hide them
from it, respectively.

After the flags have been exposed, user space will see two
additional sysfs attributes, pm_qos_no_power_off and
pm_qos_remote_wakeup, under the device's /sys/devices/.../power/
directory.  Then, writing 1 to one of them will update the
PM QoS flags request owned by user space so that the corresponding
flag is requested to be set.  In turn, writing 0 to one of them
will cause the corresponding flag in the user space's request to
be cleared (however, the owners of the other PM QoS flags requests
for the same device may still request the flag to be set and it
may be effectively set even if user space doesn't request that).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Jean Pihet <j-pihet@ti.com>
Acked-by: mark gross <markgross@thegnar.org>
---
 Documentation/ABI/testing/sysfs-devices-power |  31 +++++
 drivers/base/power/power.h                    |   6 +-
 drivers/base/power/qos.c                      | 168 ++++++++++++++++++++------
 drivers/base/power/sysfs.c                    |  94 ++++++++++++--
 include/linux/pm.h                            |   1 -
 include/linux/pm_qos.h                        |  26 ++++
 6 files changed, 278 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
index 45000f0db4d4..7fc2997b23a6 100644
--- a/Documentation/ABI/testing/sysfs-devices-power
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -204,3 +204,34 @@ Description:
 
 		This attribute has no effect on system-wide suspend/resume and
 		hibernation.
+
+What:		/sys/devices/.../power/pm_qos_no_power_off
+Date:		September 2012
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/devices/.../power/pm_qos_no_power_off attribute
+		is used for manipulating the PM QoS "no power off" flag.  If
+		set, this flag indicates to the kernel that power should not
+		be removed entirely from the device.
+
+		Not all drivers support this attribute.  If it isn't supported,
+		it is not present.
+
+		This attribute has no effect on system-wide suspend/resume and
+		hibernation.
+
+What:		/sys/devices/.../power/pm_qos_remote_wakeup
+Date:		September 2012
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/devices/.../power/pm_qos_remote_wakeup attribute
+		is used for manipulating the PM QoS "remote wakeup required"
+		flag.  If set, this flag indicates to the kernel that the
+		device is a source of user events that have to be signaled from
+		its low-power states.
+
+		Not all drivers support this attribute.  If it isn't supported,
+		it is not present.
+
+		This attribute has no effect on system-wide suspend/resume and
+		hibernation.
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 0dbfdf4419af..b16686a0a5a2 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -93,8 +93,10 @@ extern void dpm_sysfs_remove(struct device *dev);
 extern void rpm_sysfs_remove(struct device *dev);
 extern int wakeup_sysfs_add(struct device *dev);
 extern void wakeup_sysfs_remove(struct device *dev);
-extern int pm_qos_sysfs_add(struct device *dev);
-extern void pm_qos_sysfs_remove(struct device *dev);
+extern int pm_qos_sysfs_add_latency(struct device *dev);
+extern void pm_qos_sysfs_remove_latency(struct device *dev);
+extern int pm_qos_sysfs_add_flags(struct device *dev);
+extern void pm_qos_sysfs_remove_flags(struct device *dev);
 
 #else /* CONFIG_PM */
 
diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index 3c66f75d14b0..167834dcc82a 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -40,6 +40,7 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/export.h>
+#include <linux/pm_runtime.h>
 
 #include "power.h"
 
@@ -321,6 +322,37 @@ int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req,
 }
 EXPORT_SYMBOL_GPL(dev_pm_qos_add_request);
 
+/**
+ * __dev_pm_qos_update_request - Modify an existing device PM QoS request.
+ * @req : PM QoS request to modify.
+ * @new_value: New value to request.
+ */
+static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req,
+				       s32 new_value)
+{
+	s32 curr_value;
+	int ret = 0;
+
+	if (!req->dev->power.qos)
+		return -ENODEV;
+
+	switch(req->type) {
+	case DEV_PM_QOS_LATENCY:
+		curr_value = req->data.pnode.prio;
+		break;
+	case DEV_PM_QOS_FLAGS:
+		curr_value = req->data.flr.flags;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (curr_value != new_value)
+		ret = apply_constraint(req, PM_QOS_UPDATE_REQ, new_value);
+
+	return ret;
+}
+
 /**
  * dev_pm_qos_update_request - modifies an existing qos request
  * @req : handle to list element holding a dev_pm_qos request to use
@@ -336,11 +368,9 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_add_request);
  * -EINVAL in case of wrong parameters, -ENODEV if the device has been
  * removed from the system
  */
-int dev_pm_qos_update_request(struct dev_pm_qos_request *req,
-			      s32 new_value)
+int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value)
 {
-	s32 curr_value;
-	int ret = 0;
+	int ret;
 
 	if (!req) /*guard against callers passing in null */
 		return -EINVAL;
@@ -350,29 +380,9 @@ int dev_pm_qos_update_request(struct dev_pm_qos_request *req,
 		return -EINVAL;
 
 	mutex_lock(&dev_pm_qos_mtx);
-
-	if (!req->dev->power.qos) {
-		ret = -ENODEV;
-		goto out;
-	}
-
-	switch(req->type) {
-	case DEV_PM_QOS_LATENCY:
-		curr_value = req->data.pnode.prio;
-		break;
-	case DEV_PM_QOS_FLAGS:
-		curr_value = req->data.flr.flags;
-		break;
-	default:
-		ret = -EINVAL;
-		goto out;
-	}
-
-	if (curr_value != new_value)
-		ret = apply_constraint(req, PM_QOS_UPDATE_REQ, new_value);
-
- out:
+	__dev_pm_qos_update_request(req, new_value);
 	mutex_unlock(&dev_pm_qos_mtx);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(dev_pm_qos_update_request);
@@ -533,10 +543,19 @@ int dev_pm_qos_add_ancestor_request(struct device *dev,
 EXPORT_SYMBOL_GPL(dev_pm_qos_add_ancestor_request);
 
 #ifdef CONFIG_PM_RUNTIME
-static void __dev_pm_qos_drop_user_request(struct device *dev)
+static void __dev_pm_qos_drop_user_request(struct device *dev,
+					   enum dev_pm_qos_req_type type)
 {
-	dev_pm_qos_remove_request(dev->power.pq_req);
-	dev->power.pq_req = NULL;
+	switch(type) {
+	case DEV_PM_QOS_LATENCY:
+		dev_pm_qos_remove_request(dev->power.qos->latency_req);
+		dev->power.qos->latency_req = NULL;
+		break;
+	case DEV_PM_QOS_FLAGS:
+		dev_pm_qos_remove_request(dev->power.qos->flags_req);
+		dev->power.qos->flags_req = NULL;
+		break;
+	}
 }
 
 /**
@@ -552,7 +571,7 @@ int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value)
 	if (!device_is_registered(dev) || value < 0)
 		return -EINVAL;
 
-	if (dev->power.pq_req)
+	if (dev->power.qos && dev->power.qos->latency_req)
 		return -EEXIST;
 
 	req = kzalloc(sizeof(*req), GFP_KERNEL);
@@ -563,10 +582,10 @@ int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value)
 	if (ret < 0)
 		return ret;
 
-	dev->power.pq_req = req;
-	ret = pm_qos_sysfs_add(dev);
+	dev->power.qos->latency_req = req;
+	ret = pm_qos_sysfs_add_latency(dev);
 	if (ret)
-		__dev_pm_qos_drop_user_request(dev);
+		__dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY);
 
 	return ret;
 }
@@ -578,10 +597,87 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_limit);
  */
 void dev_pm_qos_hide_latency_limit(struct device *dev)
 {
-	if (dev->power.pq_req) {
-		pm_qos_sysfs_remove(dev);
-		__dev_pm_qos_drop_user_request(dev);
+	if (dev->power.qos && dev->power.qos->latency_req) {
+		pm_qos_sysfs_remove_latency(dev);
+		__dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY);
 	}
 }
 EXPORT_SYMBOL_GPL(dev_pm_qos_hide_latency_limit);
+
+/**
+ * dev_pm_qos_expose_flags - Expose PM QoS flags of a device to user space.
+ * @dev: Device whose PM QoS flags are to be exposed to user space.
+ * @val: Initial values of the flags.
+ */
+int dev_pm_qos_expose_flags(struct device *dev, s32 val)
+{
+	struct dev_pm_qos_request *req;
+	int ret;
+
+	if (!device_is_registered(dev))
+		return -EINVAL;
+
+	if (dev->power.qos && dev->power.qos->flags_req)
+		return -EEXIST;
+
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_FLAGS, val);
+	if (ret < 0)
+		return ret;
+
+	dev->power.qos->flags_req = req;
+	ret = pm_qos_sysfs_add_flags(dev);
+	if (ret)
+		__dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_expose_flags);
+
+/**
+ * dev_pm_qos_hide_flags - Hide PM QoS flags of a device from user space.
+ * @dev: Device whose PM QoS flags are to be hidden from user space.
+ */
+void dev_pm_qos_hide_flags(struct device *dev)
+{
+	if (dev->power.qos && dev->power.qos->flags_req) {
+		pm_qos_sysfs_remove_flags(dev);
+		__dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS);
+	}
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_hide_flags);
+
+/**
+ * dev_pm_qos_update_flags - Update PM QoS flags request owned by user space.
+ * @dev: Device to update the PM QoS flags request for.
+ * @mask: Flags to set/clear.
+ * @set: Whether to set or clear the flags (true means set).
+ */
+int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set)
+{
+	s32 value;
+	int ret;
+
+	if (!dev->power.qos || !dev->power.qos->flags_req)
+		return -EINVAL;
+
+	pm_runtime_get_sync(dev);
+	mutex_lock(&dev_pm_qos_mtx);
+
+	value = dev_pm_qos_requested_flags(dev);
+	if (set)
+		value |= mask;
+	else
+		value &= ~mask;
+
+	ret = __dev_pm_qos_update_request(dev->power.qos->flags_req, value);
+
+	mutex_unlock(&dev_pm_qos_mtx);
+	pm_runtime_put(dev);
+
+	return ret;
+}
 #endif /* CONFIG_PM_RUNTIME */
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 54c61ffa2044..50d16e3cb0a9 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -221,7 +221,7 @@ static DEVICE_ATTR(autosuspend_delay_ms, 0644, autosuspend_delay_ms_show,
 static ssize_t pm_qos_latency_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%d\n", dev->power.pq_req->data.pnode.prio);
+	return sprintf(buf, "%d\n", dev_pm_qos_requested_latency(dev));
 }
 
 static ssize_t pm_qos_latency_store(struct device *dev,
@@ -237,12 +237,66 @@ static ssize_t pm_qos_latency_store(struct device *dev,
 	if (value < 0)
 		return -EINVAL;
 
-	ret = dev_pm_qos_update_request(dev->power.pq_req, value);
+	ret = dev_pm_qos_update_request(dev->power.qos->latency_req, value);
 	return ret < 0 ? ret : n;
 }
 
 static DEVICE_ATTR(pm_qos_resume_latency_us, 0644,
 		   pm_qos_latency_show, pm_qos_latency_store);
+
+static ssize_t pm_qos_no_power_off_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return sprintf(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev)
+					& PM_QOS_FLAG_NO_POWER_OFF));
+}
+
+static ssize_t pm_qos_no_power_off_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t n)
+{
+	int ret;
+
+	if (kstrtoint(buf, 0, &ret))
+		return -EINVAL;
+
+	if (ret != 0 && ret != 1)
+		return -EINVAL;
+
+	ret = dev_pm_qos_update_flags(dev, PM_QOS_FLAG_NO_POWER_OFF, ret);
+	return ret < 0 ? ret : n;
+}
+
+static DEVICE_ATTR(pm_qos_no_power_off, 0644,
+		   pm_qos_no_power_off_show, pm_qos_no_power_off_store);
+
+static ssize_t pm_qos_remote_wakeup_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	return sprintf(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev)
+					& PM_QOS_FLAG_REMOTE_WAKEUP));
+}
+
+static ssize_t pm_qos_remote_wakeup_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t n)
+{
+	int ret;
+
+	if (kstrtoint(buf, 0, &ret))
+		return -EINVAL;
+
+	if (ret != 0 && ret != 1)
+		return -EINVAL;
+
+	ret = dev_pm_qos_update_flags(dev, PM_QOS_FLAG_REMOTE_WAKEUP, ret);
+	return ret < 0 ? ret : n;
+}
+
+static DEVICE_ATTR(pm_qos_remote_wakeup, 0644,
+		   pm_qos_remote_wakeup_show, pm_qos_remote_wakeup_store);
 #endif /* CONFIG_PM_RUNTIME */
 
 #ifdef CONFIG_PM_SLEEP
@@ -564,15 +618,27 @@ static struct attribute_group pm_runtime_attr_group = {
 	.attrs	= runtime_attrs,
 };
 
-static struct attribute *pm_qos_attrs[] = {
+static struct attribute *pm_qos_latency_attrs[] = {
 #ifdef CONFIG_PM_RUNTIME
 	&dev_attr_pm_qos_resume_latency_us.attr,
 #endif /* CONFIG_PM_RUNTIME */
 	NULL,
 };
-static struct attribute_group pm_qos_attr_group = {
+static struct attribute_group pm_qos_latency_attr_group = {
 	.name	= power_group_name,
-	.attrs	= pm_qos_attrs,
+	.attrs	= pm_qos_latency_attrs,
+};
+
+static struct attribute *pm_qos_flags_attrs[] = {
+#ifdef CONFIG_PM_RUNTIME
+	&dev_attr_pm_qos_no_power_off.attr,
+	&dev_attr_pm_qos_remote_wakeup.attr,
+#endif /* CONFIG_PM_RUNTIME */
+	NULL,
+};
+static struct attribute_group pm_qos_flags_attr_group = {
+	.name	= power_group_name,
+	.attrs	= pm_qos_flags_attrs,
 };
 
 int dpm_sysfs_add(struct device *dev)
@@ -615,14 +681,24 @@ void wakeup_sysfs_remove(struct device *dev)
 	sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
 }
 
-int pm_qos_sysfs_add(struct device *dev)
+int pm_qos_sysfs_add_latency(struct device *dev)
+{
+	return sysfs_merge_group(&dev->kobj, &pm_qos_latency_attr_group);
+}
+
+void pm_qos_sysfs_remove_latency(struct device *dev)
+{
+	sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_attr_group);
+}
+
+int pm_qos_sysfs_add_flags(struct device *dev)
 {
-	return sysfs_merge_group(&dev->kobj, &pm_qos_attr_group);
+	return sysfs_merge_group(&dev->kobj, &pm_qos_flags_attr_group);
 }
 
-void pm_qos_sysfs_remove(struct device *dev)
+void pm_qos_sysfs_remove_flags(struct device *dev)
 {
-	sysfs_unmerge_group(&dev->kobj, &pm_qos_attr_group);
+	sysfs_unmerge_group(&dev->kobj, &pm_qos_flags_attr_group);
 }
 
 void rpm_sysfs_remove(struct device *dev)
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 0ce6df94221a..03d7bb145311 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -546,7 +546,6 @@ struct dev_pm_info {
 	unsigned long		active_jiffies;
 	unsigned long		suspended_jiffies;
 	unsigned long		accounting_timestamp;
-	struct dev_pm_qos_request *pq_req;
 #endif
 	struct pm_subsys_data	*subsys_data;  /* Owned by the subsystem. */
 	struct dev_pm_qos	*qos;
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 3af7d8573c23..5a95013905c8 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -34,6 +34,9 @@ enum pm_qos_flags_status {
 #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE	0
 #define PM_QOS_DEV_LAT_DEFAULT_VALUE		0
 
+#define PM_QOS_FLAG_NO_POWER_OFF	(1 << 0)
+#define PM_QOS_FLAG_REMOTE_WAKEUP	(1 << 1)
+
 struct pm_qos_request {
 	struct plist_node node;
 	int pm_qos_class;
@@ -86,6 +89,8 @@ struct pm_qos_flags {
 struct dev_pm_qos {
 	struct pm_qos_constraints latency;
 	struct pm_qos_flags flags;
+	struct dev_pm_qos_request *latency_req;
+	struct dev_pm_qos_request *flags_req;
 };
 
 /* Action requested to pm_qos_update_target */
@@ -187,10 +192,31 @@ static inline int dev_pm_qos_add_ancestor_request(struct device *dev,
 #ifdef CONFIG_PM_RUNTIME
 int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value);
 void dev_pm_qos_hide_latency_limit(struct device *dev);
+int dev_pm_qos_expose_flags(struct device *dev, s32 value);
+void dev_pm_qos_hide_flags(struct device *dev);
+int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set);
+
+static inline s32 dev_pm_qos_requested_latency(struct device *dev)
+{
+	return dev->power.qos->latency_req->data.pnode.prio;
+}
+
+static inline s32 dev_pm_qos_requested_flags(struct device *dev)
+{
+	return dev->power.qos->flags_req->data.flr.flags;
+}
 #else
 static inline int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value)
 			{ return 0; }
 static inline void dev_pm_qos_hide_latency_limit(struct device *dev) {}
+static inline int dev_pm_qos_expose_flags(struct device *dev, s32 value)
+			{ return 0; }
+static inline void dev_pm_qos_hide_flags(struct device *dev) {}
+static inline int dev_pm_qos_update_flags(struct device *dev, s32 m, bool set)
+			{ return 0; }
+
+static inline s32 dev_pm_qos_requested_latency(struct device *dev) { return 0; }
+static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return 0; }
 #endif
 
 #endif
-- 
cgit v1.2.3-55-g7522


From 9d85f21c94f7f7a84d0ba686c58aa6d9da58fdbb Mon Sep 17 00:00:00 2001
From: Paul Turner
Date: Thu, 4 Oct 2012 13:18:29 +0200
Subject: sched: Track the runnable average on a per-task entity basis

Instead of tracking averaging the load parented by a cfs_rq, we can track
entity load directly. With the load for a given cfs_rq then being the sum
of its children.

To do this we represent the historical contribution to runnable average
within each trailing 1024us of execution as the coefficients of a
geometric series.

We can express this for a given task t as:

  runnable_sum(t) = \Sum u_i * y^i, runnable_avg_period(t) = \Sum 1024 * y^i
  load(t) = weight_t * runnable_sum(t) / runnable_avg_period(t)

Where: u_i is the usage in the last i`th 1024us period (approximately 1ms)
~ms and y is chosen such that y^k = 1/2.  We currently choose k to be 32 which
roughly translates to about a sched period.

Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Ben Segall <bsegall@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120823141506.372695337@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  13 +++++
 kernel/sched/core.c   |   5 ++
 kernel/sched/debug.c  |   4 ++
 kernel/sched/fair.c   | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 151 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0dd42a02df2e..418fc6d8a4da 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1095,6 +1095,16 @@ struct load_weight {
 	unsigned long weight, inv_weight;
 };
 
+struct sched_avg {
+	/*
+	 * These sums represent an infinite geometric series and so are bound
+	 * above by 1024/(1-y).  Thus we only need a u32 to store them for for all
+	 * choices of y < 1-2^(-32)*1024.
+	 */
+	u32 runnable_avg_sum, runnable_avg_period;
+	u64 last_runnable_update;
+};
+
 #ifdef CONFIG_SCHEDSTATS
 struct sched_statistics {
 	u64			wait_start;
@@ -1155,6 +1165,9 @@ struct sched_entity {
 	/* rq "owned" by this entity/group: */
 	struct cfs_rq		*my_q;
 #endif
+#ifdef CONFIG_SMP
+	struct sched_avg	avg;
+#endif
 };
 
 struct sched_rt_entity {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d8927fda712..fd9d0859350a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1524,6 +1524,11 @@ static void __sched_fork(struct task_struct *p)
 	p->se.vruntime			= 0;
 	INIT_LIST_HEAD(&p->se.group_node);
 
+#ifdef CONFIG_SMP
+	p->se.avg.runnable_avg_period = 0;
+	p->se.avg.runnable_avg_sum = 0;
+#endif
+
 #ifdef CONFIG_SCHEDSTATS
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 6f79596e0ea9..61f70979153a 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -85,6 +85,10 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 	P(se->statistics.wait_count);
 #endif
 	P(se->load.weight);
+#ifdef CONFIG_SMP
+	P(se->avg.runnable_avg_sum);
+	P(se->avg.runnable_avg_period);
+#endif
 #undef PN
 #undef P
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b800a14b990..16d67f9b6955 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -971,6 +971,126 @@ static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
 }
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
+#ifdef CONFIG_SMP
+/*
+ * Approximate:
+ *   val * y^n,    where y^32 ~= 0.5 (~1 scheduling period)
+ */
+static __always_inline u64 decay_load(u64 val, u64 n)
+{
+	for (; n && val; n--) {
+		val *= 4008;
+		val >>= 12;
+	}
+
+	return val;
+}
+
+/*
+ * We can represent the historical contribution to runnable average as the
+ * coefficients of a geometric series.  To do this we sub-divide our runnable
+ * history into segments of approximately 1ms (1024us); label the segment that
+ * occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g.
+ *
+ * [<- 1024us ->|<- 1024us ->|<- 1024us ->| ...
+ *      p0            p1           p2
+ *     (now)       (~1ms ago)  (~2ms ago)
+ *
+ * Let u_i denote the fraction of p_i that the entity was runnable.
+ *
+ * We then designate the fractions u_i as our co-efficients, yielding the
+ * following representation of historical load:
+ *   u_0 + u_1*y + u_2*y^2 + u_3*y^3 + ...
+ *
+ * We choose y based on the with of a reasonably scheduling period, fixing:
+ *   y^32 = 0.5
+ *
+ * This means that the contribution to load ~32ms ago (u_32) will be weighted
+ * approximately half as much as the contribution to load within the last ms
+ * (u_0).
+ *
+ * When a period "rolls over" and we have new u_0`, multiplying the previous
+ * sum again by y is sufficient to update:
+ *   load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
+ *            = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
+ */
+static __always_inline int __update_entity_runnable_avg(u64 now,
+							struct sched_avg *sa,
+							int runnable)
+{
+	u64 delta;
+	int delta_w, decayed = 0;
+
+	delta = now - sa->last_runnable_update;
+	/*
+	 * This should only happen when time goes backwards, which it
+	 * unfortunately does during sched clock init when we swap over to TSC.
+	 */
+	if ((s64)delta < 0) {
+		sa->last_runnable_update = now;
+		return 0;
+	}
+
+	/*
+	 * Use 1024ns as the unit of measurement since it's a reasonable
+	 * approximation of 1us and fast to compute.
+	 */
+	delta >>= 10;
+	if (!delta)
+		return 0;
+	sa->last_runnable_update = now;
+
+	/* delta_w is the amount already accumulated against our next period */
+	delta_w = sa->runnable_avg_period % 1024;
+	if (delta + delta_w >= 1024) {
+		/* period roll-over */
+		decayed = 1;
+
+		/*
+		 * Now that we know we're crossing a period boundary, figure
+		 * out how much from delta we need to complete the current
+		 * period and accrue it.
+		 */
+		delta_w = 1024 - delta_w;
+		BUG_ON(delta_w > delta);
+		do {
+			if (runnable)
+				sa->runnable_avg_sum += delta_w;
+			sa->runnable_avg_period += delta_w;
+
+			/*
+			 * Remainder of delta initiates a new period, roll over
+			 * the previous.
+			 */
+			sa->runnable_avg_sum =
+				decay_load(sa->runnable_avg_sum, 1);
+			sa->runnable_avg_period =
+				decay_load(sa->runnable_avg_period, 1);
+
+			delta -= delta_w;
+			/* New period is empty */
+			delta_w = 1024;
+		} while (delta >= 1024);
+	}
+
+	/* Remainder of delta accrued against u_0` */
+	if (runnable)
+		sa->runnable_avg_sum += delta;
+	sa->runnable_avg_period += delta;
+
+	return decayed;
+}
+
+/* Update a sched_entity's runnable average */
+static inline void update_entity_load_avg(struct sched_entity *se)
+{
+	__update_entity_runnable_avg(rq_of(cfs_rq_of(se))->clock_task, &se->avg,
+				     se->on_rq);
+}
+#else
+static inline void update_entity_load_avg(struct sched_entity *se) {}
+#endif
+
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 #ifdef CONFIG_SCHEDSTATS
@@ -1097,6 +1217,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 */
 	update_curr(cfs_rq);
 	update_cfs_load(cfs_rq, 0);
+	update_entity_load_avg(se);
 	account_entity_enqueue(cfs_rq, se);
 	update_cfs_shares(cfs_rq);
 
@@ -1171,6 +1292,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
+	update_entity_load_avg(se);
 
 	update_stats_dequeue(cfs_rq, se);
 	if (flags & DEQUEUE_SLEEP) {
@@ -1340,6 +1462,8 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
 		update_stats_wait_start(cfs_rq, prev);
 		/* Put 'current' back into the tree. */
 		__enqueue_entity(cfs_rq, prev);
+		/* in !on_rq case, update occurred at dequeue */
+		update_entity_load_avg(prev);
 	}
 	cfs_rq->curr = NULL;
 }
@@ -1352,6 +1476,11 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 	 */
 	update_curr(cfs_rq);
 
+	/*
+	 * Ensure that runnable average is periodically updated.
+	 */
+	update_entity_load_avg(curr);
+
 	/*
 	 * Update share accounting for long-running entities.
 	 */
-- 
cgit v1.2.3-55-g7522


From 2dac754e10a5d41d94d2d2365c0345d4f215a266 Mon Sep 17 00:00:00 2001
From: Paul Turner
Date: Thu, 4 Oct 2012 13:18:30 +0200
Subject: sched: Aggregate load contributed by task entities on parenting
 cfs_rq

For a given task t, we can compute its contribution to load as:

  task_load(t) = runnable_avg(t) * weight(t)

On a parenting cfs_rq we can then aggregate:

  runnable_load(cfs_rq) = \Sum task_load(t), for all runnable children t

Maintain this bottom up, with task entities adding their contributed load to
the parenting cfs_rq sum.  When a task entity's load changes we add the same
delta to the maintained sum.

Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Ben Segall <bsegall@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120823141506.514678907@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  1 +
 kernel/sched/debug.c  |  3 +++
 kernel/sched/fair.c   | 51 +++++++++++++++++++++++++++++++++++++++++++++++----
 kernel/sched/sched.h  | 10 +++++++++-
 4 files changed, 60 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 418fc6d8a4da..81d8b1ba4100 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1103,6 +1103,7 @@ struct sched_avg {
 	 */
 	u32 runnable_avg_sum, runnable_avg_period;
 	u64 last_runnable_update;
+	unsigned long load_avg_contrib;
 };
 
 #ifdef CONFIG_SCHEDSTATS
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 4240abce4116..c953a89f94aa 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -94,6 +94,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 #ifdef CONFIG_SMP
 	P(se->avg.runnable_avg_sum);
 	P(se->avg.runnable_avg_period);
+	P(se->avg.load_avg_contrib);
 #endif
 #undef PN
 #undef P
@@ -224,6 +225,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 			cfs_rq->load_contribution);
 	SEQ_printf(m, "  .%-30s: %d\n", "load_tg",
 			atomic_read(&cfs_rq->tg->load_weight));
+	SEQ_printf(m, "  .%-30s: %lld\n", "runnable_load_avg",
+			cfs_rq->runnable_load_avg);
 #endif
 
 	print_cfs_group_stats(m, cpu, cfs_rq->tg);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8c5468fcf10d..77af759e5675 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1081,20 +1081,63 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
 	return decayed;
 }
 
+/* Compute the current contribution to load_avg by se, return any delta */
+static long __update_entity_load_avg_contrib(struct sched_entity *se)
+{
+	long old_contrib = se->avg.load_avg_contrib;
+
+	if (!entity_is_task(se))
+		return 0;
+
+	se->avg.load_avg_contrib = div64_u64(se->avg.runnable_avg_sum *
+					     se->load.weight,
+					     se->avg.runnable_avg_period + 1);
+
+	return se->avg.load_avg_contrib - old_contrib;
+}
+
 /* Update a sched_entity's runnable average */
 static inline void update_entity_load_avg(struct sched_entity *se)
 {
-	__update_entity_runnable_avg(rq_of(cfs_rq_of(se))->clock_task, &se->avg,
-				     se->on_rq);
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+	long contrib_delta;
+
+	if (!__update_entity_runnable_avg(rq_of(cfs_rq)->clock_task, &se->avg,
+					  se->on_rq))
+		return;
+
+	contrib_delta = __update_entity_load_avg_contrib(se);
+	if (se->on_rq)
+		cfs_rq->runnable_load_avg += contrib_delta;
 }
 
 static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
 {
 	__update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable);
 }
+
+/* Add the load generated by se into cfs_rq's child load-average */
+static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
+						  struct sched_entity *se)
+{
+	update_entity_load_avg(se);
+	cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
+}
+
+/* Remove se's load from this cfs_rq child load-average */
+static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
+						  struct sched_entity *se)
+{
+	update_entity_load_avg(se);
+	cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
+}
 #else
 static inline void update_entity_load_avg(struct sched_entity *se) {}
 static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {}
+static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
+						  struct sched_entity *se) {}
+static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
+						  struct sched_entity *se) {}
 #endif
 
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -1223,7 +1266,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 */
 	update_curr(cfs_rq);
 	update_cfs_load(cfs_rq, 0);
-	update_entity_load_avg(se);
+	enqueue_entity_load_avg(cfs_rq, se);
 	account_entity_enqueue(cfs_rq, se);
 	update_cfs_shares(cfs_rq);
 
@@ -1298,7 +1341,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
-	update_entity_load_avg(se);
+	dequeue_entity_load_avg(cfs_rq, se);
 
 	update_stats_dequeue(cfs_rq, se);
 	if (flags & DEQUEUE_SLEEP) {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 14b571968713..e6539736af58 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -222,6 +222,15 @@ struct cfs_rq {
 	unsigned int nr_spread_over;
 #endif
 
+#ifdef CONFIG_SMP
+	/*
+	 * CFS Load tracking
+	 * Under CFS, load is tracked on a per-entity basis and aggregated up.
+	 * This allows for the description of both thread and group usage (in
+	 * the FAIR_GROUP_SCHED case).
+	 */
+	u64 runnable_load_avg;
+#endif
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */
 
@@ -1214,4 +1223,3 @@ static inline u64 irq_time_read(int cpu)
 }
 #endif /* CONFIG_64BIT */
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-
-- 
cgit v1.2.3-55-g7522


From 9ee474f55664ff63111c843099d365e7ecffb56f Mon Sep 17 00:00:00 2001
From: Paul Turner
Date: Thu, 4 Oct 2012 13:18:30 +0200
Subject: sched: Maintain the load contribution of blocked entities

We are currently maintaining:

  runnable_load(cfs_rq) = \Sum task_load(t)

For all running children t of cfs_rq.  While this can be naturally updated for
tasks in a runnable state (as they are scheduled); this does not account for
the load contributed by blocked task entities.

This can be solved by introducing a separate accounting for blocked load:

  blocked_load(cfs_rq) = \Sum runnable(b) * weight(b)

Obviously we do not want to iterate over all blocked entities to account for
their decay, we instead observe that:

  runnable_load(t) = \Sum p_i*y^i

and that to account for an additional idle period we only need to compute:

  y*runnable_load(t).

This means that we can compute all blocked entities at once by evaluating:

  blocked_load(cfs_rq)` = y * blocked_load(cfs_rq)

Finally we maintain a decay counter so that when a sleeping entity re-awakens
we can determine how much of its load should be removed from the blocked sum.

Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Ben Segall <bsegall@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120823141506.585389902@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |   1 +
 kernel/sched/core.c   |   1 -
 kernel/sched/debug.c  |   3 ++
 kernel/sched/fair.c   | 128 +++++++++++++++++++++++++++++++++++++++++++++-----
 kernel/sched/sched.h  |   4 +-
 5 files changed, 122 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 81d8b1ba4100..b1831accfd89 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1103,6 +1103,7 @@ struct sched_avg {
 	 */
 	u32 runnable_avg_sum, runnable_avg_period;
 	u64 last_runnable_update;
+	s64 decay_count;
 	unsigned long load_avg_contrib;
 };
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fd9d0859350a..00898f1fb69e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1528,7 +1528,6 @@ static void __sched_fork(struct task_struct *p)
 	p->se.avg.runnable_avg_period = 0;
 	p->se.avg.runnable_avg_sum = 0;
 #endif
-
 #ifdef CONFIG_SCHEDSTATS
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index c953a89f94aa..2d2e2b3c1bef 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -95,6 +95,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 	P(se->avg.runnable_avg_sum);
 	P(se->avg.runnable_avg_period);
 	P(se->avg.load_avg_contrib);
+	P(se->avg.decay_count);
 #endif
 #undef PN
 #undef P
@@ -227,6 +228,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 			atomic_read(&cfs_rq->tg->load_weight));
 	SEQ_printf(m, "  .%-30s: %lld\n", "runnable_load_avg",
 			cfs_rq->runnable_load_avg);
+	SEQ_printf(m, "  .%-30s: %lld\n", "blocked_load_avg",
+			cfs_rq->blocked_load_avg);
 #endif
 
 	print_cfs_group_stats(m, cpu, cfs_rq->tg);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 77af759e5675..83194175e841 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -259,6 +259,8 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
 	return grp->my_q;
 }
 
+static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq);
+
 static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 {
 	if (!cfs_rq->on_list) {
@@ -278,6 +280,8 @@ static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 		}
 
 		cfs_rq->on_list = 1;
+		/* We should have no load, but we need to update last_decay. */
+		update_cfs_rq_blocked_load(cfs_rq);
 	}
 }
 
@@ -1081,6 +1085,20 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
 	return decayed;
 }
 
+/* Synchronize an entity's decay with its parenting cfs_rq.*/
+static inline void __synchronize_entity_decay(struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+	u64 decays = atomic64_read(&cfs_rq->decay_counter);
+
+	decays -= se->avg.decay_count;
+	if (!decays)
+		return;
+
+	se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
+	se->avg.decay_count = 0;
+}
+
 /* Compute the current contribution to load_avg by se, return any delta */
 static long __update_entity_load_avg_contrib(struct sched_entity *se)
 {
@@ -1096,8 +1114,18 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se)
 	return se->avg.load_avg_contrib - old_contrib;
 }
 
+static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq,
+						 long load_contrib)
+{
+	if (likely(load_contrib < cfs_rq->blocked_load_avg))
+		cfs_rq->blocked_load_avg -= load_contrib;
+	else
+		cfs_rq->blocked_load_avg = 0;
+}
+
 /* Update a sched_entity's runnable average */
-static inline void update_entity_load_avg(struct sched_entity *se)
+static inline void update_entity_load_avg(struct sched_entity *se,
+					  int update_cfs_rq)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 	long contrib_delta;
@@ -1107,8 +1135,34 @@ static inline void update_entity_load_avg(struct sched_entity *se)
 		return;
 
 	contrib_delta = __update_entity_load_avg_contrib(se);
+
+	if (!update_cfs_rq)
+		return;
+
 	if (se->on_rq)
 		cfs_rq->runnable_load_avg += contrib_delta;
+	else
+		subtract_blocked_load_contrib(cfs_rq, -contrib_delta);
+}
+
+/*
+ * Decay the load contributed by all blocked children and account this so that
+ * their contribution may appropriately discounted when they wake up.
+ */
+static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq)
+{
+	u64 now = rq_of(cfs_rq)->clock_task >> 20;
+	u64 decays;
+
+	decays = now - cfs_rq->last_decay;
+	if (!decays)
+		return;
+
+	cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg,
+					      decays);
+	atomic64_add(decays, &cfs_rq->decay_counter);
+
+	cfs_rq->last_decay = now;
 }
 
 static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
@@ -1118,26 +1172,53 @@ static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
 
 /* Add the load generated by se into cfs_rq's child load-average */
 static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
-						  struct sched_entity *se)
+						  struct sched_entity *se,
+						  int wakeup)
 {
-	update_entity_load_avg(se);
+	/* we track migrations using entity decay_count == 0 */
+	if (unlikely(!se->avg.decay_count)) {
+		se->avg.last_runnable_update = rq_of(cfs_rq)->clock_task;
+		wakeup = 0;
+	} else {
+		__synchronize_entity_decay(se);
+	}
+
+	if (wakeup)
+		subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib);
+
+	update_entity_load_avg(se, 0);
 	cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
+	update_cfs_rq_blocked_load(cfs_rq);
 }
 
-/* Remove se's load from this cfs_rq child load-average */
+/*
+ * Remove se's load from this cfs_rq child load-average, if the entity is
+ * transitioning to a blocked state we track its projected decay using
+ * blocked_load_avg.
+ */
 static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
-						  struct sched_entity *se)
+						  struct sched_entity *se,
+						  int sleep)
 {
-	update_entity_load_avg(se);
+	update_entity_load_avg(se, 1);
+
 	cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
+	if (sleep) {
+		cfs_rq->blocked_load_avg += se->avg.load_avg_contrib;
+		se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
+	} /* migrations, e.g. sleep=0 leave decay_count == 0 */
 }
 #else
-static inline void update_entity_load_avg(struct sched_entity *se) {}
+static inline void update_entity_load_avg(struct sched_entity *se,
+					  int update_cfs_rq) {}
 static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {}
 static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
-						  struct sched_entity *se) {}
+					   struct sched_entity *se,
+					   int wakeup) {}
 static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
-						  struct sched_entity *se) {}
+					   struct sched_entity *se,
+					   int sleep) {}
+static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq) {}
 #endif
 
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -1266,7 +1347,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 */
 	update_curr(cfs_rq);
 	update_cfs_load(cfs_rq, 0);
-	enqueue_entity_load_avg(cfs_rq, se);
+	enqueue_entity_load_avg(cfs_rq, se, flags & ENQUEUE_WAKEUP);
 	account_entity_enqueue(cfs_rq, se);
 	update_cfs_shares(cfs_rq);
 
@@ -1341,7 +1422,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
-	dequeue_entity_load_avg(cfs_rq, se);
+	dequeue_entity_load_avg(cfs_rq, se, flags & DEQUEUE_SLEEP);
 
 	update_stats_dequeue(cfs_rq, se);
 	if (flags & DEQUEUE_SLEEP) {
@@ -1512,7 +1593,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
 		/* Put 'current' back into the tree. */
 		__enqueue_entity(cfs_rq, prev);
 		/* in !on_rq case, update occurred at dequeue */
-		update_entity_load_avg(prev);
+		update_entity_load_avg(prev, 1);
 	}
 	cfs_rq->curr = NULL;
 }
@@ -1528,7 +1609,8 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 	/*
 	 * Ensure that runnable average is periodically updated.
 	 */
-	update_entity_load_avg(curr);
+	update_entity_load_avg(curr, 1);
+	update_cfs_rq_blocked_load(cfs_rq);
 
 	/*
 	 * Update share accounting for long-running entities.
@@ -2387,6 +2469,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		update_cfs_load(cfs_rq, 0);
 		update_cfs_shares(cfs_rq);
+		update_entity_load_avg(se, 1);
 	}
 
 	if (!se) {
@@ -2448,6 +2531,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		update_cfs_load(cfs_rq, 0);
 		update_cfs_shares(cfs_rq);
+		update_entity_load_avg(se, 1);
 	}
 
 	if (!se) {
@@ -3498,6 +3582,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu)
 
 	update_rq_clock(rq);
 	update_cfs_load(cfs_rq, 1);
+	update_cfs_rq_blocked_load(cfs_rq);
 
 	/*
 	 * We need to update shares after updating tg->load_weight in
@@ -5232,6 +5317,20 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
 		place_entity(cfs_rq, se, 0);
 		se->vruntime -= cfs_rq->min_vruntime;
 	}
+
+#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
+	/*
+	* Remove our load from contribution when we leave sched_fair
+	* and ensure we don't carry in an old decay_count if we
+	* switch back.
+	*/
+	if (p->se.avg.decay_count) {
+		struct cfs_rq *cfs_rq = cfs_rq_of(&p->se);
+		__synchronize_entity_decay(&p->se);
+		subtract_blocked_load_contrib(cfs_rq,
+				p->se.avg.load_avg_contrib);
+	}
+#endif
 }
 
 /*
@@ -5278,6 +5377,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
 #ifndef CONFIG_64BIT
 	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
 #endif
+#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
+	atomic64_set(&cfs_rq->decay_counter, 1);
+#endif
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e6539736af58..664ff39195f7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -229,7 +229,9 @@ struct cfs_rq {
 	 * This allows for the description of both thread and group usage (in
 	 * the FAIR_GROUP_SCHED case).
 	 */
-	u64 runnable_load_avg;
+	u64 runnable_load_avg, blocked_load_avg;
+	atomic64_t decay_counter;
+	u64 last_decay;
 #endif
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */
-- 
cgit v1.2.3-55-g7522


From 0a74bef8bed18dc6889e9bc37ea1050a50c86c89 Mon Sep 17 00:00:00 2001
From: Paul Turner
Date: Thu, 4 Oct 2012 13:18:30 +0200
Subject: sched: Add an rq migration call-back to sched_class

Since we are now doing bottom up load accumulation we need explicit
notification when a task has been re-parented so that the old hierarchy can be
updated.

Adds: migrate_task_rq(struct task_struct *p, int next_cpu)

(The alternative is to do this out of __set_task_cpu, but it was suggested that
this would be a cleaner encapsulation.)

Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Ben Segall <bsegall@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120823141506.660023400@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  1 +
 kernel/sched/core.c   |  2 ++
 kernel/sched/fair.c   | 12 ++++++++++++
 3 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b1831accfd89..e483ccb08ce6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1061,6 +1061,7 @@ struct sched_class {
 
 #ifdef CONFIG_SMP
 	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
+	void (*migrate_task_rq)(struct task_struct *p, int next_cpu);
 
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
 	void (*post_schedule) (struct rq *this_rq);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 00898f1fb69e..f26860074ef2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -952,6 +952,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 	trace_sched_migrate_task(p, new_cpu);
 
 	if (task_cpu(p) != new_cpu) {
+		if (p->sched_class->migrate_task_rq)
+			p->sched_class->migrate_task_rq(p, new_cpu);
 		p->se.nr_migrations++;
 		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
 	}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 83194175e841..5e602e6ba0c3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3047,6 +3047,17 @@ unlock:
 
 	return new_cpu;
 }
+
+/*
+ * Called immediately before a task is migrated to a new cpu; task_cpu(p) and
+ * cfs_rq_of(p) references at time of call are still valid and identify the
+ * previous cpu.  However, the caller only guarantees p->pi_lock is held; no
+ * other assumptions, including the state of rq->lock, should be made.
+ */
+static void
+migrate_task_rq_fair(struct task_struct *p, int next_cpu)
+{
+}
 #endif /* CONFIG_SMP */
 
 static unsigned long
@@ -5607,6 +5618,7 @@ const struct sched_class fair_sched_class = {
 
 #ifdef CONFIG_SMP
 	.select_task_rq		= select_task_rq_fair,
+	.migrate_task_rq	= migrate_task_rq_fair,
 
 	.rq_online		= rq_online_fair,
 	.rq_offline		= rq_offline_fair,
-- 
cgit v1.2.3-55-g7522


From f4e26b120b9de84cb627bc7361ba43cfdc51341f Mon Sep 17 00:00:00 2001
From: Paul Turner
Date: Thu, 4 Oct 2012 13:18:32 +0200
Subject: sched: Introduce temporary FAIR_GROUP_SCHED dependency for
 load-tracking

While per-entity load-tracking is generally useful, beyond computing shares
distribution, e.g. runnable based load-balance (in progress), governors,
power-management, etc.

These facilities are not yet consumers of this data.  This may be trivially
reverted when the information is required; but avoid paying the overhead for
calculations we will not use until then.

Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Ben Segall <bsegall@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120823141507.422162369@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  8 +++++++-
 kernel/sched/core.c   |  7 ++++++-
 kernel/sched/fair.c   | 13 +++++++++++--
 kernel/sched/sched.h  |  9 ++++++++-
 4 files changed, 32 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e483ccb08ce6..e1581a029e3d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1168,7 +1168,13 @@ struct sched_entity {
 	/* rq "owned" by this entity/group: */
 	struct cfs_rq		*my_q;
 #endif
-#ifdef CONFIG_SMP
+/*
+ * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
+ * removed when useful for applications beyond shares distribution (e.g.
+ * load-balance).
+ */
+#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
+	/* Per-entity load-tracking */
 	struct sched_avg	avg;
 #endif
 };
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f26860074ef2..5dae0d252ff7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1526,7 +1526,12 @@ static void __sched_fork(struct task_struct *p)
 	p->se.vruntime			= 0;
 	INIT_LIST_HEAD(&p->se.group_node);
 
-#ifdef CONFIG_SMP
+/*
+ * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
+ * removed when useful for applications beyond shares distribution (e.g.
+ * load-balance).
+ */
+#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
 	p->se.avg.runnable_avg_period = 0;
 	p->se.avg.runnable_avg_sum = 0;
 #endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6ecf455fd95b..3e6a3531fa90 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -882,7 +882,8 @@ static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
 }
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
-#ifdef CONFIG_SMP
+/* Only depends on SMP, FAIR_GROUP_SCHED may be removed when useful in lb */
+#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
 /*
  * We choose a half-life close to 1 scheduling period.
  * Note: The tables below are dependent on this value.
@@ -3173,6 +3174,12 @@ unlock:
 	return new_cpu;
 }
 
+/*
+ * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
+ * removed when useful for applications beyond shares distribution (e.g.
+ * load-balance).
+ */
+#ifdef CONFIG_FAIR_GROUP_SCHED
 /*
  * Called immediately before a task is migrated to a new cpu; task_cpu(p) and
  * cfs_rq_of(p) references at time of call are still valid and identify the
@@ -3196,6 +3203,7 @@ migrate_task_rq_fair(struct task_struct *p, int next_cpu)
 		atomic64_add(se->avg.load_avg_contrib, &cfs_rq->removed_load);
 	}
 }
+#endif
 #endif /* CONFIG_SMP */
 
 static unsigned long
@@ -5773,8 +5781,9 @@ const struct sched_class fair_sched_class = {
 
 #ifdef CONFIG_SMP
 	.select_task_rq		= select_task_rq_fair,
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	.migrate_task_rq	= migrate_task_rq_fair,
-
+#endif
 	.rq_online		= rq_online_fair,
 	.rq_offline		= rq_offline_fair,
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0a75a430ca77..5eca173b563f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -225,6 +225,12 @@ struct cfs_rq {
 #endif
 
 #ifdef CONFIG_SMP
+/*
+ * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
+ * removed when useful for applications beyond shares distribution (e.g.
+ * load-balance).
+ */
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	/*
 	 * CFS Load tracking
 	 * Under CFS, load is tracked on a per-entity basis and aggregated up.
@@ -234,7 +240,8 @@ struct cfs_rq {
 	u64 runnable_load_avg, blocked_load_avg;
 	atomic64_t decay_counter, removed_load;
 	u64 last_decay;
-
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+/* These always depend on CONFIG_FAIR_GROUP_SCHED */
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	u32 tg_runnable_contrib;
 	u64 tg_load_contrib;
-- 
cgit v1.2.3-55-g7522


From 1b4f59e356cc94929305bd107b7f38eec62715ad Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Mon, 22 Oct 2012 18:05:36 +0400
Subject: slub: Commonize slab_cache field in struct page

Right now, slab and slub have fields in struct page to derive which
cache a page belongs to, but they do it slightly differently.

slab uses a field called slab_cache, that lives in the third double
word. slub, uses a field called "slab", living outside of the
doublewords area.

Ideally, we could use the same field for this. Since slub heavily makes
use of the doubleword region, there isn't really much room to move
slub's slab_cache field around. Since slab does not have such strict
placement restrictions, we can move it outside the doubleword area.

The naming used by slab, "slab_cache", is less confusing, and it is
preferred over slub's generic "slab".

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Christoph Lameter <cl@linux.com>
CC: David Rientjes <rientjes@google.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/mm_types.h |  7 ++-----
 mm/slub.c                | 24 ++++++++++++------------
 2 files changed, 14 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 31f8a3af7d94..2fef4e720e79 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -128,10 +128,7 @@ struct page {
 		};
 
 		struct list_head list;	/* slobs list of pages */
-		struct {		/* slab fields */
-			struct kmem_cache *slab_cache;
-			struct slab *slab_page;
-		};
+		struct slab *slab_page; /* slab fields */
 	};
 
 	/* Remainder is not double word aligned */
@@ -146,7 +143,7 @@ struct page {
 #if USE_SPLIT_PTLOCKS
 		spinlock_t ptl;
 #endif
-		struct kmem_cache *slab;	/* SLUB: Pointer to slab */
+		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */
 		struct page *first_page;	/* Compound tail pages */
 	};
 
diff --git a/mm/slub.c b/mm/slub.c
index 16274b273c61..35483e0ab6bc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1092,11 +1092,11 @@ static noinline struct kmem_cache_node *free_debug_processing(
 	if (!check_object(s, page, object, SLUB_RED_ACTIVE))
 		goto out;
 
-	if (unlikely(s != page->slab)) {
+	if (unlikely(s != page->slab_cache)) {
 		if (!PageSlab(page)) {
 			slab_err(s, page, "Attempt to free object(0x%p) "
 				"outside of slab", object);
-		} else if (!page->slab) {
+		} else if (!page->slab_cache) {
 			printk(KERN_ERR
 				"SLUB <none>: no slab for object 0x%p.\n",
 						object);
@@ -1357,7 +1357,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 		goto out;
 
 	inc_slabs_node(s, page_to_nid(page), page->objects);
-	page->slab = s;
+	page->slab_cache = s;
 	__SetPageSlab(page);
 	if (page->pfmemalloc)
 		SetPageSlabPfmemalloc(page);
@@ -1424,7 +1424,7 @@ static void rcu_free_slab(struct rcu_head *h)
 	else
 		page = container_of((struct list_head *)h, struct page, lru);
 
-	__free_slab(page->slab, page);
+	__free_slab(page->slab_cache, page);
 }
 
 static void free_slab(struct kmem_cache *s, struct page *page)
@@ -2617,9 +2617,9 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
 
 	page = virt_to_head_page(x);
 
-	if (kmem_cache_debug(s) && page->slab != s) {
+	if (kmem_cache_debug(s) && page->slab_cache != s) {
 		pr_err("kmem_cache_free: Wrong slab cache. %s but object"
-			" is from  %s\n", page->slab->name, s->name);
+			" is from  %s\n", page->slab_cache->name, s->name);
 		WARN_ON_ONCE(1);
 		return;
 	}
@@ -3418,7 +3418,7 @@ size_t ksize(const void *object)
 		return PAGE_SIZE << compound_order(page);
 	}
 
-	return slab_ksize(page->slab);
+	return slab_ksize(page->slab_cache);
 }
 EXPORT_SYMBOL(ksize);
 
@@ -3443,8 +3443,8 @@ bool verify_mem_not_deleted(const void *x)
 	}
 
 	slab_lock(page);
-	if (on_freelist(page->slab, page, object)) {
-		object_err(page->slab, page, object, "Object is on free-list");
+	if (on_freelist(page->slab_cache, page, object)) {
+		object_err(page->slab_cache, page, object, "Object is on free-list");
 		rv = false;
 	} else {
 		rv = true;
@@ -3475,7 +3475,7 @@ void kfree(const void *x)
 		__free_pages(page, compound_order(page));
 		return;
 	}
-	slab_free(page->slab, page, object, _RET_IP_);
+	slab_free(page->slab_cache, page, object, _RET_IP_);
 }
 EXPORT_SYMBOL(kfree);
 
@@ -3686,11 +3686,11 @@ static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
 
 		if (n) {
 			list_for_each_entry(p, &n->partial, lru)
-				p->slab = s;
+				p->slab_cache = s;
 
 #ifdef CONFIG_SLUB_DEBUG
 			list_for_each_entry(p, &n->full, lru)
-				p->slab = s;
+				p->slab_cache = s;
 #endif
 		}
 	}
-- 
cgit v1.2.3-55-g7522


From 6c9c6d6301287e369a754d628230fa6e50cdb74b Mon Sep 17 00:00:00 2001
From: Shuah Khan
Date: Mon, 8 Oct 2012 11:08:06 -0600
Subject: dma-debug: New interfaces to debug dma mapping errors

Add dma-debug interface debug_dma_mapping_error() to debug
drivers that fail to check dma mapping errors on addresses
returned by dma_map_single() and dma_map_page() interfaces.
This interface clears a flag set by debug_dma_map_page() to
indicate that dma_mapping_error() has been called by the
driver. When driver does unmap, debug_dma_unmap() checks the
flag and if this flag is still set, prints warning message
that includes call trace that leads up to the unmap. This
interface can be called from dma_mapping_error() routines to
enable dma mapping error check debugging.

Tested: Intel iommu and swiotlb (iommu=soft) on x86-64 with
        CONFIG_DMA_API_DEBUG enabled and disabled.

Signed-off-by: Shuah Khan <shuah.khan@hp.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 Documentation/DMA-API.txt          | 12 +++++++
 arch/x86/include/asm/dma-mapping.h |  1 +
 include/linux/dma-debug.h          |  7 ++++
 lib/dma-debug.c                    | 71 +++++++++++++++++++++++++++++++++++---
 4 files changed, 87 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 66bd97a95f10..78a6c569d204 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -678,3 +678,15 @@ out of dma_debug_entries. These entries are preallocated at boot. The number
 of preallocated entries is defined per architecture. If it is too low for you
 boot with 'dma_debug_entries=<your_desired_number>' to overwrite the
 architectural default.
+
+void debug_dmap_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
+dma-debug interface debug_dma_mapping_error() to debug drivers that fail
+to check dma mapping errors on addresses returned by dma_map_single() and
+dma_map_page() interfaces. This interface clears a flag set by
+debug_dma_map_page() to indicate that dma_mapping_error() has been called by
+the driver. When driver does unmap, debug_dma_unmap() checks the flag and if
+this flag is still set, prints warning message that includes call trace that
+leads up to the unmap. This interface can be called from dma_mapping_error()
+routines to enable dma mapping error check debugging.
+
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index f7b4c7903e7e..808dae63eeea 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -47,6 +47,7 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
+	debug_dma_mapping_error(dev, dma_addr);
 	if (ops->mapping_error)
 		return ops->mapping_error(dev, dma_addr);
 
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 171ad8aedc83..fc0e34ce038f 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -39,6 +39,8 @@ extern void debug_dma_map_page(struct device *dev, struct page *page,
 			       int direction, dma_addr_t dma_addr,
 			       bool map_single);
 
+extern void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
 extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
 				 size_t size, int direction, bool map_single);
 
@@ -105,6 +107,11 @@ static inline void debug_dma_map_page(struct device *dev, struct page *page,
 {
 }
 
+static inline void debug_dma_mapping_error(struct device *dev,
+					  dma_addr_t dma_addr)
+{
+}
+
 static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
 					size_t size, int direction,
 					bool map_single)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d84beb994f36..59f4a1a8187d 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -45,6 +45,12 @@ enum {
 	dma_debug_coherent,
 };
 
+enum map_err_types {
+	MAP_ERR_CHECK_NOT_APPLICABLE,
+	MAP_ERR_NOT_CHECKED,
+	MAP_ERR_CHECKED,
+};
+
 #define DMA_DEBUG_STACKTRACE_ENTRIES 5
 
 struct dma_debug_entry {
@@ -57,6 +63,7 @@ struct dma_debug_entry {
 	int              direction;
 	int		 sg_call_ents;
 	int		 sg_mapped_ents;
+	enum map_err_types  map_err_type;
 #ifdef CONFIG_STACKTRACE
 	struct		 stack_trace stacktrace;
 	unsigned long	 st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
@@ -114,6 +121,12 @@ static struct device_driver *current_driver                    __read_mostly;
 
 static DEFINE_RWLOCK(driver_name_lock);
 
+static const char *const maperr2str[] = {
+	[MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable",
+	[MAP_ERR_NOT_CHECKED] = "dma map error not checked",
+	[MAP_ERR_CHECKED] = "dma map error checked",
+};
+
 static const char *type2name[4] = { "single", "page",
 				    "scather-gather", "coherent" };
 
@@ -376,11 +389,12 @@ void debug_dma_dump_mappings(struct device *dev)
 		list_for_each_entry(entry, &bucket->list, list) {
 			if (!dev || dev == entry->dev) {
 				dev_info(entry->dev,
-					 "%s idx %d P=%Lx D=%Lx L=%Lx %s\n",
+					 "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n",
 					 type2name[entry->type], idx,
 					 (unsigned long long)entry->paddr,
 					 entry->dev_addr, entry->size,
-					 dir2name[entry->direction]);
+					 dir2name[entry->direction],
+					 maperr2str[entry->map_err_type]);
 			}
 		}
 
@@ -838,13 +852,28 @@ static __init int dma_debug_entries_cmdline(char *str)
 __setup("dma_debug=", dma_debug_cmdline);
 __setup("dma_debug_entries=", dma_debug_entries_cmdline);
 
+/* Calling dma_mapping_error() from dma-debug api will result in calling
+   debug_dma_mapping_error() - need internal mapping error routine to
+   avoid debug checks */
+#ifndef DMA_ERROR_CODE
+#define DMA_ERROR_CODE 0
+#endif
+static inline int has_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+	if (ops->mapping_error)
+		return ops->mapping_error(dev, dma_addr);
+
+	return (dma_addr == DMA_ERROR_CODE);
+}
+
 static void check_unmap(struct dma_debug_entry *ref)
 {
 	struct dma_debug_entry *entry;
 	struct hash_bucket *bucket;
 	unsigned long flags;
 
-	if (dma_mapping_error(ref->dev, ref->dev_addr)) {
+	if (unlikely(has_mapping_error(ref->dev, ref->dev_addr))) {
 		err_printk(ref->dev, NULL, "DMA-API: device driver tries "
 			   "to free an invalid DMA memory address\n");
 		return;
@@ -910,6 +939,15 @@ static void check_unmap(struct dma_debug_entry *ref)
 			   dir2name[ref->direction]);
 	}
 
+	if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
+		err_printk(ref->dev, entry,
+			   "DMA-API: device driver failed to check map error"
+			   "[device address=0x%016llx] [size=%llu bytes] "
+			   "[mapped as %s]",
+			   ref->dev_addr, ref->size,
+			   type2name[entry->type]);
+	}
+
 	hash_bucket_del(entry);
 	dma_entry_free(entry);
 
@@ -1017,7 +1055,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
 	if (unlikely(global_disable))
 		return;
 
-	if (unlikely(dma_mapping_error(dev, dma_addr)))
+	if (unlikely(has_mapping_error(dev, dma_addr)))
 		return;
 
 	entry = dma_entry_alloc();
@@ -1030,6 +1068,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
 	entry->dev_addr  = dma_addr;
 	entry->size      = size;
 	entry->direction = direction;
+	entry->map_err_type = MAP_ERR_NOT_CHECKED;
 
 	if (map_single)
 		entry->type = dma_debug_single;
@@ -1045,6 +1084,30 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
 }
 EXPORT_SYMBOL(debug_dma_map_page);
 
+void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	struct dma_debug_entry ref;
+	struct dma_debug_entry *entry;
+	struct hash_bucket *bucket;
+	unsigned long flags;
+
+	if (unlikely(global_disable))
+		return;
+
+	ref.dev = dev;
+	ref.dev_addr = dma_addr;
+	bucket = get_hash_bucket(&ref, &flags);
+	entry = bucket_find_exact(bucket, &ref);
+
+	if (!entry)
+		goto out;
+
+	entry->map_err_type = MAP_ERR_CHECKED;
+out:
+	put_hash_bucket(bucket, &flags);
+}
+EXPORT_SYMBOL(debug_dma_mapping_error);
+
 void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
 			  size_t size, int direction, bool map_single)
 {
-- 
cgit v1.2.3-55-g7522


From c30186e51e537d3ae8b1134983c1a5b4db3a8840 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman
Date: Wed, 24 Oct 2012 14:19:16 -0700
Subject: USB: ezusb: unexport some functions that aren't being used

When the ezusb code was split out, support was added for the fx2 chip
type, but no one is using these functions, so comment it out.  If
someone needs it, it can be added back in the future.

Also properly include <linux/usb/ezusb.h> into the ezusb.c file, to
ensure we catch any function prototype mis-matches

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: René Bürgel <rene.buergel@sohard.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/ezusb.c  | 39 ++++++++++++++++++++++-----------------
 include/linux/usb/ezusb.h |  8 --------
 2 files changed, 22 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/misc/ezusb.c b/drivers/usb/misc/ezusb.c
index 4223d761223d..0a48de91df3a 100644
--- a/drivers/usb/misc/ezusb.c
+++ b/drivers/usb/misc/ezusb.c
@@ -15,6 +15,7 @@
 #include <linux/usb.h>
 #include <linux/firmware.h>
 #include <linux/ihex.h>
+#include <linux/usb/ezusb.h>
 
 struct ezusb_fx_type {
 	/* EZ-USB Control and Status Register.  Bit 0 controls 8051 reset */
@@ -22,21 +23,16 @@ struct ezusb_fx_type {
 	unsigned short max_internal_adress;
 };
 
-struct ezusb_fx_type ezusb_fx1 = {
+static struct ezusb_fx_type ezusb_fx1 = {
 	.cpucs_reg = 0x7F92,
 	.max_internal_adress = 0x1B3F,
 };
 
-struct ezusb_fx_type ezusb_fx2 = {
-	.cpucs_reg = 0xE600,
-	.max_internal_adress = 0x3FFF,
-};
-
 /* Commands for writing to memory */
 #define WRITE_INT_RAM 0xA0
 #define WRITE_EXT_RAM 0xA3
 
-int ezusb_writememory(struct usb_device *dev, int address,
+static int ezusb_writememory(struct usb_device *dev, int address,
 				unsigned char *data, int length, __u8 request)
 {
 	int result;
@@ -58,10 +54,9 @@ int ezusb_writememory(struct usb_device *dev, int address,
 	kfree(transfer_buffer);
 	return result;
 }
-EXPORT_SYMBOL_GPL(ezusb_writememory);
 
-int ezusb_set_reset(struct usb_device *dev, unsigned short cpucs_reg,
-			 unsigned char reset_bit)
+static int ezusb_set_reset(struct usb_device *dev, unsigned short cpucs_reg,
+			   unsigned char reset_bit)
 {
 	int response = ezusb_writememory(dev, cpucs_reg, &reset_bit, 1, WRITE_INT_RAM);
 	if (response < 0)
@@ -76,12 +71,6 @@ int ezusb_fx1_set_reset(struct usb_device *dev, unsigned char reset_bit)
 }
 EXPORT_SYMBOL_GPL(ezusb_fx1_set_reset);
 
-int ezusb_fx2_set_reset(struct usb_device *dev, unsigned char reset_bit)
-{
-	return ezusb_set_reset(dev, ezusb_fx2.cpucs_reg, reset_bit);
-}
-EXPORT_SYMBOL_GPL(ezusb_fx2_set_reset);
-
 static int ezusb_ihex_firmware_download(struct usb_device *dev,
 					struct ezusb_fx_type fx,
 					const char *firmware_path)
@@ -151,10 +140,26 @@ int ezusb_fx1_ihex_firmware_download(struct usb_device *dev,
 }
 EXPORT_SYMBOL_GPL(ezusb_fx1_ihex_firmware_download);
 
+#if 0
+/*
+ * Once someone one needs these fx2 functions, uncomment them
+ * and add them to ezusb.h and all should be good.
+ */
+static struct ezusb_fx_type ezusb_fx2 = {
+	.cpucs_reg = 0xE600,
+	.max_internal_adress = 0x3FFF,
+};
+
+int ezusb_fx2_set_reset(struct usb_device *dev, unsigned char reset_bit)
+{
+	return ezusb_set_reset(dev, ezusb_fx2.cpucs_reg, reset_bit);
+}
+EXPORT_SYMBOL_GPL(ezusb_fx2_set_reset);
+
 int ezusb_fx2_ihex_firmware_download(struct usb_device *dev,
 				     const char *firmware_path)
 {
 	return ezusb_ihex_firmware_download(dev, ezusb_fx2, firmware_path);
 }
 EXPORT_SYMBOL_GPL(ezusb_fx2_ihex_firmware_download);
-
+#endif
diff --git a/include/linux/usb/ezusb.h b/include/linux/usb/ezusb.h
index fc618d8d1e92..639ee45779fb 100644
--- a/include/linux/usb/ezusb.h
+++ b/include/linux/usb/ezusb.h
@@ -1,16 +1,8 @@
 #ifndef __EZUSB_H
 #define __EZUSB_H
 
-
-extern int ezusb_writememory(struct usb_device *dev, int address,
-			     unsigned char *data, int length, __u8 bRequest);
-
 extern int ezusb_fx1_set_reset(struct usb_device *dev, unsigned char reset_bit);
-extern int ezusb_fx2_set_reset(struct usb_device *dev, unsigned char reset_bit);
-
 extern int ezusb_fx1_ihex_firmware_download(struct usb_device *dev,
 					    const char *firmware_path);
-extern int ezusb_fx2_ihex_firmware_download(struct usb_device *dev,
-					    const char *firmware_path);
 
 #endif /* __EZUSB_H */
-- 
cgit v1.2.3-55-g7522


From e8c4a7acc9ec0ee82feedcdc3c6d0ee44d67918a Mon Sep 17 00:00:00 2001
From: Felipe Balbi
Date: Wed, 24 Oct 2012 14:26:19 -0700
Subject: ARM: OMAP: move OMAP USB platform data to
 <linux/platform_data/omap-usb.h>

In order to make single zImage work for ARM architecture,
we need to make sure we don't depend on private headers.

Move USB platform_data to <linux/platform_data/omap-usb.h>
and add a minimal drivers/mfd/usb-omap.h.

Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Partha Basak <parthab@india.ti.com>
Cc: Keshava Munegowda <keshava_mgowda@ti.com>
Cc: linux-usb@vger.kernel.org
Signed-off-by: Felipe Balbi <balbi@ti.com>
[tony@atomide.com: updated for local mfd/usb-omap.h]
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/board-4430sdp.c    |  1 +
 arch/arm/mach-omap2/board-am3517evm.c  |  1 +
 arch/arm/mach-omap2/board-omap3evm.c   |  1 +
 arch/arm/mach-omap2/board-omap4panda.c |  1 +
 arch/arm/mach-omap2/board-rx51.c       |  1 +
 arch/arm/mach-omap2/board-ti8168evm.c  |  2 +
 arch/arm/mach-omap2/usb-tusb6010.c     |  1 +
 arch/arm/mach-omap2/usb.h              |  2 +-
 arch/arm/plat-omap/include/plat/usb.h  | 73 -------------------------------
 drivers/mfd/omap-usb-host.c            |  5 ++-
 drivers/mfd/omap-usb-tll.c             |  2 +-
 drivers/mfd/omap-usb.h                 |  2 +
 drivers/usb/host/ehci-omap.c           |  3 +-
 drivers/usb/host/ohci-omap3.c          |  1 -
 drivers/usb/musb/am35x.c               |  3 +-
 drivers/usb/musb/musb_dsps.c           |  3 +-
 drivers/usb/musb/omap2430.h            |  2 +-
 include/linux/platform_data/usb-omap.h | 80 ++++++++++++++++++++++++++++++++++
 18 files changed, 101 insertions(+), 83 deletions(-)
 delete mode 100644 arch/arm/plat-omap/include/plat/usb.h
 create mode 100644 drivers/mfd/omap-usb.h
 create mode 100644 include/linux/platform_data/usb-omap.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index 5700fc5b77a2..52534ba33984 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -27,6 +27,7 @@
 #include <linux/leds.h>
 #include <linux/leds_pwm.h>
 #include <linux/platform_data/omap4-keypad.h>
+#include <linux/usb/musb.h>
 
 #include <asm/hardware/gic.h>
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-omap2/board-am3517evm.c b/arch/arm/mach-omap2/board-am3517evm.c
index 0dbeb77aad68..96d6c5ab5d4c 100644
--- a/arch/arm/mach-omap2/board-am3517evm.c
+++ b/arch/arm/mach-omap2/board-am3517evm.c
@@ -25,6 +25,7 @@
 #include <linux/can/platform/ti_hecc.h>
 #include <linux/davinci_emac.h>
 #include <linux/mmc/host.h>
+#include <linux/usb/musb.h>
 #include <linux/platform_data/gpio-omap.h>
 
 #include "am35xx.h"
diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c
index 6e7e69bf5c7e..a22a5b61db49 100644
--- a/arch/arm/mach-omap2/board-omap3evm.c
+++ b/arch/arm/mach-omap2/board-omap3evm.c
@@ -32,6 +32,7 @@
 #include <linux/spi/ads7846.h>
 #include <linux/i2c/twl.h>
 #include <linux/usb/otg.h>
+#include <linux/usb/musb.h>
 #include <linux/usb/nop-usb-xceiv.h>
 #include <linux/smsc911x.h>
 
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index 74e39bbb3ca3..3f0b92afd957 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -29,6 +29,7 @@
 #include <linux/regulator/machine.h>
 #include <linux/regulator/fixed.h>
 #include <linux/ti_wilink_st.h>
+#include <linux/usb/musb.h>
 #include <linux/wl12xx.h>
 #include <linux/platform_data/omap-abe-twl6040.h>
 
diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c
index 8c4d27ef160b..c2f8f6c16b8f 100644
--- a/arch/arm/mach-omap2/board-rx51.c
+++ b/arch/arm/mach-omap2/board-rx51.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/gpio.h>
 #include <linux/leds.h>
+#include <linux/usb/musb.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
 
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-omap2/board-ti8168evm.c b/arch/arm/mach-omap2/board-ti8168evm.c
index 4977c77ce223..5e672c2b6a43 100644
--- a/arch/arm/mach-omap2/board-ti8168evm.c
+++ b/arch/arm/mach-omap2/board-ti8168evm.c
@@ -14,6 +14,8 @@
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/usb/musb.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-omap2/usb-tusb6010.c b/arch/arm/mach-omap2/usb-tusb6010.c
index 805bea6edf17..5e2428989c13 100644
--- a/arch/arm/mach-omap2/usb-tusb6010.c
+++ b/arch/arm/mach-omap2/usb-tusb6010.c
@@ -15,6 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
 #include <linux/export.h>
+#include <linux/platform_data/usb-omap.h>
 
 #include <linux/usb/musb.h>
 
diff --git a/arch/arm/mach-omap2/usb.h b/arch/arm/mach-omap2/usb.h
index b2c4ff15557e..9b986ead7c45 100644
--- a/arch/arm/mach-omap2/usb.h
+++ b/arch/arm/mach-omap2/usb.h
@@ -1,4 +1,4 @@
-#include <plat/usb.h>
+#include <linux/platform_data/usb-omap.h>
 
 /* AM35x */
 /* USB 2.0 PHY Control */
diff --git a/arch/arm/plat-omap/include/plat/usb.h b/arch/arm/plat-omap/include/plat/usb.h
deleted file mode 100644
index 1dd6522a0edd..000000000000
--- a/arch/arm/plat-omap/include/plat/usb.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// include/asm-arm/mach-omap/usb.h
-
-#ifndef	__ASM_ARCH_OMAP_USB_H
-#define	__ASM_ARCH_OMAP_USB_H
-
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <linux/usb/musb.h>
-
-#define OMAP3_HS_USB_PORTS	3
-
-enum usbhs_omap_port_mode {
-	OMAP_USBHS_PORT_MODE_UNUSED,
-	OMAP_EHCI_PORT_MODE_PHY,
-	OMAP_EHCI_PORT_MODE_TLL,
-	OMAP_EHCI_PORT_MODE_HSIC,
-	OMAP_OHCI_PORT_MODE_PHY_6PIN_DATSE0,
-	OMAP_OHCI_PORT_MODE_PHY_6PIN_DPDM,
-	OMAP_OHCI_PORT_MODE_PHY_3PIN_DATSE0,
-	OMAP_OHCI_PORT_MODE_PHY_4PIN_DPDM,
-	OMAP_OHCI_PORT_MODE_TLL_6PIN_DATSE0,
-	OMAP_OHCI_PORT_MODE_TLL_6PIN_DPDM,
-	OMAP_OHCI_PORT_MODE_TLL_3PIN_DATSE0,
-	OMAP_OHCI_PORT_MODE_TLL_4PIN_DPDM,
-	OMAP_OHCI_PORT_MODE_TLL_2PIN_DATSE0,
-	OMAP_OHCI_PORT_MODE_TLL_2PIN_DPDM
-};
-
-#ifdef CONFIG_ARCH_OMAP2PLUS
-
-struct ehci_hcd_omap_platform_data {
-	enum usbhs_omap_port_mode	port_mode[OMAP3_HS_USB_PORTS];
-	int				reset_gpio_port[OMAP3_HS_USB_PORTS];
-	struct regulator		*regulator[OMAP3_HS_USB_PORTS];
-	unsigned			phy_reset:1;
-};
-
-struct ohci_hcd_omap_platform_data {
-	enum usbhs_omap_port_mode	port_mode[OMAP3_HS_USB_PORTS];
-	unsigned			es2_compatibility:1;
-};
-
-struct usbhs_omap_platform_data {
-	enum usbhs_omap_port_mode		port_mode[OMAP3_HS_USB_PORTS];
-
-	struct ehci_hcd_omap_platform_data	*ehci_data;
-	struct ohci_hcd_omap_platform_data	*ohci_data;
-};
-
-struct usbtll_omap_platform_data {
-	enum usbhs_omap_port_mode		port_mode[OMAP3_HS_USB_PORTS];
-};
-/*-------------------------------------------------------------------------*/
-
-struct omap_musb_board_data {
-	u8	interface_type;
-	u8	mode;
-	u16	power;
-	unsigned extvbus:1;
-	void	(*set_phy_power)(u8 on);
-	void	(*clear_irq)(void);
-	void	(*set_mode)(u8 mode);
-	void	(*reset)(void);
-};
-
-enum musb_interface    {MUSB_INTERFACE_ULPI, MUSB_INTERFACE_UTMI};
-
-extern int omap_tll_enable(void);
-extern int omap_tll_disable(void);
-
-#endif
-
-#endif	/* __ASM_ARCH_OMAP_USB_H */
diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index 23cec57c02ba..cebfe0a68aa7 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c
@@ -26,9 +26,12 @@
 #include <linux/spinlock.h>
 #include <linux/gpio.h>
 #include <plat/cpu.h>
-#include <plat/usb.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/usb-omap.h>
 #include <linux/pm_runtime.h>
 
+#include "omap-usb.h"
+
 #define USBHS_DRIVER_NAME	"usbhs_omap"
 #define OMAP_EHCI_DEVICE	"ehci-omap"
 #define OMAP_OHCI_DEVICE	"ohci-omap3"
diff --git a/drivers/mfd/omap-usb-tll.c b/drivers/mfd/omap-usb-tll.c
index 4b7757b84301..0db0dfa3d08c 100644
--- a/drivers/mfd/omap-usb-tll.c
+++ b/drivers/mfd/omap-usb-tll.c
@@ -25,8 +25,8 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/err.h>
-#include <plat/usb.h>
 #include <linux/pm_runtime.h>
+#include <linux/platform_data/usb-omap.h>
 
 #define USBTLL_DRIVER_NAME	"usbhs_tll"
 
diff --git a/drivers/mfd/omap-usb.h b/drivers/mfd/omap-usb.h
new file mode 100644
index 000000000000..972aa961b064
--- /dev/null
+++ b/drivers/mfd/omap-usb.h
@@ -0,0 +1,2 @@
+extern int omap_tll_enable(void);
+extern int omap_tll_disable(void);
diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c
index d7fe287d0678..0d5ac36fdf47 100644
--- a/drivers/usb/host/ehci-omap.c
+++ b/drivers/usb/host/ehci-omap.c
@@ -39,12 +39,13 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/usb/ulpi.h>
-#include <plat/usb.h>
 #include <linux/regulator/consumer.h>
 #include <linux/pm_runtime.h>
 #include <linux/gpio.h>
 #include <linux/clk.h>
 
+#include <linux/platform_data/usb-omap.h>
+
 /* EHCI Register Set */
 #define EHCI_INSNREG04					(0xA0)
 #define EHCI_INSNREG04_DISABLE_UNSUSPEND		(1 << 5)
diff --git a/drivers/usb/host/ohci-omap3.c b/drivers/usb/host/ohci-omap3.c
index 1b8133b6e451..bd7803dce9be 100644
--- a/drivers/usb/host/ohci-omap3.c
+++ b/drivers/usb/host/ohci-omap3.c
@@ -30,7 +30,6 @@
  */
 
 #include <linux/platform_device.h>
-#include <plat/usb.h>
 #include <linux/pm_runtime.h>
 
 /*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/musb/am35x.c b/drivers/usb/musb/am35x.c
index c964d6af178b..a87cdd2387cf 100644
--- a/drivers/usb/musb/am35x.c
+++ b/drivers/usb/musb/am35x.c
@@ -34,8 +34,7 @@
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/usb/nop-usb-xceiv.h>
-
-#include <plat/usb.h>
+#include <linux/platform_data/usb-omap.h>
 
 #include "musb_core.h"
 
diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c
index 444346e1e10d..a67af21c1460 100644
--- a/drivers/usb/musb/musb_dsps.c
+++ b/drivers/usb/musb/musb_dsps.c
@@ -38,13 +38,12 @@
 #include <linux/pm_runtime.h>
 #include <linux/module.h>
 #include <linux/usb/nop-usb-xceiv.h>
+#include <linux/platform_data/usb-omap.h>
 
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_address.h>
 
-#include <plat/usb.h>
-
 #include "musb_core.h"
 
 #ifdef CONFIG_OF
diff --git a/drivers/usb/musb/omap2430.h b/drivers/usb/musb/omap2430.h
index b85f3973e78c..8ef656659fcb 100644
--- a/drivers/usb/musb/omap2430.h
+++ b/drivers/usb/musb/omap2430.h
@@ -10,7 +10,7 @@
 #ifndef __MUSB_OMAP243X_H__
 #define __MUSB_OMAP243X_H__
 
-#include <plat/usb.h>
+#include <linux/platform_data/usb-omap.h>
 
 /*
  * OMAP2430-specific definitions
diff --git a/include/linux/platform_data/usb-omap.h b/include/linux/platform_data/usb-omap.h
new file mode 100644
index 000000000000..8570bcfe6311
--- /dev/null
+++ b/include/linux/platform_data/usb-omap.h
@@ -0,0 +1,80 @@
+/*
+ * usb-omap.h - Platform data for the various OMAP USB IPs
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * This software is distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, as published by the Free Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define OMAP3_HS_USB_PORTS	3
+
+enum usbhs_omap_port_mode {
+	OMAP_USBHS_PORT_MODE_UNUSED,
+	OMAP_EHCI_PORT_MODE_PHY,
+	OMAP_EHCI_PORT_MODE_TLL,
+	OMAP_EHCI_PORT_MODE_HSIC,
+	OMAP_OHCI_PORT_MODE_PHY_6PIN_DATSE0,
+	OMAP_OHCI_PORT_MODE_PHY_6PIN_DPDM,
+	OMAP_OHCI_PORT_MODE_PHY_3PIN_DATSE0,
+	OMAP_OHCI_PORT_MODE_PHY_4PIN_DPDM,
+	OMAP_OHCI_PORT_MODE_TLL_6PIN_DATSE0,
+	OMAP_OHCI_PORT_MODE_TLL_6PIN_DPDM,
+	OMAP_OHCI_PORT_MODE_TLL_3PIN_DATSE0,
+	OMAP_OHCI_PORT_MODE_TLL_4PIN_DPDM,
+	OMAP_OHCI_PORT_MODE_TLL_2PIN_DATSE0,
+	OMAP_OHCI_PORT_MODE_TLL_2PIN_DPDM
+};
+
+struct usbtll_omap_platform_data {
+	enum usbhs_omap_port_mode		port_mode[OMAP3_HS_USB_PORTS];
+};
+
+struct ehci_hcd_omap_platform_data {
+	enum usbhs_omap_port_mode	port_mode[OMAP3_HS_USB_PORTS];
+	int				reset_gpio_port[OMAP3_HS_USB_PORTS];
+	struct regulator		*regulator[OMAP3_HS_USB_PORTS];
+	unsigned			phy_reset:1;
+};
+
+struct ohci_hcd_omap_platform_data {
+	enum usbhs_omap_port_mode	port_mode[OMAP3_HS_USB_PORTS];
+	unsigned			es2_compatibility:1;
+};
+
+struct usbhs_omap_platform_data {
+	enum usbhs_omap_port_mode		port_mode[OMAP3_HS_USB_PORTS];
+
+	struct ehci_hcd_omap_platform_data	*ehci_data;
+	struct ohci_hcd_omap_platform_data	*ohci_data;
+};
+
+/*-------------------------------------------------------------------------*/
+
+struct omap_musb_board_data {
+	u8	interface_type;
+	u8	mode;
+	u16	power;
+	unsigned extvbus:1;
+	void	(*set_phy_power)(u8 on);
+	void	(*clear_irq)(void);
+	void	(*set_mode)(u8 mode);
+	void	(*reset)(void);
+};
+
+enum musb_interface {
+	MUSB_INTERFACE_ULPI,
+	MUSB_INTERFACE_UTMI
+};
-- 
cgit v1.2.3-55-g7522


From 969ddcfc95c9a1849114fb72466d2fdea70f1d48 Mon Sep 17 00:00:00 2001
From: Alan Stern
Date: Fri, 19 Oct 2012 11:03:02 -0400
Subject: USB: hub_for_each_child should skip unconnected ports

This patch (as1619) improves the interface to the "hub_for_each_child"
macro.  The name clearly suggests that the macro iterates over child
devices; it does not suggest that the loop will also iterate over
unnconnected ports.

The patch changes the macro so that it will skip over unconnected
ports and iterate only the actual child devices.  The two existing
call sites are updated to avoid testing for a NULL child pointer,
which is now unnecessary.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/devices.c      | 18 ++++++++----------
 drivers/usb/host/r8a66597-hcd.c |  6 ++----
 include/linux/usb.h             |  5 +++--
 3 files changed, 13 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c
index f460de31acee..cbacea933b18 100644
--- a/drivers/usb/core/devices.c
+++ b/drivers/usb/core/devices.c
@@ -591,16 +591,14 @@ static ssize_t usb_device_dump(char __user **buffer, size_t *nbytes,
 
 	/* Now look at all of this device's children. */
 	usb_hub_for_each_child(usbdev, chix, childdev) {
-		if (childdev) {
-			usb_lock_device(childdev);
-			ret = usb_device_dump(buffer, nbytes, skip_bytes,
-					      file_offset, childdev, bus,
-					      level + 1, chix - 1, ++cnt);
-			usb_unlock_device(childdev);
-			if (ret == -EFAULT)
-				return total_written;
-			total_written += ret;
-		}
+		usb_lock_device(childdev);
+		ret = usb_device_dump(buffer, nbytes, skip_bytes,
+				      file_offset, childdev, bus,
+				      level + 1, chix - 1, ++cnt);
+		usb_unlock_device(childdev);
+		if (ret == -EFAULT)
+			return total_written;
+		total_written += ret;
 	}
 	return total_written;
 }
diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index fcc09e5ec0ad..b3eea0ba97a9 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -2036,10 +2036,8 @@ static void collect_usb_address_map(struct usb_device *udev, unsigned long *map)
 	    udev->parent->descriptor.bDeviceClass == USB_CLASS_HUB)
 		map[udev->devnum/32] |= (1 << (udev->devnum % 32));
 
-	usb_hub_for_each_child(udev, chix, childdev) {
-		if (childdev)
-			collect_usb_address_map(childdev, map);
-	}
+	usb_hub_for_each_child(udev, chix, childdev)
+		collect_usb_address_map(childdev, map);
 }
 
 /* this function must be called with interrupt disabled */
diff --git a/include/linux/usb.h b/include/linux/usb.h
index f92cdf0c1457..5df7c87b277f 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -588,8 +588,9 @@ extern struct usb_device *usb_hub_find_child(struct usb_device *hdev,
  */
 #define usb_hub_for_each_child(hdev, port1, child) \
 	for (port1 = 1,	child =	usb_hub_find_child(hdev, port1); \
-		port1 <= hdev->maxchild; \
-		child = usb_hub_find_child(hdev, ++port1))
+			port1 <= hdev->maxchild; \
+			child = usb_hub_find_child(hdev, ++port1)) \
+		if (!child) continue; else
 
 /* USB device locking */
 #define usb_lock_device(udev)		device_lock(&(udev)->dev)
-- 
cgit v1.2.3-55-g7522


From bfd1e910139be73fb0783a0b3171fc79e6afa031 Mon Sep 17 00:00:00 2001
From: Alan Stern
Date: Fri, 19 Oct 2012 11:03:39 -0400
Subject: USB: speed up usb_bus_resume()

This patch (as1620) speeds up USB root-hub resumes in the common case
where every enabled port has its suspend feature set (which currently
will be true for every runtime resume of the root hub).  If all the
enabled ports are suspended then resuming the root hub won't resume
any of the downstream devices.  In this case there's no need for a
Resume Recovery delay, because that delay is meant to give devices a
chance to get ready for active use.

To keep track of the port suspend features, the patch adds a
"port_is_suspended" flag to struct usb_device.  This has to be tracked
separately from the device's state; it's entirely possible for a USB-2
device to be suspended while the suspend feature on its parent port is
clear.  The reason is that devices will go into suspend whenever their
parent hub does.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reviewed-by: Peter Chen <peter.chen@freescale.com>
Tested-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c | 19 +++++++++++++++++--
 drivers/usb/core/hub.c |  2 ++
 include/linux/usb.h    |  2 ++
 3 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 1e741bca0265..eaa14514e173 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2039,8 +2039,9 @@ int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg)
 	status = hcd->driver->bus_resume(hcd);
 	clear_bit(HCD_FLAG_WAKEUP_PENDING, &hcd->flags);
 	if (status == 0) {
-		/* TRSMRCY = 10 msec */
-		msleep(10);
+		struct usb_device *udev;
+		int port1;
+
 		spin_lock_irq(&hcd_root_hub_lock);
 		if (!HCD_DEAD(hcd)) {
 			usb_set_device_state(rhdev, rhdev->actconfig
@@ -2050,6 +2051,20 @@ int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg)
 			hcd->state = HC_STATE_RUNNING;
 		}
 		spin_unlock_irq(&hcd_root_hub_lock);
+
+		/*
+		 * Check whether any of the enabled ports on the root hub are
+		 * unsuspended.  If they are then a TRSMRCY delay is needed
+		 * (this is what the USB-2 spec calls a "global resume").
+		 * Otherwise we can skip the delay.
+		 */
+		usb_hub_for_each_child(rhdev, port1, udev) {
+			if (udev->state != USB_STATE_NOTATTACHED &&
+					!udev->port_is_suspended) {
+				usleep_range(10000, 11000);	/* TRSMRCY */
+				break;
+			}
+		}
 	} else {
 		hcd->state = old_state;
 		dev_dbg(&rhdev->dev, "bus %s fail, err %d\n",
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 64854d76f529..e729e94cb751 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2876,6 +2876,7 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg)
 				(PMSG_IS_AUTO(msg) ? "auto-" : ""),
 				udev->do_remote_wakeup);
 		usb_set_device_state(udev, USB_STATE_SUSPENDED);
+		udev->port_is_suspended = 1;
 		msleep(10);
 	}
 	usb_mark_last_busy(hub->hdev);
@@ -3040,6 +3041,7 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 
  SuspendCleared:
 	if (status == 0) {
+		udev->port_is_suspended = 0;
 		if (hub_is_superspeed(hub->hdev)) {
 			if (portchange & USB_PORT_STAT_C_LINK_STATE)
 				clear_port_feature(hub->hdev, port1,
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 5df7c87b277f..f51f9981de1e 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -482,6 +482,7 @@ struct usb3_lpm_parameters {
  * @connect_time: time device was first connected
  * @do_remote_wakeup:  remote wakeup should be enabled
  * @reset_resume: needs reset instead of resume
+ * @port_is_suspended: the upstream port is suspended (L2 or U3)
  * @wusb_dev: if this is a Wireless USB device, link to the WUSB
  *	specific data for the device.
  * @slot_id: Slot ID assigned by xHCI
@@ -560,6 +561,7 @@ struct usb_device {
 
 	unsigned do_remote_wakeup:1;
 	unsigned reset_resume:1;
+	unsigned port_is_suspended:1;
 #endif
 	struct wusb_dev *wusb_dev;
 	int slot_id;
-- 
cgit v1.2.3-55-g7522


From 0a0c3b5a24bd802b1ebbf99e0b01296647b8199b Mon Sep 17 00:00:00 2001
From: Damian Hobson-Garcia
Date: Tue, 25 Sep 2012 15:09:11 +0900
Subject: Add new uio device for dynamic memory allocation

This device extends the uio_pdrv_genirq driver to provide limited
dynamic memory allocation for UIO devices.  This allows UIO devices
to use CMA and IOMMU allocated memory regions. This driver is based
on the uio_pdrv_genirq driver and provides the same generic interrupt
handling capabilities.  Like uio_prdv_genirq,
a fixed number of memory regions, defined in the platform device's
.resources field are exported to userpace. This driver adds the ability
to export additional regions whose number and size are known at boot time,
but whose memory is not allocated until the uio device file is opened for
the first time.  When the device file is closed, the allocated memory block
is freed.  Physical (DMA) addresses for the dynamic regions are provided to
the userspace via /sys/class/uio/uioX/maps/mapY/addr in the same way as
static addresses are when the uio device file is open, when no processes
are holding the device file open, the address returned to userspace is
DMA_ERROR_CODE.

Signed-off-by: Damian Hobson-Garcia <dhobsong@igel.co.jp>
Signed-off-by: "Hans J. Koch" <hjk@hansjkoch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/Kconfig                           |  16 ++
 drivers/uio/Makefile                          |   1 +
 drivers/uio/uio_dmem_genirq.c                 | 354 ++++++++++++++++++++++++++
 include/linux/platform_data/uio_dmem_genirq.h |  26 ++
 4 files changed, 397 insertions(+)
 create mode 100644 drivers/uio/uio_dmem_genirq.c
 create mode 100644 include/linux/platform_data/uio_dmem_genirq.h

(limited to 'include/linux')

diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig
index 6f3ea9bbc818..82e2b89d4480 100644
--- a/drivers/uio/Kconfig
+++ b/drivers/uio/Kconfig
@@ -44,6 +44,22 @@ config UIO_PDRV_GENIRQ
 
 	  If you don't know what to do here, say N.
 
+config UIO_DMEM_GENIRQ
+	tristate "Userspace platform driver with generic irq and dynamic memory"
+	help
+	  Platform driver for Userspace I/O devices, including generic
+	  interrupt handling code. Shared interrupts are not supported.
+
+	  Memory regions can be specified with the same platform device
+	  resources as the UIO_PDRV drivers, but dynamic regions can also
+	  be specified.
+	  The number and size of these regions is static,
+	  but the memory allocation is not performed until
+	  the associated device file is opened. The
+	  memory is freed once the uio device is closed.
+
+	  If you don't know what to do here, say N.
+
 config UIO_AEC
 	tristate "AEC video timestamp device"
 	depends on PCI
diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile
index d4dd9a5552f8..b354c539507a 100644
--- a/drivers/uio/Makefile
+++ b/drivers/uio/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_UIO)	+= uio.o
 obj-$(CONFIG_UIO_CIF)	+= uio_cif.o
 obj-$(CONFIG_UIO_PDRV)	+= uio_pdrv.o
 obj-$(CONFIG_UIO_PDRV_GENIRQ)	+= uio_pdrv_genirq.o
+obj-$(CONFIG_UIO_DMEM_GENIRQ)	+= uio_dmem_genirq.o
 obj-$(CONFIG_UIO_AEC)	+= uio_aec.o
 obj-$(CONFIG_UIO_SERCOS3)	+= uio_sercos3.o
 obj-$(CONFIG_UIO_PCI_GENERIC)	+= uio_pci_generic.o
diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c
new file mode 100644
index 000000000000..4d4dd008c8b9
--- /dev/null
+++ b/drivers/uio/uio_dmem_genirq.c
@@ -0,0 +1,354 @@
+/*
+ * drivers/uio/uio_dmem_genirq.c
+ *
+ * Userspace I/O platform driver with generic IRQ handling code.
+ *
+ * Copyright (C) 2012 Damian Hobson-Garcia
+ *
+ * Based on uio_pdrv_genirq.c by Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/uio_driver.h>
+#include <linux/spinlock.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_data/uio_dmem_genirq.h>
+#include <linux/stringify.h>
+#include <linux/pm_runtime.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+
+#define DRIVER_NAME "uio_dmem_genirq"
+
+struct uio_dmem_genirq_platdata {
+	struct uio_info *uioinfo;
+	spinlock_t lock;
+	unsigned long flags;
+	struct platform_device *pdev;
+	unsigned int dmem_region_start;
+	unsigned int num_dmem_regions;
+	struct mutex alloc_lock;
+	unsigned int refcnt;
+};
+
+static int uio_dmem_genirq_open(struct uio_info *info, struct inode *inode)
+{
+	struct uio_dmem_genirq_platdata *priv = info->priv;
+	struct uio_mem *uiomem;
+	int ret = 0;
+
+	uiomem = &priv->uioinfo->mem[priv->dmem_region_start];
+
+	mutex_lock(&priv->alloc_lock);
+	while (!priv->refcnt && uiomem < &priv->uioinfo->mem[MAX_UIO_MAPS]) {
+		void *addr;
+		if (!uiomem->size)
+			break;
+
+		addr = dma_alloc_coherent(&priv->pdev->dev, uiomem->size,
+				(dma_addr_t *)&uiomem->addr, GFP_KERNEL);
+		if (!addr) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		uiomem->internal_addr = addr;
+		++uiomem;
+	}
+	priv->refcnt++;
+
+	mutex_unlock(&priv->alloc_lock);
+	/* Wait until the Runtime PM code has woken up the device */
+	pm_runtime_get_sync(&priv->pdev->dev);
+	return ret;
+}
+
+static int uio_dmem_genirq_release(struct uio_info *info, struct inode *inode)
+{
+	struct uio_dmem_genirq_platdata *priv = info->priv;
+	struct uio_mem *uiomem;
+
+	/* Tell the Runtime PM code that the device has become idle */
+	pm_runtime_put_sync(&priv->pdev->dev);
+
+	uiomem = &priv->uioinfo->mem[priv->dmem_region_start];
+
+	mutex_lock(&priv->alloc_lock);
+
+	priv->refcnt--;
+	while (!priv->refcnt && uiomem < &priv->uioinfo->mem[MAX_UIO_MAPS]) {
+		if (!uiomem->size)
+			break;
+
+		dma_free_coherent(&priv->pdev->dev, uiomem->size,
+				uiomem->internal_addr, uiomem->addr);
+		uiomem->addr = DMA_ERROR_CODE;
+		++uiomem;
+	}
+
+	mutex_unlock(&priv->alloc_lock);
+	return 0;
+}
+
+static irqreturn_t uio_dmem_genirq_handler(int irq, struct uio_info *dev_info)
+{
+	struct uio_dmem_genirq_platdata *priv = dev_info->priv;
+
+	/* Just disable the interrupt in the interrupt controller, and
+	 * remember the state so we can allow user space to enable it later.
+	 */
+
+	if (!test_and_set_bit(0, &priv->flags))
+		disable_irq_nosync(irq);
+
+	return IRQ_HANDLED;
+}
+
+static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on)
+{
+	struct uio_dmem_genirq_platdata *priv = dev_info->priv;
+	unsigned long flags;
+
+	/* Allow user space to enable and disable the interrupt
+	 * in the interrupt controller, but keep track of the
+	 * state to prevent per-irq depth damage.
+	 *
+	 * Serialize this operation to support multiple tasks.
+	 */
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (irq_on) {
+		if (test_and_clear_bit(0, &priv->flags))
+			enable_irq(dev_info->irq);
+	} else {
+		if (!test_and_set_bit(0, &priv->flags))
+			disable_irq(dev_info->irq);
+	}
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static int uio_dmem_genirq_probe(struct platform_device *pdev)
+{
+	struct uio_dmem_genirq_pdata *pdata = pdev->dev.platform_data;
+	struct uio_info *uioinfo = &pdata->uioinfo;
+	struct uio_dmem_genirq_platdata *priv;
+	struct uio_mem *uiomem;
+	int ret = -EINVAL;
+	int i;
+
+	if (!uioinfo) {
+		int irq;
+
+		/* alloc uioinfo for one device */
+		uioinfo = kzalloc(sizeof(*uioinfo), GFP_KERNEL);
+		if (!uioinfo) {
+			ret = -ENOMEM;
+			dev_err(&pdev->dev, "unable to kmalloc\n");
+			goto bad2;
+		}
+		uioinfo->name = pdev->dev.of_node->name;
+		uioinfo->version = "devicetree";
+
+		/* Multiple IRQs are not supported */
+		irq = platform_get_irq(pdev, 0);
+		if (irq == -ENXIO)
+			uioinfo->irq = UIO_IRQ_NONE;
+		else
+			uioinfo->irq = irq;
+	}
+
+	if (!uioinfo || !uioinfo->name || !uioinfo->version) {
+		dev_err(&pdev->dev, "missing platform_data\n");
+		goto bad0;
+	}
+
+	if (uioinfo->handler || uioinfo->irqcontrol ||
+	    uioinfo->irq_flags & IRQF_SHARED) {
+		dev_err(&pdev->dev, "interrupt configuration error\n");
+		goto bad0;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		ret = -ENOMEM;
+		dev_err(&pdev->dev, "unable to kmalloc\n");
+		goto bad0;
+	}
+
+	dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+
+	priv->uioinfo = uioinfo;
+	spin_lock_init(&priv->lock);
+	priv->flags = 0; /* interrupt is enabled to begin with */
+	priv->pdev = pdev;
+	mutex_init(&priv->alloc_lock);
+
+	if (!uioinfo->irq) {
+		ret = platform_get_irq(pdev, 0);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "failed to get IRQ\n");
+			goto bad0;
+		}
+		uioinfo->irq = ret;
+	}
+	uiomem = &uioinfo->mem[0];
+
+	for (i = 0; i < pdev->num_resources; ++i) {
+		struct resource *r = &pdev->resource[i];
+
+		if (r->flags != IORESOURCE_MEM)
+			continue;
+
+		if (uiomem >= &uioinfo->mem[MAX_UIO_MAPS]) {
+			dev_warn(&pdev->dev, "device has more than "
+					__stringify(MAX_UIO_MAPS)
+					" I/O memory resources.\n");
+			break;
+		}
+
+		uiomem->memtype = UIO_MEM_PHYS;
+		uiomem->addr = r->start;
+		uiomem->size = resource_size(r);
+		++uiomem;
+	}
+
+	priv->dmem_region_start = i;
+	priv->num_dmem_regions = pdata->num_dynamic_regions;
+
+	for (i = 0; i < pdata->num_dynamic_regions; ++i) {
+		if (uiomem >= &uioinfo->mem[MAX_UIO_MAPS]) {
+			dev_warn(&pdev->dev, "device has more than "
+					__stringify(MAX_UIO_MAPS)
+					" dynamic and fixed memory regions.\n");
+			break;
+		}
+		uiomem->memtype = UIO_MEM_PHYS;
+		uiomem->addr = DMA_ERROR_CODE;
+		uiomem->size = pdata->dynamic_region_sizes[i];
+		++uiomem;
+	}
+
+	while (uiomem < &uioinfo->mem[MAX_UIO_MAPS]) {
+		uiomem->size = 0;
+		++uiomem;
+	}
+
+	/* This driver requires no hardware specific kernel code to handle
+	 * interrupts. Instead, the interrupt handler simply disables the
+	 * interrupt in the interrupt controller. User space is responsible
+	 * for performing hardware specific acknowledge and re-enabling of
+	 * the interrupt in the interrupt controller.
+	 *
+	 * Interrupt sharing is not supported.
+	 */
+
+	uioinfo->handler = uio_dmem_genirq_handler;
+	uioinfo->irqcontrol = uio_dmem_genirq_irqcontrol;
+	uioinfo->open = uio_dmem_genirq_open;
+	uioinfo->release = uio_dmem_genirq_release;
+	uioinfo->priv = priv;
+
+	/* Enable Runtime PM for this device:
+	 * The device starts in suspended state to allow the hardware to be
+	 * turned off by default. The Runtime PM bus code should power on the
+	 * hardware and enable clocks at open().
+	 */
+	pm_runtime_enable(&pdev->dev);
+
+	ret = uio_register_device(&pdev->dev, priv->uioinfo);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to register uio device\n");
+		goto bad1;
+	}
+
+	platform_set_drvdata(pdev, priv);
+	return 0;
+ bad1:
+	kfree(priv);
+	pm_runtime_disable(&pdev->dev);
+ bad0:
+	/* kfree uioinfo for OF */
+	if (pdev->dev.of_node)
+		kfree(uioinfo);
+ bad2:
+	return ret;
+}
+
+static int uio_dmem_genirq_remove(struct platform_device *pdev)
+{
+	struct uio_dmem_genirq_platdata *priv = platform_get_drvdata(pdev);
+
+	uio_unregister_device(priv->uioinfo);
+	pm_runtime_disable(&pdev->dev);
+
+	priv->uioinfo->handler = NULL;
+	priv->uioinfo->irqcontrol = NULL;
+
+	/* kfree uioinfo for OF */
+	if (pdev->dev.of_node)
+		kfree(priv->uioinfo);
+
+	kfree(priv);
+	return 0;
+}
+
+static int uio_dmem_genirq_runtime_nop(struct device *dev)
+{
+	/* Runtime PM callback shared between ->runtime_suspend()
+	 * and ->runtime_resume(). Simply returns success.
+	 *
+	 * In this driver pm_runtime_get_sync() and pm_runtime_put_sync()
+	 * are used at open() and release() time. This allows the
+	 * Runtime PM code to turn off power to the device while the
+	 * device is unused, ie before open() and after release().
+	 *
+	 * This Runtime PM callback does not need to save or restore
+	 * any registers since user space is responsbile for hardware
+	 * register reinitialization after open().
+	 */
+	return 0;
+}
+
+static const struct dev_pm_ops uio_dmem_genirq_dev_pm_ops = {
+	.runtime_suspend = uio_dmem_genirq_runtime_nop,
+	.runtime_resume = uio_dmem_genirq_runtime_nop,
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id uio_of_genirq_match[] = {
+	{ /* empty for now */ },
+};
+MODULE_DEVICE_TABLE(of, uio_of_genirq_match);
+#else
+# define uio_of_genirq_match NULL
+#endif
+
+static struct platform_driver uio_dmem_genirq = {
+	.probe = uio_dmem_genirq_probe,
+	.remove = uio_dmem_genirq_remove,
+	.driver = {
+		.name = DRIVER_NAME,
+		.owner = THIS_MODULE,
+		.pm = &uio_dmem_genirq_dev_pm_ops,
+		.of_match_table = uio_of_genirq_match,
+	},
+};
+
+module_platform_driver(uio_dmem_genirq);
+
+MODULE_AUTHOR("Damian Hobson-Garcia");
+MODULE_DESCRIPTION("Userspace I/O platform driver with dynamic memory.");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/include/linux/platform_data/uio_dmem_genirq.h b/include/linux/platform_data/uio_dmem_genirq.h
new file mode 100644
index 000000000000..973c1bb32168
--- /dev/null
+++ b/include/linux/platform_data/uio_dmem_genirq.h
@@ -0,0 +1,26 @@
+/*
+ * include/linux/platform_data/uio_dmem_genirq.h
+ *
+ * Copyright (C) 2012 Damian Hobson-Garcia
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _UIO_DMEM_GENIRQ_H
+#define _UIO_DMEM_GENIRQ_H
+
+#include <linux/uio_driver.h>
+
+struct uio_dmem_genirq_pdata {
+	struct uio_info	uioinfo;
+	unsigned int *dynamic_region_sizes;
+	unsigned int num_dynamic_regions;
+};
+#endif /* _UIO_DMEM_GENIRQ_H */
-- 
cgit v1.2.3-55-g7522


From 877bd862f32b815d54ab5fc10a4fd903d7bf3012 Mon Sep 17 00:00:00 2001
From: Ming Lei
Date: Wed, 24 Oct 2012 19:46:54 +0000
Subject: usbnet: introduce usbnet 3 command helpers

This patch introduces the below 3 usb command helpers:

	usbnet_read_cmd / usbnet_write_cmd / usbnet_write_cmd_async

so that each low level driver doesn't need to implement them
by itself, and the dma buffer allocation for usb transfer and
runtime PM things can be handled just in one place.

Acked-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 132 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/usbnet.h |   6 +++
 2 files changed, 138 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index f9819d10b1f9..447d20d22b7c 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1610,6 +1610,138 @@ void usbnet_device_suggests_idle(struct usbnet *dev)
 }
 EXPORT_SYMBOL(usbnet_device_suggests_idle);
 
+/*-------------------------------------------------------------------------*/
+int usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
+		    u16 value, u16 index, void *data, u16 size)
+{
+	void *buf = NULL;
+	int err = -ENOMEM;
+
+	netdev_dbg(dev->net, "usbnet_read_cmd cmd=0x%02x reqtype=%02x"
+		   " value=0x%04x index=0x%04x size=%d\n",
+		   cmd, reqtype, value, index, size);
+
+	if (data) {
+		buf = kmalloc(size, GFP_KERNEL);
+		if (!buf)
+			goto out;
+	}
+
+	err = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
+			      cmd, reqtype, value, index, buf, size,
+			      USB_CTRL_GET_TIMEOUT);
+	if (err > 0 && err <= size)
+		memcpy(data, buf, err);
+	kfree(buf);
+out:
+	return err;
+}
+EXPORT_SYMBOL_GPL(usbnet_read_cmd);
+
+int usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
+		     u16 value, u16 index, const void *data, u16 size)
+{
+	void *buf = NULL;
+	int err = -ENOMEM;
+
+	netdev_dbg(dev->net, "usbnet_write_cmd cmd=0x%02x reqtype=%02x"
+		   " value=0x%04x index=0x%04x size=%d\n",
+		   cmd, reqtype, value, index, size);
+
+	if (data) {
+		buf = kmemdup(data, size, GFP_KERNEL);
+		if (!buf)
+			goto out;
+	}
+
+	err = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
+			      cmd, reqtype, value, index, buf, size,
+			      USB_CTRL_SET_TIMEOUT);
+	kfree(buf);
+
+out:
+	return err;
+}
+EXPORT_SYMBOL_GPL(usbnet_write_cmd);
+
+static void usbnet_async_cmd_cb(struct urb *urb)
+{
+	struct usb_ctrlrequest *req = (struct usb_ctrlrequest *)urb->context;
+	int status = urb->status;
+
+	if (status < 0)
+		dev_dbg(&urb->dev->dev, "%s failed with %d",
+			__func__, status);
+
+	kfree(req);
+	usb_free_urb(urb);
+}
+
+int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype,
+			   u16 value, u16 index, const void *data, u16 size)
+{
+	struct usb_ctrlrequest *req = NULL;
+	struct urb *urb;
+	int err = -ENOMEM;
+	void *buf = NULL;
+
+	netdev_dbg(dev->net, "usbnet_write_cmd cmd=0x%02x reqtype=%02x"
+		   " value=0x%04x index=0x%04x size=%d\n",
+		   cmd, reqtype, value, index, size);
+
+	urb = usb_alloc_urb(0, GFP_ATOMIC);
+	if (!urb) {
+		netdev_err(dev->net, "Error allocating URB in"
+			   " %s!\n", __func__);
+		goto fail;
+	}
+
+	if (data) {
+		buf = kmemdup(data, size, GFP_ATOMIC);
+		if (!buf) {
+			netdev_err(dev->net, "Error allocating buffer"
+				   " in %s!\n", __func__);
+			goto fail_free;
+		}
+	}
+
+	req = kmalloc(sizeof(struct usb_ctrlrequest), GFP_ATOMIC);
+	if (!req) {
+		netdev_err(dev->net, "Failed to allocate memory for %s\n",
+			   __func__);
+		goto fail_free_buf;
+	}
+
+	req->bRequestType = reqtype;
+	req->bRequest = cmd;
+	req->wValue = cpu_to_le16(value);
+	req->wIndex = cpu_to_le16(index);
+	req->wLength = cpu_to_le16(size);
+
+	usb_fill_control_urb(urb, dev->udev,
+			     usb_sndctrlpipe(dev->udev, 0),
+			     (void *)req, buf, size,
+			     usbnet_async_cmd_cb, req);
+	urb->transfer_flags |= URB_FREE_BUFFER;
+
+	err = usb_submit_urb(urb, GFP_ATOMIC);
+	if (err < 0) {
+		netdev_err(dev->net, "Error submitting the control"
+			   " message: status=%d\n", err);
+		goto fail_free;
+	}
+	return 0;
+
+fail_free_buf:
+	kfree(buf);
+fail_free:
+	kfree(req);
+	usb_free_urb(urb);
+fail:
+	return err;
+
+}
+EXPORT_SYMBOL_GPL(usbnet_write_cmd_async);
 /*-------------------------------------------------------------------------*/
 
 static int __init usbnet_init(void)
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index ddbbb7de894b..4410e4166c6c 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -163,6 +163,12 @@ extern int usbnet_resume(struct usb_interface *);
 extern void usbnet_disconnect(struct usb_interface *);
 extern void usbnet_device_suggests_idle(struct usbnet *dev);
 
+extern int usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
+		    u16 value, u16 index, void *data, u16 size);
+extern int usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
+		    u16 value, u16 index, const void *data, u16 size);
+extern int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype,
+		    u16 value, u16 index, const void *data, u16 size);
 
 /* Drivers that reuse some of the standard USB CDC infrastructure
  * (notably, using multiple interfaces according to the CDC
-- 
cgit v1.2.3-55-g7522


From 91872392f08486f692887d2f06a333f512648f22 Mon Sep 17 00:00:00 2001
From: Borislav Petkov
Date: Tue, 9 Oct 2012 19:52:05 +0200
Subject: drivers/base: Add a DEVICE_BOOL_ATTR macro

... which, analogous to DEVICE_INT_ATTR provides functionality to
set/clear bools. Its purpose is to be used where values need to be used
as booleans in configuration context.

Next patch uses this.

Signed-off-by: Borislav Petkov <bp@alien8.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Tony Luck <tony.luck@intel.com>
---
 drivers/base/core.c    | 21 +++++++++++++++++++++
 include/linux/device.h |  7 +++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index abea76c36a4b..c8ae1f6b01b9 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -171,6 +171,27 @@ ssize_t device_show_int(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(device_show_int);
 
+ssize_t device_store_bool(struct device *dev, struct device_attribute *attr,
+			  const char *buf, size_t size)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+
+	if (strtobool(buf, ea->var) < 0)
+		return -EINVAL;
+
+	return size;
+}
+EXPORT_SYMBOL_GPL(device_store_bool);
+
+ssize_t device_show_bool(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", *(bool *)(ea->var));
+}
+EXPORT_SYMBOL_GPL(device_show_bool);
+
 /**
  *	device_release - free device structure.
  *	@kobj:	device's kobject.
diff --git a/include/linux/device.h b/include/linux/device.h
index 86ef6ab553b1..50c85a26b003 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -496,6 +496,10 @@ ssize_t device_show_int(struct device *dev, struct device_attribute *attr,
 			char *buf);
 ssize_t device_store_int(struct device *dev, struct device_attribute *attr,
 			 const char *buf, size_t count);
+ssize_t device_show_bool(struct device *dev, struct device_attribute *attr,
+			char *buf);
+ssize_t device_store_bool(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count);
 
 #define DEVICE_ATTR(_name, _mode, _show, _store) \
 	struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
@@ -505,6 +509,9 @@ ssize_t device_store_int(struct device *dev, struct device_attribute *attr,
 #define DEVICE_INT_ATTR(_name, _mode, _var) \
 	struct dev_ext_attribute dev_attr_##_name = \
 		{ __ATTR(_name, _mode, device_show_int, device_store_int), &(_var) }
+#define DEVICE_BOOL_ATTR(_name, _mode, _var) \
+	struct dev_ext_attribute dev_attr_##_name = \
+		{ __ATTR(_name, _mode, device_show_bool, device_store_bool), &(_var) }
 #define DEVICE_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \
 	struct device_attribute dev_attr_##_name =		\
 		__ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)
-- 
cgit v1.2.3-55-g7522


From 5d8f72b55c275677865de670fa147ed318191d81 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Fri, 26 Oct 2012 19:46:06 +0200
Subject: freezer: change ptrace_stop/do_signal_stop to use
 freezable_schedule()

try_to_freeze_tasks() and cgroup_freezer rely on scheduler locks
to ensure that a task doing STOPPED/TRACED -> RUNNING transition
can't escape freezing. This mostly works, but ptrace_stop() does
not necessarily call schedule(), it can change task->state back to
RUNNING and check freezing() without any lock/barrier in between.

We could add the necessary barrier, but this patch changes
ptrace_stop() and do_signal_stop() to use freezable_schedule().
This fixes the race, freezer_count() and freezer_should_skip()
carefully avoid the race.

And this simplifies the code, try_to_freeze_tasks/update_if_frozen
no longer need to use task_is_stopped_or_traced() checks with the
non trivial assumptions. We can rely on the mechanism which was
specially designed to mark the sleeping task as "frozen enough".

v2: As Tejun pointed out, we can also change get_signal_to_deliver()
and move try_to_freeze() up before 'relock' label.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/freezer.h |  7 +++----
 kernel/cgroup_freezer.c |  3 +--
 kernel/freezer.c        | 11 ++---------
 kernel/power/process.c  | 13 +------------
 kernel/signal.c         | 20 ++++++--------------
 5 files changed, 13 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index ee899329e65a..8039893bc3ec 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -134,10 +134,9 @@ static inline bool freezer_should_skip(struct task_struct *p)
 }
 
 /*
- * These macros are intended to be used whenever you want allow a task that's
- * sleeping in TASK_UNINTERRUPTIBLE or TASK_KILLABLE state to be frozen. Note
- * that neither return any clear indication of whether a freeze event happened
- * while in this function.
+ * These macros are intended to be used whenever you want allow a sleeping
+ * task to be frozen. Note that neither return any clear indication of
+ * whether a freeze event happened while in this function.
  */
 
 /* Like schedule(), but should not block the freezer. */
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 8a92b0e52099..bedefd9a22df 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -198,8 +198,7 @@ static void update_if_frozen(struct cgroup *cgroup, struct freezer *freezer)
 			 * completion.  Consider it frozen in addition to
 			 * the usual frozen condition.
 			 */
-			if (!frozen(task) && !task_is_stopped_or_traced(task) &&
-			    !freezer_should_skip(task))
+			if (!frozen(task) && !freezer_should_skip(task))
 				goto notyet;
 		}
 	}
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 11f82a4d4eae..c38893b0efba 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -116,17 +116,10 @@ bool freeze_task(struct task_struct *p)
 		return false;
 	}
 
-	if (!(p->flags & PF_KTHREAD)) {
+	if (!(p->flags & PF_KTHREAD))
 		fake_signal_wake_up(p);
-		/*
-		 * fake_signal_wake_up() goes through p's scheduler
-		 * lock and guarantees that TASK_STOPPED/TRACED ->
-		 * TASK_RUNNING transition can't race with task state
-		 * testing in try_to_freeze_tasks().
-		 */
-	} else {
+	else
 		wake_up_state(p, TASK_INTERRUPTIBLE);
-	}
 
 	spin_unlock_irqrestore(&freezer_lock, flags);
 	return true;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 87da817f9e13..d5a258b60c6f 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -48,18 +48,7 @@ static int try_to_freeze_tasks(bool user_only)
 			if (p == current || !freeze_task(p))
 				continue;
 
-			/*
-			 * Now that we've done set_freeze_flag, don't
-			 * perturb a task in TASK_STOPPED or TASK_TRACED.
-			 * It is "frozen enough".  If the task does wake
-			 * up, it will immediately call try_to_freeze.
-			 *
-			 * Because freeze_task() goes through p's scheduler lock, it's
-			 * guaranteed that TASK_STOPPED/TRACED -> TASK_RUNNING
-			 * transition can't race with task state testing here.
-			 */
-			if (!task_is_stopped_or_traced(p) &&
-			    !freezer_should_skip(p))
+			if (!freezer_should_skip(p))
 				todo++;
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
diff --git a/kernel/signal.c b/kernel/signal.c
index 0af8868525d6..5ffb5626e072 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1908,7 +1908,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 		preempt_disable();
 		read_unlock(&tasklist_lock);
 		preempt_enable_no_resched();
-		schedule();
+		freezable_schedule();
 	} else {
 		/*
 		 * By the time we got the lock, our tracer went away.
@@ -1929,13 +1929,6 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 		read_unlock(&tasklist_lock);
 	}
 
-	/*
-	 * While in TASK_TRACED, we were considered "frozen enough".
-	 * Now that we woke up, it's crucial if we're supposed to be
-	 * frozen that we freeze now before running anything substantial.
-	 */
-	try_to_freeze();
-
 	/*
 	 * We are back.  Now reacquire the siglock before touching
 	 * last_siginfo, so that we are sure to have synchronized with
@@ -2092,7 +2085,7 @@ static bool do_signal_stop(int signr)
 		}
 
 		/* Now we don't run again until woken by SIGCONT or SIGKILL */
-		schedule();
+		freezable_schedule();
 		return true;
 	} else {
 		/*
@@ -2200,15 +2193,14 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
 	if (unlikely(uprobe_deny_signal()))
 		return 0;
 
-relock:
 	/*
-	 * We'll jump back here after any time we were stopped in TASK_STOPPED.
-	 * While in TASK_STOPPED, we were considered "frozen enough".
-	 * Now that we woke up, it's crucial if we're supposed to be
-	 * frozen that we freeze now before running anything substantial.
+	 * Do this once, we can't return to user-mode if freezing() == T.
+	 * do_signal_stop() and ptrace_stop() do freezable_schedule() and
+	 * thus do not need another check after return.
 	 */
 	try_to_freeze();
 
+relock:
 	spin_lock_irq(&sighand->siglock);
 	/*
 	 * Every stopped thread goes here after wakeup. Check to see if
-- 
cgit v1.2.3-55-g7522


From 2eb2478d471e45e1d0c8bb3defbf82bf7204e13d Mon Sep 17 00:00:00 2001
From: Matt Porter
Date: Fri, 5 Oct 2012 13:04:40 -0400
Subject: uio: uio_pruss: replace private SRAM API with genalloc

Remove the use of the private DaVinci SRAM API in favor
of genalloc. The pool to be used is provided by platform
data.

Signed-off-by: Matt Porter <mporter@ti.com>
Signed-off-by: "Hans J. Koch" <hjk@hansjkoch.de>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 drivers/uio/Kconfig                     |  1 +
 drivers/uio/uio_pruss.c                 | 24 +++++++++++++++++-------
 include/linux/platform_data/uio_pruss.h |  3 ++-
 3 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig
index 6f3ea9bbc818..c48b93813fc1 100644
--- a/drivers/uio/Kconfig
+++ b/drivers/uio/Kconfig
@@ -97,6 +97,7 @@ config UIO_NETX
 config UIO_PRUSS
 	tristate "Texas Instruments PRUSS driver"
 	depends on ARCH_DAVINCI_DA850
+	select GENERIC_ALLOCATOR
 	help
 	  PRUSS driver for OMAPL138/DA850/AM18XX devices
 	  PRUSS driver requires user space components, examples and user space
diff --git a/drivers/uio/uio_pruss.c b/drivers/uio/uio_pruss.c
index 33a7a273b453..f8738de342be 100644
--- a/drivers/uio/uio_pruss.c
+++ b/drivers/uio/uio_pruss.c
@@ -25,7 +25,7 @@
 #include <linux/clk.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
-#include <mach/sram.h>
+#include <linux/genalloc.h>
 
 #define DRV_NAME "pruss_uio"
 #define DRV_VERSION "1.0"
@@ -65,10 +65,11 @@ struct uio_pruss_dev {
 	dma_addr_t sram_paddr;
 	dma_addr_t ddr_paddr;
 	void __iomem *prussio_vaddr;
-	void *sram_vaddr;
+	unsigned long sram_vaddr;
 	void *ddr_vaddr;
 	unsigned int hostirq_start;
 	unsigned int pintc_base;
+	struct gen_pool *sram_pool;
 };
 
 static irqreturn_t pruss_handler(int irq, struct uio_info *info)
@@ -106,7 +107,9 @@ static void pruss_cleanup(struct platform_device *dev,
 			gdev->ddr_paddr);
 	}
 	if (gdev->sram_vaddr)
-		sram_free(gdev->sram_vaddr, sram_pool_sz);
+		gen_pool_free(gdev->sram_pool,
+			      gdev->sram_vaddr,
+			      sram_pool_sz);
 	kfree(gdev->info);
 	clk_put(gdev->pruss_clk);
 	kfree(gdev);
@@ -152,10 +155,17 @@ static int __devinit pruss_probe(struct platform_device *dev)
 		goto out_free;
 	}
 
-	gdev->sram_vaddr = sram_alloc(sram_pool_sz, &(gdev->sram_paddr));
-	if (!gdev->sram_vaddr) {
-		dev_err(&dev->dev, "Could not allocate SRAM pool\n");
-		goto out_free;
+	if (pdata->sram_pool) {
+		gdev->sram_pool = pdata->sram_pool;
+		gdev->sram_vaddr =
+			gen_pool_alloc(gdev->sram_pool, sram_pool_sz);
+		if (!gdev->sram_vaddr) {
+			dev_err(&dev->dev, "Could not allocate SRAM pool\n");
+			goto out_free;
+		}
+		gdev->sram_paddr =
+			gen_pool_virt_to_phys(gdev->sram_pool,
+					      gdev->sram_vaddr);
 	}
 
 	gdev->ddr_vaddr = dma_alloc_coherent(&dev->dev, extram_pool_sz,
diff --git a/include/linux/platform_data/uio_pruss.h b/include/linux/platform_data/uio_pruss.h
index f39140aabc6f..3d47d219827f 100644
--- a/include/linux/platform_data/uio_pruss.h
+++ b/include/linux/platform_data/uio_pruss.h
@@ -20,6 +20,7 @@
 
 /* To configure the PRUSS INTC base offset for UIO driver */
 struct uio_pruss_pdata {
-	u32	pintc_base;
+	u32		pintc_base;
+	struct gen_pool *sram_pool;
 };
 #endif /* _UIO_PRUSS_H_ */
-- 
cgit v1.2.3-55-g7522


From 55c6659afaa6fd79a3b5a7c2b42bb87e0c11209d Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Sat, 13 Oct 2012 01:14:16 +0800
Subject: srcu: Add DEFINE_SRCU()

In old days, we had two different API sets for dynamic-allocated per-CPU
data and DEFINE_PER_CPU()-defined per_cpu data, and because SRCU used
dynamic-allocated per-CPU data, its srcu_struct structures cannot be
declared statically.  This commit therefore introduces DEFINE_SRCU()
and DEFINE_STATIC_SRCU() to allow statically declared SRCU structures,
using the new static per-CPU interfaces.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Updated for __DELAYED_WORK_INITIALIZER() added argument,
	   fixed whitespace issue. ]
---
 include/linux/srcu.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 5cce128f196c..6eb691b08358 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -42,6 +42,8 @@ struct rcu_batch {
 	struct rcu_head *head, **tail;
 };
 
+#define RCU_BATCH_INIT(name) { NULL, &(name.head) }
+
 struct srcu_struct {
 	unsigned completed;
 	struct srcu_struct_array __percpu *per_cpu_ref;
@@ -72,14 +74,42 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name,
 	__init_srcu_struct((sp), #sp, &__srcu_key); \
 })
 
+#define __SRCU_DEP_MAP_INIT(srcu_name)	.dep_map = { .name = #srcu_name },
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 int init_srcu_struct(struct srcu_struct *sp);
 
+#define __SRCU_DEP_MAP_INIT(srcu_name)
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 void process_srcu(struct work_struct *work);
 
+#define __SRCU_STRUCT_INIT(name)					\
+	{								\
+		.completed = -300,					\
+		.per_cpu_ref = &name##_srcu_array,			\
+		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
+		.running = false,					\
+		.batch_queue = RCU_BATCH_INIT(name.batch_queue),	\
+		.batch_check0 = RCU_BATCH_INIT(name.batch_check0),	\
+		.batch_check1 = RCU_BATCH_INIT(name.batch_check1),	\
+		.batch_done = RCU_BATCH_INIT(name.batch_done),		\
+		.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
+		__SRCU_DEP_MAP_INIT(name)				\
+	}
+
+/*
+ * define and init a srcu struct at build time.
+ * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it.
+ */
+#define DEFINE_SRCU(name)						\
+	static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
+	struct srcu_struct name = __SRCU_STRUCT_INIT(name);
+
+#define DEFINE_STATIC_SRCU(name)					\
+	static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
+	static struct srcu_struct name = __SRCU_STRUCT_INIT(name);
+
 /**
  * call_srcu() - Queue a callback for invocation after an SRCU grace period
  * @sp: srcu_struct in queue the callback
-- 
cgit v1.2.3-55-g7522


From dcbf832e5823156e8f155359b47bd108cac8ad68 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Fri, 5 Oct 2012 23:07:19 +0200
Subject: vtime: Gather vtime declarations to their own header file

These APIs are scattered around and are going to expand a bit.
Let's create a dedicated header file for sanity.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/hardirq.h     | 11 +----------
 include/linux/kernel_stat.h |  9 +--------
 include/linux/vtime.h       | 22 ++++++++++++++++++++++
 3 files changed, 24 insertions(+), 18 deletions(-)
 create mode 100644 include/linux/vtime.h

(limited to 'include/linux')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index cab3da3d0949..b083a475423d 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -4,6 +4,7 @@
 #include <linux/preempt.h>
 #include <linux/lockdep.h>
 #include <linux/ftrace_irq.h>
+#include <linux/vtime.h>
 #include <asm/hardirq.h>
 
 /*
@@ -129,16 +130,6 @@ extern void synchronize_irq(unsigned int irq);
 # define synchronize_irq(irq)	barrier()
 #endif
 
-struct task_struct;
-
-#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
-static inline void vtime_account(struct task_struct *tsk)
-{
-}
-#else
-extern void vtime_account(struct task_struct *tsk);
-#endif
-
 #if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
 
 static inline void rcu_nmi_enter(void)
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 36d12f0884c3..1865b1f29770 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -7,6 +7,7 @@
 #include <linux/cpumask.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/vtime.h>
 #include <asm/irq.h>
 #include <asm/cputime.h>
 
@@ -130,12 +131,4 @@ extern void account_process_tick(struct task_struct *, int user);
 extern void account_steal_ticks(unsigned long ticks);
 extern void account_idle_ticks(unsigned long ticks);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-extern void vtime_task_switch(struct task_struct *prev);
-extern void vtime_account_system(struct task_struct *tsk);
-extern void vtime_account_idle(struct task_struct *tsk);
-#else
-static inline void vtime_task_switch(struct task_struct *prev) { }
-#endif
-
 #endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
new file mode 100644
index 000000000000..7199c24c8204
--- /dev/null
+++ b/include/linux/vtime.h
@@ -0,0 +1,22 @@
+#ifndef _LINUX_KERNEL_VTIME_H
+#define _LINUX_KERNEL_VTIME_H
+
+struct task_struct;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+extern void vtime_task_switch(struct task_struct *prev);
+extern void vtime_account_system(struct task_struct *tsk);
+extern void vtime_account_idle(struct task_struct *tsk);
+#else
+static inline void vtime_task_switch(struct task_struct *prev) { }
+#endif
+
+#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
+static inline void vtime_account(struct task_struct *tsk)
+{
+}
+#else
+extern void vtime_account(struct task_struct *tsk);
+#endif
+
+#endif /* _LINUX_KERNEL_VTIME_H */
-- 
cgit v1.2.3-55-g7522


From 11113334d1c5dd5355c86e531c29f1202a855c86 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Wed, 24 Oct 2012 18:05:51 +0200
Subject: vtime: Make vtime_account_system() irqsafe

vtime_account_system() currently has only one caller with
vtime_account() which is irq safe.

Now we are going to call it from other places like kvm where
irqs are not always disabled by the time we account the cputime.

So let's make it irqsafe. The arch implementation part is now
prefixed with "__".

vtime_account_idle() arch implementation is prefixed accordingly
to stay consistent.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/ia64/kernel/time.c    |  8 ++++----
 arch/powerpc/kernel/time.c |  4 ++--
 arch/s390/kernel/vtime.c   |  4 ++++
 include/linux/vtime.h      |  4 +++-
 kernel/sched/cputime.c     | 16 +++++++++++++---
 5 files changed, 26 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index f6388216080d..5e4850305d3f 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -106,9 +106,9 @@ void vtime_task_switch(struct task_struct *prev)
 	struct thread_info *ni = task_thread_info(current);
 
 	if (idle_task(smp_processor_id()) != prev)
-		vtime_account_system(prev);
+		__vtime_account_system(prev);
 	else
-		vtime_account_idle(prev);
+		__vtime_account_idle(prev);
 
 	vtime_account_user(prev);
 
@@ -135,14 +135,14 @@ static cputime_t vtime_delta(struct task_struct *tsk)
 	return delta_stime;
 }
 
-void vtime_account_system(struct task_struct *tsk)
+void __vtime_account_system(struct task_struct *tsk)
 {
 	cputime_t delta = vtime_delta(tsk);
 
 	account_system_time(tsk, 0, delta, delta);
 }
 
-void vtime_account_idle(struct task_struct *tsk)
+void __vtime_account_idle(struct task_struct *tsk)
 {
 	account_idle_time(vtime_delta(tsk));
 }
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index ce4cb772dc78..0db456f30d45 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -336,7 +336,7 @@ static u64 vtime_delta(struct task_struct *tsk,
 	return delta;
 }
 
-void vtime_account_system(struct task_struct *tsk)
+void __vtime_account_system(struct task_struct *tsk)
 {
 	u64 delta, sys_scaled, stolen;
 
@@ -346,7 +346,7 @@ void vtime_account_system(struct task_struct *tsk)
 		account_steal_time(stolen);
 }
 
-void vtime_account_idle(struct task_struct *tsk)
+void __vtime_account_idle(struct task_struct *tsk)
 {
 	u64 delta, sys_scaled, stolen;
 
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 790334427895..783e988c4e1e 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -140,6 +140,10 @@ void vtime_account(struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(vtime_account);
 
+void __vtime_account_system(struct task_struct *tsk)
+__attribute__((alias("vtime_account")));
+EXPORT_SYMBOL_GPL(__vtime_account_system);
+
 void __kprobes vtime_stop_cpu(void)
 {
 	struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 7199c24c8204..b9fc4f9ab470 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -5,10 +5,12 @@ struct task_struct;
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 extern void vtime_task_switch(struct task_struct *prev);
+extern void __vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_system(struct task_struct *tsk);
-extern void vtime_account_idle(struct task_struct *tsk);
+extern void __vtime_account_idle(struct task_struct *tsk);
 #else
 static inline void vtime_task_switch(struct task_struct *prev) { }
+static inline void vtime_account_system(struct task_struct *tsk) { }
 #endif
 
 #if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 81b763ba58a6..0359f47b0ae4 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -433,10 +433,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	*st = cputime.stime;
 }
 
+void vtime_account_system(struct task_struct *tsk)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__vtime_account_system(tsk);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(vtime_account_system);
+
 /*
  * Archs that account the whole time spent in the idle task
  * (outside irq) as idle time can rely on this and just implement
- * vtime_account_system() and vtime_account_idle(). Archs that
+ * __vtime_account_system() and __vtime_account_idle(). Archs that
  * have other meaning of the idle time (s390 only includes the
  * time spent by the CPU when it's in low power mode) must override
  * vtime_account().
@@ -449,9 +459,9 @@ void vtime_account(struct task_struct *tsk)
 	local_irq_save(flags);
 
 	if (in_interrupt() || !is_idle_task(tsk))
-		vtime_account_system(tsk);
+		__vtime_account_system(tsk);
 	else
-		vtime_account_idle(tsk);
+		__vtime_account_idle(tsk);
 
 	local_irq_restore(flags);
 }
-- 
cgit v1.2.3-55-g7522


From b080935c8638e08134629d0a9ebdf35669bec14d Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Fri, 5 Oct 2012 23:07:19 +0200
Subject: kvm: Directly account vtime to system on guest switch

Switching to or from guest context is done on ioctl context.
So by the time we call kvm_guest_enter() or kvm_guest_exit()
we know we are not running the idle task.

As a result, we can directly account the cputime using
vtime_account_system().

There are two good reasons to do this:

* We avoid some useless checks on guest switch. It optimizes
a bit this fast path.

* In the case of CONFIG_IRQ_TIME_ACCOUNTING, calling vtime_account()
checks for irq time to account. This is pointless since we know
we are not in an irq on guest switch. This is wasting cpu cycles
for no good reason. vtime_account_system() OTOH is a no-op in
this config option.

* We can remove the irq disable/enable around kvm guest switch in s390.

A further optimization may consist in introducing a vtime_account_guest()
that directly calls account_guest_time().

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: Xiantao Zhang <xiantao.zhang@intel.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 arch/s390/kvm/kvm-s390.c |  4 ----
 include/linux/kvm_host.h | 12 ++++++++++--
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ecced9d18986..d91a95568002 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -608,9 +608,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		kvm_s390_deliver_pending_interrupts(vcpu);
 
 	vcpu->arch.sie_block->icptcode = 0;
-	local_irq_disable();
 	kvm_guest_enter();
-	local_irq_enable();
 	VCPU_EVENT(vcpu, 6, "entering sie flags %x",
 		   atomic_read(&vcpu->arch.sie_block->cpuflags));
 	trace_kvm_s390_sie_enter(vcpu,
@@ -629,9 +627,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
 		   vcpu->arch.sie_block->icptcode);
 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
-	local_irq_disable();
 	kvm_guest_exit();
-	local_irq_enable();
 
 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
 	return rc;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 93bfc9f9815c..0e2212fe4784 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -737,7 +737,11 @@ static inline int kvm_deassign_device(struct kvm *kvm,
 static inline void kvm_guest_enter(void)
 {
 	BUG_ON(preemptible());
-	vtime_account(current);
+	/*
+	 * This is running in ioctl context so we can avoid
+	 * the call to vtime_account() with its unnecessary idle check.
+	 */
+	vtime_account_system(current);
 	current->flags |= PF_VCPU;
 	/* KVM does not hold any references to rcu protected data when it
 	 * switches CPU into a guest mode. In fact switching to a guest mode
@@ -751,7 +755,11 @@ static inline void kvm_guest_enter(void)
 
 static inline void kvm_guest_exit(void)
 {
-	vtime_account(current);
+	/*
+	 * This is running in ioctl context so we can avoid
+	 * the call to vtime_account() with its unnecessary idle check.
+	 */
+	vtime_account_system(current);
 	current->flags &= ~PF_VCPU;
 }
 
-- 
cgit v1.2.3-55-g7522


From fa5058f3b63153e0147ef65bcdb3a4ee63581346 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Sat, 6 Oct 2012 04:07:19 +0200
Subject: cputime: Specialize irq vtime hooks

With CONFIG_VIRT_CPU_ACCOUNTING, when vtime_account()
is called in irq entry/exit, we perform a check on the
context: if we are interrupting the idle task we
account the pending cputime to idle, otherwise account
to system time or its sub-areas: tsk->stime, hardirq time,
softirq time, ...

However this check for idle only concerns the hardirq entry
and softirq entry:

* Hardirq may directly interrupt the idle task, in which case
we need to flush the pending CPU time to idle.

* The idle task may be directly interrupted by a softirq if
it calls local_bh_enable(). There is probably no such call
in any idle task but we need to cover every case. Ksoftirqd
is not concerned because the idle time is flushed on context
switch and softirq in the end of hardirq have the idle time
already flushed from the hardirq entry.

In the other cases we always account to system/irq time:

* On hardirq exit we account the time to hardirq time.
* On softirq exit we account the time to softirq time.

To optimize this and avoid the indirect call to vtime_account()
and the checks it performs, specialize the vtime irq APIs and
only perform the check on irq entry. Irq exit can directly call
vtime_account_system().

CONFIG_IRQ_TIME_ACCOUNTING behaviour doesn't change and directly
maps to its own vtime_account() implementation. One may want
to take benefits from the new APIs to optimize irq time accounting
as well in the future.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/hardirq.h |  4 ++--
 include/linux/vtime.h   | 25 +++++++++++++++++++++++++
 kernel/softirq.c        |  6 +++---
 3 files changed, 30 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index b083a475423d..624ef3f45c8e 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void);
  */
 #define __irq_enter()					\
 	do {						\
-		vtime_account(current);		\
+		vtime_account_irq_enter(current);	\
 		add_preempt_count(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
 	} while (0)
@@ -169,7 +169,7 @@ extern void irq_enter(void);
 #define __irq_exit()					\
 	do {						\
 		trace_hardirq_exit();			\
-		vtime_account(current);		\
+		vtime_account_irq_exit(current);	\
 		sub_preempt_count(HARDIRQ_OFFSET);	\
 	} while (0)
 
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index b9fc4f9ab470..c35c02223da8 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -21,4 +21,29 @@ static inline void vtime_account(struct task_struct *tsk)
 extern void vtime_account(struct task_struct *tsk);
 #endif
 
+static inline void vtime_account_irq_enter(struct task_struct *tsk)
+{
+	/*
+	 * Hardirq can interrupt idle task anytime. So we need vtime_account()
+	 * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING.
+	 * Softirq can also interrupt idle task directly if it calls
+	 * local_bh_enable(). Such case probably don't exist but we never know.
+	 * Ksoftirqd is not concerned because idle time is flushed on context
+	 * switch. Softirqs in the end of hardirqs are also not a problem because
+	 * the idle time is flushed on hardirq time already.
+	 */
+	vtime_account(tsk);
+}
+
+static inline void vtime_account_irq_exit(struct task_struct *tsk)
+{
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	/* On hard|softirq exit we always account to hard|softirq cputime */
+	__vtime_account_system(tsk);
+#endif
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+	vtime_account(tsk);
+#endif
+}
+
 #endif /* _LINUX_KERNEL_VTIME_H */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index cc96bdc0c2c9..ed567babe789 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void)
 	current->flags &= ~PF_MEMALLOC;
 
 	pending = local_softirq_pending();
-	vtime_account(current);
+	vtime_account_irq_enter(current);
 
 	__local_bh_disable((unsigned long)__builtin_return_address(0),
 				SOFTIRQ_OFFSET);
@@ -272,7 +272,7 @@ restart:
 
 	lockdep_softirq_exit();
 
-	vtime_account(current);
+	vtime_account_irq_exit(current);
 	__local_bh_enable(SOFTIRQ_OFFSET);
 	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
 }
@@ -341,7 +341,7 @@ static inline void invoke_softirq(void)
  */
 void irq_exit(void)
 {
-	vtime_account(current);
+	vtime_account_irq_exit(current);
 	trace_hardirq_exit();
 	sub_preempt_count(IRQ_EXIT_OFFSET);
 	if (!in_interrupt() && local_softirq_pending())
-- 
cgit v1.2.3-55-g7522


From 3e1df4f506836e6bea1ab61cf88c75c8b1840643 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Sat, 6 Oct 2012 05:23:22 +0200
Subject: cputime: Separate irqtime accounting from generic vtime

vtime_account() doesn't have the same role in
CONFIG_VIRT_CPU_ACCOUNTING and CONFIG_IRQ_TIME_ACCOUNTING.

In the first case it handles time accounting in any context. In
the second case it only handles irq time accounting.

So when vtime_account() is called from outside vtime_account_irq_*()
this call is pointless to CONFIG_IRQ_TIME_ACCOUNTING.

To fix the confusion, change vtime_account() to irqtime_account_irq()
in CONFIG_IRQ_TIME_ACCOUNTING. This way we ensure future account_vtime()
calls won't waste useless cycles in the irqtime APIs.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/vtime.h  | 18 ++++++++----------
 kernel/sched/cputime.c |  4 ++--
 2 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index c35c02223da8..0c2a2d303020 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -8,17 +8,18 @@ extern void vtime_task_switch(struct task_struct *prev);
 extern void __vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_system(struct task_struct *tsk);
 extern void __vtime_account_idle(struct task_struct *tsk);
+extern void vtime_account(struct task_struct *tsk);
 #else
 static inline void vtime_task_switch(struct task_struct *prev) { }
+static inline void __vtime_account_system(struct task_struct *tsk) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
+static inline void vtime_account(struct task_struct *tsk) { }
 #endif
 
-#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
-static inline void vtime_account(struct task_struct *tsk)
-{
-}
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+extern void irqtime_account_irq(struct task_struct *tsk);
 #else
-extern void vtime_account(struct task_struct *tsk);
+static inline void irqtime_account_irq(struct task_struct *tsk) { }
 #endif
 
 static inline void vtime_account_irq_enter(struct task_struct *tsk)
@@ -33,17 +34,14 @@ static inline void vtime_account_irq_enter(struct task_struct *tsk)
 	 * the idle time is flushed on hardirq time already.
 	 */
 	vtime_account(tsk);
+	irqtime_account_irq(tsk);
 }
 
 static inline void vtime_account_irq_exit(struct task_struct *tsk)
 {
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	/* On hard|softirq exit we always account to hard|softirq cputime */
 	__vtime_account_system(tsk);
-#endif
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-	vtime_account(tsk);
-#endif
+	irqtime_account_irq(tsk);
 }
 
 #endif /* _LINUX_KERNEL_VTIME_H */
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 0359f47b0ae4..8d859dae5bed 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -43,7 +43,7 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq);
  * Called before incrementing preempt_count on {soft,}irq_enter
  * and before decrementing preempt_count on {soft,}irq_exit.
  */
-void vtime_account(struct task_struct *curr)
+void irqtime_account_irq(struct task_struct *curr)
 {
 	unsigned long flags;
 	s64 delta;
@@ -73,7 +73,7 @@ void vtime_account(struct task_struct *curr)
 	irq_time_write_end();
 	local_irq_restore(flags);
 }
-EXPORT_SYMBOL_GPL(vtime_account);
+EXPORT_SYMBOL_GPL(irqtime_account_irq);
 
 static int irqtime_account_hi_update(void)
 {
-- 
cgit v1.2.3-55-g7522


From 81c52c56e2b43589091ee29038bcf793d3f184ab Mon Sep 17 00:00:00 2001
From: Xiao Guangrong
Date: Tue, 16 Oct 2012 20:10:59 +0800
Subject: KVM: do not treat noslot pfn as a error pfn

This patch filters noslot pfn out from error pfns based on Marcelo comment:
noslot pfn is not a error pfn

After this patch,
- is_noslot_pfn indicates that the gfn is not in slot
- is_error_pfn indicates that the gfn is in slot but the error is occurred
  when translate the gfn to pfn
- is_error_noslot_pfn indicates that the pfn either it is error pfns or it
  is noslot pfn
And is_invalid_pfn can be removed, it makes the code more clean

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/powerpc/kvm/book3s_32_mmu_host.c |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_host.c |  2 +-
 arch/powerpc/kvm/e500_tlb.c           |  2 +-
 arch/x86/kvm/mmu.c                    |  4 ++--
 arch/x86/kvm/paging_tmpl.h            |  2 +-
 arch/x86/kvm/x86.c                    |  2 +-
 include/linux/kvm_host.h              | 28 ++++++++++++++++++++--------
 virt/kvm/iommu.c                      |  4 ++--
 virt/kvm/kvm_main.c                   |  6 +++---
 9 files changed, 32 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index d1107a9b5d13..00e619bf608e 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -155,7 +155,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 
 	/* Get host physical address for gpa */
 	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
-	if (is_error_pfn(hpaddr)) {
+	if (is_error_noslot_pfn(hpaddr)) {
 		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
 				 orig_pte->eaddr);
 		r = -EINVAL;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index d0205a545a81..ead58e317294 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -93,7 +93,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 
 	/* Get host physical address for gpa */
 	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
-	if (is_error_pfn(hpaddr)) {
+	if (is_error_noslot_pfn(hpaddr)) {
 		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
 		r = -EINVAL;
 		goto out;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index c73389477d17..6305ee692ef7 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -524,7 +524,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 	if (likely(!pfnmap)) {
 		unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
 		pfn = gfn_to_pfn_memslot(slot, gfn);
-		if (is_error_pfn(pfn)) {
+		if (is_error_noslot_pfn(pfn)) {
 			printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
 					(long)gfn);
 			return;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index aabb1289ff04..b875a9ed9b8e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2699,7 +2699,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
 	 * PT_PAGE_TABLE_LEVEL and there would be no adjustment done
 	 * here.
 	 */
-	if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn) &&
+	if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn) &&
 	    level == PT_PAGE_TABLE_LEVEL &&
 	    PageTransCompound(pfn_to_page(pfn)) &&
 	    !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) {
@@ -2733,7 +2733,7 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 	bool ret = true;
 
 	/* The pfn is invalid, report the error! */
-	if (unlikely(is_invalid_pfn(pfn))) {
+	if (unlikely(is_error_pfn(pfn))) {
 		*ret_val = kvm_handle_bad_page(vcpu, gfn, pfn);
 		goto exit;
 	}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d17decaf1db9..891eb6d93b8b 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -323,7 +323,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	protect_clean_gpte(&pte_access, gpte);
 	pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
 			no_dirty_log && (pte_access & ACC_WRITE_MASK));
-	if (is_invalid_pfn(pfn))
+	if (is_error_pfn(pfn))
 		return false;
 
 	/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6e5f069bee30..49fa1f0e59bd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4504,7 +4504,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
 	 * instruction -> ...
 	 */
 	pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
-	if (!is_error_pfn(pfn)) {
+	if (!is_error_noslot_pfn(pfn)) {
 		kvm_release_pfn_clean(pfn);
 		return true;
 	}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 82e2c783a21e..99a47627e046 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -58,28 +58,40 @@
 
 /*
  * For the normal pfn, the highest 12 bits should be zero,
- * so we can mask these bits to indicate the error.
+ * so we can mask bit 62 ~ bit 52  to indicate the error pfn,
+ * mask bit 63 to indicate the noslot pfn.
  */
-#define KVM_PFN_ERR_MASK	(0xfffULL << 52)
+#define KVM_PFN_ERR_MASK	(0x7ffULL << 52)
+#define KVM_PFN_ERR_NOSLOT_MASK	(0xfffULL << 52)
+#define KVM_PFN_NOSLOT		(0x1ULL << 63)
 
 #define KVM_PFN_ERR_FAULT	(KVM_PFN_ERR_MASK)
 #define KVM_PFN_ERR_HWPOISON	(KVM_PFN_ERR_MASK + 1)
-#define KVM_PFN_ERR_BAD		(KVM_PFN_ERR_MASK + 2)
-#define KVM_PFN_ERR_RO_FAULT	(KVM_PFN_ERR_MASK + 3)
+#define KVM_PFN_ERR_RO_FAULT	(KVM_PFN_ERR_MASK + 2)
 
+/*
+ * error pfns indicate that the gfn is in slot but faild to
+ * translate it to pfn on host.
+ */
 static inline bool is_error_pfn(pfn_t pfn)
 {
 	return !!(pfn & KVM_PFN_ERR_MASK);
 }
 
-static inline bool is_noslot_pfn(pfn_t pfn)
+/*
+ * error_noslot pfns indicate that the gfn can not be
+ * translated to pfn - it is not in slot or failed to
+ * translate it to pfn.
+ */
+static inline bool is_error_noslot_pfn(pfn_t pfn)
 {
-	return pfn == KVM_PFN_ERR_BAD;
+	return !!(pfn & KVM_PFN_ERR_NOSLOT_MASK);
 }
 
-static inline bool is_invalid_pfn(pfn_t pfn)
+/* noslot pfn indicates that the gfn is not in slot. */
+static inline bool is_noslot_pfn(pfn_t pfn)
 {
-	return !is_noslot_pfn(pfn) && is_error_pfn(pfn);
+	return pfn == KVM_PFN_NOSLOT;
 }
 
 #define KVM_HVA_ERR_BAD		(PAGE_OFFSET)
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 18e1e30019e3..4a340cb23013 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -52,7 +52,7 @@ static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
 	end_gfn = gfn + (size >> PAGE_SHIFT);
 	gfn    += 1;
 
-	if (is_error_pfn(pfn))
+	if (is_error_noslot_pfn(pfn))
 		return pfn;
 
 	while (gfn < end_gfn)
@@ -106,7 +106,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 		 * important because we unmap and unpin in 4kb steps later.
 		 */
 		pfn = kvm_pin_pages(slot, gfn, page_size);
-		if (is_error_pfn(pfn)) {
+		if (is_error_noslot_pfn(pfn)) {
 			gfn += 1;
 			continue;
 		}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index be70035fd42a..2fb73191801f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1208,7 +1208,7 @@ __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
 		return KVM_PFN_ERR_RO_FAULT;
 
 	if (kvm_is_error_hva(addr))
-		return KVM_PFN_ERR_BAD;
+		return KVM_PFN_NOSLOT;
 
 	/* Do not map writable pfn in the readonly memslot. */
 	if (writable && memslot_is_readonly(slot)) {
@@ -1290,7 +1290,7 @@ EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
 
 static struct page *kvm_pfn_to_page(pfn_t pfn)
 {
-	if (is_error_pfn(pfn))
+	if (is_error_noslot_pfn(pfn))
 		return KVM_ERR_PTR_BAD_PAGE;
 
 	if (kvm_is_mmio_pfn(pfn)) {
@@ -1322,7 +1322,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);
 
 void kvm_release_pfn_clean(pfn_t pfn)
 {
-	if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn))
+	if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn))
 		put_page(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
-- 
cgit v1.2.3-55-g7522


From 37c67d03989eca60b28d67398d9388f653454c5d Mon Sep 17 00:00:00 2001
From: Paul Walmsley
Date: Mon, 29 Oct 2012 20:49:44 -0600
Subject: ARM: OMAP2+: WDT: move init; add read_reset_sources pdata function
 pointer

The OMAP watchdog timer driver directly calls a function exported by
code in arch/arm/mach-omap2.  This is not good; it tightly couples
this driver to the mach-omap2 integration code.  Instead, add a
temporary platform_data function pointer to abstract this function
call.  A subsequent patch will convert the watchdog driver to use this
function pointer.

This patch also moves the device creation code out of
arch/arm/mach-omap2/devices.c and into arch/arm/mach-omap2/wd_timer.c.
This is another step towards the removal of
arch/arm/mach-omap2/devices.c.

Cc: Wim Van Sebroeck <wim@iguana.be>
Acked-by: Wim Van Sebroeck <wim@iguana.be>
[paul@pwsan.com: skip wd_timer device creation when DT blob is present]
Signed-off-by: Paul Walmsley <paul@pwsan.com>
---
 arch/arm/mach-omap1/devices.c               | 21 +++++++++++++---
 arch/arm/mach-omap2/devices.c               | 26 --------------------
 arch/arm/mach-omap2/wd_timer.c              | 35 +++++++++++++++++++++++++-
 include/linux/platform_data/omap-wd-timer.h | 38 +++++++++++++++++++++++++++++
 4 files changed, 90 insertions(+), 30 deletions(-)
 create mode 100644 include/linux/platform_data/omap-wd-timer.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/devices.c b/arch/arm/mach-omap1/devices.c
index 645668e2b1d5..745031870ce4 100644
--- a/arch/arm/mach-omap1/devices.c
+++ b/arch/arm/mach-omap1/devices.c
@@ -17,6 +17,8 @@
 #include <linux/platform_device.h>
 #include <linux/spi/spi.h>
 
+#include <linux/platform_data/omap-wd-timer.h>
+
 #include <asm/mach/map.h>
 
 #include <mach/tc.h>
@@ -448,18 +450,31 @@ static struct resource wdt_resources[] = {
 };
 
 static struct platform_device omap_wdt_device = {
-	.name	   = "omap_wdt",
-	.id	     = -1,
+	.name		= "omap_wdt",
+	.id		= -1,
 	.num_resources	= ARRAY_SIZE(wdt_resources),
 	.resource	= wdt_resources,
 };
 
 static int __init omap_init_wdt(void)
 {
+	struct omap_wd_timer_platform_data pdata;
+	int ret;
+
 	if (!cpu_is_omap16xx())
 		return -ENODEV;
 
-	return platform_device_register(&omap_wdt_device);
+	pdata.read_reset_sources = omap1_get_reset_sources;
+
+	ret = platform_device_register(&omap_wdt_device);
+	if (!ret) {
+		ret = platform_device_add_data(&omap_wdt_device, &pdata,
+					       sizeof(pdata));
+		if (ret)
+			platform_device_del(&omap_wdt_device);
+	}
+
+	return ret;
 }
 subsys_initcall(omap_init_wdt);
 #endif
diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index 2ad491d6910b..cf365c387c06 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -646,29 +646,3 @@ static int __init omap2_init_devices(void)
 	return 0;
 }
 arch_initcall(omap2_init_devices);
-
-#if defined(CONFIG_OMAP_WATCHDOG) || defined(CONFIG_OMAP_WATCHDOG_MODULE)
-static int __init omap_init_wdt(void)
-{
-	int id = -1;
-	struct platform_device *pdev;
-	struct omap_hwmod *oh;
-	char *oh_name = "wd_timer2";
-	char *dev_name = "omap_wdt";
-
-	if (!cpu_class_is_omap2() || of_have_populated_dt())
-		return 0;
-
-	oh = omap_hwmod_lookup(oh_name);
-	if (!oh) {
-		pr_err("Could not look up wd_timer%d hwmod\n", id);
-		return -EINVAL;
-	}
-
-	pdev = omap_device_build(dev_name, id, oh, NULL, 0, NULL, 0, 0);
-	WARN(IS_ERR(pdev), "Can't build omap_device for %s:%s.\n",
-				dev_name, oh->name);
-	return 0;
-}
-subsys_initcall(omap_init_wdt);
-#endif
diff --git a/arch/arm/mach-omap2/wd_timer.c b/arch/arm/mach-omap2/wd_timer.c
index f6b6c37ac3f4..5a8629ff0ab0 100644
--- a/arch/arm/mach-omap2/wd_timer.c
+++ b/arch/arm/mach-omap2/wd_timer.c
@@ -11,10 +11,14 @@
 #include <linux/io.h>
 #include <linux/err.h>
 
-#include "omap_hwmod.h"
+#include <linux/platform_data/omap-wd-timer.h>
 
+#include "omap_hwmod.h"
+#include "omap_device.h"
 #include "wd_timer.h"
 #include "common.h"
+#include "prm.h"
+#include "soc.h"
 
 /*
  * In order to avoid any assumptions from bootloader regarding WDT
@@ -99,3 +103,32 @@ int omap2_wd_timer_reset(struct omap_hwmod *oh)
 	return (c == MAX_MODULE_SOFTRESET_WAIT) ? -ETIMEDOUT :
 		omap2_wd_timer_disable(oh);
 }
+
+static int __init omap_init_wdt(void)
+{
+	int id = -1;
+	struct platform_device *pdev;
+	struct omap_hwmod *oh;
+	char *oh_name = "wd_timer2";
+	char *dev_name = "omap_wdt";
+	struct omap_wd_timer_platform_data pdata;
+
+	if (!cpu_class_is_omap2() || of_have_populated_dt())
+		return 0;
+
+	oh = omap_hwmod_lookup(oh_name);
+	if (!oh) {
+		pr_err("Could not look up wd_timer%d hwmod\n", id);
+		return -EINVAL;
+	}
+
+	pdata.read_reset_sources = prm_read_reset_sources;
+
+	pdev = omap_device_build(dev_name, id, oh, &pdata,
+				 sizeof(struct omap_wd_timer_platform_data),
+				 NULL, 0, 0);
+	WARN(IS_ERR(pdev), "Can't build omap_device for %s:%s.\n",
+	     dev_name, oh->name);
+	return 0;
+}
+subsys_initcall(omap_init_wdt);
diff --git a/include/linux/platform_data/omap-wd-timer.h b/include/linux/platform_data/omap-wd-timer.h
new file mode 100644
index 000000000000..d75f5f802d98
--- /dev/null
+++ b/include/linux/platform_data/omap-wd-timer.h
@@ -0,0 +1,38 @@
+/*
+ * OMAP2+ WDTIMER-specific function prototypes
+ *
+ * Copyright (C) 2012 Texas Instruments, Inc.
+ * Paul Walmsley
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __LINUX_PLATFORM_DATA_OMAP_WD_TIMER_H
+#define __LINUX_PLATFORM_DATA_OMAP_WD_TIMER_H
+
+#include <linux/types.h>
+
+/*
+ * Standardized OMAP reset source bits
+ *
+ * This is a subset of the ones listed in arch/arm/mach-omap2/prm.h
+ * and are the only ones needed in the watchdog driver.
+ */
+#define OMAP_MPU_WD_RST_SRC_ID_SHIFT				3
+
+/**
+ * struct omap_wd_timer_platform_data - WDTIMER integration to the host SoC
+ * @read_reset_sources - fn ptr for the SoC to indicate the last reset cause
+ *
+ * The function pointed to by @read_reset_sources must return its data
+ * in a standard format - search for RST_SRC_ID_SHIFT in
+ * arch/arm/mach-omap2
+ */
+struct omap_wd_timer_platform_data {
+	u32 (*read_reset_sources)(void);
+};
+
+#endif
-- 
cgit v1.2.3-55-g7522


From b3343a2a2c95b3b7ed4f6596e860c4276ba46217 Mon Sep 17 00:00:00 2001
From: John Fastabend
Date: Tue, 18 Sep 2012 00:01:12 +0000
Subject: net, ixgbe: handle link local multicast addresses in SR-IOV mode

In SR-IOV mode the PF driver acts as the uplink port and is
used to send control packets e.g. lldpad, stp, etc.

   eth0.1     eth0.2     eth0
   VF         VF         PF
   |          |          |   <-- stand-in for uplink
   |          |          |
  --------------------------
  |  Embedded Switch       |
  --------------------------
              |
             MAC   <-- uplink

But the embedded switch is setup to forward multicast addresses
to all interfaces both VFs and PF and onto the physical link.
This results in reserved MAC addresses used by control protocols
to be forwarded over the switch onto the VF.

In the LLDP case the PF sends an LLDPDU and it is currently
being forwarded to all the VFs who then see the PF as a peer.
This is incorrect.

This patch adds the multicast addresses to the RAR table in the
hardware to prevent this behavior.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  2 +-
 drivers/net/ethernet/intel/ixgbevf/vf.c       |  3 +++
 include/linux/etherdevice.h                   | 19 +++++++++++++++++++
 net/bridge/br_device.c                        |  2 +-
 net/bridge/br_input.c                         | 15 ---------------
 net/bridge/br_private.h                       |  1 -
 net/bridge/br_sysfs_br.c                      |  3 ++-
 7 files changed, 26 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 88d636a7459c..97bab45ed95f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6967,7 +6967,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		return -EINVAL;
 	}
 
-	if (is_unicast_ether_addr(addr)) {
+	if (is_unicast_ether_addr(addr) || is_link_local(addr)) {
 		u32 rar_uc_entries = IXGBE_MAX_PF_MACVLANS;
 
 		if (netdev_uc_count(dev) < rar_uc_entries)
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index 5fa397b953eb..c0fd1ef49400 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -331,6 +331,9 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
 	netdev_for_each_mc_addr(ha, netdev) {
 		if (i == cnt)
 			break;
+		if (is_link_local(ha->addr))
+			continue;
+
 		vector_list[i++] = ixgbevf_mta_vector(hw, ha->addr);
 	}
 
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index b006ba0a9f42..45ded4be7b43 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -51,6 +51,25 @@ extern struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
 #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
 
+/* Reserved Ethernet Addresses per IEEE 802.1Q */
+static const u8 br_reserved_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
+
+/**
+ * is_link_local -  Determine if given Eth addr is a link local mcast address.
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Return true if address is link local reserved addr (01:80:c2:00:00:0X) per
+ * IEEE 802.1Q 8.6.3 Frame filtering.
+ */
+static inline int is_link_local(const unsigned char *dest)
+{
+	__be16 *a = (__be16 *)dest;
+	static const __be16 *b = (const __be16 *)br_reserved_address;
+	static const __be16 m = cpu_to_be16(0xfff0);
+
+	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
+}
+
 /**
  * is_zero_ether_addr - Determine if give Ethernet address is all zeros.
  * @addr: Pointer to a six-byte array containing the Ethernet address
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 070e8a68cfc6..d41b6f9698d7 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -356,7 +356,7 @@ void br_dev_setup(struct net_device *dev)
 	br->bridge_id.prio[0] = 0x80;
 	br->bridge_id.prio[1] = 0x00;
 
-	memcpy(br->group_addr, br_group_address, ETH_ALEN);
+	memcpy(br->group_addr, br_reserved_address, ETH_ALEN);
 
 	br->stp_enabled = BR_NO_STP;
 	br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 76f15fda0212..d047978bf025 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -19,9 +19,6 @@
 #include <linux/export.h>
 #include "br_private.h"
 
-/* Bridge group multicast address 802.1d (pg 51). */
-const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
-
 /* Hook for brouter */
 br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
 EXPORT_SYMBOL(br_should_route_hook);
@@ -127,18 +124,6 @@ static int br_handle_local_finish(struct sk_buff *skb)
 	return 0;	 /* process further */
 }
 
-/* Does address match the link local multicast address.
- * 01:80:c2:00:00:0X
- */
-static inline int is_link_local(const unsigned char *dest)
-{
-	__be16 *a = (__be16 *)dest;
-	static const __be16 *b = (const __be16 *)br_group_address;
-	static const __be16 m = cpu_to_be16(0xfff0);
-
-	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
-}
-
 /*
  * Return NULL if skb is handled
  * note: already called with rcu_read_lock
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 9b278c4ebee1..65b191a05dd6 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -288,7 +288,6 @@ struct br_input_skb_cb {
 	pr_debug("%s: " format,  (br)->dev->name, ##args)
 
 extern struct notifier_block br_device_notifier;
-extern const u8 br_group_address[ETH_ALEN];
 
 /* called under bridge lock */
 static inline int br_is_root_bridge(const struct net_bridge *br)
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index c5c059333eab..e157b0dbcd82 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -14,6 +14,7 @@
 #include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/etherdevice.h>
 #include <linux/if_bridge.h>
 #include <linux/rtnetlink.h>
 #include <linux/spinlock.h>
@@ -310,7 +311,7 @@ static ssize_t store_group_addr(struct device *d,
 
 	/* Must be 01:80:c2:00:00:0X */
 	for (i = 0; i < 5; i++)
-		if (new_addr[i] != br_group_address[i])
+		if (new_addr[i] != br_reserved_address[i])
 			return -EINVAL;
 
 	if (new_addr[5] & ~0xf)
-- 
cgit v1.2.3-55-g7522


From 02b91b55260f7a1bdc8da25866cf27f726f5788f Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Thu, 28 Jun 2012 18:26:52 +0200
Subject: drbd: introduce stop-sector to online verify

We now can schedule only a specific range of sectors for online verify,
or interrupt a running verify without interrupting the connection.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_int.h      |  1 +
 drivers/block/drbd/drbd_main.c     | 17 +++++++++++++----
 drivers/block/drbd/drbd_nl.c       | 11 +++++++----
 drivers/block/drbd/drbd_proc.c     | 12 +++++++++---
 drivers/block/drbd/drbd_receiver.c |  8 ++++++++
 drivers/block/drbd/drbd_worker.c   | 33 +++++++++++++++++++++++++++------
 include/linux/drbd.h               |  2 +-
 include/linux/drbd_nl.h            |  1 +
 8 files changed, 67 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 9a6d3a4a739a..3cce7357402b 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1052,6 +1052,7 @@ struct drbd_conf {
 
 	/* where does the admin want us to start? (sector) */
 	sector_t ov_start_sector;
+	sector_t ov_stop_sector;
 	/* where are we now? (sector) */
 	sector_t ov_position;
 	/* Start sector of out of sync range (to merge printk reporting). */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index dfa08b7411c0..df9965d820c9 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1231,13 +1231,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
 	wake_up(&mdev->misc_wait);
 	wake_up(&mdev->state_wait);
 
-	/* aborted verify run. log the last position */
+	/* Aborted verify run, or we reached the stop sector.
+	 * Log the last position, unless end-of-device. */
 	if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
-	    ns.conn < C_CONNECTED) {
+	    ns.conn <= C_CONNECTED) {
 		mdev->ov_start_sector =
 			BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
-		dev_info(DEV, "Online Verify reached sector %llu\n",
-			(unsigned long long)mdev->ov_start_sector);
+		if (mdev->ov_left)
+			dev_info(DEV, "Online Verify reached sector %llu\n",
+				(unsigned long long)mdev->ov_start_sector);
 	}
 
 	if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
@@ -1703,6 +1705,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
 		drbd_send_state(mdev, ns);
 
+	/* Verify finished, or reached stop sector.  Peer did not know about
+	 * the stop sector, and we may even have changed the stop sector during
+	 * verify to interrupt/stop early.  Send the new state. */
+	if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
+	&& mdev->agreed_pro_version >= 97)
+		drbd_send_state(mdev, ns);
+
 	/* Wake up role changes, that were delayed because of connection establishing */
 	if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) {
 		clear_bit(STATE_SENT, &mdev->flags);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index ab660556a001..e2d368f1747e 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2211,8 +2211,10 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 				    struct drbd_nl_cfg_reply *reply)
 {
 	/* default to resume from last known position, if possible */
-	struct start_ov args =
-		{ .start_sector = mdev->ov_start_sector };
+	struct start_ov args = {
+		.start_sector = mdev->ov_start_sector,
+		.stop_sector = ULLONG_MAX,
+	};
 
 	if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) {
 		reply->ret_code = ERR_MANDATORY_TAG;
@@ -2224,8 +2226,9 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 	drbd_suspend_io(mdev);
 	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
 
-	/* w_make_ov_request expects position to be aligned */
-	mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
+	/* w_make_ov_request expects start position to be aligned */
+	mdev->ov_start_sector = args.start_sector & ~(BM_SECT_PER_BIT-1);
+	mdev->ov_stop_sector = args.stop_sector;
 	reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
 	drbd_resume_io(mdev);
 	return 0;
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 5496104f90b9..a5a453b4355f 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -167,18 +167,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
 		 * we convert to sectors in the display below. */
 		unsigned long bm_bits = drbd_bm_bits(mdev);
 		unsigned long bit_pos;
+		unsigned long long stop_sector = 0;
 		if (mdev->state.conn == C_VERIFY_S ||
-		    mdev->state.conn == C_VERIFY_T)
+		    mdev->state.conn == C_VERIFY_T) {
 			bit_pos = bm_bits - mdev->ov_left;
-		else
+			if (mdev->agreed_pro_version >= 97)
+				stop_sector = mdev->ov_stop_sector;
+		} else
 			bit_pos = mdev->bm_resync_fo;
 		/* Total sectors may be slightly off for oddly
 		 * sized devices. So what. */
 		seq_printf(seq,
-			"\t%3d%% sector pos: %llu/%llu\n",
+			"\t%3d%% sector pos: %llu/%llu",
 			(int)(bit_pos / (bm_bits/100+1)),
 			(unsigned long long)bit_pos * BM_SECT_PER_BIT,
 			(unsigned long long)bm_bits * BM_SECT_PER_BIT);
+		if (stop_sector != 0 && stop_sector != ULLONG_MAX)
+			seq_printf(seq, " stop sector: %llu", stop_sector);
+		seq_printf(seq, "\n");
 	}
 }
 
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 434adf75259a..280735da1963 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3255,6 +3255,14 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 		}
 	}
 
+	/* explicit verify finished notification, stop sector reached. */
+	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
+	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
+		ov_oos_print(mdev);
+		drbd_resync_finished(mdev);
+		return true;
+	}
+
 	/* peer says his disk is inconsistent, while we think it is uptodate,
 	 * and this happens while the peer still thinks we have a sync going on,
 	 * but we think we are already done with the sync.
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 6bce2cc179d4..1352455dd7dd 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -691,6 +691,7 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
 	int number, i, size;
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+	bool stop_sector_reached = false;
 
 	if (unlikely(cancel))
 		return 1;
@@ -699,9 +700,17 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
 
 	sector = mdev->ov_position;
 	for (i = 0; i < number; i++) {
-		if (sector >= capacity) {
+		if (sector >= capacity)
 			return 1;
-		}
+
+		/* We check for "finished" only in the reply path:
+		 * w_e_end_ov_reply().
+		 * We need to send at least one request out. */
+		stop_sector_reached = i > 0
+			&& mdev->agreed_pro_version >= 97
+			&& sector >= mdev->ov_stop_sector;
+		if (stop_sector_reached)
+			break;
 
 		size = BM_BLOCK_SIZE;
 
@@ -725,7 +734,8 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
 
  requeue:
 	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
-	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
+	if (i == 0 || !stop_sector_reached)
+		mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
 	return 1;
 }
 
@@ -808,7 +818,12 @@ int drbd_resync_finished(struct drbd_conf *mdev)
 	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
 	if (dt <= 0)
 		dt = 1;
+	
 	db = mdev->rs_total;
+	/* adjust for verify start and stop sectors, respective reached position */
+	if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+		db -= mdev->ov_left;
+
 	dbdt = Bit2KB(db/dt);
 	mdev->rs_paused /= HZ;
 
@@ -831,7 +846,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
 	ns.conn = C_CONNECTED;
 
 	dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
-	     verify_done ? "Online verify " : "Resync",
+	     verify_done ? "Online verify" : "Resync",
 	     dt + mdev->rs_paused, mdev->rs_paused, dbdt);
 
 	n_oos = drbd_bm_total_weight(mdev);
@@ -912,7 +927,9 @@ out:
 	mdev->rs_total  = 0;
 	mdev->rs_failed = 0;
 	mdev->rs_paused = 0;
-	if (verify_done)
+
+	/* reset start sector, if we reached end of device */
+	if (verify_done && mdev->ov_left == 0)
 		mdev->ov_start_sector = 0;
 
 	drbd_md_sync(mdev);
@@ -1158,6 +1175,7 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	unsigned int size = e->size;
 	int digest_size;
 	int ok, eq = 0;
+	bool stop_sector_reached = false;
 
 	if (unlikely(cancel)) {
 		drbd_free_ee(mdev, e);
@@ -1208,7 +1226,10 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	if ((mdev->ov_left & 0x200) == 0x200)
 		drbd_advance_rs_marks(mdev, mdev->ov_left);
 
-	if (mdev->ov_left == 0) {
+	stop_sector_reached = mdev->agreed_pro_version >= 97 &&
+		(sector + (size>>9)) >= mdev->ov_stop_sector;
+
+	if (mdev->ov_left == 0 || stop_sector_reached) {
 		ov_oos_print(mdev);
 		drbd_resync_finished(mdev);
 	}
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 47e3d4850584..4a7eccbd1292 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void);
 #define REL_VERSION "8.3.13"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 96
+#define PRO_VERSION_MAX 97
 
 
 enum drbd_io_error_p {
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index a8706f08ab36..f6a576df19e0 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -145,6 +145,7 @@ NL_PACKET(dump_ee, 24,
 
 NL_PACKET(start_ov, 25,
 	NL_INT64(	66,	T_MAY_IGNORE,	start_sector)
+	NL_INT64(	90,	T_MANDATORY,	stop_sector)
 )
 
 NL_PACKET(new_c_uuid, 26,
-- 
cgit v1.2.3-55-g7522


From ccae7868b0c5697508a541c531cf96b361d62c1c Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Wed, 26 Sep 2012 14:07:04 +0200
Subject: drbd: log request sector offset and size for IO errors

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_req.c | 19 ++++++++++++++++++-
 include/linux/drbd.h          |  2 +-
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index d9e5962a9a8c..135ea76ed502 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -387,6 +387,20 @@ out_conflict:
 	return 1;
 }
 
+static void drbd_report_io_error(struct drbd_conf *mdev, struct drbd_request *req)
+{
+        char b[BDEVNAME_SIZE];
+
+	if (__ratelimit(&drbd_ratelimit_state))
+		return;
+
+	dev_warn(DEV, "local %s IO error sector %llu+%u on %s\n",
+			(req->rq_state & RQ_WRITE) ? "WRITE" : "READ",
+			(unsigned long long)req->sector,
+			req->size >> 9,
+			bdevname(mdev->ldev->backing_bdev, b));
+}
+
 /* obviously this could be coded as many single functions
  * instead of one huge switch,
  * or by putting the code directly in the respective locations
@@ -455,6 +469,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		req->rq_state |= RQ_LOCAL_COMPLETED;
 		req->rq_state &= ~RQ_LOCAL_PENDING;
 
+		drbd_report_io_error(mdev, req);
 		__drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
 		_req_may_be_done_not_susp(req, m);
 		break;
@@ -477,6 +492,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 			break;
 		}
 
+		drbd_report_io_error(mdev, req);
 		__drbd_chk_io_error(mdev, DRBD_READ_ERROR);
 
 	goto_queue_for_net_read:
@@ -900,7 +916,8 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
 
 	if (!(local || remote) && !is_susp(mdev->state)) {
 		if (__ratelimit(&drbd_ratelimit_state))
-			dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
+			dev_err(DEV, "IO ERROR: neither local nor remote data, sector %llu+%u\n",
+					(unsigned long long)req->sector, req->size >> 9);
 		goto fail_free_complete;
 	}
 
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 4a7eccbd1292..94f58a102bbb 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.13"
+#define REL_VERSION "8.3.14"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 97
-- 
cgit v1.2.3-55-g7522


From 5d9db883761ad1bc2245fd3018715549b974203d Mon Sep 17 00:00:00 2001
From: Matthew Garrett
Date: Fri, 5 Oct 2012 13:54:56 +0800
Subject: efi: Add support for a UEFI variable filesystem

The existing EFI variables code only supports variables of up to 1024
bytes. This limitation existed in version 0.99 of the EFI specification,
but was removed before any full releases. Since variables can now be
larger than a single page, sysfs isn't the best interface for this. So,
instead, let's add a filesystem. Variables can be read, written and
created, with the first 4 bytes of each variable representing its UEFI
attributes. The create() method doesn't actually commit to flash since
zero-length variables can't exist per-spec.

Updates from Jeremy Kerr <jeremy.kerr@canonical.com>.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Jeremy Kerr <jeremy.kerr@canonical.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efivars.c | 384 ++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/efi.h        |   5 +
 2 files changed, 383 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index d10c9873dd9a..b605c4849772 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -80,6 +80,10 @@
 #include <linux/slab.h>
 #include <linux/pstore.h>
 
+#include <linux/fs.h>
+#include <linux/ramfs.h>
+#include <linux/pagemap.h>
+
 #include <asm/uaccess.h>
 
 #define EFIVARS_VERSION "0.08"
@@ -91,6 +95,7 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION(EFIVARS_VERSION);
 
 #define DUMP_NAME_LEN 52
+#define GUID_LEN 37
 
 /*
  * The maximum size of VariableName + Data = 1024
@@ -108,7 +113,6 @@ struct efi_variable {
 	__u32         Attributes;
 } __attribute__((packed));
 
-
 struct efivar_entry {
 	struct efivars *efivars;
 	struct efi_variable var;
@@ -122,6 +126,9 @@ struct efivar_attribute {
 	ssize_t (*store)(struct efivar_entry *entry, const char *buf, size_t count);
 };
 
+static struct efivars __efivars;
+static struct efivar_operations ops;
+
 #define PSTORE_EFI_ATTRIBUTES \
 	(EFI_VARIABLE_NON_VOLATILE | \
 	 EFI_VARIABLE_BOOTSERVICE_ACCESS | \
@@ -629,14 +636,380 @@ static struct kobj_type efivar_ktype = {
 	.default_attrs = def_attrs,
 };
 
-static struct pstore_info efi_pstore_info;
-
 static inline void
 efivar_unregister(struct efivar_entry *var)
 {
 	kobject_put(&var->kobj);
 }
 
+static int efivarfs_file_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t efivarfs_file_write(struct file *file,
+		const char __user *userbuf, size_t count, loff_t *ppos)
+{
+	struct efivar_entry *var = file->private_data;
+	struct efivars *efivars;
+	efi_status_t status;
+	void *data;
+	u32 attributes;
+	struct inode *inode = file->f_mapping->host;
+	int datasize = count - sizeof(attributes);
+
+	if (count < sizeof(attributes))
+		return -EINVAL;
+
+	data = kmalloc(datasize, GFP_KERNEL);
+
+	if (!data)
+		return -ENOMEM;
+
+	efivars = var->efivars;
+
+	if (copy_from_user(&attributes, userbuf, sizeof(attributes))) {
+		count = -EFAULT;
+		goto out;
+	}
+
+	if (attributes & ~(EFI_VARIABLE_MASK)) {
+		count = -EINVAL;
+		goto out;
+	}
+
+	if (copy_from_user(data, userbuf + sizeof(attributes), datasize)) {
+		count = -EFAULT;
+		goto out;
+	}
+
+	if (validate_var(&var->var, data, datasize) == false) {
+		count = -EINVAL;
+		goto out;
+	}
+
+	status = efivars->ops->set_variable(var->var.VariableName,
+					    &var->var.VendorGuid,
+					    attributes, datasize,
+					    data);
+
+	switch (status) {
+	case EFI_SUCCESS:
+		mutex_lock(&inode->i_mutex);
+		i_size_write(inode, count);
+		mutex_unlock(&inode->i_mutex);
+		break;
+	case EFI_INVALID_PARAMETER:
+		count = -EINVAL;
+		break;
+	case EFI_OUT_OF_RESOURCES:
+		count = -ENOSPC;
+		break;
+	case EFI_DEVICE_ERROR:
+		count = -EIO;
+		break;
+	case EFI_WRITE_PROTECTED:
+		count = -EROFS;
+		break;
+	case EFI_SECURITY_VIOLATION:
+		count = -EACCES;
+		break;
+	case EFI_NOT_FOUND:
+		count = -ENOENT;
+		break;
+	default:
+		count = -EINVAL;
+		break;
+	}
+out:
+	kfree(data);
+
+	return count;
+}
+
+static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf,
+		size_t count, loff_t *ppos)
+{
+	struct efivar_entry *var = file->private_data;
+	struct efivars *efivars = var->efivars;
+	efi_status_t status;
+	unsigned long datasize = 0;
+	u32 attributes;
+	void *data;
+	ssize_t size;
+
+	status = efivars->ops->get_variable(var->var.VariableName,
+					    &var->var.VendorGuid,
+					    &attributes, &datasize, NULL);
+
+	if (status != EFI_BUFFER_TOO_SMALL)
+		return 0;
+
+	data = kmalloc(datasize + 4, GFP_KERNEL);
+
+	if (!data)
+		return 0;
+
+	status = efivars->ops->get_variable(var->var.VariableName,
+					    &var->var.VendorGuid,
+					    &attributes, &datasize,
+					    (data + 4));
+
+	if (status != EFI_SUCCESS)
+		return 0;
+
+	memcpy(data, &attributes, 4);
+	size = simple_read_from_buffer(userbuf, count, ppos,
+					data, datasize + 4);
+	kfree(data);
+
+	return size;
+}
+
+static void efivarfs_evict_inode(struct inode *inode)
+{
+	clear_inode(inode);
+}
+
+static const struct super_operations efivarfs_ops = {
+	.statfs = simple_statfs,
+	.drop_inode = generic_delete_inode,
+	.evict_inode = efivarfs_evict_inode,
+	.show_options = generic_show_options,
+};
+
+static struct super_block *efivarfs_sb;
+
+static const struct inode_operations efivarfs_dir_inode_operations;
+
+static const struct file_operations efivarfs_file_operations = {
+	.open	= efivarfs_file_open,
+	.read	= efivarfs_file_read,
+	.write	= efivarfs_file_write,
+	.llseek	= no_llseek,
+};
+
+static struct inode *efivarfs_get_inode(struct super_block *sb,
+				const struct inode *dir, int mode, dev_t dev)
+{
+	struct inode *inode = new_inode(sb);
+
+	if (inode) {
+		inode->i_ino = get_next_ino();
+		inode->i_uid = inode->i_gid = 0;
+		inode->i_mode = mode;
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		switch (mode & S_IFMT) {
+		case S_IFREG:
+			inode->i_fop = &efivarfs_file_operations;
+			break;
+		case S_IFDIR:
+			inode->i_op = &efivarfs_dir_inode_operations;
+			inode->i_fop = &simple_dir_operations;
+			inc_nlink(inode);
+			break;
+		}
+	}
+	return inode;
+}
+
+static void efivarfs_hex_to_guid(const char *str, efi_guid_t *guid)
+{
+	guid->b[0] = hex_to_bin(str[6]) << 4 | hex_to_bin(str[7]);
+	guid->b[1] = hex_to_bin(str[4]) << 4 | hex_to_bin(str[5]);
+	guid->b[2] = hex_to_bin(str[2]) << 4 | hex_to_bin(str[3]);
+	guid->b[3] = hex_to_bin(str[0]) << 4 | hex_to_bin(str[1]);
+	guid->b[4] = hex_to_bin(str[11]) << 4 | hex_to_bin(str[12]);
+	guid->b[5] = hex_to_bin(str[9]) << 4 | hex_to_bin(str[10]);
+	guid->b[6] = hex_to_bin(str[16]) << 4 | hex_to_bin(str[17]);
+	guid->b[7] = hex_to_bin(str[14]) << 4 | hex_to_bin(str[15]);
+	guid->b[8] = hex_to_bin(str[19]) << 4 | hex_to_bin(str[20]);
+	guid->b[9] = hex_to_bin(str[21]) << 4 | hex_to_bin(str[22]);
+	guid->b[10] = hex_to_bin(str[24]) << 4 | hex_to_bin(str[25]);
+	guid->b[11] = hex_to_bin(str[26]) << 4 | hex_to_bin(str[27]);
+	guid->b[12] = hex_to_bin(str[28]) << 4 | hex_to_bin(str[29]);
+	guid->b[13] = hex_to_bin(str[30]) << 4 | hex_to_bin(str[31]);
+	guid->b[14] = hex_to_bin(str[32]) << 4 | hex_to_bin(str[33]);
+	guid->b[15] = hex_to_bin(str[34]) << 4 | hex_to_bin(str[35]);
+}
+
+static int efivarfs_create(struct inode *dir, struct dentry *dentry,
+			  umode_t mode, bool excl)
+{
+	struct inode *inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0);
+	struct efivars *efivars = &__efivars;
+	struct efivar_entry *var;
+	int namelen, i = 0, err = 0;
+
+	if (dentry->d_name.len < 38)
+		return -EINVAL;
+
+	if (!inode)
+		return -ENOSPC;
+
+	var = kzalloc(sizeof(struct efivar_entry), GFP_KERNEL);
+
+	if (!var)
+		return -ENOMEM;
+
+	namelen = dentry->d_name.len - GUID_LEN;
+
+	efivarfs_hex_to_guid(dentry->d_name.name + namelen + 1,
+			&var->var.VendorGuid);
+
+	for (i = 0; i < namelen; i++)
+		var->var.VariableName[i] = dentry->d_name.name[i];
+
+	var->var.VariableName[i] = '\0';
+
+	inode->i_private = var;
+	var->efivars = efivars;
+	var->kobj.kset = efivars->kset;
+
+	err = kobject_init_and_add(&var->kobj, &efivar_ktype, NULL, "%s",
+			     dentry->d_name.name);
+	if (err)
+		goto out;
+
+	kobject_uevent(&var->kobj, KOBJ_ADD);
+	spin_lock(&efivars->lock);
+	list_add(&var->list, &efivars->list);
+	spin_unlock(&efivars->lock);
+	d_instantiate(dentry, inode);
+	dget(dentry);
+out:
+	if (err)
+		kfree(var);
+	return err;
+}
+
+static int efivarfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct efivar_entry *var = dentry->d_inode->i_private;
+	struct efivars *efivars = var->efivars;
+	efi_status_t status;
+
+	spin_lock(&efivars->lock);
+
+	status = efivars->ops->set_variable(var->var.VariableName,
+					    &var->var.VendorGuid,
+					    0, 0, NULL);
+
+	if (status == EFI_SUCCESS || status == EFI_NOT_FOUND) {
+		list_del(&var->list);
+		spin_unlock(&efivars->lock);
+		efivar_unregister(var);
+		drop_nlink(dir);
+		dput(dentry);
+		return 0;
+	}
+
+	spin_unlock(&efivars->lock);
+	return -EINVAL;
+};
+
+int efivarfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct inode *inode = NULL;
+	struct dentry *root;
+	struct efivar_entry *entry, *n;
+	struct efivars *efivars = &__efivars;
+	int err;
+
+	efivarfs_sb = sb;
+
+	sb->s_maxbytes          = MAX_LFS_FILESIZE;
+	sb->s_blocksize         = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits    = PAGE_CACHE_SHIFT;
+	sb->s_magic             = PSTOREFS_MAGIC;
+	sb->s_op                = &efivarfs_ops;
+	sb->s_time_gran         = 1;
+
+	inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0);
+	if (!inode) {
+		err = -ENOMEM;
+		goto fail;
+	}
+	inode->i_op = &efivarfs_dir_inode_operations;
+
+	root = d_make_root(inode);
+	sb->s_root = root;
+	if (!root) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	list_for_each_entry_safe(entry, n, &efivars->list, list) {
+		struct inode *inode;
+		struct dentry *dentry, *root = efivarfs_sb->s_root;
+		char *name;
+		unsigned long size = 0;
+		int len, i;
+
+		len = utf16_strlen(entry->var.VariableName);
+
+		/* GUID plus trailing NULL */
+		name = kmalloc(len + 38, GFP_ATOMIC);
+
+		for (i = 0; i < len; i++)
+			name[i] = entry->var.VariableName[i] & 0xFF;
+
+		name[len] = '-';
+
+		efi_guid_unparse(&entry->var.VendorGuid, name + len + 1);
+
+		name[len+GUID_LEN] = '\0';
+
+		inode = efivarfs_get_inode(efivarfs_sb, root->d_inode,
+					  S_IFREG | 0644, 0);
+		dentry = d_alloc_name(root, name);
+
+		efivars->ops->get_variable(entry->var.VariableName,
+					   &entry->var.VendorGuid,
+					   &entry->var.Attributes,
+					   &size,
+					   NULL);
+
+		mutex_lock(&inode->i_mutex);
+		inode->i_private = entry;
+		i_size_write(inode, size+4);
+		mutex_unlock(&inode->i_mutex);
+		d_add(dentry, inode);
+	}
+
+	return 0;
+fail:
+	iput(inode);
+	return err;
+}
+
+static struct dentry *efivarfs_mount(struct file_system_type *fs_type,
+				    int flags, const char *dev_name, void *data)
+{
+	return mount_single(fs_type, flags, data, efivarfs_fill_super);
+}
+
+static void efivarfs_kill_sb(struct super_block *sb)
+{
+	kill_litter_super(sb);
+	efivarfs_sb = NULL;
+}
+
+static struct file_system_type efivarfs_type = {
+	.name    = "efivarfs",
+	.mount   = efivarfs_mount,
+	.kill_sb = efivarfs_kill_sb,
+};
+
+static const struct inode_operations efivarfs_dir_inode_operations = {
+	.lookup = simple_lookup,
+	.unlink = efivarfs_unlink,
+	.create = efivarfs_create,
+};
+
+static struct pstore_info efi_pstore_info;
+
 #ifdef CONFIG_PSTORE
 
 static int efi_pstore_open(struct pstore_info *psi)
@@ -1198,6 +1571,8 @@ int register_efivars(struct efivars *efivars,
 		pstore_register(&efivars->efi_pstore_info);
 	}
 
+	register_filesystem(&efivarfs_type);
+
 out:
 	kfree(variable_name);
 
@@ -1205,9 +1580,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(register_efivars);
 
-static struct efivars __efivars;
-static struct efivar_operations ops;
-
 /*
  * For now we register the efi subsystem with the firmware subsystem
  * and the vars subsystem with the efi subsystem.  In the future, it
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8670eb1eb8cd..b2af1571592b 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -29,7 +29,12 @@
 #define EFI_UNSUPPORTED		( 3 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_BAD_BUFFER_SIZE     ( 4 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_BUFFER_TOO_SMALL	( 5 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_NOT_READY		( 6 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_DEVICE_ERROR	( 7 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_WRITE_PROTECTED	( 8 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_OUT_OF_RESOURCES	( 9 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_NOT_FOUND		(14 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_SECURITY_VIOLATION	(26 | (1UL << (BITS_PER_LONG-1)))
 
 typedef unsigned long efi_status_t;
 typedef u8 efi_bool_t;
-- 
cgit v1.2.3-55-g7522


From 605e70c7aa1b7b0d554baf945630c1d606bbfbc3 Mon Sep 17 00:00:00 2001
From: Lee, Chun-Yi
Date: Fri, 5 Oct 2012 13:54:56 +0800
Subject: efi: add efivars kobject to efi sysfs folder

UEFI variable filesystem need a new mount point, so this patch add
efivars kobject to efi_kobj for create a /sys/firmware/efi/efivars
folder.

Cc: Matthew Garrett <mjg@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Lee, Chun-Yi <jlee@suse.com>
Signed-off-by: Jeremy Kerr <jeremy.kerr@canonical.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efivars.c | 9 +++++++++
 include/linux/efi.h        | 1 +
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index d7658b4a5010..6793965b7c8b 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -1527,6 +1527,7 @@ void unregister_efivars(struct efivars *efivars)
 		sysfs_remove_bin_file(&efivars->kset->kobj, efivars->del_var);
 	kfree(efivars->new_var);
 	kfree(efivars->del_var);
+	kobject_put(efivars->kobject);
 	kset_unregister(efivars->kset);
 }
 EXPORT_SYMBOL_GPL(unregister_efivars);
@@ -1558,6 +1559,14 @@ int register_efivars(struct efivars *efivars,
 		goto out;
 	}
 
+	efivars->kobject = kobject_create_and_add("efivars", parent_kobj);
+	if (!efivars->kobject) {
+		pr_err("efivars: Subsystem registration failed.\n");
+		error = -ENOMEM;
+		kset_unregister(efivars->kset);
+		goto out;
+	}
+
 	/*
 	 * Per EFI spec, the maximum storage allocated for both
 	 * the variable name and variable data is 1024 bytes.
diff --git a/include/linux/efi.h b/include/linux/efi.h
index b2af1571592b..337aefbfb003 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -662,6 +662,7 @@ struct efivars {
 	spinlock_t lock;
 	struct list_head list;
 	struct kset *kset;
+	struct kobject *kobject;
 	struct bin_attribute *new_var, *del_var;
 	const struct efivar_operations *ops;
 	struct efivar_entry *walk_entry;
-- 
cgit v1.2.3-55-g7522


From bd52276fa1d420c3a504b76ffaaa1642cc79d4c4 Mon Sep 17 00:00:00 2001
From: Jan Beulich
Date: Fri, 25 May 2012 16:20:31 +0100
Subject: x86-64/efi: Use EFI to deal with platform wall clock (again)

Other than ix86, x86-64 on EFI so far didn't set the
{g,s}et_wallclock accessors to the EFI routines, thus
incorrectly using raw RTC accesses instead.

Simply removing the #ifdef around the respective code isn't
enough, however: While so far early get-time calls were done in
physical mode, this doesn't work properly for x86-64, as virtual
addresses would still need to be set up for all runtime regions
(which wasn't the case on the system I have access to), so
instead the patch moves the call to efi_enter_virtual_mode()
ahead (which in turn allows to drop all code related to calling
efi-get-time in physical mode).

Additionally the earlier calling of efi_set_executable()
requires the CPA code to cope, i.e. during early boot it must be
avoided to call cpa_flush_array(), as the first thing this
function does is a BUG_ON(irqs_disabled()).

Also make the two EFI functions in question here static -
they're not being referenced elsewhere.

History:

    This commit was originally merged as bacef661acdb ("x86-64/efi:
    Use EFI to deal with platform wall clock") but it resulted in some
    ASUS machines no longer booting due to a firmware bug, and so was
    reverted in f026cfa82f62. A pre-emptive fix for the buggy ASUS
    firmware was merged in 03a1c254975e ("x86, efi: 1:1 pagetable
    mapping for virtual EFI calls") so now this patch can be
    reapplied.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Matt Fleming <matt.fleming@intel.com>
Acked-by: Matthew Garrett <mjg@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com> [added commit history]
---
 arch/x86/mm/pageattr.c      | 10 ++++++----
 arch/x86/platform/efi/efi.c | 30 ++++--------------------------
 include/linux/efi.h         |  2 --
 init/main.c                 |  8 ++++----
 4 files changed, 14 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a718e0d23503..931930a96160 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -919,11 +919,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 
 	/*
 	 * On success we use clflush, when the CPU supports it to
-	 * avoid the wbindv. If the CPU does not support it and in the
-	 * error case we fall back to cpa_flush_all (which uses
-	 * wbindv):
+	 * avoid the wbindv. If the CPU does not support it, in the
+	 * error case, and during early boot (for EFI) we fall back
+	 * to cpa_flush_all (which uses wbinvd):
 	 */
-	if (!ret && cpu_has_clflush) {
+	if (early_boot_irqs_disabled)
+		__cpa_flush_all((void *)(long)cache);
+	else if (!ret && cpu_has_clflush) {
 		if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
 			cpa_flush_array(addr, numpages, cache,
 					cpa.flags, pages);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index aded2a91162a..757834434e59 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -235,22 +235,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
 	return status;
 }
 
-static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
-					     efi_time_cap_t *tc)
-{
-	unsigned long flags;
-	efi_status_t status;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	efi_call_phys_prelog();
-	status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
-				virt_to_phys(tc));
-	efi_call_phys_epilog();
-	spin_unlock_irqrestore(&rtc_lock, flags);
-	return status;
-}
-
-int efi_set_rtc_mmss(unsigned long nowtime)
+static int efi_set_rtc_mmss(unsigned long nowtime)
 {
 	int real_seconds, real_minutes;
 	efi_status_t 	status;
@@ -279,7 +264,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
 	return 0;
 }
 
-unsigned long efi_get_time(void)
+static unsigned long efi_get_time(void)
 {
 	efi_status_t status;
 	efi_time_t eft;
@@ -635,18 +620,13 @@ static int __init efi_runtime_init(void)
 	}
 	/*
 	 * We will only need *early* access to the following
-	 * two EFI runtime services before set_virtual_address_map
+	 * EFI runtime service before set_virtual_address_map
 	 * is invoked.
 	 */
-	efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
 	efi_phys.set_virtual_address_map =
 		(efi_set_virtual_address_map_t *)
 		runtime->set_virtual_address_map;
-	/*
-	 * Make efi_get_time can be called before entering
-	 * virtual mode.
-	 */
-	efi.get_time = phys_efi_get_time;
+
 	early_iounmap(runtime, sizeof(efi_runtime_services_t));
 
 	return 0;
@@ -734,12 +714,10 @@ void __init efi_init(void)
 		efi_enabled = 0;
 		return;
 	}
-#ifdef CONFIG_X86_32
 	if (efi_native) {
 		x86_platform.get_wallclock = efi_get_time;
 		x86_platform.set_wallclock = efi_set_rtc_mmss;
 	}
-#endif
 
 #if EFI_DEBUG
 	print_efi_memmap();
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 337aefbfb003..5e2308d9c6be 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -516,8 +516,6 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
 extern int __init efi_uart_console_only (void);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
-extern unsigned long efi_get_time(void);
-extern int efi_set_rtc_mmss(unsigned long nowtime);
 extern void efi_reserve_boot_services(void);
 extern struct efi_memory_map memmap;
 
diff --git a/init/main.c b/init/main.c
index 9cf77ab138a6..ae70b647b4d9 100644
--- a/init/main.c
+++ b/init/main.c
@@ -461,6 +461,10 @@ static void __init mm_init(void)
 	percpu_init_late();
 	pgtable_cache_init();
 	vmalloc_init();
+#ifdef CONFIG_X86
+	if (efi_enabled)
+		efi_enter_virtual_mode();
+#endif
 }
 
 asmlinkage void __init start_kernel(void)
@@ -601,10 +605,6 @@ asmlinkage void __init start_kernel(void)
 	calibrate_delay();
 	pidmap_init();
 	anon_vma_init();
-#ifdef CONFIG_X86
-	if (efi_enabled)
-		efi_enter_virtual_mode();
-#endif
 	thread_info_cache_init();
 	cred_init();
 	fork_init(totalram_pages);
-- 
cgit v1.2.3-55-g7522


From e05ed4d1fad9e730995abb08cb9bc3bffac5018b Mon Sep 17 00:00:00 2001
From: Alexander Duyck
Date: Mon, 15 Oct 2012 10:19:39 -0700
Subject: swiotlb: Return physical addresses when calling
 swiotlb_tbl_map_single

This change makes it so that swiotlb_tbl_map_single will return a physical
address instead of a virtual address when called.  The advantage to this once
again is that we are avoiding a number of virt_to_phys and phys_to_virt
translations by working with everything as a physical address.

One change I had to make in order to support using physical addresses is that
I could no longer trust 0 to be a invalid physical address on all platforms.
So instead I made it so that ~0 is returned on error.  This should never be a
valid return value as it implies that only one byte would be available for
use.

In order to clarify things since we now have 2 physical addresses in use
inside of swiotlb_tbl_map_single I am renaming phys to orig_addr, and
dma_addr to tlb_addr.  This way is should be clear that orig_addr is
contained within io_orig_addr and tlb_addr is an address within the
io_tlb_addr buffer.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/swiotlb-xen.c | 22 ++++++-------
 include/linux/swiotlb.h   | 11 +++++--
 lib/swiotlb.c             | 78 ++++++++++++++++++++++++-----------------------
 3 files changed, 59 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 58db6df866ef..8a6035aa69c9 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -338,9 +338,8 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 				enum dma_data_direction dir,
 				struct dma_attrs *attrs)
 {
-	phys_addr_t phys = page_to_phys(page) + offset;
+	phys_addr_t map, phys = page_to_phys(page) + offset;
 	dma_addr_t dev_addr = xen_phys_to_bus(phys);
-	void *map;
 
 	BUG_ON(dir == DMA_NONE);
 	/*
@@ -356,16 +355,16 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	 * Oh well, have to allocate and map a bounce buffer.
 	 */
 	map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir);
-	if (!map)
+	if (map == SWIOTLB_MAP_ERROR)
 		return DMA_ERROR_CODE;
 
-	dev_addr = xen_virt_to_bus(map);
+	dev_addr = xen_phys_to_bus(map);
 
 	/*
 	 * Ensure that the address returned is DMA'ble
 	 */
 	if (!dma_capable(dev, dev_addr, size)) {
-		swiotlb_tbl_unmap_single(dev, map, size, dir);
+		swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir);
 		dev_addr = 0;
 	}
 	return dev_addr;
@@ -494,11 +493,12 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 		if (swiotlb_force ||
 		    !dma_capable(hwdev, dev_addr, sg->length) ||
 		    range_straddles_page_boundary(paddr, sg->length)) {
-			void *map = swiotlb_tbl_map_single(hwdev,
-							   start_dma_addr,
-							   sg_phys(sg),
-							   sg->length, dir);
-			if (!map) {
+			phys_addr_t map = swiotlb_tbl_map_single(hwdev,
+								 start_dma_addr,
+								 sg_phys(sg),
+								 sg->length,
+								 dir);
+			if (map == SWIOTLB_MAP_ERROR) {
 				/* Don't panic here, we expect map_sg users
 				   to do proper error handling. */
 				xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
@@ -506,7 +506,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 				sgl[0].dma_length = 0;
 				return DMA_ERROR_CODE;
 			}
-			sg->dma_address = xen_virt_to_bus(map);
+			sg->dma_address = xen_phys_to_bus(map);
 		} else
 			sg->dma_address = dev_addr;
 		sg->dma_length = sg->length;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 8d08b3ed406d..1995f3e04fed 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -34,9 +34,14 @@ enum dma_sync_target {
 	SYNC_FOR_CPU = 0,
 	SYNC_FOR_DEVICE = 1,
 };
-extern void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
-				    phys_addr_t phys, size_t size,
-				    enum dma_data_direction dir);
+
+/* define the last possible byte of physical address space as a mapping error */
+#define SWIOTLB_MAP_ERROR (~(phys_addr_t)0x0)
+
+extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
+					  dma_addr_t tbl_dma_addr,
+					  phys_addr_t phys, size_t size,
+					  enum dma_data_direction dir);
 
 extern void swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr,
 				     size_t size, enum dma_data_direction dir);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index f8c0d4e1d1d3..3adc148bb8d8 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -393,12 +393,13 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
 }
 EXPORT_SYMBOL_GPL(swiotlb_bounce);
 
-void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
-			     phys_addr_t phys, size_t size,
-			     enum dma_data_direction dir)
+phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
+				   dma_addr_t tbl_dma_addr,
+				   phys_addr_t orig_addr, size_t size,
+				   enum dma_data_direction dir)
 {
 	unsigned long flags;
-	char *dma_addr;
+	phys_addr_t tlb_addr;
 	unsigned int nslots, stride, index, wrap;
 	int i;
 	unsigned long mask;
@@ -462,7 +463,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
 				io_tlb_list[i] = 0;
 			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
 				io_tlb_list[i] = ++count;
-			dma_addr = (char *)phys_to_virt(io_tlb_start) + (index << IO_TLB_SHIFT);
+			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
 
 			/*
 			 * Update the indices to avoid searching in the next
@@ -480,7 +481,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
 
 not_found:
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
-	return NULL;
+	return SWIOTLB_MAP_ERROR;
 found:
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 
@@ -490,11 +491,12 @@ found:
 	 * needed.
 	 */
 	for (i = 0; i < nslots; i++)
-		io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT);
+		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
-		swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
+		swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr), size,
+			       DMA_TO_DEVICE);
 
-	return dma_addr;
+	return tlb_addr;
 }
 EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
 
@@ -502,9 +504,8 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
  * Allocates bounce buffer and returns its kernel virtual address.
  */
 
-static void *
-map_single(struct device *hwdev, phys_addr_t phys, size_t size,
-	   enum dma_data_direction dir)
+phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+		       enum dma_data_direction dir)
 {
 	dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
 
@@ -598,12 +599,15 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		dma_mask = hwdev->coherent_dma_mask;
 
 	ret = (void *)__get_free_pages(flags, order);
-	if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) {
-		/*
-		 * The allocated memory isn't reachable by the device.
-		 */
-		free_pages((unsigned long) ret, order);
-		ret = NULL;
+	if (ret) {
+		dev_addr = swiotlb_virt_to_bus(hwdev, ret);
+		if (dev_addr + size - 1 > dma_mask) {
+			/*
+			 * The allocated memory isn't reachable by the device.
+			 */
+			free_pages((unsigned long) ret, order);
+			ret = NULL;
+		}
 	}
 	if (!ret) {
 		/*
@@ -611,13 +615,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		 * GFP_DMA memory; fall back on map_single(), which
 		 * will grab memory from the lowest available address range.
 		 */
-		ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
-		if (!ret)
+		phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
+		if (paddr == SWIOTLB_MAP_ERROR)
 			return NULL;
-	}
 
-	memset(ret, 0, size);
-	dev_addr = swiotlb_virt_to_bus(hwdev, ret);
+		ret = phys_to_virt(paddr);
+		dev_addr = phys_to_dma(hwdev, paddr);
+	}
 
 	/* Confirm address can be DMA'd by device */
 	if (dev_addr + size - 1 > dma_mask) {
@@ -629,7 +633,10 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
 		return NULL;
 	}
+
 	*dma_handle = dev_addr;
+	memset(ret, 0, size);
+
 	return ret;
 }
 EXPORT_SYMBOL(swiotlb_alloc_coherent);
@@ -686,9 +693,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 			    enum dma_data_direction dir,
 			    struct dma_attrs *attrs)
 {
-	phys_addr_t phys = page_to_phys(page) + offset;
+	phys_addr_t map, phys = page_to_phys(page) + offset;
 	dma_addr_t dev_addr = phys_to_dma(dev, phys);
-	void *map;
 
 	BUG_ON(dir == DMA_NONE);
 	/*
@@ -699,22 +705,18 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
 		return dev_addr;
 
-	/*
-	 * Oh well, have to allocate and map a bounce buffer.
-	 */
+	/* Oh well, have to allocate and map a bounce buffer. */
 	map = map_single(dev, phys, size, dir);
-	if (!map) {
+	if (map == SWIOTLB_MAP_ERROR) {
 		swiotlb_full(dev, size, dir, 1);
 		return phys_to_dma(dev, io_tlb_overflow_buffer);
 	}
 
-	dev_addr = swiotlb_virt_to_bus(dev, map);
+	dev_addr = phys_to_dma(dev, map);
 
-	/*
-	 * Ensure that the address returned is DMA'ble
-	 */
+	/* Ensure that the address returned is DMA'ble */
 	if (!dma_capable(dev, dev_addr, size)) {
-		swiotlb_tbl_unmap_single(dev, map, size, dir);
+		swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir);
 		return phys_to_dma(dev, io_tlb_overflow_buffer);
 	}
 
@@ -840,9 +842,9 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 
 		if (swiotlb_force ||
 		    !dma_capable(hwdev, dev_addr, sg->length)) {
-			void *map = map_single(hwdev, sg_phys(sg),
-					       sg->length, dir);
-			if (!map) {
+			phys_addr_t map = map_single(hwdev, sg_phys(sg),
+						     sg->length, dir);
+			if (map == SWIOTLB_MAP_ERROR) {
 				/* Don't panic here, we expect map_sg users
 				   to do proper error handling. */
 				swiotlb_full(hwdev, sg->length, dir, 0);
@@ -851,7 +853,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 				sgl[0].dma_length = 0;
 				return 0;
 			}
-			sg->dma_address = swiotlb_virt_to_bus(hwdev, map);
+			sg->dma_address = phys_to_dma(hwdev, map);
 		} else
 			sg->dma_address = dev_addr;
 		sg->dma_length = sg->length;
-- 
cgit v1.2.3-55-g7522


From 61ca08c3220032dd88815b3465d56cb779258168 Mon Sep 17 00:00:00 2001
From: Alexander Duyck
Date: Mon, 15 Oct 2012 10:19:44 -0700
Subject: swiotlb: Use physical addresses for swiotlb_tbl_unmap_single

This change makes it so that the unmap functionality also uses physical
addresses.  This helps to further reduce the use of virt_to_phys and
phys_to_virt functions.

In order to clarify things since we now have 2 physical addresses in use
inside of swiotlb_tbl_unmap_single I am renaming phys to orig_addr, and
dma_addr to tlb_addr.  This way is should be clear that orig_addr is
contained within io_orig_addr and tlb_addr is an address within the
io_tlb_addr buffer.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/swiotlb-xen.c |  4 ++--
 include/linux/swiotlb.h   |  3 ++-
 lib/swiotlb.c             | 37 +++++++++++++++++++------------------
 3 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 8a6035aa69c9..4cedc284b5df 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -364,7 +364,7 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	 * Ensure that the address returned is DMA'ble
 	 */
 	if (!dma_capable(dev, dev_addr, size)) {
-		swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir);
+		swiotlb_tbl_unmap_single(dev, map, size, dir);
 		dev_addr = 0;
 	}
 	return dev_addr;
@@ -388,7 +388,7 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 
 	/* NOTE: We use dev_addr here, not paddr! */
 	if (is_xen_swiotlb_buffer(dev_addr)) {
-		swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
+		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
 		return;
 	}
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 1995f3e04fed..291643c6b88b 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -43,7 +43,8 @@ extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 					  phys_addr_t phys, size_t size,
 					  enum dma_data_direction dir);
 
-extern void swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr,
+extern void swiotlb_tbl_unmap_single(struct device *hwdev,
+				     phys_addr_t tlb_addr,
 				     size_t size, enum dma_data_direction dir);
 
 extern void swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 3adc148bb8d8..d7701dcf407f 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -515,20 +515,20 @@ phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size,
 /*
  * dma_addr is the kernel virtual address of the bounce buffer to unmap.
  */
-void
-swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
-			enum dma_data_direction dir)
+void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
+			      size_t size, enum dma_data_direction dir)
 {
 	unsigned long flags;
 	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	int index = (dma_addr - (char *)phys_to_virt(io_tlb_start)) >> IO_TLB_SHIFT;
-	phys_addr_t phys = io_tlb_orig_addr[index];
+	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
 	/*
 	 * First, sync the memory before unmapping the entry
 	 */
 	if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-		swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
+		swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+			       size, DMA_FROM_DEVICE);
 
 	/*
 	 * Return the buffer to the free list by setting the corresponding
@@ -621,17 +621,18 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 
 		ret = phys_to_virt(paddr);
 		dev_addr = phys_to_dma(hwdev, paddr);
-	}
 
-	/* Confirm address can be DMA'd by device */
-	if (dev_addr + size - 1 > dma_mask) {
-		printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
-		       (unsigned long long)dma_mask,
-		       (unsigned long long)dev_addr);
+		/* Confirm address can be DMA'd by device */
+		if (dev_addr + size - 1 > dma_mask) {
+			printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
+			       (unsigned long long)dma_mask,
+			       (unsigned long long)dev_addr);
 
-		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-		swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
-		return NULL;
+			/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
+			swiotlb_tbl_unmap_single(hwdev, paddr,
+						 size, DMA_TO_DEVICE);
+			return NULL;
+		}
 	}
 
 	*dma_handle = dev_addr;
@@ -652,7 +653,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 		free_pages((unsigned long)vaddr, get_order(size));
 	else
 		/* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
-		swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
+		swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE);
 }
 EXPORT_SYMBOL(swiotlb_free_coherent);
 
@@ -716,7 +717,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 
 	/* Ensure that the address returned is DMA'ble */
 	if (!dma_capable(dev, dev_addr, size)) {
-		swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir);
+		swiotlb_tbl_unmap_single(dev, map, size, dir);
 		return phys_to_dma(dev, io_tlb_overflow_buffer);
 	}
 
@@ -740,7 +741,7 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 	BUG_ON(dir == DMA_NONE);
 
 	if (is_swiotlb_buffer(paddr)) {
-		swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
+		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
 		return;
 	}
 
-- 
cgit v1.2.3-55-g7522


From fbfda893eb570bbe9e9ad9128b6e9cf2a1e48c87 Mon Sep 17 00:00:00 2001
From: Alexander Duyck
Date: Mon, 15 Oct 2012 10:19:49 -0700
Subject: swiotlb: Use physical addresses instead of virtual in
 swiotlb_tbl_sync_single

This change makes it so that the sync functionality also uses physical
addresses.  This helps to further reduce the use of virt_to_phys and
phys_to_virt functions.

In order to clarify things since we now have 2 physical addresses in use
inside of swiotlb_tbl_sync_single I am renaming phys to orig_addr, and
dma_addr to tlb_addr.  This way is should be clear that orig_addr is
contained within io_orig_addr and tlb_addr is an address within the
io_tlb_addr buffer.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/swiotlb-xen.c |  3 +--
 include/linux/swiotlb.h   |  3 ++-
 lib/swiotlb.c             | 22 +++++++++++-----------
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 4cedc284b5df..af47e7594460 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -433,8 +433,7 @@ xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 
 	/* NOTE: We use dev_addr here, not paddr! */
 	if (is_xen_swiotlb_buffer(dev_addr)) {
-		swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
-				       target);
+		swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
 		return;
 	}
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 291643c6b88b..e0ac98fd81a9 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -47,7 +47,8 @@ extern void swiotlb_tbl_unmap_single(struct device *hwdev,
 				     phys_addr_t tlb_addr,
 				     size_t size, enum dma_data_direction dir);
 
-extern void swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr,
+extern void swiotlb_tbl_sync_single(struct device *hwdev,
+				    phys_addr_t tlb_addr,
 				    size_t size, enum dma_data_direction dir,
 				    enum dma_sync_target target);
 
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index d7701dcf407f..16a548dc91ac 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -557,26 +557,27 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 }
 EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
 
-void
-swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
-			enum dma_data_direction dir,
-			enum dma_sync_target target)
+void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
+			     size_t size, enum dma_data_direction dir,
+			     enum dma_sync_target target)
 {
-	int index = (dma_addr - (char *)phys_to_virt(io_tlb_start)) >> IO_TLB_SHIFT;
-	phys_addr_t phys = io_tlb_orig_addr[index];
+	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
-	phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
+	orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
 
 	switch (target) {
 	case SYNC_FOR_CPU:
 		if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
+			swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+				       size, DMA_FROM_DEVICE);
 		else
 			BUG_ON(dir != DMA_TO_DEVICE);
 		break;
 	case SYNC_FOR_DEVICE:
 		if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
+			swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+				       size, DMA_TO_DEVICE);
 		else
 			BUG_ON(dir != DMA_FROM_DEVICE);
 		break;
@@ -785,8 +786,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 	BUG_ON(dir == DMA_NONE);
 
 	if (is_swiotlb_buffer(paddr)) {
-		swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
-				       target);
+		swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
 		return;
 	}
 
-- 
cgit v1.2.3-55-g7522


From af51a9f1848ff50079a10def56a2c064f326af22 Mon Sep 17 00:00:00 2001
From: Alexander Duyck
Date: Mon, 15 Oct 2012 10:19:55 -0700
Subject: swiotlb: Do not export swiotlb_bounce since there are no external
 consumers

Currently swiotlb is the only consumer for swiotlb_bounce.  Since that is the
case it doesn't make much sense to be exporting it so make it a static
function only.

In addition we can save a few more lines of code by making it so that it
accepts the DMA address as a physical address instead of a virtual one.  This
is the last piece in essentially pushing all of the DMA address values to use
physical addresses in swiotlb.

In order to clarify things since we now have 2 physical addresses in use
inside of swiotlb_bounce I am renaming phys to orig_addr, and dma_addr to
tlb_addr.  This way is should be clear that orig_addr is contained within
io_orig_addr and tlb_addr is an address within the io_tlb_addr buffer.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/linux/swiotlb.h |  3 ---
 lib/swiotlb.c           | 35 ++++++++++++++++-------------------
 2 files changed, 16 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index e0ac98fd81a9..071d62c214a6 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -53,9 +53,6 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev,
 				    enum dma_sync_target target);
 
 /* Accessory functions. */
-extern void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
-			   enum dma_data_direction dir);
-
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 16a548dc91ac..196b06984dec 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -355,14 +355,15 @@ static int is_swiotlb_buffer(phys_addr_t paddr)
 /*
  * Bounce: copy the swiotlb buffer back to the original dma location
  */
-void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
-		    enum dma_data_direction dir)
+static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
+			   size_t size, enum dma_data_direction dir)
 {
-	unsigned long pfn = PFN_DOWN(phys);
+	unsigned long pfn = PFN_DOWN(orig_addr);
+	unsigned char *vaddr = phys_to_virt(tlb_addr);
 
 	if (PageHighMem(pfn_to_page(pfn))) {
 		/* The buffer does not have a mapping.  Map it in and copy */
-		unsigned int offset = phys & ~PAGE_MASK;
+		unsigned int offset = orig_addr & ~PAGE_MASK;
 		char *buffer;
 		unsigned int sz = 0;
 		unsigned long flags;
@@ -373,25 +374,23 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
 			local_irq_save(flags);
 			buffer = kmap_atomic(pfn_to_page(pfn));
 			if (dir == DMA_TO_DEVICE)
-				memcpy(dma_addr, buffer + offset, sz);
+				memcpy(vaddr, buffer + offset, sz);
 			else
-				memcpy(buffer + offset, dma_addr, sz);
+				memcpy(buffer + offset, vaddr, sz);
 			kunmap_atomic(buffer);
 			local_irq_restore(flags);
 
 			size -= sz;
 			pfn++;
-			dma_addr += sz;
+			vaddr += sz;
 			offset = 0;
 		}
+	} else if (dir == DMA_TO_DEVICE) {
+		memcpy(vaddr, phys_to_virt(orig_addr), size);
 	} else {
-		if (dir == DMA_TO_DEVICE)
-			memcpy(dma_addr, phys_to_virt(phys), size);
-		else
-			memcpy(phys_to_virt(phys), dma_addr, size);
+		memcpy(phys_to_virt(orig_addr), vaddr, size);
 	}
 }
-EXPORT_SYMBOL_GPL(swiotlb_bounce);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 				   dma_addr_t tbl_dma_addr,
@@ -493,8 +492,7 @@ found:
 	for (i = 0; i < nslots; i++)
 		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
-		swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr), size,
-			       DMA_TO_DEVICE);
+		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
 
 	return tlb_addr;
 }
@@ -526,9 +524,8 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 	/*
 	 * First, sync the memory before unmapping the entry
 	 */
-	if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-		swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
-			       size, DMA_FROM_DEVICE);
+	if (orig_addr && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
 
 	/*
 	 * Return the buffer to the free list by setting the corresponding
@@ -569,14 +566,14 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 	switch (target) {
 	case SYNC_FOR_CPU:
 		if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+			swiotlb_bounce(orig_addr, tlb_addr,
 				       size, DMA_FROM_DEVICE);
 		else
 			BUG_ON(dir != DMA_TO_DEVICE);
 		break;
 	case SYNC_FOR_DEVICE:
 		if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+			swiotlb_bounce(orig_addr, tlb_addr,
 				       size, DMA_TO_DEVICE);
 		else
 			BUG_ON(dir != DMA_FROM_DEVICE);
-- 
cgit v1.2.3-55-g7522


From 5e3b08749951e5746d3e747f1ffae37eff2d08d5 Mon Sep 17 00:00:00 2001
From: Rob Clark
Date: Mon, 29 Oct 2012 09:31:12 +0100
Subject: staging: drm/omap: add support for ARCH_MULTIPLATFORM

Remove usage of plat/cpu.h and get information from platform data
instead.  This enables omapdrm to be built with ARCH_MULTIPLATFORM.

Signed-off-by: Rob Clark <rob@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/drm.c                | 7 +++++++
 drivers/staging/omapdrm/Kconfig          | 2 +-
 drivers/staging/omapdrm/omap_dmm_tiler.h | 1 -
 drivers/staging/omapdrm/omap_drv.c       | 6 +++++-
 drivers/staging/omapdrm/omap_drv.h       | 2 ++
 include/linux/platform_data/omap_drm.h   | 1 +
 6 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/drm.c b/arch/arm/mach-omap2/drm.c
index 72e0f01b715c..49a7ffb716a5 100644
--- a/arch/arm/mach-omap2/drm.c
+++ b/arch/arm/mach-omap2/drm.c
@@ -23,15 +23,20 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
+#include <linux/platform_data/omap_drm.h>
 
 #include <plat/omap_device.h>
 #include <plat/omap_hwmod.h>
+#include <plat/cpu.h>
 
 #if defined(CONFIG_DRM_OMAP) || (CONFIG_DRM_OMAP_MODULE)
 
+static struct omap_drm_platform_data platform_data;
+
 static struct platform_device omap_drm_device = {
 	.dev = {
 		.coherent_dma_mask = DMA_BIT_MASK(32),
+		.platform_data = &platform_data,
 	},
 	.name = "omapdrm",
 	.id = 0,
@@ -52,6 +57,8 @@ static int __init omap_init_drm(void)
 			oh->name);
 	}
 
+	platform_data.omaprev = GET_OMAP_REVISION();
+
 	return platform_device_register(&omap_drm_device);
 
 }
diff --git a/drivers/staging/omapdrm/Kconfig b/drivers/staging/omapdrm/Kconfig
index 81a7cba4a0c5..b724a4131435 100644
--- a/drivers/staging/omapdrm/Kconfig
+++ b/drivers/staging/omapdrm/Kconfig
@@ -2,7 +2,7 @@
 config DRM_OMAP
 	tristate "OMAP DRM"
 	depends on DRM && !CONFIG_FB_OMAP2
-	depends on ARCH_OMAP2PLUS
+	depends on ARCH_OMAP2PLUS || ARCH_MULTIPLATFORM
 	select DRM_KMS_HELPER
 	select OMAP2_DSS
 	select FB_SYS_FILLRECT
diff --git a/drivers/staging/omapdrm/omap_dmm_tiler.h b/drivers/staging/omapdrm/omap_dmm_tiler.h
index c0271a2ac877..4fdd61e54bd2 100644
--- a/drivers/staging/omapdrm/omap_dmm_tiler.h
+++ b/drivers/staging/omapdrm/omap_dmm_tiler.h
@@ -16,7 +16,6 @@
 #ifndef OMAP_DMM_TILER_H
 #define OMAP_DMM_TILER_H
 
-#include <plat/cpu.h>
 #include "omap_drv.h"
 #include "tcm.h"
 
diff --git a/drivers/staging/omapdrm/omap_drv.c b/drivers/staging/omapdrm/omap_drv.c
index 09653b88e0ec..6ae2f763b27a 100644
--- a/drivers/staging/omapdrm/omap_drv.c
+++ b/drivers/staging/omapdrm/omap_drv.c
@@ -417,13 +417,14 @@ static void omap_modeset_free(struct drm_device *dev)
 static int ioctl_get_param(struct drm_device *dev, void *data,
 		struct drm_file *file_priv)
 {
+	struct omap_drm_private *priv = dev->dev_private;
 	struct drm_omap_param *args = data;
 
 	DBG("%p: param=%llu", dev, args->param);
 
 	switch (args->param) {
 	case OMAP_PARAM_CHIPSET_ID:
-		args->value = GET_OMAP_TYPE;
+		args->value = priv->omaprev;
 		break;
 	default:
 		DBG("unknown parameter %lld", args->param);
@@ -555,6 +556,7 @@ struct drm_ioctl_desc ioctls[DRM_COMMAND_END - DRM_COMMAND_BASE] = {
  */
 static int dev_load(struct drm_device *dev, unsigned long flags)
 {
+	struct omap_drm_platform_data *pdata = dev->dev->platform_data;
 	struct omap_drm_private *priv;
 	int ret;
 
@@ -566,6 +568,8 @@ static int dev_load(struct drm_device *dev, unsigned long flags)
 		return -ENOMEM;
 	}
 
+	priv->omaprev = pdata->omaprev;
+
 	dev->dev_private = priv;
 
 	priv->wq = alloc_ordered_workqueue("omapdrm", 0);
diff --git a/drivers/staging/omapdrm/omap_drv.h b/drivers/staging/omapdrm/omap_drv.h
index 9dc72d143ff3..8f7c447930fa 100644
--- a/drivers/staging/omapdrm/omap_drv.h
+++ b/drivers/staging/omapdrm/omap_drv.h
@@ -40,6 +40,8 @@
 #define MAX_MAPPERS 2
 
 struct omap_drm_private {
+	uint32_t omaprev;
+
 	unsigned int num_crtcs;
 	struct drm_crtc *crtcs[8];
 
diff --git a/include/linux/platform_data/omap_drm.h b/include/linux/platform_data/omap_drm.h
index 3da73bdc2031..f4e4a237ebd2 100644
--- a/include/linux/platform_data/omap_drm.h
+++ b/include/linux/platform_data/omap_drm.h
@@ -46,6 +46,7 @@ struct omap_kms_platform_data {
 };
 
 struct omap_drm_platform_data {
+	uint32_t omaprev;
 	struct omap_kms_platform_data *kms_pdata;
 };
 
-- 
cgit v1.2.3-55-g7522


From dc4dc36056392c0b0b1ca9e81bebff964b9297e0 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Tue, 30 Oct 2012 12:34:05 +0530
Subject: spi: tegra: add spi driver for SLINK controller

Tegra20/Tegra30 supports the spi interface through its SLINK
controller. Add spi driver for SLINK controller.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Reviewed-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 .../bindings/spi/nvidia,tegra20-slink.txt          |   26 +
 drivers/spi/Kconfig                                |    6 +
 drivers/spi/Makefile                               |    2 +-
 drivers/spi/spi-tegra20-slink.c                    | 1359 ++++++++++++++++++++
 include/linux/spi/spi-tegra.h                      |   40 +
 5 files changed, 1432 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt
 create mode 100644 drivers/spi/spi-tegra20-slink.c
 create mode 100644 include/linux/spi/spi-tegra.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt
new file mode 100644
index 000000000000..f5b1ad1a1ec3
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt
@@ -0,0 +1,26 @@
+NVIDIA Tegra20/Tegra30 SLINK controller.
+
+Required properties:
+- compatible : should be "nvidia,tegra20-slink", "nvidia,tegra30-slink".
+- reg: Should contain SLINK registers location and length.
+- interrupts: Should contain SLINK interrupts.
+- nvidia,dma-request-selector : The Tegra DMA controller's phandle and
+  request selector for this SLINK controller.
+
+Recommended properties:
+- spi-max-frequency: Definition as per
+                     Documentation/devicetree/bindings/spi/spi-bus.txt
+
+Example:
+
+slink@7000d600 {
+	compatible = "nvidia,tegra20-slink";
+	reg = <0x7000d600 0x200>;
+	interrupts = <0 82 0x04>;
+	nvidia,dma-request-selector = <&apbdma 16>;
+	spi-max-frequency = <25000000>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+	status = "disabled";
+};
+
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 1acae359cabe..25290d9780b2 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -385,6 +385,12 @@ config SPI_MXS
 	help
 	  SPI driver for Freescale MXS devices.
 
+config SPI_TEGRA20_SLINK
+	tristate "Nvidia Tegra20/Tegra30 SLINK Controller"
+	depends on ARCH_TEGRA && TEGRA20_APB_DMA
+	help
+	  SPI driver for Nvidia Tegra20/Tegra30 SLINK Controller interface.
+
 config SPI_TI_SSP
 	tristate "TI Sequencer Serial Port - SPI Support"
 	depends on MFD_TI_SSP
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index c48df47e4b0f..f87c0f142e5a 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -60,10 +60,10 @@ obj-$(CONFIG_SPI_SH_MSIOF)		+= spi-sh-msiof.o
 obj-$(CONFIG_SPI_SH_SCI)		+= spi-sh-sci.o
 obj-$(CONFIG_SPI_SIRF)		+= spi-sirf.o
 obj-$(CONFIG_SPI_STMP3XXX)		+= spi-stmp.o
+obj-$(CONFIG_SPI_TEGRA20_SLINK)		+= spi-tegra20-slink.o
 obj-$(CONFIG_SPI_TI_SSP)		+= spi-ti-ssp.o
 obj-$(CONFIG_SPI_TLE62X0)		+= spi-tle62x0.o
 obj-$(CONFIG_SPI_TOPCLIFF_PCH)		+= spi-topcliff-pch.o
 obj-$(CONFIG_SPI_TXX9)			+= spi-txx9.o
 obj-$(CONFIG_SPI_XCOMM)		+= spi-xcomm.o
 obj-$(CONFIG_SPI_XILINX)		+= spi-xilinx.o
-
diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
new file mode 100644
index 000000000000..b8985be81d96
--- /dev/null
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -0,0 +1,1359 @@
+/*
+ * SPI driver for Nvidia's Tegra20/Tegra30 SLINK Controller.
+ *
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-tegra.h>
+#include <mach/clk.h>
+
+#define SLINK_COMMAND			0x000
+#define SLINK_BIT_LENGTH(x)		(((x) & 0x1f) << 0)
+#define SLINK_WORD_SIZE(x)		(((x) & 0x1f) << 5)
+#define SLINK_BOTH_EN			(1 << 10)
+#define SLINK_CS_SW			(1 << 11)
+#define SLINK_CS_VALUE			(1 << 12)
+#define SLINK_CS_POLARITY		(1 << 13)
+#define SLINK_IDLE_SDA_DRIVE_LOW	(0 << 16)
+#define SLINK_IDLE_SDA_DRIVE_HIGH	(1 << 16)
+#define SLINK_IDLE_SDA_PULL_LOW		(2 << 16)
+#define SLINK_IDLE_SDA_PULL_HIGH	(3 << 16)
+#define SLINK_IDLE_SDA_MASK		(3 << 16)
+#define SLINK_CS_POLARITY1		(1 << 20)
+#define SLINK_CK_SDA			(1 << 21)
+#define SLINK_CS_POLARITY2		(1 << 22)
+#define SLINK_CS_POLARITY3		(1 << 23)
+#define SLINK_IDLE_SCLK_DRIVE_LOW	(0 << 24)
+#define SLINK_IDLE_SCLK_DRIVE_HIGH	(1 << 24)
+#define SLINK_IDLE_SCLK_PULL_LOW	(2 << 24)
+#define SLINK_IDLE_SCLK_PULL_HIGH	(3 << 24)
+#define SLINK_IDLE_SCLK_MASK		(3 << 24)
+#define SLINK_M_S			(1 << 28)
+#define SLINK_WAIT			(1 << 29)
+#define SLINK_GO			(1 << 30)
+#define SLINK_ENB			(1 << 31)
+
+#define SLINK_MODES			(SLINK_IDLE_SCLK_MASK | SLINK_CK_SDA)
+
+#define SLINK_COMMAND2			0x004
+#define SLINK_LSBFE			(1 << 0)
+#define SLINK_SSOE			(1 << 1)
+#define SLINK_SPIE			(1 << 4)
+#define SLINK_BIDIROE			(1 << 6)
+#define SLINK_MODFEN			(1 << 7)
+#define SLINK_INT_SIZE(x)		(((x) & 0x1f) << 8)
+#define SLINK_CS_ACTIVE_BETWEEN		(1 << 17)
+#define SLINK_SS_EN_CS(x)		(((x) & 0x3) << 18)
+#define SLINK_SS_SETUP(x)		(((x) & 0x3) << 20)
+#define SLINK_FIFO_REFILLS_0		(0 << 22)
+#define SLINK_FIFO_REFILLS_1		(1 << 22)
+#define SLINK_FIFO_REFILLS_2		(2 << 22)
+#define SLINK_FIFO_REFILLS_3		(3 << 22)
+#define SLINK_FIFO_REFILLS_MASK		(3 << 22)
+#define SLINK_WAIT_PACK_INT(x)		(((x) & 0x7) << 26)
+#define SLINK_SPC0			(1 << 29)
+#define SLINK_TXEN			(1 << 30)
+#define SLINK_RXEN			(1 << 31)
+
+#define SLINK_STATUS			0x008
+#define SLINK_COUNT(val)		(((val) >> 0) & 0x1f)
+#define SLINK_WORD(val)			(((val) >> 5) & 0x1f)
+#define SLINK_BLK_CNT(val)		(((val) >> 0) & 0xffff)
+#define SLINK_MODF			(1 << 16)
+#define SLINK_RX_UNF			(1 << 18)
+#define SLINK_TX_OVF			(1 << 19)
+#define SLINK_TX_FULL			(1 << 20)
+#define SLINK_TX_EMPTY			(1 << 21)
+#define SLINK_RX_FULL			(1 << 22)
+#define SLINK_RX_EMPTY			(1 << 23)
+#define SLINK_TX_UNF			(1 << 24)
+#define SLINK_RX_OVF			(1 << 25)
+#define SLINK_TX_FLUSH			(1 << 26)
+#define SLINK_RX_FLUSH			(1 << 27)
+#define SLINK_SCLK			(1 << 28)
+#define SLINK_ERR			(1 << 29)
+#define SLINK_RDY			(1 << 30)
+#define SLINK_BSY			(1 << 31)
+#define SLINK_FIFO_ERROR		(SLINK_TX_OVF | SLINK_RX_UNF |	\
+					SLINK_TX_UNF | SLINK_RX_OVF)
+
+#define SLINK_FIFO_EMPTY		(SLINK_TX_EMPTY | SLINK_RX_EMPTY)
+
+#define SLINK_MAS_DATA			0x010
+#define SLINK_SLAVE_DATA		0x014
+
+#define SLINK_DMA_CTL			0x018
+#define SLINK_DMA_BLOCK_SIZE(x)		(((x) & 0xffff) << 0)
+#define SLINK_TX_TRIG_1			(0 << 16)
+#define SLINK_TX_TRIG_4			(1 << 16)
+#define SLINK_TX_TRIG_8			(2 << 16)
+#define SLINK_TX_TRIG_16		(3 << 16)
+#define SLINK_TX_TRIG_MASK		(3 << 16)
+#define SLINK_RX_TRIG_1			(0 << 18)
+#define SLINK_RX_TRIG_4			(1 << 18)
+#define SLINK_RX_TRIG_8			(2 << 18)
+#define SLINK_RX_TRIG_16		(3 << 18)
+#define SLINK_RX_TRIG_MASK		(3 << 18)
+#define SLINK_PACKED			(1 << 20)
+#define SLINK_PACK_SIZE_4		(0 << 21)
+#define SLINK_PACK_SIZE_8		(1 << 21)
+#define SLINK_PACK_SIZE_16		(2 << 21)
+#define SLINK_PACK_SIZE_32		(3 << 21)
+#define SLINK_PACK_SIZE_MASK		(3 << 21)
+#define SLINK_IE_TXC			(1 << 26)
+#define SLINK_IE_RXC			(1 << 27)
+#define SLINK_DMA_EN			(1 << 31)
+
+#define SLINK_STATUS2			0x01c
+#define SLINK_TX_FIFO_EMPTY_COUNT(val)	(((val) & 0x3f) >> 0)
+#define SLINK_RX_FIFO_FULL_COUNT(val)	(((val) & 0x3f0000) >> 16)
+#define SLINK_SS_HOLD_TIME(val)		(((val) & 0xF) << 6)
+
+#define SLINK_TX_FIFO			0x100
+#define SLINK_RX_FIFO			0x180
+
+#define DATA_DIR_TX			(1 << 0)
+#define DATA_DIR_RX			(1 << 1)
+
+#define SLINK_DMA_TIMEOUT		(msecs_to_jiffies(1000))
+
+#define DEFAULT_SPI_DMA_BUF_LEN		(16*1024)
+#define TX_FIFO_EMPTY_COUNT_MAX		SLINK_TX_FIFO_EMPTY_COUNT(0x20)
+#define RX_FIFO_FULL_COUNT_ZERO		SLINK_RX_FIFO_FULL_COUNT(0)
+
+#define SLINK_STATUS2_RESET \
+	(TX_FIFO_EMPTY_COUNT_MAX | RX_FIFO_FULL_COUNT_ZERO << 16)
+
+#define MAX_CHIP_SELECT			4
+#define SLINK_FIFO_DEPTH		32
+
+struct tegra_slink_chip_data {
+	bool cs_hold_time;
+};
+
+struct tegra_slink_data {
+	struct device				*dev;
+	struct spi_master			*master;
+	const struct tegra_slink_chip_data	*chip_data;
+	spinlock_t				lock;
+
+	struct clk				*clk;
+	void __iomem				*base;
+	phys_addr_t				phys;
+	unsigned				irq;
+	int					dma_req_sel;
+	u32					spi_max_frequency;
+	u32					cur_speed;
+
+	struct spi_device			*cur_spi;
+	unsigned				cur_pos;
+	unsigned				cur_len;
+	unsigned				words_per_32bit;
+	unsigned				bytes_per_word;
+	unsigned				curr_dma_words;
+	unsigned				cur_direction;
+
+	unsigned				cur_rx_pos;
+	unsigned				cur_tx_pos;
+
+	unsigned				dma_buf_size;
+	unsigned				max_buf_size;
+	bool					is_curr_dma_xfer;
+	bool					is_hw_based_cs;
+
+	struct completion			rx_dma_complete;
+	struct completion			tx_dma_complete;
+
+	u32					tx_status;
+	u32					rx_status;
+	u32					status_reg;
+	bool					is_packed;
+	unsigned long				packed_size;
+
+	u32					command_reg;
+	u32					command2_reg;
+	u32					dma_control_reg;
+	u32					def_command_reg;
+	u32					def_command2_reg;
+
+	struct completion			xfer_completion;
+	struct spi_transfer			*curr_xfer;
+	struct dma_chan				*rx_dma_chan;
+	u32					*rx_dma_buf;
+	dma_addr_t				rx_dma_phys;
+	struct dma_async_tx_descriptor		*rx_dma_desc;
+
+	struct dma_chan				*tx_dma_chan;
+	u32					*tx_dma_buf;
+	dma_addr_t				tx_dma_phys;
+	struct dma_async_tx_descriptor		*tx_dma_desc;
+};
+
+static int tegra_slink_runtime_suspend(struct device *dev);
+static int tegra_slink_runtime_resume(struct device *dev);
+
+static inline unsigned long tegra_slink_readl(struct tegra_slink_data *tspi,
+		unsigned long reg)
+{
+	return readl(tspi->base + reg);
+}
+
+static inline void tegra_slink_writel(struct tegra_slink_data *tspi,
+		unsigned long val, unsigned long reg)
+{
+	writel(val, tspi->base + reg);
+
+	/* Read back register to make sure that register writes completed */
+	if (reg != SLINK_TX_FIFO)
+		readl(tspi->base + SLINK_MAS_DATA);
+}
+
+static void tegra_slink_clear_status(struct tegra_slink_data *tspi)
+{
+	unsigned long val;
+	unsigned long val_write = 0;
+
+	val = tegra_slink_readl(tspi, SLINK_STATUS);
+
+	/* Write 1 to clear status register */
+	val_write = SLINK_RDY | SLINK_FIFO_ERROR;
+	tegra_slink_writel(tspi, val_write, SLINK_STATUS);
+}
+
+static unsigned long tegra_slink_get_packed_size(struct tegra_slink_data *tspi,
+				  struct spi_transfer *t)
+{
+	unsigned long val;
+
+	switch (tspi->bytes_per_word) {
+	case 0:
+		val = SLINK_PACK_SIZE_4;
+		break;
+	case 1:
+		val = SLINK_PACK_SIZE_8;
+		break;
+	case 2:
+		val = SLINK_PACK_SIZE_16;
+		break;
+	case 4:
+		val = SLINK_PACK_SIZE_32;
+		break;
+	default:
+		val = 0;
+	}
+	return val;
+}
+
+static unsigned tegra_slink_calculate_curr_xfer_param(
+	struct spi_device *spi, struct tegra_slink_data *tspi,
+	struct spi_transfer *t)
+{
+	unsigned remain_len = t->len - tspi->cur_pos;
+	unsigned max_word;
+	unsigned bits_per_word ;
+	unsigned max_len;
+	unsigned total_fifo_words;
+
+	bits_per_word = t->bits_per_word ? t->bits_per_word :
+						spi->bits_per_word;
+	tspi->bytes_per_word = (bits_per_word - 1) / 8 + 1;
+
+	if (bits_per_word == 8 || bits_per_word == 16) {
+		tspi->is_packed = 1;
+		tspi->words_per_32bit = 32/bits_per_word;
+	} else {
+		tspi->is_packed = 0;
+		tspi->words_per_32bit = 1;
+	}
+	tspi->packed_size = tegra_slink_get_packed_size(tspi, t);
+
+	if (tspi->is_packed) {
+		max_len = min(remain_len, tspi->max_buf_size);
+		tspi->curr_dma_words = max_len/tspi->bytes_per_word;
+		total_fifo_words = max_len/4;
+	} else {
+		max_word = (remain_len - 1) / tspi->bytes_per_word + 1;
+		max_word = min(max_word, tspi->max_buf_size/4);
+		tspi->curr_dma_words = max_word;
+		total_fifo_words = max_word;
+	}
+	return total_fifo_words;
+}
+
+static unsigned tegra_slink_fill_tx_fifo_from_client_txbuf(
+	struct tegra_slink_data *tspi, struct spi_transfer *t)
+{
+	unsigned nbytes;
+	unsigned tx_empty_count;
+	unsigned long fifo_status;
+	unsigned max_n_32bit;
+	unsigned i, count;
+	unsigned long x;
+	unsigned int written_words;
+	unsigned fifo_words_left;
+	u8 *tx_buf = (u8 *)t->tx_buf + tspi->cur_tx_pos;
+
+	fifo_status = tegra_slink_readl(tspi, SLINK_STATUS2);
+	tx_empty_count = SLINK_TX_FIFO_EMPTY_COUNT(fifo_status);
+
+	if (tspi->is_packed) {
+		fifo_words_left = tx_empty_count * tspi->words_per_32bit;
+		written_words = min(fifo_words_left, tspi->curr_dma_words);
+		nbytes = written_words * tspi->bytes_per_word;
+		max_n_32bit = DIV_ROUND_UP(nbytes, 4);
+		for (count = 0; count < max_n_32bit; count++) {
+			x = 0;
+			for (i = 0; (i < 4) && nbytes; i++, nbytes--)
+				x |= (*tx_buf++) << (i*8);
+			tegra_slink_writel(tspi, x, SLINK_TX_FIFO);
+		}
+	} else {
+		max_n_32bit = min(tspi->curr_dma_words,  tx_empty_count);
+		written_words = max_n_32bit;
+		nbytes = written_words * tspi->bytes_per_word;
+		for (count = 0; count < max_n_32bit; count++) {
+			x = 0;
+			for (i = 0; nbytes && (i < tspi->bytes_per_word);
+							i++, nbytes--)
+				x |= ((*tx_buf++) << i*8);
+			tegra_slink_writel(tspi, x, SLINK_TX_FIFO);
+		}
+	}
+	tspi->cur_tx_pos += written_words * tspi->bytes_per_word;
+	return written_words;
+}
+
+static unsigned int tegra_slink_read_rx_fifo_to_client_rxbuf(
+		struct tegra_slink_data *tspi, struct spi_transfer *t)
+{
+	unsigned rx_full_count;
+	unsigned long fifo_status;
+	unsigned i, count;
+	unsigned long x;
+	unsigned int read_words = 0;
+	unsigned len;
+	u8 *rx_buf = (u8 *)t->rx_buf + tspi->cur_rx_pos;
+
+	fifo_status = tegra_slink_readl(tspi, SLINK_STATUS2);
+	rx_full_count = SLINK_RX_FIFO_FULL_COUNT(fifo_status);
+	if (tspi->is_packed) {
+		len = tspi->curr_dma_words * tspi->bytes_per_word;
+		for (count = 0; count < rx_full_count; count++) {
+			x = tegra_slink_readl(tspi, SLINK_RX_FIFO);
+			for (i = 0; len && (i < 4); i++, len--)
+				*rx_buf++ = (x >> i*8) & 0xFF;
+		}
+		tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
+		read_words += tspi->curr_dma_words;
+	} else {
+		unsigned int bits_per_word;
+
+		bits_per_word = t->bits_per_word ? t->bits_per_word :
+						tspi->cur_spi->bits_per_word;
+		for (count = 0; count < rx_full_count; count++) {
+			x = tegra_slink_readl(tspi, SLINK_RX_FIFO);
+			for (i = 0; (i < tspi->bytes_per_word); i++)
+				*rx_buf++ = (x >> (i*8)) & 0xFF;
+		}
+		tspi->cur_rx_pos += rx_full_count * tspi->bytes_per_word;
+		read_words += rx_full_count;
+	}
+	return read_words;
+}
+
+static void tegra_slink_copy_client_txbuf_to_spi_txbuf(
+		struct tegra_slink_data *tspi, struct spi_transfer *t)
+{
+	unsigned len;
+
+	/* Make the dma buffer to read by cpu */
+	dma_sync_single_for_cpu(tspi->dev, tspi->tx_dma_phys,
+				tspi->dma_buf_size, DMA_TO_DEVICE);
+
+	if (tspi->is_packed) {
+		len = tspi->curr_dma_words * tspi->bytes_per_word;
+		memcpy(tspi->tx_dma_buf, t->tx_buf + tspi->cur_pos, len);
+	} else {
+		unsigned int i;
+		unsigned int count;
+		u8 *tx_buf = (u8 *)t->tx_buf + tspi->cur_tx_pos;
+		unsigned consume = tspi->curr_dma_words * tspi->bytes_per_word;
+		unsigned int x;
+
+		for (count = 0; count < tspi->curr_dma_words; count++) {
+			x = 0;
+			for (i = 0; consume && (i < tspi->bytes_per_word);
+							i++, consume--)
+				x |= ((*tx_buf++) << i * 8);
+			tspi->tx_dma_buf[count] = x;
+		}
+	}
+	tspi->cur_tx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
+
+	/* Make the dma buffer to read by dma */
+	dma_sync_single_for_device(tspi->dev, tspi->tx_dma_phys,
+				tspi->dma_buf_size, DMA_TO_DEVICE);
+}
+
+static void tegra_slink_copy_spi_rxbuf_to_client_rxbuf(
+		struct tegra_slink_data *tspi, struct spi_transfer *t)
+{
+	unsigned len;
+
+	/* Make the dma buffer to read by cpu */
+	dma_sync_single_for_cpu(tspi->dev, tspi->rx_dma_phys,
+		tspi->dma_buf_size, DMA_FROM_DEVICE);
+
+	if (tspi->is_packed) {
+		len = tspi->curr_dma_words * tspi->bytes_per_word;
+		memcpy(t->rx_buf + tspi->cur_rx_pos, tspi->rx_dma_buf, len);
+	} else {
+		unsigned int i;
+		unsigned int count;
+		unsigned char *rx_buf = t->rx_buf + tspi->cur_rx_pos;
+		unsigned int x;
+		unsigned int rx_mask, bits_per_word;
+
+		bits_per_word = t->bits_per_word ? t->bits_per_word :
+						tspi->cur_spi->bits_per_word;
+		rx_mask = (1 << bits_per_word) - 1;
+		for (count = 0; count < tspi->curr_dma_words; count++) {
+			x = tspi->rx_dma_buf[count];
+			x &= rx_mask;
+			for (i = 0; (i < tspi->bytes_per_word); i++)
+				*rx_buf++ = (x >> (i*8)) & 0xFF;
+		}
+	}
+	tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
+
+	/* Make the dma buffer to read by dma */
+	dma_sync_single_for_device(tspi->dev, tspi->rx_dma_phys,
+		tspi->dma_buf_size, DMA_FROM_DEVICE);
+}
+
+static void tegra_slink_dma_complete(void *args)
+{
+	struct completion *dma_complete = args;
+
+	complete(dma_complete);
+}
+
+static int tegra_slink_start_tx_dma(struct tegra_slink_data *tspi, int len)
+{
+	INIT_COMPLETION(tspi->tx_dma_complete);
+	tspi->tx_dma_desc = dmaengine_prep_slave_single(tspi->tx_dma_chan,
+				tspi->tx_dma_phys, len, DMA_MEM_TO_DEV,
+				DMA_PREP_INTERRUPT |  DMA_CTRL_ACK);
+	if (!tspi->tx_dma_desc) {
+		dev_err(tspi->dev, "Not able to get desc for Tx\n");
+		return -EIO;
+	}
+
+	tspi->tx_dma_desc->callback = tegra_slink_dma_complete;
+	tspi->tx_dma_desc->callback_param = &tspi->tx_dma_complete;
+
+	dmaengine_submit(tspi->tx_dma_desc);
+	dma_async_issue_pending(tspi->tx_dma_chan);
+	return 0;
+}
+
+static int tegra_slink_start_rx_dma(struct tegra_slink_data *tspi, int len)
+{
+	INIT_COMPLETION(tspi->rx_dma_complete);
+	tspi->rx_dma_desc = dmaengine_prep_slave_single(tspi->rx_dma_chan,
+				tspi->rx_dma_phys, len, DMA_DEV_TO_MEM,
+				DMA_PREP_INTERRUPT |  DMA_CTRL_ACK);
+	if (!tspi->rx_dma_desc) {
+		dev_err(tspi->dev, "Not able to get desc for Rx\n");
+		return -EIO;
+	}
+
+	tspi->rx_dma_desc->callback = tegra_slink_dma_complete;
+	tspi->rx_dma_desc->callback_param = &tspi->rx_dma_complete;
+
+	dmaengine_submit(tspi->rx_dma_desc);
+	dma_async_issue_pending(tspi->rx_dma_chan);
+	return 0;
+}
+
+static int tegra_slink_start_dma_based_transfer(
+		struct tegra_slink_data *tspi, struct spi_transfer *t)
+{
+	unsigned long val;
+	unsigned long test_val;
+	unsigned int len;
+	int ret = 0;
+	unsigned long status;
+
+	/* Make sure that Rx and Tx fifo are empty */
+	status = tegra_slink_readl(tspi, SLINK_STATUS);
+	if ((status & SLINK_FIFO_EMPTY) != SLINK_FIFO_EMPTY) {
+		dev_err(tspi->dev,
+			"Rx/Tx fifo are not empty status 0x%08lx\n", status);
+		return -EIO;
+	}
+
+	val = SLINK_DMA_BLOCK_SIZE(tspi->curr_dma_words - 1);
+	val |= tspi->packed_size;
+	if (tspi->is_packed)
+		len = DIV_ROUND_UP(tspi->curr_dma_words * tspi->bytes_per_word,
+					4) * 4;
+	else
+		len = tspi->curr_dma_words * 4;
+
+	/* Set attention level based on length of transfer */
+	if (len & 0xF)
+		val |= SLINK_TX_TRIG_1 | SLINK_RX_TRIG_1;
+	else if (((len) >> 4) & 0x1)
+		val |= SLINK_TX_TRIG_4 | SLINK_RX_TRIG_4;
+	else
+		val |= SLINK_TX_TRIG_8 | SLINK_RX_TRIG_8;
+
+	if (tspi->cur_direction & DATA_DIR_TX)
+		val |= SLINK_IE_TXC;
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		val |= SLINK_IE_RXC;
+
+	tegra_slink_writel(tspi, val, SLINK_DMA_CTL);
+	tspi->dma_control_reg = val;
+
+	if (tspi->cur_direction & DATA_DIR_TX) {
+		tegra_slink_copy_client_txbuf_to_spi_txbuf(tspi, t);
+		wmb();
+		ret = tegra_slink_start_tx_dma(tspi, len);
+		if (ret < 0) {
+			dev_err(tspi->dev,
+				"Starting tx dma failed, err %d\n", ret);
+			return ret;
+		}
+
+		/* Wait for tx fifo to be fill before starting slink */
+		test_val = tegra_slink_readl(tspi, SLINK_STATUS);
+		while (!(test_val & SLINK_TX_FULL))
+			test_val = tegra_slink_readl(tspi, SLINK_STATUS);
+	}
+
+	if (tspi->cur_direction & DATA_DIR_RX) {
+		/* Make the dma buffer to read by dma */
+		dma_sync_single_for_device(tspi->dev, tspi->rx_dma_phys,
+				tspi->dma_buf_size, DMA_FROM_DEVICE);
+
+		ret = tegra_slink_start_rx_dma(tspi, len);
+		if (ret < 0) {
+			dev_err(tspi->dev,
+				"Starting rx dma failed, err %d\n", ret);
+			if (tspi->cur_direction & DATA_DIR_TX)
+				dmaengine_terminate_all(tspi->tx_dma_chan);
+			return ret;
+		}
+	}
+	tspi->is_curr_dma_xfer = true;
+	if (tspi->is_packed) {
+		val |= SLINK_PACKED;
+		tegra_slink_writel(tspi, val, SLINK_DMA_CTL);
+		/* HW need small delay after settign Packed mode */
+		udelay(1);
+	}
+	tspi->dma_control_reg = val;
+
+	val |= SLINK_DMA_EN;
+	tegra_slink_writel(tspi, val, SLINK_DMA_CTL);
+	return ret;
+}
+
+static int tegra_slink_start_cpu_based_transfer(
+		struct tegra_slink_data *tspi, struct spi_transfer *t)
+{
+	unsigned long val;
+	unsigned cur_words;
+
+	val = tspi->packed_size;
+	if (tspi->cur_direction & DATA_DIR_TX)
+		val |= SLINK_IE_TXC;
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		val |= SLINK_IE_RXC;
+
+	tegra_slink_writel(tspi, val, SLINK_DMA_CTL);
+	tspi->dma_control_reg = val;
+
+	if (tspi->cur_direction & DATA_DIR_TX)
+		cur_words = tegra_slink_fill_tx_fifo_from_client_txbuf(tspi, t);
+	else
+		cur_words = tspi->curr_dma_words;
+	val |= SLINK_DMA_BLOCK_SIZE(cur_words - 1);
+	tegra_slink_writel(tspi, val, SLINK_DMA_CTL);
+	tspi->dma_control_reg = val;
+
+	tspi->is_curr_dma_xfer = false;
+	if (tspi->is_packed) {
+		val |= SLINK_PACKED;
+		tegra_slink_writel(tspi, val, SLINK_DMA_CTL);
+		udelay(1);
+		wmb();
+	}
+	tspi->dma_control_reg = val;
+	val |= SLINK_DMA_EN;
+	tegra_slink_writel(tspi, val, SLINK_DMA_CTL);
+	return 0;
+}
+
+static int tegra_slink_init_dma_param(struct tegra_slink_data *tspi,
+			bool dma_to_memory)
+{
+	struct dma_chan *dma_chan;
+	u32 *dma_buf;
+	dma_addr_t dma_phys;
+	int ret;
+	struct dma_slave_config dma_sconfig;
+	dma_cap_mask_t mask;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dma_chan = dma_request_channel(mask, NULL, NULL);
+	if (!dma_chan) {
+		dev_err(tspi->dev,
+			"Dma channel is not available, will try later\n");
+		return -EPROBE_DEFER;
+	}
+
+	dma_buf = dma_alloc_coherent(tspi->dev, tspi->dma_buf_size,
+				&dma_phys, GFP_KERNEL);
+	if (!dma_buf) {
+		dev_err(tspi->dev, " Not able to allocate the dma buffer\n");
+		dma_release_channel(dma_chan);
+		return -ENOMEM;
+	}
+
+	dma_sconfig.slave_id = tspi->dma_req_sel;
+	if (dma_to_memory) {
+		dma_sconfig.src_addr = tspi->phys + SLINK_RX_FIFO;
+		dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		dma_sconfig.src_maxburst = 0;
+	} else {
+		dma_sconfig.dst_addr = tspi->phys + SLINK_TX_FIFO;
+		dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		dma_sconfig.dst_maxburst = 0;
+	}
+
+	ret = dmaengine_slave_config(dma_chan, &dma_sconfig);
+	if (ret)
+		goto scrub;
+	if (dma_to_memory) {
+		tspi->rx_dma_chan = dma_chan;
+		tspi->rx_dma_buf = dma_buf;
+		tspi->rx_dma_phys = dma_phys;
+	} else {
+		tspi->tx_dma_chan = dma_chan;
+		tspi->tx_dma_buf = dma_buf;
+		tspi->tx_dma_phys = dma_phys;
+	}
+	return 0;
+
+scrub:
+	dma_free_coherent(tspi->dev, tspi->dma_buf_size, dma_buf, dma_phys);
+	dma_release_channel(dma_chan);
+	return ret;
+}
+
+static void tegra_slink_deinit_dma_param(struct tegra_slink_data *tspi,
+	bool dma_to_memory)
+{
+	u32 *dma_buf;
+	dma_addr_t dma_phys;
+	struct dma_chan *dma_chan;
+
+	if (dma_to_memory) {
+		dma_buf = tspi->rx_dma_buf;
+		dma_chan = tspi->rx_dma_chan;
+		dma_phys = tspi->rx_dma_phys;
+		tspi->rx_dma_chan = NULL;
+		tspi->rx_dma_buf = NULL;
+	} else {
+		dma_buf = tspi->tx_dma_buf;
+		dma_chan = tspi->tx_dma_chan;
+		dma_phys = tspi->tx_dma_phys;
+		tspi->tx_dma_buf = NULL;
+		tspi->tx_dma_chan = NULL;
+	}
+	if (!dma_chan)
+		return;
+
+	dma_free_coherent(tspi->dev, tspi->dma_buf_size, dma_buf, dma_phys);
+	dma_release_channel(dma_chan);
+}
+
+static int tegra_slink_start_transfer_one(struct spi_device *spi,
+		struct spi_transfer *t, bool is_first_of_msg,
+		bool is_single_xfer)
+{
+	struct tegra_slink_data *tspi = spi_master_get_devdata(spi->master);
+	u32 speed;
+	u8 bits_per_word;
+	unsigned total_fifo_words;
+	int ret;
+	struct tegra_spi_device_controller_data *cdata = spi->controller_data;
+	unsigned long command;
+	unsigned long command2;
+
+	bits_per_word = t->bits_per_word ? t->bits_per_word :
+					spi->bits_per_word;
+	speed = t->speed_hz ? t->speed_hz : spi->max_speed_hz;
+	if (!speed)
+		speed = tspi->spi_max_frequency;
+	if (speed != tspi->cur_speed) {
+		clk_set_rate(tspi->clk, speed * 4);
+		tspi->cur_speed = speed;
+	}
+
+	tspi->cur_spi = spi;
+	tspi->cur_pos = 0;
+	tspi->cur_rx_pos = 0;
+	tspi->cur_tx_pos = 0;
+	tspi->curr_xfer = t;
+	total_fifo_words = tegra_slink_calculate_curr_xfer_param(spi, tspi, t);
+
+	if (is_first_of_msg) {
+		tegra_slink_clear_status(tspi);
+
+		command = tspi->def_command_reg;
+		command |= SLINK_BIT_LENGTH(bits_per_word - 1);
+
+		command2 = tspi->def_command2_reg;
+		command2 |= SLINK_SS_EN_CS(spi->chip_select);
+
+		/* possibly use the hw based chip select */
+		tspi->is_hw_based_cs = false;
+		if (cdata && cdata->is_hw_based_cs && is_single_xfer &&
+			((tspi->curr_dma_words * tspi->bytes_per_word) ==
+						(t->len - tspi->cur_pos))) {
+			int setup_count;
+			int sts2;
+
+			setup_count = cdata->cs_setup_clk_count >> 1;
+			setup_count = max(setup_count, 3);
+			command2 |= SLINK_SS_SETUP(setup_count);
+			if (tspi->chip_data->cs_hold_time) {
+				int hold_count;
+
+				hold_count = cdata->cs_hold_clk_count;
+				hold_count = max(hold_count, 0xF);
+				sts2 = tegra_slink_readl(tspi, SLINK_STATUS2);
+				sts2 &= ~SLINK_SS_HOLD_TIME(0xF);
+				sts2 |= SLINK_SS_HOLD_TIME(hold_count);
+				tegra_slink_writel(tspi, sts2, SLINK_STATUS2);
+			}
+			tspi->is_hw_based_cs = true;
+		}
+
+		if (tspi->is_hw_based_cs)
+			command &= ~SLINK_CS_SW;
+		else
+			command |= SLINK_CS_SW | SLINK_CS_VALUE;
+
+		command &= ~SLINK_MODES;
+		if (spi->mode & SPI_CPHA)
+			command |= SLINK_CK_SDA;
+
+		if (spi->mode & SPI_CPOL)
+			command |= SLINK_IDLE_SCLK_DRIVE_HIGH;
+		else
+			command |= SLINK_IDLE_SCLK_DRIVE_LOW;
+	} else {
+		command = tspi->command_reg;
+		command &= ~SLINK_BIT_LENGTH(~0);
+		command |= SLINK_BIT_LENGTH(bits_per_word - 1);
+
+		command2 = tspi->command2_reg;
+		command2 &= ~(SLINK_RXEN | SLINK_TXEN);
+	}
+
+	tegra_slink_writel(tspi, command, SLINK_COMMAND);
+	tspi->command_reg = command;
+
+	tspi->cur_direction = 0;
+	if (t->rx_buf) {
+		command2 |= SLINK_RXEN;
+		tspi->cur_direction |= DATA_DIR_RX;
+	}
+	if (t->tx_buf) {
+		command2 |= SLINK_TXEN;
+		tspi->cur_direction |= DATA_DIR_TX;
+	}
+	tegra_slink_writel(tspi, command2, SLINK_COMMAND2);
+	tspi->command2_reg = command2;
+
+	if (total_fifo_words > SLINK_FIFO_DEPTH)
+		ret = tegra_slink_start_dma_based_transfer(tspi, t);
+	else
+		ret = tegra_slink_start_cpu_based_transfer(tspi, t);
+	return ret;
+}
+
+static int tegra_slink_setup(struct spi_device *spi)
+{
+	struct tegra_slink_data *tspi = spi_master_get_devdata(spi->master);
+	unsigned long val;
+	unsigned long flags;
+	int ret;
+	unsigned int cs_pol_bit[MAX_CHIP_SELECT] = {
+			SLINK_CS_POLARITY,
+			SLINK_CS_POLARITY1,
+			SLINK_CS_POLARITY2,
+			SLINK_CS_POLARITY3,
+	};
+
+	dev_dbg(&spi->dev, "setup %d bpw, %scpol, %scpha, %dHz\n",
+		spi->bits_per_word,
+		spi->mode & SPI_CPOL ? "" : "~",
+		spi->mode & SPI_CPHA ? "" : "~",
+		spi->max_speed_hz);
+
+	BUG_ON(spi->chip_select >= MAX_CHIP_SELECT);
+
+	ret = pm_runtime_get_sync(tspi->dev);
+	if (ret < 0) {
+		dev_err(tspi->dev, "pm runtime failed, e = %d\n", ret);
+		return ret;
+	}
+
+	spin_lock_irqsave(&tspi->lock, flags);
+	val = tspi->def_command_reg;
+	if (spi->mode & SPI_CS_HIGH)
+		val |= cs_pol_bit[spi->chip_select];
+	else
+		val &= ~cs_pol_bit[spi->chip_select];
+	tspi->def_command_reg = val;
+	tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND);
+	spin_unlock_irqrestore(&tspi->lock, flags);
+
+	pm_runtime_put(tspi->dev);
+	return 0;
+}
+
+static int tegra_slink_prepare_transfer(struct spi_master *master)
+{
+	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
+
+	return pm_runtime_get_sync(tspi->dev);
+}
+
+static int tegra_slink_unprepare_transfer(struct spi_master *master)
+{
+	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
+
+	pm_runtime_put(tspi->dev);
+	return 0;
+}
+
+static int tegra_slink_transfer_one_message(struct spi_master *master,
+			struct spi_message *msg)
+{
+	bool is_first_msg = true;
+	int single_xfer;
+	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
+	struct spi_transfer *xfer;
+	struct spi_device *spi = msg->spi;
+	int ret;
+
+	msg->status = 0;
+	msg->actual_length = 0;
+	single_xfer = list_is_singular(&msg->transfers);
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		INIT_COMPLETION(tspi->xfer_completion);
+		ret = tegra_slink_start_transfer_one(spi, xfer,
+					is_first_msg, single_xfer);
+		if (ret < 0) {
+			dev_err(tspi->dev,
+				"spi can not start transfer, err %d\n", ret);
+			goto exit;
+		}
+		is_first_msg = false;
+		ret = wait_for_completion_timeout(&tspi->xfer_completion,
+						SLINK_DMA_TIMEOUT);
+		if (WARN_ON(ret == 0)) {
+			dev_err(tspi->dev,
+				"spi trasfer timeout, err %d\n", ret);
+			ret = -EIO;
+			goto exit;
+		}
+
+		if (tspi->tx_status ||  tspi->rx_status) {
+			dev_err(tspi->dev, "Error in Transfer\n");
+			ret = -EIO;
+			goto exit;
+		}
+		msg->actual_length += xfer->len;
+		if (xfer->cs_change && xfer->delay_usecs) {
+			tegra_slink_writel(tspi, tspi->def_command_reg,
+					SLINK_COMMAND);
+			udelay(xfer->delay_usecs);
+		}
+	}
+	ret = 0;
+exit:
+	tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND);
+	tegra_slink_writel(tspi, tspi->def_command2_reg, SLINK_COMMAND2);
+	msg->status = ret;
+	spi_finalize_current_message(master);
+	return ret;
+}
+
+static irqreturn_t handle_cpu_based_xfer(struct tegra_slink_data *tspi)
+{
+	struct spi_transfer *t = tspi->curr_xfer;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tspi->lock, flags);
+	if (tspi->tx_status ||  tspi->rx_status ||
+				(tspi->status_reg & SLINK_BSY)) {
+		dev_err(tspi->dev,
+			"CpuXfer ERROR bit set 0x%x\n", tspi->status_reg);
+		dev_err(tspi->dev,
+			"CpuXfer 0x%08x:0x%08x:0x%08x\n", tspi->command_reg,
+				tspi->command2_reg, tspi->dma_control_reg);
+		tegra_periph_reset_assert(tspi->clk);
+		udelay(2);
+		tegra_periph_reset_deassert(tspi->clk);
+		complete(&tspi->xfer_completion);
+		goto exit;
+	}
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		tegra_slink_read_rx_fifo_to_client_rxbuf(tspi, t);
+
+	if (tspi->cur_direction & DATA_DIR_TX)
+		tspi->cur_pos = tspi->cur_tx_pos;
+	else
+		tspi->cur_pos = tspi->cur_rx_pos;
+
+	if (tspi->cur_pos == t->len) {
+		complete(&tspi->xfer_completion);
+		goto exit;
+	}
+
+	tegra_slink_calculate_curr_xfer_param(tspi->cur_spi, tspi, t);
+	tegra_slink_start_cpu_based_transfer(tspi, t);
+exit:
+	spin_unlock_irqrestore(&tspi->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t handle_dma_based_xfer(struct tegra_slink_data *tspi)
+{
+	struct spi_transfer *t = tspi->curr_xfer;
+	long wait_status;
+	int err = 0;
+	unsigned total_fifo_words;
+	unsigned long flags;
+
+	/* Abort dmas if any error */
+	if (tspi->cur_direction & DATA_DIR_TX) {
+		if (tspi->tx_status) {
+			dmaengine_terminate_all(tspi->tx_dma_chan);
+			err += 1;
+		} else {
+			wait_status = wait_for_completion_interruptible_timeout(
+				&tspi->tx_dma_complete, SLINK_DMA_TIMEOUT);
+			if (wait_status <= 0) {
+				dmaengine_terminate_all(tspi->tx_dma_chan);
+				dev_err(tspi->dev, "TxDma Xfer failed\n");
+				err += 1;
+			}
+		}
+	}
+
+	if (tspi->cur_direction & DATA_DIR_RX) {
+		if (tspi->rx_status) {
+			dmaengine_terminate_all(tspi->rx_dma_chan);
+			err += 2;
+		} else {
+			wait_status = wait_for_completion_interruptible_timeout(
+				&tspi->rx_dma_complete, SLINK_DMA_TIMEOUT);
+			if (wait_status <= 0) {
+				dmaengine_terminate_all(tspi->rx_dma_chan);
+				dev_err(tspi->dev, "RxDma Xfer failed\n");
+				err += 2;
+			}
+		}
+	}
+
+	spin_lock_irqsave(&tspi->lock, flags);
+	if (err) {
+		dev_err(tspi->dev,
+			"DmaXfer: ERROR bit set 0x%x\n", tspi->status_reg);
+		dev_err(tspi->dev,
+			"DmaXfer 0x%08x:0x%08x:0x%08x\n", tspi->command_reg,
+				tspi->command2_reg, tspi->dma_control_reg);
+		tegra_periph_reset_assert(tspi->clk);
+		udelay(2);
+		tegra_periph_reset_deassert(tspi->clk);
+		complete(&tspi->xfer_completion);
+		spin_unlock_irqrestore(&tspi->lock, flags);
+		return IRQ_HANDLED;
+	}
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		tegra_slink_copy_spi_rxbuf_to_client_rxbuf(tspi, t);
+
+	if (tspi->cur_direction & DATA_DIR_TX)
+		tspi->cur_pos = tspi->cur_tx_pos;
+	else
+		tspi->cur_pos = tspi->cur_rx_pos;
+
+	if (tspi->cur_pos == t->len) {
+		complete(&tspi->xfer_completion);
+		goto exit;
+	}
+
+	/* Continue transfer in current message */
+	total_fifo_words = tegra_slink_calculate_curr_xfer_param(tspi->cur_spi,
+							tspi, t);
+	if (total_fifo_words > SLINK_FIFO_DEPTH)
+		err = tegra_slink_start_dma_based_transfer(tspi, t);
+	else
+		err = tegra_slink_start_cpu_based_transfer(tspi, t);
+
+exit:
+	spin_unlock_irqrestore(&tspi->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t tegra_slink_isr_thread(int irq, void *context_data)
+{
+	struct tegra_slink_data *tspi = context_data;
+
+	if (!tspi->is_curr_dma_xfer)
+		return handle_cpu_based_xfer(tspi);
+	return handle_dma_based_xfer(tspi);
+}
+
+static irqreturn_t tegra_slink_isr(int irq, void *context_data)
+{
+	struct tegra_slink_data *tspi = context_data;
+
+	tspi->status_reg = tegra_slink_readl(tspi, SLINK_STATUS);
+	if (tspi->cur_direction & DATA_DIR_TX)
+		tspi->tx_status = tspi->status_reg &
+					(SLINK_TX_OVF | SLINK_TX_UNF);
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		tspi->rx_status = tspi->status_reg &
+					(SLINK_RX_OVF | SLINK_RX_UNF);
+	tegra_slink_clear_status(tspi);
+
+	return IRQ_WAKE_THREAD;
+}
+
+static struct tegra_spi_platform_data *tegra_slink_parse_dt(
+		struct platform_device *pdev)
+{
+	struct tegra_spi_platform_data *pdata;
+	const unsigned int *prop;
+	struct device_node *np = pdev->dev.of_node;
+	u32 of_dma[2];
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata) {
+		dev_err(&pdev->dev, "Memory alloc for pdata failed\n");
+		return NULL;
+	}
+
+	if (of_property_read_u32_array(np, "nvidia,dma-request-selector",
+				of_dma, 2) >= 0)
+		pdata->dma_req_sel = of_dma[1];
+
+	prop = of_get_property(np, "spi-max-frequency", NULL);
+	if (prop)
+		pdata->spi_max_frequency = be32_to_cpup(prop);
+
+	return pdata;
+}
+
+const struct tegra_slink_chip_data tegra30_spi_cdata = {
+	.cs_hold_time = true,
+};
+
+const struct tegra_slink_chip_data tegra20_spi_cdata = {
+	.cs_hold_time = false,
+};
+
+static struct of_device_id tegra_slink_of_match[] __devinitconst = {
+	{ .compatible = "nvidia,tegra20-slink", .data = &tegra20_spi_cdata, },
+	{ .compatible = "nvidia,tegra30-slink", .data = &tegra30_spi_cdata, },
+	{}
+};
+MODULE_DEVICE_TABLE(of, tegra_slink_of_match);
+
+static int __devinit tegra_slink_probe(struct platform_device *pdev)
+{
+	struct spi_master	*master;
+	struct tegra_slink_data	*tspi;
+	struct resource		*r;
+	struct tegra_spi_platform_data *pdata = pdev->dev.platform_data;
+	int ret, spi_irq;
+	const struct tegra_slink_chip_data *cdata = NULL;
+	const struct of_device_id *match;
+
+	match = of_match_device(of_match_ptr(tegra_slink_of_match), &pdev->dev);
+	if (!match) {
+		dev_err(&pdev->dev, "Error: No device match found\n");
+		return -ENODEV;
+	}
+	cdata = match->data;
+	if (!pdata && pdev->dev.of_node)
+		pdata = tegra_slink_parse_dt(pdev);
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "No platform data, exiting\n");
+		return -ENODEV;
+	}
+
+	if (!pdata->spi_max_frequency)
+		pdata->spi_max_frequency = 25000000; /* 25MHz */
+
+	master = spi_alloc_master(&pdev->dev, sizeof(*tspi));
+	if (!master) {
+		dev_err(&pdev->dev, "master allocation failed\n");
+		return -ENOMEM;
+	}
+
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+	master->setup = tegra_slink_setup;
+	master->prepare_transfer_hardware = tegra_slink_prepare_transfer;
+	master->transfer_one_message = tegra_slink_transfer_one_message;
+	master->unprepare_transfer_hardware = tegra_slink_unprepare_transfer;
+	master->num_chipselect = MAX_CHIP_SELECT;
+	master->bus_num = -1;
+
+	dev_set_drvdata(&pdev->dev, master);
+	tspi = spi_master_get_devdata(master);
+	tspi->master = master;
+	tspi->dma_req_sel = pdata->dma_req_sel;
+	tspi->dev = &pdev->dev;
+	tspi->chip_data = cdata;
+	spin_lock_init(&tspi->lock);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		dev_err(&pdev->dev, "No IO memory resource\n");
+		ret = -ENODEV;
+		goto exit_free_master;
+	}
+	tspi->phys = r->start;
+	tspi->base = devm_request_and_ioremap(&pdev->dev, r);
+	if (!tspi->base) {
+		dev_err(&pdev->dev,
+			"Cannot request memregion/iomap dma address\n");
+		ret = -EADDRNOTAVAIL;
+		goto exit_free_master;
+	}
+
+	spi_irq = platform_get_irq(pdev, 0);
+	tspi->irq = spi_irq;
+	ret = request_threaded_irq(tspi->irq, tegra_slink_isr,
+			tegra_slink_isr_thread, IRQF_ONESHOT,
+			dev_name(&pdev->dev), tspi);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n",
+					tspi->irq);
+		goto exit_free_master;
+	}
+
+	tspi->clk = devm_clk_get(&pdev->dev, "slink");
+	if (IS_ERR(tspi->clk)) {
+		dev_err(&pdev->dev, "can not get clock\n");
+		ret = PTR_ERR(tspi->clk);
+		goto exit_free_irq;
+	}
+
+	tspi->max_buf_size = SLINK_FIFO_DEPTH << 2;
+	tspi->dma_buf_size = DEFAULT_SPI_DMA_BUF_LEN;
+	tspi->spi_max_frequency = pdata->spi_max_frequency;
+
+	if (pdata->dma_req_sel) {
+		ret = tegra_slink_init_dma_param(tspi, true);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "RxDma Init failed, err %d\n", ret);
+			goto exit_free_irq;
+		}
+
+		ret = tegra_slink_init_dma_param(tspi, false);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "TxDma Init failed, err %d\n", ret);
+			goto exit_rx_dma_free;
+		}
+		tspi->max_buf_size = tspi->dma_buf_size;
+		init_completion(&tspi->tx_dma_complete);
+		init_completion(&tspi->rx_dma_complete);
+	}
+
+	init_completion(&tspi->xfer_completion);
+
+	pm_runtime_enable(&pdev->dev);
+	if (!pm_runtime_enabled(&pdev->dev)) {
+		ret = tegra_slink_runtime_resume(&pdev->dev);
+		if (ret)
+			goto exit_pm_disable;
+	}
+
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret);
+		goto exit_pm_disable;
+	}
+	tspi->def_command_reg  = SLINK_M_S;
+	tspi->def_command2_reg = SLINK_CS_ACTIVE_BETWEEN;
+	tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND);
+	tegra_slink_writel(tspi, tspi->def_command2_reg, SLINK_COMMAND2);
+	pm_runtime_put(&pdev->dev);
+
+	master->dev.of_node = pdev->dev.of_node;
+	ret = spi_register_master(master);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "can not register to master err %d\n", ret);
+		goto exit_pm_disable;
+	}
+	return ret;
+
+exit_pm_disable:
+	pm_runtime_disable(&pdev->dev);
+	if (!pm_runtime_status_suspended(&pdev->dev))
+		tegra_slink_runtime_suspend(&pdev->dev);
+	tegra_slink_deinit_dma_param(tspi, false);
+exit_rx_dma_free:
+	tegra_slink_deinit_dma_param(tspi, true);
+exit_free_irq:
+	free_irq(spi_irq, tspi);
+exit_free_master:
+	spi_master_put(master);
+	return ret;
+}
+
+static int __devexit tegra_slink_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = dev_get_drvdata(&pdev->dev);
+	struct tegra_slink_data	*tspi = spi_master_get_devdata(master);
+
+	free_irq(tspi->irq, tspi);
+	spi_unregister_master(master);
+
+	if (tspi->tx_dma_chan)
+		tegra_slink_deinit_dma_param(tspi, false);
+
+	if (tspi->rx_dma_chan)
+		tegra_slink_deinit_dma_param(tspi, true);
+
+	pm_runtime_disable(&pdev->dev);
+	if (!pm_runtime_status_suspended(&pdev->dev))
+		tegra_slink_runtime_suspend(&pdev->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int tegra_slink_suspend(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+
+	return spi_master_suspend(master);
+}
+
+static int tegra_slink_resume(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
+	int ret;
+
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "pm runtime failed, e = %d\n", ret);
+		return ret;
+	}
+	tegra_slink_writel(tspi, tspi->command_reg, SLINK_COMMAND);
+	tegra_slink_writel(tspi, tspi->command2_reg, SLINK_COMMAND2);
+	pm_runtime_put(dev);
+
+	return spi_master_resume(master);
+}
+#endif
+
+static int tegra_slink_runtime_suspend(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
+
+	/* Flush all write which are in PPSB queue by reading back */
+	tegra_slink_readl(tspi, SLINK_MAS_DATA);
+
+	clk_disable_unprepare(tspi->clk);
+	return 0;
+}
+
+static int tegra_slink_runtime_resume(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
+	int ret;
+
+	ret = clk_prepare_enable(tspi->clk);
+	if (ret < 0) {
+		dev_err(tspi->dev, "clk_prepare failed: %d\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+static const struct dev_pm_ops slink_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_slink_runtime_suspend,
+		tegra_slink_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(tegra_slink_suspend, tegra_slink_resume)
+};
+static struct platform_driver tegra_slink_driver = {
+	.driver = {
+		.name		= "spi-tegra-slink",
+		.owner		= THIS_MODULE,
+		.pm		= &slink_pm_ops,
+		.of_match_table	= of_match_ptr(tegra_slink_of_match),
+	},
+	.probe =	tegra_slink_probe,
+	.remove =	__devexit_p(tegra_slink_remove),
+};
+module_platform_driver(tegra_slink_driver);
+
+MODULE_ALIAS("platform:spi-tegra-slink");
+MODULE_DESCRIPTION("NVIDIA Tegra20/Tegra30 SLINK Controller Driver");
+MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/spi/spi-tegra.h b/include/linux/spi/spi-tegra.h
new file mode 100644
index 000000000000..786932c62edb
--- /dev/null
+++ b/include/linux/spi/spi-tegra.h
@@ -0,0 +1,40 @@
+/*
+ * spi-tegra.h: SPI interface for Nvidia Tegra20 SLINK controller.
+ *
+ * Copyright (C) 2011 NVIDIA Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _LINUX_SPI_TEGRA_H
+#define _LINUX_SPI_TEGRA_H
+
+struct tegra_spi_platform_data {
+	int dma_req_sel;
+	unsigned int spi_max_frequency;
+};
+
+/*
+ * Controller data from device to pass some info like
+ * hw based chip select can be used or not and if yes
+ * then CS hold and setup time.
+ */
+struct tegra_spi_device_controller_data {
+	bool is_hw_based_cs;
+	int cs_setup_clk_count;
+	int cs_hold_clk_count;
+};
+
+#endif /* _LINUX_SPI_TEGRA_H */
-- 
cgit v1.2.3-55-g7522


From 1778794031aae75d4464904319d320edc3e77d39 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 30 Oct 2012 14:31:10 -0600
Subject: PCI: Separate out pci_assign_unassigned_bus_resources()

It is main portion of pci_rescan_bus().

Separate it out and prepare to use it for PCI root bus hot add later.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>---
 drivers/pci/setup-bus.c | 33 +++++++++++++++++++--------------
 include/linux/pci.h     |  1 +
 2 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 1e808ca338f8..f64c071d6923 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1550,25 +1550,12 @@ enable_all:
 }
 EXPORT_SYMBOL_GPL(pci_assign_unassigned_bridge_resources);
 
-#ifdef CONFIG_HOTPLUG
-/**
- * pci_rescan_bus - scan a PCI bus for devices.
- * @bus: PCI bus to scan
- *
- * Scan a PCI bus and child buses for new devices, adds them,
- * and enables them.
- *
- * Returns the max number of subordinate bus discovered.
- */
-unsigned int __ref pci_rescan_bus(struct pci_bus *bus)
+void pci_assign_unassigned_bus_resources(struct pci_bus *bus)
 {
-	unsigned int max;
 	struct pci_dev *dev;
 	LIST_HEAD(add_list); /* list of resources that
 					want additional resources */
 
-	max = pci_scan_child_bus(bus);
-
 	down_read(&pci_bus_sem);
 	list_for_each_entry(dev, &bus->devices, bus_list)
 		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
@@ -1581,6 +1568,24 @@ unsigned int __ref pci_rescan_bus(struct pci_bus *bus)
 	BUG_ON(!list_empty(&add_list));
 
 	pci_enable_bridges(bus);
+}
+
+#ifdef CONFIG_HOTPLUG
+/**
+ * pci_rescan_bus - scan a PCI bus for devices.
+ * @bus: PCI bus to scan
+ *
+ * Scan a PCI bus and child buses for new devices, adds them,
+ * and enables them.
+ *
+ * Returns the max number of subordinate bus discovered.
+ */
+unsigned int __ref pci_rescan_bus(struct pci_bus *bus)
+{
+	unsigned int max;
+
+	max = pci_scan_child_bus(bus);
+	pci_assign_unassigned_bus_resources(bus);
 	pci_bus_add_devices(bus);
 
 	return max;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ee2179546c63..f543eb3b1c0c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -958,6 +958,7 @@ void pci_bus_size_bridges(struct pci_bus *bus);
 int pci_claim_resource(struct pci_dev *, int);
 void pci_assign_unassigned_resources(void);
 void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge);
+void pci_assign_unassigned_bus_resources(struct pci_bus *bus);
 void pdev_enable_device(struct pci_dev *);
 int pci_enable_resources(struct pci_dev *, int mask);
 void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
-- 
cgit v1.2.3-55-g7522


From 242860a47a75b933a79a30f6a40bf4858f4a3ecc Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia
Date: Fri, 19 Oct 2012 09:33:12 -0300
Subject: mm/sl[aou]b: Move common kmem_cache_size() to slab.h

This function is identically defined in all three allocators
and it's trivial to move it to slab.h

Since now it's static, inline, header-defined function
this patch also drops the EXPORT_SYMBOL tag.

Cc: Pekka Enberg <penberg@kernel.org>
Cc: Matt Mackall <mpm@selenic.com>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Ezequiel Garcia <elezegarcia@gmail.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slab.h | 9 ++++++++-
 mm/slab.c            | 6 ------
 mm/slob.c            | 6 ------
 mm/slub.c            | 9 ---------
 4 files changed, 8 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 83d1a1454b7e..743a10415122 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -128,7 +128,6 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
-unsigned int kmem_cache_size(struct kmem_cache *);
 
 /*
  * Please use this macro to create slab caches. Simply specify the
@@ -388,6 +387,14 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
 	return kmalloc_node(size, flags | __GFP_ZERO, node);
 }
 
+/*
+ * Determine the size of a slab object
+ */
+static inline unsigned int kmem_cache_size(struct kmem_cache *s)
+{
+	return s->object_size;
+}
+
 void __init kmem_cache_init_late(void);
 
 #endif	/* _LINUX_SLAB_H */
diff --git a/mm/slab.c b/mm/slab.c
index 6d5c83c6ddd5..1f7fd5f51f87 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3969,12 +3969,6 @@ void kfree(const void *objp)
 }
 EXPORT_SYMBOL(kfree);
 
-unsigned int kmem_cache_size(struct kmem_cache *cachep)
-{
-	return cachep->object_size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
-
 /*
  * This initializes kmem_list3 or resizes various caches for all nodes.
  */
diff --git a/mm/slob.c b/mm/slob.c
index 287a88aa4a61..fffbc820774d 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -604,12 +604,6 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
-unsigned int kmem_cache_size(struct kmem_cache *c)
-{
-	return c->object_size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
-
 int __kmem_cache_shutdown(struct kmem_cache *c)
 {
 	/* No way to check for remaining objects */
diff --git a/mm/slub.c b/mm/slub.c
index 35483e0ab6bc..deee7c754a7d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3121,15 +3121,6 @@ error:
 	return -EINVAL;
 }
 
-/*
- * Determine the size of a slab object
- */
-unsigned int kmem_cache_size(struct kmem_cache *s)
-{
-	return s->object_size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
-
 static void list_slab_objects(struct kmem_cache *s, struct page *page,
 							const char *text)
 {
-- 
cgit v1.2.3-55-g7522


From bcb2f99c6c43a8da6cb4002e8b0acf6f1275f3a8 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior
Date: Mon, 22 Oct 2012 22:14:57 +0200
Subject: usb: gadget: use a computation macro for INT endpoint interval

The 5+4 magic for HS tries to aim 32ms which is also what is intended
with 1 << 5 for FS. This little macro should make this easier to
understand.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/f_acm.c    | 7 +++----
 drivers/usb/gadget/f_ecm.c    | 8 ++++----
 drivers/usb/gadget/f_ncm.c    | 6 +++---
 drivers/usb/gadget/f_rndis.c  | 8 ++++----
 include/linux/usb/composite.h | 2 ++
 5 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/f_acm.c b/drivers/usb/gadget/f_acm.c
index d672250a61fa..7c30bb49850b 100644
--- a/drivers/usb/gadget/f_acm.c
+++ b/drivers/usb/gadget/f_acm.c
@@ -87,7 +87,7 @@ static inline struct f_acm *port_to_acm(struct gserial *p)
 
 /* notification endpoint uses smallish and infrequent fixed-size messages */
 
-#define GS_LOG2_NOTIFY_INTERVAL		5	/* 1 << 5 == 32 msec */
+#define GS_NOTIFY_INTERVAL_MS		32
 #define GS_NOTIFY_MAXPACKET		10	/* notification + 2 bytes */
 
 /* interface and class descriptors: */
@@ -167,7 +167,7 @@ static struct usb_endpoint_descriptor acm_fs_notify_desc = {
 	.bEndpointAddress =	USB_DIR_IN,
 	.bmAttributes =		USB_ENDPOINT_XFER_INT,
 	.wMaxPacketSize =	cpu_to_le16(GS_NOTIFY_MAXPACKET),
-	.bInterval =		1 << GS_LOG2_NOTIFY_INTERVAL,
+	.bInterval =		GS_NOTIFY_INTERVAL_MS,
 };
 
 static struct usb_endpoint_descriptor acm_fs_in_desc = {
@@ -199,14 +199,13 @@ static struct usb_descriptor_header *acm_fs_function[] = {
 };
 
 /* high speed support: */
-
 static struct usb_endpoint_descriptor acm_hs_notify_desc = {
 	.bLength =		USB_DT_ENDPOINT_SIZE,
 	.bDescriptorType =	USB_DT_ENDPOINT,
 	.bEndpointAddress =	USB_DIR_IN,
 	.bmAttributes =		USB_ENDPOINT_XFER_INT,
 	.wMaxPacketSize =	cpu_to_le16(GS_NOTIFY_MAXPACKET),
-	.bInterval =		GS_LOG2_NOTIFY_INTERVAL+4,
+	.bInterval =		USB_MS_TO_HS_INTERVAL(GS_NOTIFY_INTERVAL_MS),
 };
 
 static struct usb_endpoint_descriptor acm_hs_in_desc = {
diff --git a/drivers/usb/gadget/f_ecm.c b/drivers/usb/gadget/f_ecm.c
index 8ab9e9638f91..789242749df5 100644
--- a/drivers/usb/gadget/f_ecm.c
+++ b/drivers/usb/gadget/f_ecm.c
@@ -91,7 +91,7 @@ static inline unsigned ecm_bitrate(struct usb_gadget *g)
  * encapsulated commands (vendor-specific, using control-OUT).
  */
 
-#define LOG2_STATUS_INTERVAL_MSEC	5	/* 1 << 5 == 32 msec */
+#define ECM_STATUS_INTERVAL_MS		32
 #define ECM_STATUS_BYTECOUNT		16	/* 8 byte header + data */
 
 
@@ -192,7 +192,7 @@ static struct usb_endpoint_descriptor fs_ecm_notify_desc = {
 	.bEndpointAddress =	USB_DIR_IN,
 	.bmAttributes =		USB_ENDPOINT_XFER_INT,
 	.wMaxPacketSize =	cpu_to_le16(ECM_STATUS_BYTECOUNT),
-	.bInterval =		1 << LOG2_STATUS_INTERVAL_MSEC,
+	.bInterval =		ECM_STATUS_INTERVAL_MS,
 };
 
 static struct usb_endpoint_descriptor fs_ecm_in_desc = {
@@ -239,7 +239,7 @@ static struct usb_endpoint_descriptor hs_ecm_notify_desc = {
 	.bEndpointAddress =	USB_DIR_IN,
 	.bmAttributes =		USB_ENDPOINT_XFER_INT,
 	.wMaxPacketSize =	cpu_to_le16(ECM_STATUS_BYTECOUNT),
-	.bInterval =		LOG2_STATUS_INTERVAL_MSEC + 4,
+	.bInterval =		USB_MS_TO_HS_INTERVAL(ECM_STATUS_INTERVAL_MS),
 };
 
 static struct usb_endpoint_descriptor hs_ecm_in_desc = {
@@ -288,7 +288,7 @@ static struct usb_endpoint_descriptor ss_ecm_notify_desc = {
 	.bEndpointAddress =	USB_DIR_IN,
 	.bmAttributes =		USB_ENDPOINT_XFER_INT,
 	.wMaxPacketSize =	cpu_to_le16(ECM_STATUS_BYTECOUNT),
-	.bInterval =		LOG2_STATUS_INTERVAL_MSEC + 4,
+	.bInterval =		USB_MS_TO_HS_INTERVAL(ECM_STATUS_INTERVAL_MS),
 };
 
 static struct usb_ss_ep_comp_descriptor ss_ecm_intr_comp_desc = {
diff --git a/drivers/usb/gadget/f_ncm.c b/drivers/usb/gadget/f_ncm.c
index b651b529c67f..4f0950069a43 100644
--- a/drivers/usb/gadget/f_ncm.c
+++ b/drivers/usb/gadget/f_ncm.c
@@ -121,7 +121,7 @@ static struct usb_cdc_ncm_ntb_parameters ntb_parameters = {
  * waste less bandwidth.
  */
 
-#define LOG2_STATUS_INTERVAL_MSEC	5	/* 1 << 5 == 32 msec */
+#define NCM_STATUS_INTERVAL_MS		32
 #define NCM_STATUS_BYTECOUNT		16	/* 8 byte header + data */
 
 static struct usb_interface_assoc_descriptor ncm_iad_desc __initdata = {
@@ -230,7 +230,7 @@ static struct usb_endpoint_descriptor fs_ncm_notify_desc __initdata = {
 	.bEndpointAddress =	USB_DIR_IN,
 	.bmAttributes =		USB_ENDPOINT_XFER_INT,
 	.wMaxPacketSize =	cpu_to_le16(NCM_STATUS_BYTECOUNT),
-	.bInterval =		1 << LOG2_STATUS_INTERVAL_MSEC,
+	.bInterval =		NCM_STATUS_INTERVAL_MS,
 };
 
 static struct usb_endpoint_descriptor fs_ncm_in_desc __initdata = {
@@ -275,7 +275,7 @@ static struct usb_endpoint_descriptor hs_ncm_notify_desc __initdata = {
 	.bEndpointAddress =	USB_DIR_IN,
 	.bmAttributes =		USB_ENDPOINT_XFER_INT,
 	.wMaxPacketSize =	cpu_to_le16(NCM_STATUS_BYTECOUNT),
-	.bInterval =		LOG2_STATUS_INTERVAL_MSEC + 4,
+	.bInterval =		USB_MS_TO_HS_INTERVAL(NCM_STATUS_INTERVAL_MS),
 };
 static struct usb_endpoint_descriptor hs_ncm_in_desc __initdata = {
 	.bLength =		USB_DT_ENDPOINT_SIZE,
diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c
index b1681e45aca7..61f4b13c6cf5 100644
--- a/drivers/usb/gadget/f_rndis.c
+++ b/drivers/usb/gadget/f_rndis.c
@@ -101,7 +101,7 @@ static unsigned int bitrate(struct usb_gadget *g)
 /*
  */
 
-#define LOG2_STATUS_INTERVAL_MSEC	5	/* 1 << 5 == 32 msec */
+#define RNDIS_STATUS_INTERVAL_MS	32
 #define STATUS_BYTECOUNT		8	/* 8 bytes data */
 
 
@@ -190,7 +190,7 @@ static struct usb_endpoint_descriptor fs_notify_desc = {
 	.bEndpointAddress =	USB_DIR_IN,
 	.bmAttributes =		USB_ENDPOINT_XFER_INT,
 	.wMaxPacketSize =	cpu_to_le16(STATUS_BYTECOUNT),
-	.bInterval =		1 << LOG2_STATUS_INTERVAL_MSEC,
+	.bInterval =		RNDIS_STATUS_INTERVAL_MS,
 };
 
 static struct usb_endpoint_descriptor fs_in_desc = {
@@ -236,7 +236,7 @@ static struct usb_endpoint_descriptor hs_notify_desc = {
 	.bEndpointAddress =	USB_DIR_IN,
 	.bmAttributes =		USB_ENDPOINT_XFER_INT,
 	.wMaxPacketSize =	cpu_to_le16(STATUS_BYTECOUNT),
-	.bInterval =		LOG2_STATUS_INTERVAL_MSEC + 4,
+	.bInterval =		USB_MS_TO_HS_INTERVAL(RNDIS_STATUS_INTERVAL_MS)
 };
 
 static struct usb_endpoint_descriptor hs_in_desc = {
@@ -284,7 +284,7 @@ static struct usb_endpoint_descriptor ss_notify_desc = {
 	.bEndpointAddress =	USB_DIR_IN,
 	.bmAttributes =		USB_ENDPOINT_XFER_INT,
 	.wMaxPacketSize =	cpu_to_le16(STATUS_BYTECOUNT),
-	.bInterval =		LOG2_STATUS_INTERVAL_MSEC + 4,
+	.bInterval =		USB_MS_TO_HS_INTERVAL(RNDIS_STATUS_INTERVAL_MS)
 };
 
 static struct usb_ss_ep_comp_descriptor ss_intr_comp_desc = {
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index f8dda0621800..8634a127bdd3 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -38,6 +38,7 @@
 #include <linux/version.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
+#include <linux/log2.h>
 
 /*
  * USB function drivers should return USB_GADGET_DELAYED_STATUS if they
@@ -51,6 +52,7 @@
 /* big enough to hold our biggest descriptor */
 #define USB_COMP_EP0_BUFSIZ	1024
 
+#define USB_MS_TO_HS_INTERVAL(x)	(ilog2((x * 1000 / 125)) + 1)
 struct usb_configuration;
 
 /**
-- 
cgit v1.2.3-55-g7522


From 10287baec761d33f0a82d84b46e37a44030350d8 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior
Date: Mon, 22 Oct 2012 22:15:06 +0200
Subject: usb: gadget: always update HS/SS descriptors and create a copy of
 them

HS and SS descriptors are staticaly created. They are updated during the
bind process with the endpoint address, string id or interface numbers.

After that, the descriptor chain is linked to struct usb_function which
is used by composite in order to serve the GET_DESCRIPTOR requests,
number of available configs and so on.

There is no need to assign the HS descriptor only if the UDC supports
HS speed because composite won't report those to the host if HS support
has not been reached. The same reasoning is valid for SS.

This patch makes sure each function updates HS/SS descriptors
unconditionally and uses the newly introduced helper function to create a
copy the descriptors for the speed which is supported by the UDC.

While at that, also rename f->descriptors to f->fs_descriptors in order
to make it more explicit what that means.

Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/composite.c      |   8 +--
 drivers/usb/gadget/config.c         |  39 +++++++++++++-
 drivers/usb/gadget/f_acm.c          |  45 +++++-----------
 drivers/usb/gadget/f_ecm.c          |  57 ++++++--------------
 drivers/usb/gadget/f_eem.c          |  46 ++++------------
 drivers/usb/gadget/f_fs.c           |   4 +-
 drivers/usb/gadget/f_hid.c          |  30 ++++-------
 drivers/usb/gadget/f_loopback.c     |  28 +++++-----
 drivers/usb/gadget/f_mass_storage.c |  59 +++++++-------------
 drivers/usb/gadget/f_midi.c         |   8 +--
 drivers/usb/gadget/f_ncm.c          |  32 +++--------
 drivers/usb/gadget/f_obex.c         |  23 ++++----
 drivers/usb/gadget/f_phonet.c       |  15 +++---
 drivers/usb/gadget/f_rndis.c        |  55 +++++--------------
 drivers/usb/gadget/f_serial.c       |  38 ++++---------
 drivers/usb/gadget/f_sourcesink.c   | 104 +++++++++++++++++-------------------
 drivers/usb/gadget/f_subset.c       |  48 +++++------------
 drivers/usb/gadget/f_uac1.c         |  22 +++-----
 drivers/usb/gadget/f_uac2.c         |  16 ++----
 drivers/usb/gadget/f_uvc.c          |  79 ++++++++++++---------------
 drivers/usb/gadget/printer.c        |  12 +++--
 drivers/usb/gadget/tcm_usb_gadget.c |  10 ++--
 include/linux/usb/composite.h       |   2 +-
 include/linux/usb/gadget.h          |   7 +++
 24 files changed, 307 insertions(+), 480 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 957f973dd96a..2a6bfe759c29 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -107,7 +107,7 @@ int config_ep_by_speed(struct usb_gadget *g,
 		}
 		/* else: fall through */
 	default:
-		speed_desc = f->descriptors;
+		speed_desc = f->fs_descriptors;
 	}
 	/* find descriptors */
 	for_each_ep_desc(speed_desc, d_spd) {
@@ -200,7 +200,7 @@ int usb_add_function(struct usb_configuration *config,
 	 * as full speed ... it's the function drivers that will need
 	 * to avoid bulk and ISO transfers.
 	 */
-	if (!config->fullspeed && function->descriptors)
+	if (!config->fullspeed && function->fs_descriptors)
 		config->fullspeed = true;
 	if (!config->highspeed && function->hs_descriptors)
 		config->highspeed = true;
@@ -363,7 +363,7 @@ static int config_buf(struct usb_configuration *config,
 			descriptors = f->hs_descriptors;
 			break;
 		default:
-			descriptors = f->descriptors;
+			descriptors = f->fs_descriptors;
 		}
 
 		if (!descriptors)
@@ -620,7 +620,7 @@ static int set_config(struct usb_composite_dev *cdev,
 			descriptors = f->hs_descriptors;
 			break;
 		default:
-			descriptors = f->descriptors;
+			descriptors = f->fs_descriptors;
 		}
 
 		for (; *descriptors; ++descriptors) {
diff --git a/drivers/usb/gadget/config.c b/drivers/usb/gadget/config.c
index e3a98929d346..34e12fc52c23 100644
--- a/drivers/usb/gadget/config.c
+++ b/drivers/usb/gadget/config.c
@@ -19,7 +19,7 @@
 
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
-
+#include <linux/usb/composite.h>
 
 /**
  * usb_descriptor_fillbuf - fill buffer with descriptors
@@ -158,3 +158,40 @@ usb_copy_descriptors(struct usb_descriptor_header **src)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(usb_copy_descriptors);
+
+int usb_assign_descriptors(struct usb_function *f,
+		struct usb_descriptor_header **fs,
+		struct usb_descriptor_header **hs,
+		struct usb_descriptor_header **ss)
+{
+	struct usb_gadget *g = f->config->cdev->gadget;
+
+	if (fs) {
+		f->fs_descriptors = usb_copy_descriptors(fs);
+		if (!f->fs_descriptors)
+			goto err;
+	}
+	if (hs && gadget_is_dualspeed(g)) {
+		f->hs_descriptors = usb_copy_descriptors(hs);
+		if (!f->hs_descriptors)
+			goto err;
+	}
+	if (ss && gadget_is_superspeed(g)) {
+		f->ss_descriptors = usb_copy_descriptors(ss);
+		if (!f->ss_descriptors)
+			goto err;
+	}
+	return 0;
+err:
+	usb_free_all_descriptors(f);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(usb_assign_descriptors);
+
+void usb_free_all_descriptors(struct usb_function *f)
+{
+	usb_free_descriptors(f->fs_descriptors);
+	usb_free_descriptors(f->hs_descriptors);
+	usb_free_descriptors(f->ss_descriptors);
+}
+EXPORT_SYMBOL_GPL(usb_free_all_descriptors);
diff --git a/drivers/usb/gadget/f_acm.c b/drivers/usb/gadget/f_acm.c
index 7c30bb49850b..308242b5d6e0 100644
--- a/drivers/usb/gadget/f_acm.c
+++ b/drivers/usb/gadget/f_acm.c
@@ -658,37 +658,22 @@ acm_bind(struct usb_configuration *c, struct usb_function *f)
 	acm->notify_req->complete = acm_cdc_notify_complete;
 	acm->notify_req->context = acm;
 
-	/* copy descriptors */
-	f->descriptors = usb_copy_descriptors(acm_fs_function);
-	if (!f->descriptors)
-		goto fail;
-
 	/* support all relevant hardware speeds... we expect that when
 	 * hardware is dual speed, all bulk-capable endpoints work at
 	 * both speeds
 	 */
-	if (gadget_is_dualspeed(c->cdev->gadget)) {
-		acm_hs_in_desc.bEndpointAddress =
-				acm_fs_in_desc.bEndpointAddress;
-		acm_hs_out_desc.bEndpointAddress =
-				acm_fs_out_desc.bEndpointAddress;
-		acm_hs_notify_desc.bEndpointAddress =
-				acm_fs_notify_desc.bEndpointAddress;
-
-		/* copy descriptors */
-		f->hs_descriptors = usb_copy_descriptors(acm_hs_function);
-	}
-	if (gadget_is_superspeed(c->cdev->gadget)) {
-		acm_ss_in_desc.bEndpointAddress =
-			acm_fs_in_desc.bEndpointAddress;
-		acm_ss_out_desc.bEndpointAddress =
-			acm_fs_out_desc.bEndpointAddress;
-
-		/* copy descriptors, and track endpoint copies */
-		f->ss_descriptors = usb_copy_descriptors(acm_ss_function);
-		if (!f->ss_descriptors)
-			goto fail;
-	}
+	acm_hs_in_desc.bEndpointAddress = acm_fs_in_desc.bEndpointAddress;
+	acm_hs_out_desc.bEndpointAddress = acm_fs_out_desc.bEndpointAddress;
+	acm_hs_notify_desc.bEndpointAddress =
+		acm_fs_notify_desc.bEndpointAddress;
+
+	acm_ss_in_desc.bEndpointAddress = acm_fs_in_desc.bEndpointAddress;
+	acm_ss_out_desc.bEndpointAddress = acm_fs_out_desc.bEndpointAddress;
+
+	status = usb_assign_descriptors(f, acm_fs_function, acm_hs_function,
+			acm_ss_function);
+	if (status)
+		goto fail;
 
 	DBG(cdev, "acm ttyGS%d: %s speed IN/%s OUT/%s NOTIFY/%s\n",
 			acm->port_num,
@@ -720,11 +705,7 @@ acm_unbind(struct usb_configuration *c, struct usb_function *f)
 {
 	struct f_acm		*acm = func_to_acm(f);
 
-	if (gadget_is_dualspeed(c->cdev->gadget))
-		usb_free_descriptors(f->hs_descriptors);
-	if (gadget_is_superspeed(c->cdev->gadget))
-		usb_free_descriptors(f->ss_descriptors);
-	usb_free_descriptors(f->descriptors);
+	usb_free_all_descriptors(f);
 	gs_free_req(acm->notify, acm->notify_req);
 	kfree(acm);
 }
diff --git a/drivers/usb/gadget/f_ecm.c b/drivers/usb/gadget/f_ecm.c
index a158740255ce..2d3c5a46de8e 100644
--- a/drivers/usb/gadget/f_ecm.c
+++ b/drivers/usb/gadget/f_ecm.c
@@ -743,42 +743,24 @@ ecm_bind(struct usb_configuration *c, struct usb_function *f)
 	ecm->notify_req->context = ecm;
 	ecm->notify_req->complete = ecm_notify_complete;
 
-	/* copy descriptors, and track endpoint copies */
-	f->descriptors = usb_copy_descriptors(ecm_fs_function);
-	if (!f->descriptors)
-		goto fail;
-
 	/* support all relevant hardware speeds... we expect that when
 	 * hardware is dual speed, all bulk-capable endpoints work at
 	 * both speeds
 	 */
-	if (gadget_is_dualspeed(c->cdev->gadget)) {
-		hs_ecm_in_desc.bEndpointAddress =
-				fs_ecm_in_desc.bEndpointAddress;
-		hs_ecm_out_desc.bEndpointAddress =
-				fs_ecm_out_desc.bEndpointAddress;
-		hs_ecm_notify_desc.bEndpointAddress =
-				fs_ecm_notify_desc.bEndpointAddress;
-
-		/* copy descriptors, and track endpoint copies */
-		f->hs_descriptors = usb_copy_descriptors(ecm_hs_function);
-		if (!f->hs_descriptors)
-			goto fail;
-	}
-
-	if (gadget_is_superspeed(c->cdev->gadget)) {
-		ss_ecm_in_desc.bEndpointAddress =
-				fs_ecm_in_desc.bEndpointAddress;
-		ss_ecm_out_desc.bEndpointAddress =
-				fs_ecm_out_desc.bEndpointAddress;
-		ss_ecm_notify_desc.bEndpointAddress =
-				fs_ecm_notify_desc.bEndpointAddress;
-
-		/* copy descriptors, and track endpoint copies */
-		f->ss_descriptors = usb_copy_descriptors(ecm_ss_function);
-		if (!f->ss_descriptors)
-			goto fail;
-	}
+	hs_ecm_in_desc.bEndpointAddress = fs_ecm_in_desc.bEndpointAddress;
+	hs_ecm_out_desc.bEndpointAddress = fs_ecm_out_desc.bEndpointAddress;
+	hs_ecm_notify_desc.bEndpointAddress =
+		fs_ecm_notify_desc.bEndpointAddress;
+
+	ss_ecm_in_desc.bEndpointAddress = fs_ecm_in_desc.bEndpointAddress;
+	ss_ecm_out_desc.bEndpointAddress = fs_ecm_out_desc.bEndpointAddress;
+	ss_ecm_notify_desc.bEndpointAddress =
+		fs_ecm_notify_desc.bEndpointAddress;
+
+	status = usb_assign_descriptors(f, ecm_fs_function, ecm_hs_function,
+			ecm_ss_function);
+	if (status)
+		goto fail;
 
 	/* NOTE:  all that is done without knowing or caring about
 	 * the network link ... which is unavailable to this code
@@ -796,11 +778,6 @@ ecm_bind(struct usb_configuration *c, struct usb_function *f)
 	return 0;
 
 fail:
-	if (f->descriptors)
-		usb_free_descriptors(f->descriptors);
-	if (f->hs_descriptors)
-		usb_free_descriptors(f->hs_descriptors);
-
 	if (ecm->notify_req) {
 		kfree(ecm->notify_req->buf);
 		usb_ep_free_request(ecm->notify, ecm->notify_req);
@@ -826,11 +803,7 @@ ecm_unbind(struct usb_configuration *c, struct usb_function *f)
 
 	DBG(c->cdev, "ecm unbind\n");
 
-	if (gadget_is_superspeed(c->cdev->gadget))
-		usb_free_descriptors(f->ss_descriptors);
-	if (gadget_is_dualspeed(c->cdev->gadget))
-		usb_free_descriptors(f->hs_descriptors);
-	usb_free_descriptors(f->descriptors);
+	usb_free_all_descriptors(f);
 
 	kfree(ecm->notify_req->buf);
 	usb_ep_free_request(ecm->notify, ecm->notify_req);
diff --git a/drivers/usb/gadget/f_eem.c b/drivers/usb/gadget/f_eem.c
index a9cf20522ffa..cf0ebee85563 100644
--- a/drivers/usb/gadget/f_eem.c
+++ b/drivers/usb/gadget/f_eem.c
@@ -274,38 +274,20 @@ eem_bind(struct usb_configuration *c, struct usb_function *f)
 
 	status = -ENOMEM;
 
-	/* copy descriptors, and track endpoint copies */
-	f->descriptors = usb_copy_descriptors(eem_fs_function);
-	if (!f->descriptors)
-		goto fail;
-
 	/* support all relevant hardware speeds... we expect that when
 	 * hardware is dual speed, all bulk-capable endpoints work at
 	 * both speeds
 	 */
-	if (gadget_is_dualspeed(c->cdev->gadget)) {
-		eem_hs_in_desc.bEndpointAddress =
-				eem_fs_in_desc.bEndpointAddress;
-		eem_hs_out_desc.bEndpointAddress =
-				eem_fs_out_desc.bEndpointAddress;
-
-		/* copy descriptors, and track endpoint copies */
-		f->hs_descriptors = usb_copy_descriptors(eem_hs_function);
-		if (!f->hs_descriptors)
-			goto fail;
-	}
+	eem_hs_in_desc.bEndpointAddress = eem_fs_in_desc.bEndpointAddress;
+	eem_hs_out_desc.bEndpointAddress = eem_fs_out_desc.bEndpointAddress;
 
-	if (gadget_is_superspeed(c->cdev->gadget)) {
-		eem_ss_in_desc.bEndpointAddress =
-				eem_fs_in_desc.bEndpointAddress;
-		eem_ss_out_desc.bEndpointAddress =
-				eem_fs_out_desc.bEndpointAddress;
+	eem_ss_in_desc.bEndpointAddress = eem_fs_in_desc.bEndpointAddress;
+	eem_ss_out_desc.bEndpointAddress = eem_fs_out_desc.bEndpointAddress;
 
-		/* copy descriptors, and track endpoint copies */
-		f->ss_descriptors = usb_copy_descriptors(eem_ss_function);
-		if (!f->ss_descriptors)
-			goto fail;
-	}
+	status = usb_assign_descriptors(f, eem_fs_function, eem_hs_function,
+			eem_ss_function);
+	if (status)
+		goto fail;
 
 	DBG(cdev, "CDC Ethernet (EEM): %s speed IN/%s OUT/%s\n",
 			gadget_is_superspeed(c->cdev->gadget) ? "super" :
@@ -314,11 +296,7 @@ eem_bind(struct usb_configuration *c, struct usb_function *f)
 	return 0;
 
 fail:
-	if (f->descriptors)
-		usb_free_descriptors(f->descriptors);
-	if (f->hs_descriptors)
-		usb_free_descriptors(f->hs_descriptors);
-
+	usb_free_all_descriptors(f);
 	if (eem->port.out_ep)
 		eem->port.out_ep->driver_data = NULL;
 	if (eem->port.in_ep)
@@ -336,11 +314,7 @@ eem_unbind(struct usb_configuration *c, struct usb_function *f)
 
 	DBG(c->cdev, "eem unbind\n");
 
-	if (gadget_is_superspeed(c->cdev->gadget))
-		usb_free_descriptors(f->ss_descriptors);
-	if (gadget_is_dualspeed(c->cdev->gadget))
-		usb_free_descriptors(f->hs_descriptors);
-	usb_free_descriptors(f->descriptors);
+	usb_free_all_descriptors(f);
 	kfree(eem);
 }
 
diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c
index 64c4ec10d1fc..4a6961c517f2 100644
--- a/drivers/usb/gadget/f_fs.c
+++ b/drivers/usb/gadget/f_fs.c
@@ -2097,7 +2097,7 @@ static int __ffs_func_bind_do_descs(enum ffs_entity_type type, u8 *valuep,
 	if (isHS)
 		func->function.hs_descriptors[(long)valuep] = desc;
 	else
-		func->function.descriptors[(long)valuep]    = desc;
+		func->function.fs_descriptors[(long)valuep]    = desc;
 
 	if (!desc || desc->bDescriptorType != USB_DT_ENDPOINT)
 		return 0;
@@ -2249,7 +2249,7 @@ static int ffs_func_bind(struct usb_configuration *c,
 	 * numbers without worrying that it may be described later on.
 	 */
 	if (likely(full)) {
-		func->function.descriptors = data->fs_descs;
+		func->function.fs_descriptors = data->fs_descs;
 		ret = ffs_do_descs(ffs->fs_descs_count,
 				   data->raw_descs,
 				   sizeof data->raw_descs,
diff --git a/drivers/usb/gadget/f_hid.c b/drivers/usb/gadget/f_hid.c
index 511e527178e2..6e69a8e8d22a 100644
--- a/drivers/usb/gadget/f_hid.c
+++ b/drivers/usb/gadget/f_hid.c
@@ -573,7 +573,6 @@ static int __init hidg_bind(struct usb_configuration *c, struct usb_function *f)
 		goto fail;
 	hidg_interface_desc.bInterfaceNumber = status;
 
-
 	/* allocate instance-specific endpoints */
 	status = -ENODEV;
 	ep = usb_ep_autoconfig(c->cdev->gadget, &hidg_fs_in_ep_desc);
@@ -609,20 +608,15 @@ static int __init hidg_bind(struct usb_configuration *c, struct usb_function *f)
 	hidg_desc.desc[0].wDescriptorLength =
 		cpu_to_le16(hidg->report_desc_length);
 
-	/* copy descriptors */
-	f->descriptors = usb_copy_descriptors(hidg_fs_descriptors);
-	if (!f->descriptors)
-		goto fail;
+	hidg_hs_in_ep_desc.bEndpointAddress =
+		hidg_fs_in_ep_desc.bEndpointAddress;
+	hidg_hs_out_ep_desc.bEndpointAddress =
+		hidg_fs_out_ep_desc.bEndpointAddress;
 
-	if (gadget_is_dualspeed(c->cdev->gadget)) {
-		hidg_hs_in_ep_desc.bEndpointAddress =
-			hidg_fs_in_ep_desc.bEndpointAddress;
-		hidg_hs_out_ep_desc.bEndpointAddress =
-			hidg_fs_out_ep_desc.bEndpointAddress;
-		f->hs_descriptors = usb_copy_descriptors(hidg_hs_descriptors);
-		if (!f->hs_descriptors)
-			goto fail;
-	}
+	status = usb_assign_descriptors(f, hidg_fs_descriptors,
+			hidg_hs_descriptors, NULL);
+	if (status)
+		goto fail;
 
 	mutex_init(&hidg->lock);
 	spin_lock_init(&hidg->spinlock);
@@ -649,9 +643,7 @@ fail:
 			usb_ep_free_request(hidg->in_ep, hidg->req);
 	}
 
-	usb_free_descriptors(f->hs_descriptors);
-	usb_free_descriptors(f->descriptors);
-
+	usb_free_all_descriptors(f);
 	return status;
 }
 
@@ -668,9 +660,7 @@ static void hidg_unbind(struct usb_configuration *c, struct usb_function *f)
 	kfree(hidg->req->buf);
 	usb_ep_free_request(hidg->in_ep, hidg->req);
 
-	/* free descriptors copies */
-	usb_free_descriptors(f->hs_descriptors);
-	usb_free_descriptors(f->descriptors);
+	usb_free_all_descriptors(f);
 
 	kfree(hidg->report_desc);
 	kfree(hidg);
diff --git a/drivers/usb/gadget/f_loopback.c b/drivers/usb/gadget/f_loopback.c
index 7275706caeb0..bb39cb2bb3a3 100644
--- a/drivers/usb/gadget/f_loopback.c
+++ b/drivers/usb/gadget/f_loopback.c
@@ -177,6 +177,7 @@ loopback_bind(struct usb_configuration *c, struct usb_function *f)
 	struct usb_composite_dev *cdev = c->cdev;
 	struct f_loopback	*loop = func_to_loop(f);
 	int			id;
+	int ret;
 
 	/* allocate interface ID(s) */
 	id = usb_interface_id(c, f);
@@ -201,22 +202,19 @@ autoconf_fail:
 	loop->out_ep->driver_data = cdev;	/* claim */
 
 	/* support high speed hardware */
-	if (gadget_is_dualspeed(c->cdev->gadget)) {
-		hs_loop_source_desc.bEndpointAddress =
-				fs_loop_source_desc.bEndpointAddress;
-		hs_loop_sink_desc.bEndpointAddress =
-				fs_loop_sink_desc.bEndpointAddress;
-		f->hs_descriptors = hs_loopback_descs;
-	}
+	hs_loop_source_desc.bEndpointAddress =
+		fs_loop_source_desc.bEndpointAddress;
+	hs_loop_sink_desc.bEndpointAddress = fs_loop_sink_desc.bEndpointAddress;
 
 	/* support super speed hardware */
-	if (gadget_is_superspeed(c->cdev->gadget)) {
-		ss_loop_source_desc.bEndpointAddress =
-				fs_loop_source_desc.bEndpointAddress;
-		ss_loop_sink_desc.bEndpointAddress =
-				fs_loop_sink_desc.bEndpointAddress;
-		f->ss_descriptors = ss_loopback_descs;
-	}
+	ss_loop_source_desc.bEndpointAddress =
+		fs_loop_source_desc.bEndpointAddress;
+	ss_loop_sink_desc.bEndpointAddress = fs_loop_sink_desc.bEndpointAddress;
+
+	ret = usb_assign_descriptors(f, fs_loopback_descs, hs_loopback_descs,
+			ss_loopback_descs);
+	if (ret)
+		return ret;
 
 	DBG(cdev, "%s speed %s: IN/%s, OUT/%s\n",
 	    (gadget_is_superspeed(c->cdev->gadget) ? "super" :
@@ -228,6 +226,7 @@ autoconf_fail:
 static void
 loopback_unbind(struct usb_configuration *c, struct usb_function *f)
 {
+	usb_free_all_descriptors(f);
 	kfree(func_to_loop(f));
 }
 
@@ -379,7 +378,6 @@ static int __init loopback_bind_config(struct usb_configuration *c)
 		return -ENOMEM;
 
 	loop->function.name = "loopback";
-	loop->function.descriptors = fs_loopback_descs;
 	loop->function.bind = loopback_bind;
 	loop->function.unbind = loopback_unbind;
 	loop->function.set_alt = loopback_set_alt;
diff --git a/drivers/usb/gadget/f_mass_storage.c b/drivers/usb/gadget/f_mass_storage.c
index 3a7668bde3ef..3433e432a4ae 100644
--- a/drivers/usb/gadget/f_mass_storage.c
+++ b/drivers/usb/gadget/f_mass_storage.c
@@ -2904,9 +2904,7 @@ static void fsg_unbind(struct usb_configuration *c, struct usb_function *f)
 	}
 
 	fsg_common_put(common);
-	usb_free_descriptors(fsg->function.descriptors);
-	usb_free_descriptors(fsg->function.hs_descriptors);
-	usb_free_descriptors(fsg->function.ss_descriptors);
+	usb_free_all_descriptors(&fsg->function);
 	kfree(fsg);
 }
 
@@ -2916,6 +2914,8 @@ static int fsg_bind(struct usb_configuration *c, struct usb_function *f)
 	struct usb_gadget	*gadget = c->cdev->gadget;
 	int			i;
 	struct usb_ep		*ep;
+	unsigned		max_burst;
+	int			ret;
 
 	fsg->gadget = gadget;
 
@@ -2939,45 +2939,27 @@ static int fsg_bind(struct usb_configuration *c, struct usb_function *f)
 	ep->driver_data = fsg->common;	/* claim the endpoint */
 	fsg->bulk_out = ep;
 
-	/* Copy descriptors */
-	f->descriptors = usb_copy_descriptors(fsg_fs_function);
-	if (unlikely(!f->descriptors))
-		return -ENOMEM;
-
-	if (gadget_is_dualspeed(gadget)) {
-		/* Assume endpoint addresses are the same for both speeds */
-		fsg_hs_bulk_in_desc.bEndpointAddress =
-			fsg_fs_bulk_in_desc.bEndpointAddress;
-		fsg_hs_bulk_out_desc.bEndpointAddress =
-			fsg_fs_bulk_out_desc.bEndpointAddress;
-		f->hs_descriptors = usb_copy_descriptors(fsg_hs_function);
-		if (unlikely(!f->hs_descriptors)) {
-			usb_free_descriptors(f->descriptors);
-			return -ENOMEM;
-		}
-	}
-
-	if (gadget_is_superspeed(gadget)) {
-		unsigned	max_burst;
+	/* Assume endpoint addresses are the same for both speeds */
+	fsg_hs_bulk_in_desc.bEndpointAddress =
+		fsg_fs_bulk_in_desc.bEndpointAddress;
+	fsg_hs_bulk_out_desc.bEndpointAddress =
+		fsg_fs_bulk_out_desc.bEndpointAddress;
 
-		/* Calculate bMaxBurst, we know packet size is 1024 */
-		max_burst = min_t(unsigned, FSG_BUFLEN / 1024, 15);
+	/* Calculate bMaxBurst, we know packet size is 1024 */
+	max_burst = min_t(unsigned, FSG_BUFLEN / 1024, 15);
 
-		fsg_ss_bulk_in_desc.bEndpointAddress =
-			fsg_fs_bulk_in_desc.bEndpointAddress;
-		fsg_ss_bulk_in_comp_desc.bMaxBurst = max_burst;
+	fsg_ss_bulk_in_desc.bEndpointAddress =
+		fsg_fs_bulk_in_desc.bEndpointAddress;
+	fsg_ss_bulk_in_comp_desc.bMaxBurst = max_burst;
 
-		fsg_ss_bulk_out_desc.bEndpointAddress =
-			fsg_fs_bulk_out_desc.bEndpointAddress;
-		fsg_ss_bulk_out_comp_desc.bMaxBurst = max_burst;
+	fsg_ss_bulk_out_desc.bEndpointAddress =
+		fsg_fs_bulk_out_desc.bEndpointAddress;
+	fsg_ss_bulk_out_comp_desc.bMaxBurst = max_burst;
 
-		f->ss_descriptors = usb_copy_descriptors(fsg_ss_function);
-		if (unlikely(!f->ss_descriptors)) {
-			usb_free_descriptors(f->hs_descriptors);
-			usb_free_descriptors(f->descriptors);
-			return -ENOMEM;
-		}
-	}
+	ret = usb_assign_descriptors(f, fsg_fs_function, fsg_hs_function,
+			fsg_ss_function);
+	if (ret)
+		goto autoconf_fail;
 
 	return 0;
 
@@ -2986,7 +2968,6 @@ autoconf_fail:
 	return -ENOTSUPP;
 }
 
-
 /****************************** ADD FUNCTION ******************************/
 
 static struct usb_gadget_strings *fsg_strings_array[] = {
diff --git a/drivers/usb/gadget/f_midi.c b/drivers/usb/gadget/f_midi.c
index b265ee8fa5aa..263e721c2694 100644
--- a/drivers/usb/gadget/f_midi.c
+++ b/drivers/usb/gadget/f_midi.c
@@ -414,8 +414,7 @@ static void f_midi_unbind(struct usb_configuration *c, struct usb_function *f)
 	kfree(midi->id);
 	midi->id = NULL;
 
-	usb_free_descriptors(f->descriptors);
-	usb_free_descriptors(f->hs_descriptors);
+	usb_free_all_descriptors(f);
 	kfree(midi);
 }
 
@@ -882,9 +881,10 @@ f_midi_bind(struct usb_configuration *c, struct usb_function *f)
 	 * both speeds
 	 */
 	/* copy descriptors, and track endpoint copies */
-	f->descriptors = usb_copy_descriptors(midi_function);
-	if (!f->descriptors)
+	f->fs_descriptors = usb_copy_descriptors(midi_function);
+	if (!f->fs_descriptors)
 		goto fail_f_midi;
+
 	if (gadget_is_dualspeed(c->cdev->gadget)) {
 		bulk_in_desc.wMaxPacketSize = cpu_to_le16(512);
 		bulk_out_desc.wMaxPacketSize = cpu_to_le16(512);
diff --git a/drivers/usb/gadget/f_ncm.c b/drivers/usb/gadget/f_ncm.c
index 424fc3d1cc36..326d7e6c297c 100644
--- a/drivers/usb/gadget/f_ncm.c
+++ b/drivers/usb/gadget/f_ncm.c
@@ -1208,30 +1208,18 @@ ncm_bind(struct usb_configuration *c, struct usb_function *f)
 	ncm->notify_req->context = ncm;
 	ncm->notify_req->complete = ncm_notify_complete;
 
-	/* copy descriptors, and track endpoint copies */
-	f->descriptors = usb_copy_descriptors(ncm_fs_function);
-	if (!f->descriptors)
-		goto fail;
-
 	/*
 	 * support all relevant hardware speeds... we expect that when
 	 * hardware is dual speed, all bulk-capable endpoints work at
 	 * both speeds
 	 */
-	if (gadget_is_dualspeed(c->cdev->gadget)) {
-		hs_ncm_in_desc.bEndpointAddress =
-				fs_ncm_in_desc.bEndpointAddress;
-		hs_ncm_out_desc.bEndpointAddress =
-				fs_ncm_out_desc.bEndpointAddress;
-		hs_ncm_notify_desc.bEndpointAddress =
-				fs_ncm_notify_desc.bEndpointAddress;
-
-		/* copy descriptors, and track endpoint copies */
-		f->hs_descriptors = usb_copy_descriptors(ncm_hs_function);
-		if (!f->hs_descriptors)
-			goto fail;
-	}
+	hs_ncm_in_desc.bEndpointAddress = fs_ncm_in_desc.bEndpointAddress;
+	hs_ncm_out_desc.bEndpointAddress = fs_ncm_out_desc.bEndpointAddress;
+	hs_ncm_notify_desc.bEndpointAddress =
+		fs_ncm_notify_desc.bEndpointAddress;
 
+	status = usb_assign_descriptors(f, ncm_fs_function, ncm_hs_function,
+			NULL);
 	/*
 	 * NOTE:  all that is done without knowing or caring about
 	 * the network link ... which is unavailable to this code
@@ -1248,9 +1236,7 @@ ncm_bind(struct usb_configuration *c, struct usb_function *f)
 	return 0;
 
 fail:
-	if (f->descriptors)
-		usb_free_descriptors(f->descriptors);
-
+	usb_free_all_descriptors(f);
 	if (ncm->notify_req) {
 		kfree(ncm->notify_req->buf);
 		usb_ep_free_request(ncm->notify, ncm->notify_req);
@@ -1276,9 +1262,7 @@ ncm_unbind(struct usb_configuration *c, struct usb_function *f)
 
 	DBG(c->cdev, "ncm unbind\n");
 
-	if (gadget_is_dualspeed(c->cdev->gadget))
-		usb_free_descriptors(f->hs_descriptors);
-	usb_free_descriptors(f->descriptors);
+	usb_free_all_descriptors(f);
 
 	kfree(ncm->notify_req->buf);
 	usb_ep_free_request(ncm->notify, ncm->notify_req);
diff --git a/drivers/usb/gadget/f_obex.c b/drivers/usb/gadget/f_obex.c
index 5f400f66aa9b..d74491ad82cb 100644
--- a/drivers/usb/gadget/f_obex.c
+++ b/drivers/usb/gadget/f_obex.c
@@ -331,23 +331,19 @@ obex_bind(struct usb_configuration *c, struct usb_function *f)
 	obex->port.out = ep;
 	ep->driver_data = cdev;	/* claim */
 
-	/* copy descriptors, and track endpoint copies */
-	f->descriptors = usb_copy_descriptors(fs_function);
-
 	/* support all relevant hardware speeds... we expect that when
 	 * hardware is dual speed, all bulk-capable endpoints work at
 	 * both speeds
 	 */
-	if (gadget_is_dualspeed(c->cdev->gadget)) {
 
-		obex_hs_ep_in_desc.bEndpointAddress =
-				obex_fs_ep_in_desc.bEndpointAddress;
-		obex_hs_ep_out_desc.bEndpointAddress =
-				obex_fs_ep_out_desc.bEndpointAddress;
+	obex_hs_ep_in_desc.bEndpointAddress =
+		obex_fs_ep_in_desc.bEndpointAddress;
+	obex_hs_ep_out_desc.bEndpointAddress =
+		obex_fs_ep_out_desc.bEndpointAddress;
 
-		/* copy descriptors, and track endpoint copies */
-		f->hs_descriptors = usb_copy_descriptors(hs_function);
-	}
+	status = usb_assign_descriptors(f, fs_function, hs_function, NULL);
+	if (status)
+		goto fail;
 
 	/* Avoid letting this gadget enumerate until the userspace
 	 * OBEX server is active.
@@ -368,6 +364,7 @@ obex_bind(struct usb_configuration *c, struct usb_function *f)
 	return 0;
 
 fail:
+	usb_free_all_descriptors(f);
 	/* we might as well release our claims on endpoints */
 	if (obex->port.out)
 		obex->port.out->driver_data = NULL;
@@ -382,9 +379,7 @@ fail:
 static void
 obex_unbind(struct usb_configuration *c, struct usb_function *f)
 {
-	if (gadget_is_dualspeed(c->cdev->gadget))
-		usb_free_descriptors(f->hs_descriptors);
-	usb_free_descriptors(f->descriptors);
+	usb_free_all_descriptors(f);
 	kfree(func_to_obex(f));
 }
 
diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c
index a6c19a486d53..b21ab558b6c0 100644
--- a/drivers/usb/gadget/f_phonet.c
+++ b/drivers/usb/gadget/f_phonet.c
@@ -515,14 +515,14 @@ int pn_bind(struct usb_configuration *c, struct usb_function *f)
 	fp->in_ep = ep;
 	ep->driver_data = fp; /* Claim */
 
-	pn_hs_sink_desc.bEndpointAddress =
-		pn_fs_sink_desc.bEndpointAddress;
-	pn_hs_source_desc.bEndpointAddress =
-		pn_fs_source_desc.bEndpointAddress;
+	pn_hs_sink_desc.bEndpointAddress = pn_fs_sink_desc.bEndpointAddress;
+	pn_hs_source_desc.bEndpointAddress = pn_fs_source_desc.bEndpointAddress;
 
 	/* Do not try to bind Phonet twice... */
-	fp->function.descriptors = fs_pn_function;
-	fp->function.hs_descriptors = hs_pn_function;
+	status = usb_assign_descriptors(f, fs_pn_function, hs_pn_function,
+			NULL);
+	if (status)
+		goto err;
 
 	/* Incoming USB requests */
 	status = -ENOMEM;
@@ -551,7 +551,7 @@ err_req:
 	for (i = 0; i < phonet_rxq_size && fp->out_reqv[i]; i++)
 		usb_ep_free_request(fp->out_ep, fp->out_reqv[i]);
 err:
-
+	usb_free_all_descriptors(f);
 	if (fp->out_ep)
 		fp->out_ep->driver_data = NULL;
 	if (fp->in_ep)
@@ -573,6 +573,7 @@ pn_unbind(struct usb_configuration *c, struct usb_function *f)
 		if (fp->out_reqv[i])
 			usb_ep_free_request(fp->out_ep, fp->out_reqv[i]);
 
+	usb_free_all_descriptors(f);
 	kfree(fp);
 }
 
diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c
index 7c27626f0235..e7c25105bd8e 100644
--- a/drivers/usb/gadget/f_rndis.c
+++ b/drivers/usb/gadget/f_rndis.c
@@ -722,42 +722,22 @@ rndis_bind(struct usb_configuration *c, struct usb_function *f)
 	rndis->notify_req->context = rndis;
 	rndis->notify_req->complete = rndis_response_complete;
 
-	/* copy descriptors, and track endpoint copies */
-	f->descriptors = usb_copy_descriptors(eth_fs_function);
-	if (!f->descriptors)
-		goto fail;
-
 	/* support all relevant hardware speeds... we expect that when
 	 * hardware is dual speed, all bulk-capable endpoints work at
 	 * both speeds
 	 */
-	if (gadget_is_dualspeed(c->cdev->gadget)) {
-		hs_in_desc.bEndpointAddress =
-				fs_in_desc.bEndpointAddress;
-		hs_out_desc.bEndpointAddress =
-				fs_out_desc.bEndpointAddress;
-		hs_notify_desc.bEndpointAddress =
-				fs_notify_desc.bEndpointAddress;
-
-		/* copy descriptors, and track endpoint copies */
-		f->hs_descriptors = usb_copy_descriptors(eth_hs_function);
-		if (!f->hs_descriptors)
-			goto fail;
-	}
+	hs_in_desc.bEndpointAddress = fs_in_desc.bEndpointAddress;
+	hs_out_desc.bEndpointAddress = fs_out_desc.bEndpointAddress;
+	hs_notify_desc.bEndpointAddress = fs_notify_desc.bEndpointAddress;
 
-	if (gadget_is_superspeed(c->cdev->gadget)) {
-		ss_in_desc.bEndpointAddress =
-				fs_in_desc.bEndpointAddress;
-		ss_out_desc.bEndpointAddress =
-				fs_out_desc.bEndpointAddress;
-		ss_notify_desc.bEndpointAddress =
-				fs_notify_desc.bEndpointAddress;
-
-		/* copy descriptors, and track endpoint copies */
-		f->ss_descriptors = usb_copy_descriptors(eth_ss_function);
-		if (!f->ss_descriptors)
-			goto fail;
-	}
+	ss_in_desc.bEndpointAddress = fs_in_desc.bEndpointAddress;
+	ss_out_desc.bEndpointAddress = fs_out_desc.bEndpointAddress;
+	ss_notify_desc.bEndpointAddress = fs_notify_desc.bEndpointAddress;
+
+	status = usb_assign_descriptors(f, eth_fs_function, eth_hs_function,
+			eth_ss_function);
+	if (status)
+		goto fail;
 
 	rndis->port.open = rndis_open;
 	rndis->port.close = rndis_close;
@@ -788,12 +768,7 @@ rndis_bind(struct usb_configuration *c, struct usb_function *f)
 	return 0;
 
 fail:
-	if (gadget_is_superspeed(c->cdev->gadget) && f->ss_descriptors)
-		usb_free_descriptors(f->ss_descriptors);
-	if (gadget_is_dualspeed(c->cdev->gadget) && f->hs_descriptors)
-		usb_free_descriptors(f->hs_descriptors);
-	if (f->descriptors)
-		usb_free_descriptors(f->descriptors);
+	usb_free_all_descriptors(f);
 
 	if (rndis->notify_req) {
 		kfree(rndis->notify_req->buf);
@@ -822,11 +797,7 @@ rndis_unbind(struct usb_configuration *c, struct usb_function *f)
 	rndis_exit();
 	rndis_string_defs[0].id = 0;
 
-	if (gadget_is_superspeed(c->cdev->gadget))
-		usb_free_descriptors(f->ss_descriptors);
-	if (gadget_is_dualspeed(c->cdev->gadget))
-		usb_free_descriptors(f->hs_descriptors);
-	usb_free_descriptors(f->descriptors);
+	usb_free_all_descriptors(f);
 
 	kfree(rndis->notify_req->buf);
 	usb_ep_free_request(rndis->notify, rndis->notify_req);
diff --git a/drivers/usb/gadget/f_serial.c b/drivers/usb/gadget/f_serial.c
index 07197d63d9b1..98fa7795df5f 100644
--- a/drivers/usb/gadget/f_serial.c
+++ b/drivers/usb/gadget/f_serial.c
@@ -213,34 +213,20 @@ gser_bind(struct usb_configuration *c, struct usb_function *f)
 	gser->port.out = ep;
 	ep->driver_data = cdev;	/* claim */
 
-	/* copy descriptors, and track endpoint copies */
-	f->descriptors = usb_copy_descriptors(gser_fs_function);
-
 	/* support all relevant hardware speeds... we expect that when
 	 * hardware is dual speed, all bulk-capable endpoints work at
 	 * both speeds
 	 */
-	if (gadget_is_dualspeed(c->cdev->gadget)) {
-		gser_hs_in_desc.bEndpointAddress =
-				gser_fs_in_desc.bEndpointAddress;
-		gser_hs_out_desc.bEndpointAddress =
-				gser_fs_out_desc.bEndpointAddress;
-
-		/* copy descriptors, and track endpoint copies */
-		f->hs_descriptors = usb_copy_descriptors(gser_hs_function);
-	}
-	if (gadget_is_superspeed(c->cdev->gadget)) {
-		gser_ss_in_desc.bEndpointAddress =
-			gser_fs_in_desc.bEndpointAddress;
-		gser_ss_out_desc.bEndpointAddress =
-			gser_fs_out_desc.bEndpointAddress;
-
-		/* copy descriptors, and track endpoint copies */
-		f->ss_descriptors = usb_copy_descriptors(gser_ss_function);
-		if (!f->ss_descriptors)
-			goto fail;
-	}
+	gser_hs_in_desc.bEndpointAddress = gser_fs_in_desc.bEndpointAddress;
+	gser_hs_out_desc.bEndpointAddress = gser_fs_out_desc.bEndpointAddress;
 
+	gser_ss_in_desc.bEndpointAddress = gser_fs_in_desc.bEndpointAddress;
+	gser_ss_out_desc.bEndpointAddress = gser_fs_out_desc.bEndpointAddress;
+
+	status = usb_assign_descriptors(f, gser_fs_function, gser_hs_function,
+			gser_ss_function);
+	if (status)
+		goto fail;
 	DBG(cdev, "generic ttyGS%d: %s speed IN/%s OUT/%s\n",
 			gser->port_num,
 			gadget_is_superspeed(c->cdev->gadget) ? "super" :
@@ -263,11 +249,7 @@ fail:
 static void
 gser_unbind(struct usb_configuration *c, struct usb_function *f)
 {
-	if (gadget_is_dualspeed(c->cdev->gadget))
-		usb_free_descriptors(f->hs_descriptors);
-	if (gadget_is_superspeed(c->cdev->gadget))
-		usb_free_descriptors(f->ss_descriptors);
-	usb_free_descriptors(f->descriptors);
+	usb_free_all_descriptors(f);
 	kfree(func_to_gser(f));
 }
 
diff --git a/drivers/usb/gadget/f_sourcesink.c b/drivers/usb/gadget/f_sourcesink.c
index 3c126fde6e7e..102d49beb9df 100644
--- a/drivers/usb/gadget/f_sourcesink.c
+++ b/drivers/usb/gadget/f_sourcesink.c
@@ -319,6 +319,7 @@ sourcesink_bind(struct usb_configuration *c, struct usb_function *f)
 	struct usb_composite_dev *cdev = c->cdev;
 	struct f_sourcesink	*ss = func_to_ss(f);
 	int	id;
+	int ret;
 
 	/* allocate interface ID(s) */
 	id = usb_interface_id(c, f);
@@ -387,64 +388,57 @@ no_iso:
 		isoc_maxpacket = 1024;
 
 	/* support high speed hardware */
-	if (gadget_is_dualspeed(c->cdev->gadget)) {
-		hs_source_desc.bEndpointAddress =
-				fs_source_desc.bEndpointAddress;
-		hs_sink_desc.bEndpointAddress =
-				fs_sink_desc.bEndpointAddress;
+	hs_source_desc.bEndpointAddress = fs_source_desc.bEndpointAddress;
+	hs_sink_desc.bEndpointAddress = fs_sink_desc.bEndpointAddress;
 
-		/*
-		 * Fill in the HS isoc descriptors from the module parameters.
-		 * We assume that the user knows what they are doing and won't
-		 * give parameters that their UDC doesn't support.
-		 */
-		hs_iso_source_desc.wMaxPacketSize = isoc_maxpacket;
-		hs_iso_source_desc.wMaxPacketSize |= isoc_mult << 11;
-		hs_iso_source_desc.bInterval = isoc_interval;
-		hs_iso_source_desc.bEndpointAddress =
-				fs_iso_source_desc.bEndpointAddress;
-
-		hs_iso_sink_desc.wMaxPacketSize = isoc_maxpacket;
-		hs_iso_sink_desc.wMaxPacketSize |= isoc_mult << 11;
-		hs_iso_sink_desc.bInterval = isoc_interval;
-		hs_iso_sink_desc.bEndpointAddress =
-				fs_iso_sink_desc.bEndpointAddress;
-
-		f->hs_descriptors = hs_source_sink_descs;
-	}
+	/*
+	 * Fill in the HS isoc descriptors from the module parameters.
+	 * We assume that the user knows what they are doing and won't
+	 * give parameters that their UDC doesn't support.
+	 */
+	hs_iso_source_desc.wMaxPacketSize = isoc_maxpacket;
+	hs_iso_source_desc.wMaxPacketSize |= isoc_mult << 11;
+	hs_iso_source_desc.bInterval = isoc_interval;
+	hs_iso_source_desc.bEndpointAddress =
+		fs_iso_source_desc.bEndpointAddress;
+
+	hs_iso_sink_desc.wMaxPacketSize = isoc_maxpacket;
+	hs_iso_sink_desc.wMaxPacketSize |= isoc_mult << 11;
+	hs_iso_sink_desc.bInterval = isoc_interval;
+	hs_iso_sink_desc.bEndpointAddress = fs_iso_sink_desc.bEndpointAddress;
 
 	/* support super speed hardware */
-	if (gadget_is_superspeed(c->cdev->gadget)) {
-		ss_source_desc.bEndpointAddress =
-				fs_source_desc.bEndpointAddress;
-		ss_sink_desc.bEndpointAddress =
-				fs_sink_desc.bEndpointAddress;
+	ss_source_desc.bEndpointAddress =
+		fs_source_desc.bEndpointAddress;
+	ss_sink_desc.bEndpointAddress =
+		fs_sink_desc.bEndpointAddress;
 
-		/*
-		 * Fill in the SS isoc descriptors from the module parameters.
-		 * We assume that the user knows what they are doing and won't
-		 * give parameters that their UDC doesn't support.
-		 */
-		ss_iso_source_desc.wMaxPacketSize = isoc_maxpacket;
-		ss_iso_source_desc.bInterval = isoc_interval;
-		ss_iso_source_comp_desc.bmAttributes = isoc_mult;
-		ss_iso_source_comp_desc.bMaxBurst = isoc_maxburst;
-		ss_iso_source_comp_desc.wBytesPerInterval =
-			isoc_maxpacket * (isoc_mult + 1) * (isoc_maxburst + 1);
-		ss_iso_source_desc.bEndpointAddress =
-				fs_iso_source_desc.bEndpointAddress;
-
-		ss_iso_sink_desc.wMaxPacketSize = isoc_maxpacket;
-		ss_iso_sink_desc.bInterval = isoc_interval;
-		ss_iso_sink_comp_desc.bmAttributes = isoc_mult;
-		ss_iso_sink_comp_desc.bMaxBurst = isoc_maxburst;
-		ss_iso_sink_comp_desc.wBytesPerInterval =
-			isoc_maxpacket * (isoc_mult + 1) * (isoc_maxburst + 1);
-		ss_iso_sink_desc.bEndpointAddress =
-				fs_iso_sink_desc.bEndpointAddress;
-
-		f->ss_descriptors = ss_source_sink_descs;
-	}
+	/*
+	 * Fill in the SS isoc descriptors from the module parameters.
+	 * We assume that the user knows what they are doing and won't
+	 * give parameters that their UDC doesn't support.
+	 */
+	ss_iso_source_desc.wMaxPacketSize = isoc_maxpacket;
+	ss_iso_source_desc.bInterval = isoc_interval;
+	ss_iso_source_comp_desc.bmAttributes = isoc_mult;
+	ss_iso_source_comp_desc.bMaxBurst = isoc_maxburst;
+	ss_iso_source_comp_desc.wBytesPerInterval =
+		isoc_maxpacket * (isoc_mult + 1) * (isoc_maxburst + 1);
+	ss_iso_source_desc.bEndpointAddress =
+		fs_iso_source_desc.bEndpointAddress;
+
+	ss_iso_sink_desc.wMaxPacketSize = isoc_maxpacket;
+	ss_iso_sink_desc.bInterval = isoc_interval;
+	ss_iso_sink_comp_desc.bmAttributes = isoc_mult;
+	ss_iso_sink_comp_desc.bMaxBurst = isoc_maxburst;
+	ss_iso_sink_comp_desc.wBytesPerInterval =
+		isoc_maxpacket * (isoc_mult + 1) * (isoc_maxburst + 1);
+	ss_iso_sink_desc.bEndpointAddress = fs_iso_sink_desc.bEndpointAddress;
+
+	ret = usb_assign_descriptors(f, fs_source_sink_descs,
+			hs_source_sink_descs, ss_source_sink_descs);
+	if (ret)
+		return ret;
 
 	DBG(cdev, "%s speed %s: IN/%s, OUT/%s, ISO-IN/%s, ISO-OUT/%s\n",
 	    (gadget_is_superspeed(c->cdev->gadget) ? "super" :
@@ -458,6 +452,7 @@ no_iso:
 static void
 sourcesink_unbind(struct usb_configuration *c, struct usb_function *f)
 {
+	usb_free_all_descriptors(f);
 	kfree(func_to_ss(f));
 }
 
@@ -773,7 +768,6 @@ static int __init sourcesink_bind_config(struct usb_configuration *c)
 		return -ENOMEM;
 
 	ss->function.name = "source/sink";
-	ss->function.descriptors = fs_source_sink_descs;
 	ss->function.bind = sourcesink_bind;
 	ss->function.unbind = sourcesink_unbind;
 	ss->function.set_alt = sourcesink_set_alt;
diff --git a/drivers/usb/gadget/f_subset.c b/drivers/usb/gadget/f_subset.c
index deb437c3b53e..856dbae586f1 100644
--- a/drivers/usb/gadget/f_subset.c
+++ b/drivers/usb/gadget/f_subset.c
@@ -319,38 +319,22 @@ geth_bind(struct usb_configuration *c, struct usb_function *f)
 	geth->port.out_ep = ep;
 	ep->driver_data = cdev;	/* claim */
 
-	/* copy descriptors, and track endpoint copies */
-	f->descriptors = usb_copy_descriptors(fs_eth_function);
-	if (!f->descriptors)
-		goto fail;
-
 	/* support all relevant hardware speeds... we expect that when
 	 * hardware is dual speed, all bulk-capable endpoints work at
 	 * both speeds
 	 */
-	if (gadget_is_dualspeed(c->cdev->gadget)) {
-		hs_subset_in_desc.bEndpointAddress =
-				fs_subset_in_desc.bEndpointAddress;
-		hs_subset_out_desc.bEndpointAddress =
-				fs_subset_out_desc.bEndpointAddress;
-
-		/* copy descriptors, and track endpoint copies */
-		f->hs_descriptors = usb_copy_descriptors(hs_eth_function);
-		if (!f->hs_descriptors)
-			goto fail;
-	}
+	hs_subset_in_desc.bEndpointAddress = fs_subset_in_desc.bEndpointAddress;
+	hs_subset_out_desc.bEndpointAddress =
+		fs_subset_out_desc.bEndpointAddress;
 
-	if (gadget_is_superspeed(c->cdev->gadget)) {
-		ss_subset_in_desc.bEndpointAddress =
-				fs_subset_in_desc.bEndpointAddress;
-		ss_subset_out_desc.bEndpointAddress =
-				fs_subset_out_desc.bEndpointAddress;
+	ss_subset_in_desc.bEndpointAddress = fs_subset_in_desc.bEndpointAddress;
+	ss_subset_out_desc.bEndpointAddress =
+		fs_subset_out_desc.bEndpointAddress;
 
-		/* copy descriptors, and track endpoint copies */
-		f->ss_descriptors = usb_copy_descriptors(ss_eth_function);
-		if (!f->ss_descriptors)
-			goto fail;
-	}
+	status = usb_assign_descriptors(f, fs_eth_function, hs_eth_function,
+			ss_eth_function);
+	if (status)
+		goto fail;
 
 	/* NOTE:  all that is done without knowing or caring about
 	 * the network link ... which is unavailable to this code
@@ -364,11 +348,7 @@ geth_bind(struct usb_configuration *c, struct usb_function *f)
 	return 0;
 
 fail:
-	if (f->descriptors)
-		usb_free_descriptors(f->descriptors);
-	if (f->hs_descriptors)
-		usb_free_descriptors(f->hs_descriptors);
-
+	usb_free_all_descriptors(f);
 	/* we might as well release our claims on endpoints */
 	if (geth->port.out_ep)
 		geth->port.out_ep->driver_data = NULL;
@@ -383,11 +363,7 @@ fail:
 static void
 geth_unbind(struct usb_configuration *c, struct usb_function *f)
 {
-	if (gadget_is_superspeed(c->cdev->gadget))
-		usb_free_descriptors(f->ss_descriptors);
-	if (gadget_is_dualspeed(c->cdev->gadget))
-		usb_free_descriptors(f->hs_descriptors);
-	usb_free_descriptors(f->descriptors);
+	usb_free_all_descriptors(f);
 	geth_string_defs[1].s = NULL;
 	kfree(func_to_geth(f));
 }
diff --git a/drivers/usb/gadget/f_uac1.c b/drivers/usb/gadget/f_uac1.c
index c8ed41ba1042..f570e667a640 100644
--- a/drivers/usb/gadget/f_uac1.c
+++ b/drivers/usb/gadget/f_uac1.c
@@ -630,7 +630,7 @@ f_audio_bind(struct usb_configuration *c, struct usb_function *f)
 	struct usb_composite_dev *cdev = c->cdev;
 	struct f_audio		*audio = func_to_audio(f);
 	int			status;
-	struct usb_ep		*ep;
+	struct usb_ep		*ep = NULL;
 
 	f_audio_build_desc(audio);
 
@@ -659,21 +659,14 @@ f_audio_bind(struct usb_configuration *c, struct usb_function *f)
 	status = -ENOMEM;
 
 	/* copy descriptors, and track endpoint copies */
-	f->descriptors = usb_copy_descriptors(f_audio_desc);
-
-	/*
-	 * support all relevant hardware speeds... we expect that when
-	 * hardware is dual speed, all bulk-capable endpoints work at
-	 * both speeds
-	 */
-	if (gadget_is_dualspeed(c->cdev->gadget)) {
-		f->hs_descriptors = usb_copy_descriptors(f_audio_desc);
-	}
-
+	status = usb_assign_descriptors(f, f_audio_desc, f_audio_desc, NULL);
+	if (status)
+		goto fail;
 	return 0;
 
 fail:
-
+	if (ep)
+		ep->driver_data = NULL;
 	return status;
 }
 
@@ -682,8 +675,7 @@ f_audio_unbind(struct usb_configuration *c, struct usb_function *f)
 {
 	struct f_audio		*audio = func_to_audio(f);
 
-	usb_free_descriptors(f->descriptors);
-	usb_free_descriptors(f->hs_descriptors);
+	usb_free_all_descriptors(f);
 	kfree(audio);
 }
 
diff --git a/drivers/usb/gadget/f_uac2.c b/drivers/usb/gadget/f_uac2.c
index f02b8ece5287..730a260e1841 100644
--- a/drivers/usb/gadget/f_uac2.c
+++ b/drivers/usb/gadget/f_uac2.c
@@ -998,9 +998,9 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn)
 	hs_epin_desc.bEndpointAddress = fs_epin_desc.bEndpointAddress;
 	hs_epin_desc.wMaxPacketSize = fs_epin_desc.wMaxPacketSize;
 
-	fn->descriptors = usb_copy_descriptors(fs_audio_desc);
-	if (gadget_is_dualspeed(gadget))
-		fn->hs_descriptors = usb_copy_descriptors(hs_audio_desc);
+	ret = usb_assign_descriptors(fn, fs_audio_desc, hs_audio_desc, NULL);
+	if (ret)
+		goto err;
 
 	prm = &agdev->uac2.c_prm;
 	prm->max_psize = hs_epout_desc.wMaxPacketSize;
@@ -1029,8 +1029,7 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn)
 err:
 	kfree(agdev->uac2.p_prm.rbuf);
 	kfree(agdev->uac2.c_prm.rbuf);
-	usb_free_descriptors(fn->hs_descriptors);
-	usb_free_descriptors(fn->descriptors);
+	usb_free_all_descriptors(fn);
 	if (agdev->in_ep)
 		agdev->in_ep->driver_data = NULL;
 	if (agdev->out_ep)
@@ -1042,8 +1041,6 @@ static void
 afunc_unbind(struct usb_configuration *cfg, struct usb_function *fn)
 {
 	struct audio_dev *agdev = func_to_agdev(fn);
-	struct usb_composite_dev *cdev = cfg->cdev;
-	struct usb_gadget *gadget = cdev->gadget;
 	struct uac2_rtd_params *prm;
 
 	alsa_uac2_exit(agdev);
@@ -1053,10 +1050,7 @@ afunc_unbind(struct usb_configuration *cfg, struct usb_function *fn)
 
 	prm = &agdev->uac2.c_prm;
 	kfree(prm->rbuf);
-
-	if (gadget_is_dualspeed(gadget))
-		usb_free_descriptors(fn->hs_descriptors);
-	usb_free_descriptors(fn->descriptors);
+	usb_free_all_descriptors(fn);
 
 	if (agdev->in_ep)
 		agdev->in_ep->driver_data = NULL;
diff --git a/drivers/usb/gadget/f_uvc.c b/drivers/usb/gadget/f_uvc.c
index 10f13c1213c7..28ff2546a5b3 100644
--- a/drivers/usb/gadget/f_uvc.c
+++ b/drivers/usb/gadget/f_uvc.c
@@ -583,9 +583,7 @@ uvc_function_unbind(struct usb_configuration *c, struct usb_function *f)
 	usb_ep_free_request(cdev->gadget->ep0, uvc->control_req);
 	kfree(uvc->control_buf);
 
-	kfree(f->descriptors);
-	kfree(f->hs_descriptors);
-	kfree(f->ss_descriptors);
+	usb_free_all_descriptors(f);
 
 	kfree(uvc);
 }
@@ -651,49 +649,40 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f)
 	/* sanity check the streaming endpoint module parameters */
 	if (streaming_maxpacket > 1024)
 		streaming_maxpacket = 1024;
+	/*
+	 * Fill in the HS descriptors from the module parameters for the Video
+	 * Streaming endpoint.
+	 * NOTE: We assume that the user knows what they are doing and won't
+	 * give parameters that their UDC doesn't support.
+	 */
+	uvc_hs_streaming_ep.wMaxPacketSize = streaming_maxpacket;
+	uvc_hs_streaming_ep.wMaxPacketSize |= streaming_mult << 11;
+	uvc_hs_streaming_ep.bInterval = streaming_interval;
+	uvc_hs_streaming_ep.bEndpointAddress =
+		uvc_fs_streaming_ep.bEndpointAddress;
 
-	/* Copy descriptors for FS. */
-	f->descriptors = uvc_copy_descriptors(uvc, USB_SPEED_FULL);
-
-	/* support high speed hardware */
-	if (gadget_is_dualspeed(cdev->gadget)) {
-		/*
-		 * Fill in the HS descriptors from the module parameters for the
-		 * Video Streaming endpoint.
-		 * NOTE: We assume that the user knows what they are doing and
-		 * won't give parameters that their UDC doesn't support.
-		 */
-		uvc_hs_streaming_ep.wMaxPacketSize = streaming_maxpacket;
-		uvc_hs_streaming_ep.wMaxPacketSize |= streaming_mult << 11;
-		uvc_hs_streaming_ep.bInterval = streaming_interval;
-		uvc_hs_streaming_ep.bEndpointAddress =
-				uvc_fs_streaming_ep.bEndpointAddress;
-
-		/* Copy descriptors. */
-		f->hs_descriptors = uvc_copy_descriptors(uvc, USB_SPEED_HIGH);
-	}
+	/*
+	 * Fill in the SS descriptors from the module parameters for the Video
+	 * Streaming endpoint.
+	 * NOTE: We assume that the user knows what they are doing and won't
+	 * give parameters that their UDC doesn't support.
+	 */
+	uvc_ss_streaming_ep.wMaxPacketSize = streaming_maxpacket;
+	uvc_ss_streaming_ep.bInterval = streaming_interval;
+	uvc_ss_streaming_comp.bmAttributes = streaming_mult;
+	uvc_ss_streaming_comp.bMaxBurst = streaming_maxburst;
+	uvc_ss_streaming_comp.wBytesPerInterval =
+		streaming_maxpacket * (streaming_mult + 1) *
+		(streaming_maxburst + 1);
+	uvc_ss_streaming_ep.bEndpointAddress =
+		uvc_fs_streaming_ep.bEndpointAddress;
 
-	/* support super speed hardware */
-	if (gadget_is_superspeed(c->cdev->gadget)) {
-		/*
-		 * Fill in the SS descriptors from the module parameters for the
-		 * Video Streaming endpoint.
-		 * NOTE: We assume that the user knows what they are doing and
-		 * won't give parameters that their UDC doesn't support.
-		 */
-		uvc_ss_streaming_ep.wMaxPacketSize = streaming_maxpacket;
-		uvc_ss_streaming_ep.bInterval = streaming_interval;
-		uvc_ss_streaming_comp.bmAttributes = streaming_mult;
-		uvc_ss_streaming_comp.bMaxBurst = streaming_maxburst;
-		uvc_ss_streaming_comp.wBytesPerInterval =
-			streaming_maxpacket * (streaming_mult + 1) *
-			(streaming_maxburst + 1);
-		uvc_ss_streaming_ep.bEndpointAddress =
-				uvc_fs_streaming_ep.bEndpointAddress;
-
-		/* Copy descriptors. */
+	/* Copy descriptors */
+	f->fs_descriptors = uvc_copy_descriptors(uvc, USB_SPEED_FULL);
+	if (gadget_is_dualspeed(cdev->gadget))
+		f->hs_descriptors = uvc_copy_descriptors(uvc, USB_SPEED_HIGH);
+	if (gadget_is_superspeed(c->cdev->gadget))
 		f->ss_descriptors = uvc_copy_descriptors(uvc, USB_SPEED_SUPER);
-	}
 
 	/* Preallocate control endpoint request. */
 	uvc->control_req = usb_ep_alloc_request(cdev->gadget->ep0, GFP_KERNEL);
@@ -741,9 +730,7 @@ error:
 		kfree(uvc->control_buf);
 	}
 
-	kfree(f->descriptors);
-	kfree(f->hs_descriptors);
-	kfree(f->ss_descriptors);
+	usb_free_all_descriptors(f);
 	return ret;
 }
 
diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c
index e156e3f26727..35bcc83d1e04 100644
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@@ -983,8 +983,10 @@ static int __init printer_func_bind(struct usb_configuration *c,
 {
 	struct printer_dev *dev = container_of(f, struct printer_dev, function);
 	struct usb_composite_dev *cdev = c->cdev;
-	struct usb_ep		*in_ep, *out_ep;
+	struct usb_ep *in_ep;
+	struct usb_ep *out_ep = NULL;
 	int id;
+	int ret;
 
 	id = usb_interface_id(c, f);
 	if (id < 0)
@@ -1010,6 +1012,11 @@ autoconf_fail:
 	hs_ep_in_desc.bEndpointAddress = fs_ep_in_desc.bEndpointAddress;
 	hs_ep_out_desc.bEndpointAddress = fs_ep_out_desc.bEndpointAddress;
 
+	ret = usb_assign_descriptors(f, fs_printer_function,
+			hs_printer_function, NULL);
+	if (ret)
+		return ret;
+
 	dev->in_ep = in_ep;
 	dev->out_ep = out_ep;
 	return 0;
@@ -1018,6 +1025,7 @@ autoconf_fail:
 static void printer_func_unbind(struct usb_configuration *c,
 		struct usb_function *f)
 {
+	usb_free_all_descriptors(f);
 }
 
 static int printer_func_set_alt(struct usb_function *f,
@@ -1110,8 +1118,6 @@ static int __init printer_bind_config(struct usb_configuration *c)
 	dev = &usb_printer_gadget;
 
 	dev->function.name = shortname;
-	dev->function.descriptors = fs_printer_function;
-	dev->function.hs_descriptors = hs_printer_function;
 	dev->function.bind = printer_func_bind;
 	dev->function.setup = printer_func_setup;
 	dev->function.unbind = printer_func_unbind;
diff --git a/drivers/usb/gadget/tcm_usb_gadget.c b/drivers/usb/gadget/tcm_usb_gadget.c
index 49596096b435..27a2337f9868 100644
--- a/drivers/usb/gadget/tcm_usb_gadget.c
+++ b/drivers/usb/gadget/tcm_usb_gadget.c
@@ -2240,6 +2240,7 @@ static int usbg_bind(struct usb_configuration *c, struct usb_function *f)
 	struct usb_gadget	*gadget = c->cdev->gadget;
 	struct usb_ep		*ep;
 	int			iface;
+	int			ret;
 
 	iface = usb_interface_id(c, f);
 	if (iface < 0)
@@ -2290,6 +2291,11 @@ static int usbg_bind(struct usb_configuration *c, struct usb_function *f)
 		uasp_ss_status_desc.bEndpointAddress;
 	uasp_fs_cmd_desc.bEndpointAddress = uasp_ss_cmd_desc.bEndpointAddress;
 
+	ret = usb_assign_descriptors(f, uasp_fs_function_desc,
+			uasp_hs_function_desc, uasp_ss_function_desc);
+	if (ret)
+		goto ep_fail;
+
 	return 0;
 ep_fail:
 	pr_err("Can't claim all required eps\n");
@@ -2305,6 +2311,7 @@ static void usbg_unbind(struct usb_configuration *c, struct usb_function *f)
 {
 	struct f_uas *fu = to_f_uas(f);
 
+	usb_free_all_descriptors(f);
 	kfree(fu);
 }
 
@@ -2385,9 +2392,6 @@ static int usbg_cfg_bind(struct usb_configuration *c)
 	if (!fu)
 		return -ENOMEM;
 	fu->function.name = "Target Function";
-	fu->function.descriptors = uasp_fs_function_desc;
-	fu->function.hs_descriptors = uasp_hs_function_desc;
-	fu->function.ss_descriptors = uasp_ss_function_desc;
 	fu->function.bind = usbg_bind;
 	fu->function.unbind = usbg_unbind;
 	fu->function.set_alt = usbg_set_alt;
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 8634a127bdd3..b09c37e04a91 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -119,7 +119,7 @@ struct usb_configuration;
 struct usb_function {
 	const char			*name;
 	struct usb_gadget_strings	**strings;
-	struct usb_descriptor_header	**descriptors;
+	struct usb_descriptor_header	**fs_descriptors;
 	struct usb_descriptor_header	**hs_descriptors;
 	struct usb_descriptor_header	**ss_descriptors;
 
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 5b6e50888248..0af6569b8cc6 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -939,6 +939,13 @@ static inline void usb_free_descriptors(struct usb_descriptor_header **v)
 	kfree(v);
 }
 
+struct usb_function;
+int usb_assign_descriptors(struct usb_function *f,
+		struct usb_descriptor_header **fs,
+		struct usb_descriptor_header **hs,
+		struct usb_descriptor_header **ss);
+void usb_free_all_descriptors(struct usb_function *f);
+
 /*-------------------------------------------------------------------------*/
 
 /* utility to simplify map/unmap of usb_requests to/from DMA */
-- 
cgit v1.2.3-55-g7522


From e5a55a898720096f43bc24938f8875c0a1b34cd7 Mon Sep 17 00:00:00 2001
From: John Fastabend
Date: Wed, 24 Oct 2012 08:12:57 +0000
Subject: net: create generic bridge ops

The PF_BRIDGE:RTM_{GET|SET}LINK nlmsg family and type are
currently embedded in the ./net/bridge module. This prohibits
them from being used by other bridging devices. One example
of this being hardware that has embedded bridging components.

In order to use these nlmsg types more generically this patch
adds two net_device_ops hooks. One to set link bridge attributes
and another to dump the current bride attributes.

	ndo_bridge_setlink()
	ndo_bridge_getlink()

CC: Lennert Buytenhek <buytenh@wantstofly.org>
CC: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ++++++
 net/bridge/br_device.c    |  2 ++
 net/bridge/br_netlink.c   | 73 ++++++++----------------------------------
 net/bridge/br_private.h   |  3 ++
 net/core/rtnetlink.c      | 80 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 108 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f8eda0276f03..7bf867c97043 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -887,6 +887,10 @@ struct netdev_fcoe_hbainfo {
  *		       struct net_device *dev, int idx)
  *	Used to add FDB entries to dump requests. Implementers should add
  *	entries to skb and update idx with the number of entries.
+ *
+ * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh)
+ * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq,
+ *			     struct net_device *dev)
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -998,6 +1002,12 @@ struct net_device_ops {
 						struct netlink_callback *cb,
 						struct net_device *dev,
 						int idx);
+
+	int			(*ndo_bridge_setlink)(struct net_device *dev,
+						      struct nlmsghdr *nlh);
+	int			(*ndo_bridge_getlink)(struct sk_buff *skb,
+						      u32 pid, u32 seq,
+						      struct net_device *dev);
 };
 
 /*
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 070e8a68cfc6..63b5b088e80f 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -313,6 +313,8 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_fdb_add		 = br_fdb_add,
 	.ndo_fdb_del		 = br_fdb_delete,
 	.ndo_fdb_dump		 = br_fdb_dump,
+	.ndo_bridge_getlink	 = br_getlink,
+	.ndo_bridge_setlink	 = br_setlink,
 };
 
 static void br_dev_free(struct net_device *dev)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 093f527276a3..743511bb7319 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -111,54 +111,33 @@ errout:
 /*
  * Dump information about all ports, in response to GETLINK
  */
-static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+	       struct net_device *dev)
 {
-	struct net *net = sock_net(skb->sk);
-	struct net_device *dev;
-	int idx;
-
-	idx = 0;
-	rcu_read_lock();
-	for_each_netdev_rcu(net, dev) {
-		struct net_bridge_port *port = br_port_get_rcu(dev);
-
-		/* not a bridge port */
-		if (!port || idx < cb->args[0])
-			goto skip;
-
-		if (br_fill_ifinfo(skb, port,
-				   NETLINK_CB(cb->skb).portid,
-				   cb->nlh->nlmsg_seq, RTM_NEWLINK,
-				   NLM_F_MULTI) < 0)
-			break;
-skip:
-		++idx;
-	}
-	rcu_read_unlock();
-	cb->args[0] = idx;
+	int err = 0;
+	struct net_bridge_port *port = br_port_get_rcu(dev);
+
+	/* not a bridge port */
+	if (!port)
+		goto out;
 
-	return skb->len;
+	err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI);
+out:
+	return err;
 }
 
 /*
  * Change state of port (ie from forwarding to blocking etc)
  * Used by spanning tree in user space.
  */
-static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
+int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
 {
-	struct net *net = sock_net(skb->sk);
 	struct ifinfomsg *ifm;
 	struct nlattr *protinfo;
-	struct net_device *dev;
 	struct net_bridge_port *p;
 	u8 new_state;
 
-	if (nlmsg_len(nlh) < sizeof(*ifm))
-		return -EINVAL;
-
 	ifm = nlmsg_data(nlh);
-	if (ifm->ifi_family != AF_BRIDGE)
-		return -EPFNOSUPPORT;
 
 	protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO);
 	if (!protinfo || nla_len(protinfo) < sizeof(u8))
@@ -168,10 +147,6 @@ static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 	if (new_state > BR_STATE_BLOCKING)
 		return -EINVAL;
 
-	dev = __dev_get_by_index(net, ifm->ifi_index);
-	if (!dev)
-		return -ENODEV;
-
 	p = br_port_get_rtnl(dev);
 	if (!p)
 		return -EINVAL;
@@ -218,29 +193,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = {
 
 int __init br_netlink_init(void)
 {
-	int err;
-
-	err = rtnl_link_register(&br_link_ops);
-	if (err < 0)
-		goto err1;
-
-	err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL,
-			      br_dump_ifinfo, NULL);
-	if (err)
-		goto err2;
-	err = __rtnl_register(PF_BRIDGE, RTM_SETLINK,
-			      br_rtm_setlink, NULL, NULL);
-	if (err)
-		goto err3;
-
-	return 0;
-
-err3:
-	rtnl_unregister_all(PF_BRIDGE);
-err2:
-	rtnl_link_unregister(&br_link_ops);
-err1:
-	return err;
+	return rtnl_link_register(&br_link_ops);
 }
 
 void __exit br_netlink_fini(void)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 9b278c4ebee1..fdcd5f626ca6 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -553,6 +553,9 @@ extern struct rtnl_link_ops br_link_ops;
 extern int br_netlink_init(void);
 extern void br_netlink_fini(void);
 extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
+extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg);
+extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+		      struct net_device *dev);
 
 #ifdef CONFIG_SYSFS
 /* br_sysfs_if.c */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 64fe3cca2a4e..a068666b322f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2252,6 +2252,83 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct net_device *dev;
+	int idx = 0;
+	u32 portid = NETLINK_CB(cb->skb).portid;
+	u32 seq = cb->nlh->nlmsg_seq;
+
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
+		const struct net_device_ops *ops = dev->netdev_ops;
+		struct net_device *master = dev->master;
+
+		if (idx < cb->args[0])
+			continue;
+
+		if (master && master->netdev_ops->ndo_bridge_getlink) {
+			const struct net_device_ops *bops = master->netdev_ops;
+			int err = bops->ndo_bridge_getlink(skb, portid,
+							   seq, dev);
+
+			if (err < 0)
+				break;
+			else
+				idx++;
+		}
+
+		if (ops->ndo_bridge_getlink) {
+			int err = ops->ndo_bridge_getlink(skb, portid,
+							  seq, dev);
+
+			if (err < 0)
+				break;
+			else
+				idx++;
+		}
+	}
+	rcu_read_unlock();
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+			       void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct ifinfomsg *ifm;
+	struct net_device *dev;
+	int err = -EINVAL;
+
+	if (nlmsg_len(nlh) < sizeof(*ifm))
+		return -EINVAL;
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_family != AF_BRIDGE)
+		return -EPFNOSUPPORT;
+
+	dev = __dev_get_by_index(net, ifm->ifi_index);
+	if (!dev) {
+		pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+		return -ENODEV;
+	}
+
+	if (dev->master && dev->master->netdev_ops->ndo_bridge_setlink) {
+		err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh);
+		if (err)
+			goto out;
+	}
+
+	if (dev->netdev_ops->ndo_bridge_setlink)
+		err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
+
+out:
+	return err;
+}
+
 /* Protected by RTNL sempahore.  */
 static struct rtattr **rta_buf;
 static int rtattr_max;
@@ -2433,5 +2510,8 @@ void __init rtnetlink_init(void)
 	rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
 	rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
 	rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
+
+	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
+	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
 }
 
-- 
cgit v1.2.3-55-g7522


From 815cccbf10b27115fb3e5827bef26768616e5e27 Mon Sep 17 00:00:00 2001
From: John Fastabend
Date: Wed, 24 Oct 2012 08:13:09 +0000
Subject: ixgbe: add setlink, getlink support to ixgbe and ixgbevf

This adds support for the net device ops to manage the embedded
hardware bridge on ixgbe devices. With this patch the bridge
mode can be toggled between VEB and VEPA to support stacking
macvlan devices or using the embedded switch without any SW
component in 802.1Qbg/br environments.

Additionally, this adds source address pruning to the ixgbevf
driver to prune any frames sent back from a reflective relay on
the switch. This is required because the existing hardware does
not support this. Without it frames get pushed into the stack
with its own src mac which is invalid per 802.1Qbg VEPA
definition.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c     | 59 +++++++++++++++++++++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c    |  3 ++
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 10 ++++
 include/linux/rtnetlink.h                         |  3 ++
 net/core/rtnetlink.c                              | 50 +++++++++++++++++++
 5 files changed, 122 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 88d636a7459c..9a88e01216bb 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -44,6 +44,7 @@
 #include <linux/ethtool.h>
 #include <linux/if.h>
 #include <linux/if_vlan.h>
+#include <linux/if_bridge.h>
 #include <linux/prefetch.h>
 #include <scsi/fc/fc_fcoe.h>
 
@@ -3224,7 +3225,6 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
 	IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset ^ 1), reg_offset - 1);
 	IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), (~0) << vf_shift);
 	IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset ^ 1), reg_offset - 1);
-	IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
 
 	/* Map PF MAC address in RAR Entry 0 to first pool following VFs */
 	hw->mac.ops.set_vmdq(hw, 0, VMDQ_P(0));
@@ -3247,8 +3247,6 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
 
 	IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext);
 
-	/* enable Tx loopback for VF/PF communication */
-	IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
 
 	/* Enable MAC Anti-Spoofing */
 	hw->mac.ops.set_mac_anti_spoofing(hw, (adapter->num_vfs != 0),
@@ -7025,6 +7023,59 @@ static int ixgbe_ndo_fdb_dump(struct sk_buff *skb,
 	return idx;
 }
 
+static int ixgbe_ndo_bridge_setlink(struct net_device *dev,
+				    struct nlmsghdr *nlh)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct nlattr *attr, *br_spec;
+	int rem;
+
+	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
+		return -EOPNOTSUPP;
+
+	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+
+	nla_for_each_nested(attr, br_spec, rem) {
+		__u16 mode;
+		u32 reg = 0;
+
+		if (nla_type(attr) != IFLA_BRIDGE_MODE)
+			continue;
+
+		mode = nla_get_u16(attr);
+		if (mode == BRIDGE_MODE_VEPA)
+			reg = 0;
+		else if (mode == BRIDGE_MODE_VEB)
+			reg = IXGBE_PFDTXGSWC_VT_LBEN;
+		else
+			return -EINVAL;
+
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_PFDTXGSWC, reg);
+
+		e_info(drv, "enabling bridge mode: %s\n",
+			mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
+	}
+
+	return 0;
+}
+
+static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+				    struct net_device *dev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	u16 mode;
+
+	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
+		return 0;
+
+	if (IXGBE_READ_REG(&adapter->hw, IXGBE_PFDTXGSWC) & 1)
+		mode = BRIDGE_MODE_VEB;
+	else
+		mode = BRIDGE_MODE_VEPA;
+
+	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode);
+}
+
 static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_open		= ixgbe_open,
 	.ndo_stop		= ixgbe_close,
@@ -7064,6 +7115,8 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_fdb_add		= ixgbe_ndo_fdb_add,
 	.ndo_fdb_del		= ixgbe_ndo_fdb_del,
 	.ndo_fdb_dump		= ixgbe_ndo_fdb_dump,
+	.ndo_bridge_setlink	= ixgbe_ndo_bridge_setlink,
+	.ndo_bridge_getlink	= ixgbe_ndo_bridge_getlink,
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 96876b7442b1..7e3ac28ffba8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -117,6 +117,9 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter,
 		}
 	}
 
+	/* Initialize default switching mode VEB */
+	IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
+
 	/* If call to enable VFs succeeded then allocate memory
 	 * for per VF control structures.
 	 */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 07d7eaba6f1b..ac6a76deb01d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -478,6 +478,16 @@ static bool ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 		}
 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 
+		/* Workaround hardware that can't do proper VEPA multicast
+		 * source pruning.
+		 */
+		if ((skb->pkt_type & (PACKET_BROADCAST | PACKET_MULTICAST)) &&
+		    !(compare_ether_addr(adapter->netdev->dev_addr,
+					eth_hdr(skb)->h_source))) {
+			dev_kfree_skb_irq(skb);
+			goto next_desc;
+		}
+
 		ixgbevf_receive_skb(q_vector, skb, staterr, rx_desc);
 
 next_desc:
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 7002bbfd5d4a..489dd7bb28ec 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -69,4 +69,7 @@ extern int ndo_dflt_fdb_dump(struct sk_buff *skb,
 			     struct netlink_callback *cb,
 			     struct net_device *dev,
 			     int idx);
+
+extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+				   struct net_device *dev, u16 mode);
 #endif	/* __LINUX_RTNETLINK_H */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8d2af0f77d36..51dc58ff0091 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2252,6 +2252,56 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+			    struct net_device *dev, u16 mode)
+{
+	struct nlmsghdr *nlh;
+	struct ifinfomsg *ifm;
+	struct nlattr *br_afspec;
+	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
+
+	nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	ifm = nlmsg_data(nlh);
+	ifm->ifi_family = AF_BRIDGE;
+	ifm->__ifi_pad = 0;
+	ifm->ifi_type = dev->type;
+	ifm->ifi_index = dev->ifindex;
+	ifm->ifi_flags = dev_get_flags(dev);
+	ifm->ifi_change = 0;
+
+
+	if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
+	    nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
+	    nla_put_u8(skb, IFLA_OPERSTATE, operstate) ||
+	    (dev->master &&
+	     nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) ||
+	    (dev->addr_len &&
+	     nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
+	    (dev->ifindex != dev->iflink &&
+	     nla_put_u32(skb, IFLA_LINK, dev->iflink)))
+		goto nla_put_failure;
+
+	br_afspec = nla_nest_start(skb, IFLA_AF_SPEC);
+	if (!br_afspec)
+		goto nla_put_failure;
+
+	if (nla_put_u16(skb, IFLA_BRIDGE_FLAGS, BRIDGE_FLAGS_SELF) ||
+	    nla_put_u16(skb, IFLA_BRIDGE_MODE, mode)) {
+		nla_nest_cancel(skb, br_afspec);
+		goto nla_put_failure;
+	}
+	nla_nest_end(skb, br_afspec);
+
+	return nlmsg_end(skb, nlh);
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL(ndo_dflt_bridge_getlink);
+
 static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
-- 
cgit v1.2.3-55-g7522


From ecd5cf5dc5b07bc57aedf4c92453d6c343e3d840 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn
Date: Fri, 26 Oct 2012 11:52:08 +0000
Subject: net: compute skb->rxhash if nic hash may be 3-tuple

Network device drivers can communicate a Toeplitz hash in skb->rxhash,
but devices differ in their hashing capabilities. All compute a 5-tuple
hash for TCP over IPv4, but for other connection-oriented protocols,
they may compute only a 3-tuple. This breaks RPS load balancing, e.g.,
for TCP over IPv6 flows. Additionally, for GRE and other tunnels,
the kernel computes a 5-tuple hash over the inner packet if possible,
but devices do not.

This patch recomputes the rxhash in software in all cases where it
cannot be certain that a 5-tuple was computed. Device drivers can avoid
recomputation by setting the skb->l4_rxhash flag.

Recomputing adds cycles to each packet when RPS is enabled or the
packet arrives over a tunnel. A comparison of 200x TCP_STREAM between
two servers running unmodified netnext with rxhash computation
in hardware vs software (using ethtool -K eth0 rxhash [on|off]) shows
how much time is spent in __skb_get_rxhash in this worst case:

     0.03%          swapper  [kernel.kallsyms]     [k] __skb_get_rxhash
     0.03%          swapper  [kernel.kallsyms]     [k] __skb_get_rxhash
     0.05%          swapper  [kernel.kallsyms]     [k] __skb_get_rxhash

With 200x TCP_RR it increases to

     0.10%          netperf  [kernel.kallsyms]     [k] __skb_get_rxhash
     0.10%          netperf  [kernel.kallsyms]     [k] __skb_get_rxhash
     0.10%          netperf  [kernel.kallsyms]     [k] __skb_get_rxhash

I considered having the patch explicitly skips recomputation when it knows
that it will not improve the hash (TCP over IPv4), but that conditional
complicates code without saving many cycles in practice, because it has
to take place after flow dissector.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6a2c34e6d962..a2a0bdb95a8f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -643,7 +643,7 @@ extern unsigned int   skb_find_text(struct sk_buff *skb, unsigned int from,
 extern void __skb_get_rxhash(struct sk_buff *skb);
 static inline __u32 skb_get_rxhash(struct sk_buff *skb)
 {
-	if (!skb->rxhash)
+	if (!skb->l4_rxhash)
 		__skb_get_rxhash(skb);
 
 	return skb->rxhash;
-- 
cgit v1.2.3-55-g7522


From f3335031b9452baebfe49b8b5e55d3fe0c4677d1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Sat, 27 Oct 2012 02:26:17 +0000
Subject: net: filter: add vlan tag access

BPF filters lack ability to access skb->vlan_tci

This patch adds two new ancillary accessors :

SKF_AD_VLAN_TAG         (44) mapped to vlan_tx_tag_get(skb)

SKF_AD_VLAN_TAG_PRESENT (48) mapped to vlan_tx_tag_present(skb)

This allows libpcap/tcpdump to use a kernel filter instead of
having to fallback to accept all packets, then filter them in
user space.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Suggested-by: Ani Sinha <ani@aristanetworks.com>
Suggested-by: Daniel Borkmann <danborkmann@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h      | 2 ++
 include/uapi/linux/filter.h | 4 +++-
 net/core/filter.c           | 9 +++++++++
 3 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 24d251f3bab0..c9f0005c35e2 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -123,6 +123,8 @@ enum {
 	BPF_S_ANC_CPU,
 	BPF_S_ANC_ALU_XOR_X,
 	BPF_S_ANC_SECCOMP_LD_W,
+	BPF_S_ANC_VLAN_TAG,
+	BPF_S_ANC_VLAN_TAG_PRESENT,
 };
 
 #endif /* __LINUX_FILTER_H__ */
diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h
index 3d7922433aba..9cfde6941099 100644
--- a/include/uapi/linux/filter.h
+++ b/include/uapi/linux/filter.h
@@ -127,7 +127,9 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_RXHASH	32
 #define SKF_AD_CPU	36
 #define SKF_AD_ALU_XOR_X	40
-#define SKF_AD_MAX	44
+#define SKF_AD_VLAN_TAG	44
+#define SKF_AD_VLAN_TAG_PRESENT 48
+#define SKF_AD_MAX	52
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 3d92ebb7fbcf..5a114d41bf11 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,6 +39,7 @@
 #include <linux/reciprocal_div.h>
 #include <linux/ratelimit.h>
 #include <linux/seccomp.h>
+#include <linux/if_vlan.h>
 
 /* No hurry in this branch
  *
@@ -341,6 +342,12 @@ load_b:
 		case BPF_S_ANC_CPU:
 			A = raw_smp_processor_id();
 			continue;
+		case BPF_S_ANC_VLAN_TAG:
+			A = vlan_tx_tag_get(skb);
+			continue;
+		case BPF_S_ANC_VLAN_TAG_PRESENT:
+			A = !!vlan_tx_tag_present(skb);
+			continue;
 		case BPF_S_ANC_NLATTR: {
 			struct nlattr *nla;
 
@@ -600,6 +607,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 			ANCILLARY(RXHASH);
 			ANCILLARY(CPU);
 			ANCILLARY(ALU_XOR_X);
+			ANCILLARY(VLAN_TAG);
+			ANCILLARY(VLAN_TAG_PRESENT);
 			}
 		}
 		ftest->code = code;
-- 
cgit v1.2.3-55-g7522


From c73cee717e7d5da0698acb720ad1219646fe4f46 Mon Sep 17 00:00:00 2001
From: Alan Stern
Date: Wed, 31 Oct 2012 13:21:06 -0400
Subject: USB: EHCI: remove ehci_port_power() routine

This patch (as1623) removes the ehci_port_power() routine and all the
places that call it.  There's no reason for ehci-hcd to change the
port power settings; the hub driver takes care of all that stuff.

There is one exception: When the controller is resumed from
hibernation or following a loss of power, the ports that are supposed
to be handed over to a companion controller must be powered on first.
Otherwise the handover won't work.  This process is not visible to the
hub driver, so it has to be handled in ehci-hcd.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-cns3xxx/cns3420vb.c     |  1 -
 arch/mips/ath79/dev-usb.c             |  2 --
 arch/mips/loongson1/common/platform.c |  1 -
 drivers/usb/chipidea/host.c           | 18 +-----------------
 drivers/usb/host/ehci-atmel.c         |  9 +--------
 drivers/usb/host/ehci-fsl.c           |  1 -
 drivers/usb/host/ehci-grlib.c         | 18 +-----------------
 drivers/usb/host/ehci-hcd.c           | 21 ---------------------
 drivers/usb/host/ehci-hub.c           | 13 +++++++++++++
 drivers/usb/host/ehci-msm.c           |  1 -
 drivers/usb/host/ehci-mxc.c           |  8 +-------
 drivers/usb/host/ehci-octeon.c        |  3 ---
 drivers/usb/host/ehci-omap.c          |  3 ---
 drivers/usb/host/ehci-orion.c         | 16 +---------------
 drivers/usb/host/ehci-pci.c           |  1 -
 drivers/usb/host/ehci-platform.c      |  5 -----
 drivers/usb/host/ehci-pmcmsp.c        |  1 -
 drivers/usb/host/ehci-sh.c            |  9 +--------
 drivers/usb/host/ehci-spear.c         |  9 +--------
 drivers/usb/host/ehci-tegra.c         |  8 +-------
 include/linux/usb/ehci_pdriver.h      |  2 --
 21 files changed, 21 insertions(+), 129 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-cns3xxx/cns3420vb.c b/arch/arm/mach-cns3xxx/cns3420vb.c
index 8a00cee82228..ae305397003c 100644
--- a/arch/arm/mach-cns3xxx/cns3420vb.c
+++ b/arch/arm/mach-cns3xxx/cns3420vb.c
@@ -162,7 +162,6 @@ static void csn3xxx_usb_power_off(struct platform_device *pdev)
 }
 
 static struct usb_ehci_pdata cns3xxx_usb_ehci_pdata = {
-	.port_power_off	= 1,
 	.power_on	= csn3xxx_usb_power_on,
 	.power_off	= csn3xxx_usb_power_off,
 };
diff --git a/arch/mips/ath79/dev-usb.c b/arch/mips/ath79/dev-usb.c
index 072bb9be2304..bd2bc108e1b5 100644
--- a/arch/mips/ath79/dev-usb.c
+++ b/arch/mips/ath79/dev-usb.c
@@ -50,13 +50,11 @@ static u64 ath79_ehci_dmamask = DMA_BIT_MASK(32);
 
 static struct usb_ehci_pdata ath79_ehci_pdata_v1 = {
 	.has_synopsys_hc_bug	= 1,
-	.port_power_off		= 1,
 };
 
 static struct usb_ehci_pdata ath79_ehci_pdata_v2 = {
 	.caps_offset		= 0x100,
 	.has_tt			= 1,
-	.port_power_off		= 1,
 };
 
 static struct platform_device ath79_ehci_device = {
diff --git a/arch/mips/loongson1/common/platform.c b/arch/mips/loongson1/common/platform.c
index 2874bf224418..0412ad61e290 100644
--- a/arch/mips/loongson1/common/platform.c
+++ b/arch/mips/loongson1/common/platform.c
@@ -109,7 +109,6 @@ static struct resource ls1x_ehci_resources[] = {
 };
 
 static struct usb_ehci_pdata ls1x_ehci_pdata = {
-	.port_power_off	= 1,
 };
 
 struct platform_device ls1x_ehci_device = {
diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c
index ebff9f4f56ec..ebc041ff9cd5 100644
--- a/drivers/usb/chipidea/host.c
+++ b/drivers/usb/chipidea/host.c
@@ -31,22 +31,6 @@
 #include "bits.h"
 #include "host.h"
 
-static int ci_ehci_setup(struct usb_hcd *hcd)
-{
-	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
-	int ret;
-
-	hcd->has_tt = 1;
-
-	ret = ehci_setup(hcd);
-	if (ret)
-		return ret;
-
-	ehci_port_power(ehci, 0);
-
-	return ret;
-}
-
 static const struct hc_driver ci_ehci_hc_driver = {
 	.description	= "ehci_hcd",
 	.product_desc	= "ChipIdea HDRC EHCI",
@@ -61,7 +45,7 @@ static const struct hc_driver ci_ehci_hc_driver = {
 	/*
 	 * basic lifecycle operations
 	 */
-	.reset		= ci_ehci_setup,
+	.reset		= ehci_setup,
 	.start		= ehci_run,
 	.stop		= ehci_stop,
 	.shutdown	= ehci_shutdown,
diff --git a/drivers/usb/host/ehci-atmel.c b/drivers/usb/host/ehci-atmel.c
index 411bb74152eb..d23321ec0e46 100644
--- a/drivers/usb/host/ehci-atmel.c
+++ b/drivers/usb/host/ehci-atmel.c
@@ -53,18 +53,11 @@ static void atmel_stop_ehci(struct platform_device *pdev)
 static int ehci_atmel_setup(struct usb_hcd *hcd)
 {
 	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
-	int retval;
 
 	/* registers start at offset 0x0 */
 	ehci->caps = hcd->regs;
 
-	retval = ehci_setup(hcd);
-	if (retval)
-		return retval;
-
-	ehci_port_power(ehci, 0);
-
-	return retval;
+	return ehci_setup(hcd);
 }
 
 static const struct hc_driver ehci_atmel_hc_driver = {
diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index 0d2f35ca93f1..fd9b5424b860 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -349,7 +349,6 @@ static int ehci_fsl_reinit(struct ehci_hcd *ehci)
 {
 	if (ehci_fsl_usb_setup(ehci))
 		return -EINVAL;
-	ehci_port_power(ehci, 0);
 
 	return 0;
 }
diff --git a/drivers/usb/host/ehci-grlib.c b/drivers/usb/host/ehci-grlib.c
index 3180cb3624d9..da4269550fba 100644
--- a/drivers/usb/host/ehci-grlib.c
+++ b/drivers/usb/host/ehci-grlib.c
@@ -34,22 +34,6 @@
 
 #define GRUSBHC_HCIVERSION 0x0100 /* Known value of cap. reg. HCIVERSION */
 
-/* called during probe() after chip reset completes */
-static int ehci_grlib_setup(struct usb_hcd *hcd)
-{
-	struct ehci_hcd	*ehci = hcd_to_ehci(hcd);
-	int		retval;
-
-	retval = ehci_setup(hcd);
-	if (retval)
-		return retval;
-
-	ehci_port_power(ehci, 1);
-
-	return retval;
-}
-
-
 static const struct hc_driver ehci_grlib_hc_driver = {
 	.description		= hcd_name,
 	.product_desc		= "GRLIB GRUSBHC EHCI",
@@ -64,7 +48,7 @@ static const struct hc_driver ehci_grlib_hc_driver = {
 	/*
 	 * basic lifecycle operations
 	 */
-	.reset			= ehci_grlib_setup,
+	.reset			= ehci_setup,
 	.start			= ehci_run,
 	.stop			= ehci_stop,
 	.shutdown		= ehci_shutdown,
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 68dd1c99b1f5..ab4a769a4104 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -371,24 +371,6 @@ static void ehci_shutdown(struct usb_hcd *hcd)
 	hrtimer_cancel(&ehci->hrtimer);
 }
 
-static void ehci_port_power (struct ehci_hcd *ehci, int is_on)
-{
-	unsigned port;
-
-	if (!HCS_PPC (ehci->hcs_params))
-		return;
-
-	ehci_dbg (ehci, "...power%s ports...\n", is_on ? "up" : "down");
-	for (port = HCS_N_PORTS (ehci->hcs_params); port > 0; )
-		(void) ehci_hub_control(ehci_to_hcd(ehci),
-				is_on ? SetPortFeature : ClearPortFeature,
-				USB_PORT_FEAT_POWER,
-				port--, NULL, 0);
-	/* Flush those writes */
-	ehci_readl(ehci, &ehci->regs->command);
-	msleep(20);
-}
-
 /*-------------------------------------------------------------------------*/
 
 /*
@@ -1184,9 +1166,6 @@ static int __maybe_unused ehci_resume(struct usb_hcd *hcd, bool hibernated)
 	ehci->rh_state = EHCI_RH_SUSPENDED;
 	spin_unlock_irq(&ehci->lock);
 
-	/* here we "know" root ports should always stay powered */
-	ehci_port_power(ehci, 1);
-
 	return 1;
 }
 
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index a2c56cdd2c3a..a59c61fea09f 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -56,6 +56,19 @@ static void ehci_handover_companion_ports(struct ehci_hcd *ehci)
 	if (!ehci->owned_ports)
 		return;
 
+	/* Make sure the ports are powered */
+	port = HCS_N_PORTS(ehci->hcs_params);
+	while (port--) {
+		if (test_bit(port, &ehci->owned_ports)) {
+			reg = &ehci->regs->port_status[port];
+			status = ehci_readl(ehci, reg) & ~PORT_RWC_BITS;
+			if (!(status & PORT_POWER)) {
+				status |= PORT_POWER;
+				ehci_writel(ehci, status, reg);
+			}
+		}
+	}
+
 	/* Give the connections some time to appear */
 	msleep(20);
 
diff --git a/drivers/usb/host/ehci-msm.c b/drivers/usb/host/ehci-msm.c
index 4af4dc5b618c..7fa1ba4de789 100644
--- a/drivers/usb/host/ehci-msm.c
+++ b/drivers/usb/host/ehci-msm.c
@@ -53,7 +53,6 @@ static int ehci_msm_reset(struct usb_hcd *hcd)
 	/* Disable streaming mode and select host mode */
 	writel(0x13, USB_USBMODE);
 
-	ehci_port_power(ehci, 1);
 	return 0;
 }
 
diff --git a/drivers/usb/host/ehci-mxc.c b/drivers/usb/host/ehci-mxc.c
index 4a08fc0b27c9..a37224a4a49b 100644
--- a/drivers/usb/host/ehci-mxc.c
+++ b/drivers/usb/host/ehci-mxc.c
@@ -40,16 +40,10 @@ struct ehci_mxc_priv {
 static int ehci_mxc_setup(struct usb_hcd *hcd)
 {
 	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
-	int retval;
 
 	hcd->has_tt = 1;
 
-	retval = ehci_setup(hcd);
-	if (retval)
-		return retval;
-
-	ehci_port_power(ehci, 0);
-	return 0;
+	return ehci_setup(hcd);
 }
 
 static const struct hc_driver ehci_mxc_hc_driver = {
diff --git a/drivers/usb/host/ehci-octeon.c b/drivers/usb/host/ehci-octeon.c
index ba26957abf46..a89750fff4ff 100644
--- a/drivers/usb/host/ehci-octeon.c
+++ b/drivers/usb/host/ehci-octeon.c
@@ -159,9 +159,6 @@ static int ehci_octeon_drv_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, hcd);
 
-	/* root ports should always stay powered */
-	ehci_port_power(ehci, 1);
-
 	return 0;
 err3:
 	ehci_octeon_stop();
diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c
index d7fe287d0678..44e7d0f638e8 100644
--- a/drivers/usb/host/ehci-omap.c
+++ b/drivers/usb/host/ehci-omap.c
@@ -146,9 +146,6 @@ static int omap_ehci_init(struct usb_hcd *hcd)
 			gpio_set_value_cansleep(pdata->reset_gpio_port[1], 1);
 	}
 
-	/* root ports should always stay powered */
-	ehci_port_power(ehci, 1);
-
 	return rc;
 }
 
diff --git a/drivers/usb/host/ehci-orion.c b/drivers/usb/host/ehci-orion.c
index 9c2717d66730..96da679becef 100644
--- a/drivers/usb/host/ehci-orion.c
+++ b/drivers/usb/host/ehci-orion.c
@@ -101,20 +101,6 @@ static void orion_usb_phy_v1_setup(struct usb_hcd *hcd)
 	wrl(USB_MODE, 0x13);
 }
 
-static int ehci_orion_setup(struct usb_hcd *hcd)
-{
-	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
-	int retval;
-
-	retval = ehci_setup(hcd);
-	if (retval)
-		return retval;
-
-	ehci_port_power(ehci, 0);
-
-	return retval;
-}
-
 static const struct hc_driver ehci_orion_hc_driver = {
 	.description = hcd_name,
 	.product_desc = "Marvell Orion EHCI",
@@ -129,7 +115,7 @@ static const struct hc_driver ehci_orion_hc_driver = {
 	/*
 	 * basic lifecycle operations
 	 */
-	.reset = ehci_orion_setup,
+	.reset = ehci_setup,
 	.start = ehci_run,
 	.stop = ehci_stop,
 	.shutdown = ehci_shutdown,
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index e17330ae0aee..c92dcaee0d4d 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -297,7 +297,6 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
 		ehci_warn(ehci, "selective suspend/wakeup unavailable\n");
 #endif
 
-	ehci_port_power(ehci, 1);
 	retval = ehci_pci_reinit(ehci, pdev);
 done:
 	return retval;
diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c
index 272728c48c9e..6e6c23bdb484 100644
--- a/drivers/usb/host/ehci-platform.c
+++ b/drivers/usb/host/ehci-platform.c
@@ -40,11 +40,6 @@ static int ehci_platform_reset(struct usb_hcd *hcd)
 
 	if (pdata->no_io_watchdog)
 		ehci->need_io_watchdog = 0;
-	if (pdata->port_power_on)
-		ehci_port_power(ehci, 1);
-	if (pdata->port_power_off)
-		ehci_port_power(ehci, 0);
-
 	return 0;
 }
 
diff --git a/drivers/usb/host/ehci-pmcmsp.c b/drivers/usb/host/ehci-pmcmsp.c
index 087aee2a904f..363890ee41d2 100644
--- a/drivers/usb/host/ehci-pmcmsp.c
+++ b/drivers/usb/host/ehci-pmcmsp.c
@@ -90,7 +90,6 @@ static int ehci_msp_setup(struct usb_hcd *hcd)
 		return retval;
 
 	usb_hcd_tdi_set_mode(ehci);
-	ehci_port_power(ehci, 0);
 
 	return retval;
 }
diff --git a/drivers/usb/host/ehci-sh.c b/drivers/usb/host/ehci-sh.c
index 6081e1ed3ac9..0c90a24fa989 100644
--- a/drivers/usb/host/ehci-sh.c
+++ b/drivers/usb/host/ehci-sh.c
@@ -21,17 +21,10 @@ struct ehci_sh_priv {
 static int ehci_sh_reset(struct usb_hcd *hcd)
 {
 	struct ehci_hcd	*ehci = hcd_to_ehci(hcd);
-	int ret;
 
 	ehci->caps = hcd->regs;
 
-	ret = ehci_setup(hcd);
-	if (unlikely(ret))
-		return ret;
-
-	ehci_port_power(ehci, 0);
-
-	return ret;
+	return ehci_setup(hcd);
 }
 
 static const struct hc_driver ehci_sh_hc_driver = {
diff --git a/drivers/usb/host/ehci-spear.c b/drivers/usb/host/ehci-spear.c
index c718a065e154..719ca48a471a 100644
--- a/drivers/usb/host/ehci-spear.c
+++ b/drivers/usb/host/ehci-spear.c
@@ -37,18 +37,11 @@ static void spear_stop_ehci(struct spear_ehci *ehci)
 static int ehci_spear_setup(struct usb_hcd *hcd)
 {
 	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
-	int retval = 0;
 
 	/* registers start at offset 0x0 */
 	ehci->caps = hcd->regs;
 
-	retval = ehci_setup(hcd);
-	if (retval)
-		return retval;
-
-	ehci_port_power(ehci, 0);
-
-	return retval;
+	return ehci_setup(hcd);
 }
 
 static const struct hc_driver ehci_spear_hc_driver = {
diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c
index 2de089001ae9..94ee3212094e 100644
--- a/drivers/usb/host/ehci-tegra.c
+++ b/drivers/usb/host/ehci-tegra.c
@@ -280,7 +280,6 @@ static void tegra_ehci_shutdown(struct usb_hcd *hcd)
 static int tegra_ehci_setup(struct usb_hcd *hcd)
 {
 	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
-	int retval;
 
 	/* EHCI registers start at offset 0x100 */
 	ehci->caps = hcd->regs + 0x100;
@@ -288,12 +287,7 @@ static int tegra_ehci_setup(struct usb_hcd *hcd)
 	/* switch to host mode */
 	hcd->has_tt = 1;
 
-	retval = ehci_setup(hcd);
-	if (retval)
-		return retval;
-
-	ehci_port_power(ehci, 1);
-	return retval;
+	return ehci_setup(hcd);
 }
 
 struct dma_aligned_buffer {
diff --git a/include/linux/usb/ehci_pdriver.h b/include/linux/usb/ehci_pdriver.h
index 67ac74bde6d0..99238b096f7e 100644
--- a/include/linux/usb/ehci_pdriver.h
+++ b/include/linux/usb/ehci_pdriver.h
@@ -41,8 +41,6 @@ struct usb_ehci_pdata {
 	unsigned	has_synopsys_hc_bug:1;
 	unsigned	big_endian_desc:1;
 	unsigned	big_endian_mmio:1;
-	unsigned	port_power_on:1;
-	unsigned	port_power_off:1;
 	unsigned	no_io_watchdog:1;
 
 	/* Turn on all power and clocks */
-- 
cgit v1.2.3-55-g7522


From 17b72feb2be14e6d37023267dc0e199e8e0e3fdc Mon Sep 17 00:00:00 2001
From: Bjørn Mork
Date: Wed, 31 Oct 2012 06:08:39 +0100
Subject: USB: add USB_DEVICE_INTERFACE_CLASS macro

Matching on device and interface class with with unspecified
subclass and protocol is sometimes useful.  This is slightly
different from USB_DEVICE_AND_INTERFACE_INFO which requires
the full interface class/subclass/protocol triplet.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index f51f9981de1e..689b14b26c8d 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -807,6 +807,22 @@ static inline int usb_make_path(struct usb_device *dev, char *buf, size_t size)
 	.bcdDevice_lo = (lo), \
 	.bcdDevice_hi = (hi)
 
+/**
+ * USB_DEVICE_INTERFACE_CLASS - describe a usb device with a specific interface class
+ * @vend: the 16 bit USB Vendor ID
+ * @prod: the 16 bit USB Product ID
+ * @cl: bInterfaceClass value
+ *
+ * This macro is used to create a struct usb_device_id that matches a
+ * specific interface class of devices.
+ */
+#define USB_DEVICE_INTERFACE_CLASS(vend, prod, cl) \
+	.match_flags = USB_DEVICE_ID_MATCH_DEVICE | \
+		       USB_DEVICE_ID_MATCH_INT_CLASS, \
+	.idVendor = (vend), \
+	.idProduct = (prod), \
+	.bInterfaceClass = (cl)
+
 /**
  * USB_DEVICE_INTERFACE_PROTOCOL - describe a usb device with a specific interface protocol
  * @vend: the 16 bit USB Vendor ID
-- 
cgit v1.2.3-55-g7522


From 884bfe89a462fcc85c8abd96171519cf2fe70929 Mon Sep 17 00:00:00 2001
From: Slava Pestov
Date: Fri, 15 Jul 2011 14:23:58 -0700
Subject: ring-buffer: Add a 'dropped events' counter

The existing 'overrun' counter is incremented when the ring
buffer wraps around, with overflow on (the default). We wanted
a way to count requests lost from the buffer filling up with
overflow off, too. I decided to add a new counter instead
of retro-fitting the existing one because it seems like a
different statistic to count conceptually, and also because
of how the code was structured.

Link: http://lkml.kernel.org/r/1310765038-26399-1-git-send-email-slavapestov@google.com

Signed-off-by: Slava Pestov <slavapestov@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  1 +
 kernel/trace/ring_buffer.c  | 41 +++++++++++++++++++++++++++++++++++------
 kernel/trace/trace.c        |  3 +++
 3 files changed, 39 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 6c8835f74f79..2007375cfe77 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -166,6 +166,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
 unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
 void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b979426d16c6..0ebeb1d76ddf 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -460,9 +460,10 @@ struct ring_buffer_per_cpu {
 	unsigned long			lost_events;
 	unsigned long			last_overrun;
 	local_t				entries_bytes;
-	local_t				commit_overrun;
-	local_t				overrun;
 	local_t				entries;
+	local_t				overrun;
+	local_t				commit_overrun;
+	local_t				dropped_events;
 	local_t				committing;
 	local_t				commits;
 	unsigned long			read;
@@ -2155,8 +2156,10 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 			 * If we are not in overwrite mode,
 			 * this is easy, just stop here.
 			 */
-			if (!(buffer->flags & RB_FL_OVERWRITE))
+			if (!(buffer->flags & RB_FL_OVERWRITE)) {
+				local_inc(&cpu_buffer->dropped_events);
 				goto out_reset;
+			}
 
 			ret = rb_handle_head_page(cpu_buffer,
 						  tail_page,
@@ -2995,7 +2998,8 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
 EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
 
 /**
- * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
+ * ring_buffer_overrun_cpu - get the number of overruns caused by the ring
+ * buffer wrapping around (only if RB_FL_OVERWRITE is on).
  * @buffer: The ring buffer
  * @cpu: The per CPU buffer to get the number of overruns from
  */
@@ -3015,7 +3019,9 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
 EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
 
 /**
- * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
+ * ring_buffer_commit_overrun_cpu - get the number of overruns caused by
+ * commits failing due to the buffer wrapping around while there are uncommitted
+ * events, such as during an interrupt storm.
  * @buffer: The ring buffer
  * @cpu: The per CPU buffer to get the number of overruns from
  */
@@ -3035,6 +3041,28 @@ ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
 
+/**
+ * ring_buffer_dropped_events_cpu - get the number of dropped events caused by
+ * the ring buffer filling up (only if RB_FL_OVERWRITE is off).
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long
+ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long ret;
+
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	ret = local_read(&cpu_buffer->dropped_events);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
+
 /**
  * ring_buffer_entries - get the number of entries in a buffer
  * @buffer: The ring buffer
@@ -3864,9 +3892,10 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 	local_set(&cpu_buffer->reader_page->page->commit, 0);
 	cpu_buffer->reader_page->read = 0;
 
-	local_set(&cpu_buffer->commit_overrun, 0);
 	local_set(&cpu_buffer->entries_bytes, 0);
 	local_set(&cpu_buffer->overrun, 0);
+	local_set(&cpu_buffer->commit_overrun, 0);
+	local_set(&cpu_buffer->dropped_events, 0);
 	local_set(&cpu_buffer->entries, 0);
 	local_set(&cpu_buffer->committing, 0);
 	local_set(&cpu_buffer->commits, 0);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f6928edacd6d..36c213fbfce7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4385,6 +4385,9 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 	usec_rem = do_div(t, USEC_PER_SEC);
 	trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
 
+	cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "dropped events: %ld\n", cnt);
+
 	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
 
 	kfree(s);
-- 
cgit v1.2.3-55-g7522


From 60efc15ae96c7aace8060411b0d5add20e1ab21e Mon Sep 17 00:00:00 2001
From: Michal Hocko
Date: Thu, 25 Oct 2012 15:41:51 +0200
Subject: linux/kernel.h: Remove duplicate trace_printk declaration

!CONFIG_TRACING both declares and defines (empty) trace_printk.
The first one is not redundant so it can be removed.

Link: http://lkml.kernel.org/r/1351172511-18125-1-git-send-email-mhocko@suse.cz

Signed-off-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kernel.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a123b13b70fd..7785d5df6d8b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -527,9 +527,6 @@ __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
 
 extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
 #else
-static inline __printf(1, 2)
-int trace_printk(const char *fmt, ...);
-
 static inline void tracing_start(void) { }
 static inline void tracing_stop(void) { }
 static inline void ftrace_off_permanent(void) { }
@@ -539,8 +536,8 @@ static inline void tracing_on(void) { }
 static inline void tracing_off(void) { }
 static inline int tracing_is_on(void) { return 0; }
 
-static inline int
-trace_printk(const char *fmt, ...)
+static inline __printf(1, 2)
+int trace_printk(const char *fmt, ...)
 {
 	return 0;
 }
-- 
cgit v1.2.3-55-g7522


From 293a7a0a165c4f8327bbcf396cee9ec672727c98 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Tue, 16 Oct 2012 15:07:49 -0700
Subject: genirq: Provide means to retrigger parent

Attempts to retrigger nested threaded IRQs currently fail because they
have no primary handler. In order to support retrigger of nested
IRQs, the parent IRQ needs to be retriggered.

To fix, when an IRQ needs to be resent, if the interrupt has a parent
IRQ and runs in the context of the parent IRQ, then resend the parent.

Also, handle_nested_irq() needs to clear the replay flag like the
other handlers, otherwise check_irq_resend() will set it and it will
never be cleared.  Without clearing, it results in the first resend
working fine, but check_irq_resend() returning early on subsequent
resends because the replay flag is still set.

Problem discovered on ARM/OMAP platforms where a nested IRQ that's
also a wakeup IRQ happens late in suspend and needed to be retriggered
during the resume process.

[khilman@ti.com: changelog edits, clear IRQS_REPLAY in handle_nested_irq()]

Reported-by: Kevin Hilman <khilman@ti.com>
Tested-by: Kevin Hilman <khilman@ti.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1350425269-11489-1-git-send-email-khilman@deeprootsystems.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h     |  9 +++++++++
 include/linux/irqdesc.h |  3 +++
 kernel/irq/chip.c       |  1 +
 kernel/irq/manage.c     | 16 ++++++++++++++++
 kernel/irq/resend.c     |  8 ++++++++
 5 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 216b0ba109d7..526f10a637c1 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -392,6 +392,15 @@ static inline void irq_move_masked_irq(struct irq_data *data) { }
 
 extern int no_irq_affinity;
 
+#ifdef CONFIG_HARDIRQS_SW_RESEND
+int irq_set_parent(int irq, int parent_irq);
+#else
+static inline int irq_set_parent(int irq, int parent_irq)
+{
+	return 0;
+}
+#endif
+
 /*
  * Built-in IRQ handlers for various IRQ types,
  * callable via desc->handle_irq()
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 0ba014c55056..623325e2ff97 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -11,6 +11,8 @@
 struct irq_affinity_notify;
 struct proc_dir_entry;
 struct module;
+struct irq_desc;
+
 /**
  * struct irq_desc - interrupt descriptor
  * @irq_data:		per irq and chip data passed down to chip functions
@@ -65,6 +67,7 @@ struct irq_desc {
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry	*dir;
 #endif
+	int			parent_irq;
 	struct module		*owner;
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 57d86d07221e..3aca9f29d30e 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -272,6 +272,7 @@ void handle_nested_irq(unsigned int irq)
 
 	raw_spin_lock_irq(&desc->lock);
 
+	desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
 	kstat_incr_irqs_this_cpu(irq, desc);
 
 	action = desc->action;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 4c69326aa773..d06a396c7ce3 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -616,6 +616,22 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 	return ret;
 }
 
+#ifdef CONFIG_HARDIRQS_SW_RESEND
+int irq_set_parent(int irq, int parent_irq)
+{
+	unsigned long flags;
+	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
+
+	if (!desc)
+		return -EINVAL;
+
+	desc->parent_irq = parent_irq;
+
+	irq_put_desc_unlock(desc, flags);
+	return 0;
+}
+#endif
+
 /*
  * Default primary interrupt handler for threaded interrupts. Is
  * assigned as primary handler when request_threaded_irq is called
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 6454db7b6a4d..9065107f083e 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -74,6 +74,14 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq)
 		if (!desc->irq_data.chip->irq_retrigger ||
 		    !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) {
 #ifdef CONFIG_HARDIRQS_SW_RESEND
+			/*
+			 * If the interrupt has a parent irq and runs
+			 * in the thread context of the parent irq,
+			 * retrigger the parent.
+			 */
+			if (desc->parent_irq &&
+			    irq_settings_is_nested_thread(desc))
+				irq = desc->parent_irq;
 			/* Set it pending and activate the softirq: */
 			set_bit(irq, irqs_resend);
 			tasklet_schedule(&resend_tasklet);
-- 
cgit v1.2.3-55-g7522


From 86b00e0da6be7bbc16412f126c5b548ac5d91d50 Mon Sep 17 00:00:00 2001
From: Alex Elder
Date: Thu, 25 Oct 2012 23:34:42 -0500
Subject: rbd: get parent spec for version 2 images

Add support for getting the the information identifying the parent
image for rbd images that have them.  The child image holds a
reference to its parent image specification structure.  Create a new
entry "parent" in /sys/bus/rbd/image/N/ to report the identifying
information for the parent image, if any.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 Documentation/ABI/testing/sysfs-bus-rbd |   4 +
 drivers/block/rbd.c                     | 131 ++++++++++++++++++++++++++++++++
 include/linux/ceph/rados.h              |   2 +
 3 files changed, 137 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd
index 1cf2adf46b11..cd9213ccf3dc 100644
--- a/Documentation/ABI/testing/sysfs-bus-rbd
+++ b/Documentation/ABI/testing/sysfs-bus-rbd
@@ -70,6 +70,10 @@ snap_*
 
 	A directory per each snapshot
 
+parent
+
+	Information identifying the pool, image, and snapshot id for
+	the parent image in a layered rbd image (format 2 only).
 
 Entries under /sys/bus/rbd/devices/<dev-id>/snap_<snap-name>
 -------------------------------------------------------------
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 28052ff679ca..bce1fcfb5185 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -217,6 +217,9 @@ struct rbd_device {
 	struct ceph_osd_event   *watch_event;
 	struct ceph_osd_request *watch_request;
 
+	struct rbd_spec		*parent_spec;
+	u64			parent_overlap;
+
 	/* protects updating the header */
 	struct rw_semaphore     header_rwsem;
 
@@ -2009,6 +2012,49 @@ static ssize_t rbd_snap_show(struct device *dev,
 	return sprintf(buf, "%s\n", rbd_dev->spec->snap_name);
 }
 
+/*
+ * For an rbd v2 image, shows the pool id, image id, and snapshot id
+ * for the parent image.  If there is no parent, simply shows
+ * "(no parent image)".
+ */
+static ssize_t rbd_parent_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+	struct rbd_spec *spec = rbd_dev->parent_spec;
+	int count;
+	char *bufp = buf;
+
+	if (!spec)
+		return sprintf(buf, "(no parent image)\n");
+
+	count = sprintf(bufp, "pool_id %llu\npool_name %s\n",
+			(unsigned long long) spec->pool_id, spec->pool_name);
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id,
+			spec->image_name ? spec->image_name : "(unknown)");
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n",
+			(unsigned long long) spec->snap_id, spec->snap_name);
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap);
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	return (ssize_t) (bufp - buf);
+}
+
 static ssize_t rbd_image_refresh(struct device *dev,
 				 struct device_attribute *attr,
 				 const char *buf,
@@ -2032,6 +2078,7 @@ static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
 static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL);
 static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
 static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
+static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL);
 
 static struct attribute *rbd_attrs[] = {
 	&dev_attr_size.attr,
@@ -2043,6 +2090,7 @@ static struct attribute *rbd_attrs[] = {
 	&dev_attr_name.attr,
 	&dev_attr_image_id.attr,
 	&dev_attr_current_snap.attr,
+	&dev_attr_parent.attr,
 	&dev_attr_refresh.attr,
 	NULL
 };
@@ -2192,6 +2240,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
 
 static void rbd_dev_destroy(struct rbd_device *rbd_dev)
 {
+	rbd_spec_put(rbd_dev->parent_spec);
 	kfree(rbd_dev->header_name);
 	rbd_put_client(rbd_dev->rbd_client);
 	rbd_spec_put(rbd_dev->spec);
@@ -2400,6 +2449,71 @@ static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
 						&rbd_dev->header.features);
 }
 
+static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+{
+	struct rbd_spec *parent_spec;
+	size_t size;
+	void *reply_buf = NULL;
+	__le64 snapid;
+	void *p;
+	void *end;
+	char *image_id;
+	u64 overlap;
+	size_t len = 0;
+	int ret;
+
+	parent_spec = rbd_spec_alloc();
+	if (!parent_spec)
+		return -ENOMEM;
+
+	size = sizeof (__le64) +				/* pool_id */
+		sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX +	/* image_id */
+		sizeof (__le64) +				/* snap_id */
+		sizeof (__le64);				/* overlap */
+	reply_buf = kmalloc(size, GFP_KERNEL);
+	if (!reply_buf) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	snapid = cpu_to_le64(CEPH_NOSNAP);
+	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+				"rbd", "get_parent",
+				(char *) &snapid, sizeof (snapid),
+				(char *) reply_buf, size,
+				CEPH_OSD_FLAG_READ, NULL);
+	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+	if (ret < 0)
+		goto out_err;
+
+	ret = -ERANGE;
+	p = reply_buf;
+	end = (char *) reply_buf + size;
+	ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err);
+	if (parent_spec->pool_id == CEPH_NOPOOL)
+		goto out;	/* No parent?  No problem. */
+
+	image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL);
+	if (IS_ERR(image_id)) {
+		ret = PTR_ERR(image_id);
+		goto out_err;
+	}
+	parent_spec->image_id = image_id;
+	ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err);
+	ceph_decode_64_safe(&p, end, overlap, out_err);
+
+	rbd_dev->parent_overlap = overlap;
+	rbd_dev->parent_spec = parent_spec;
+	parent_spec = NULL;	/* rbd_dev now owns this */
+out:
+	ret = 0;
+out_err:
+	kfree(reply_buf);
+	rbd_spec_put(parent_spec);
+
+	return ret;
+}
+
 static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver)
 {
 	size_t size;
@@ -3154,6 +3268,12 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev)
 	ret = rbd_read_header(rbd_dev, &rbd_dev->header);
 	if (ret < 0)
 		goto out_err;
+
+	/* Version 1 images have no parent (no layering) */
+
+	rbd_dev->parent_spec = NULL;
+	rbd_dev->parent_overlap = 0;
+
 	rbd_dev->image_format = 1;
 
 	dout("discovered version 1 image, header name is %s\n",
@@ -3205,6 +3325,14 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
 	if (ret < 0)
 		goto out_err;
 
+	/* If the image supports layering, get the parent info */
+
+	if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
+		ret = rbd_dev_v2_parent_info(rbd_dev);
+		if (ret < 0)
+			goto out_err;
+	}
+
 	/* crypto and compression type aren't (yet) supported for v2 images */
 
 	rbd_dev->header.crypt_type = 0;
@@ -3224,6 +3352,9 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
 
 	return 0;
 out_err:
+	rbd_dev->parent_overlap = 0;
+	rbd_spec_put(rbd_dev->parent_spec);
+	rbd_dev->parent_spec = NULL;
 	kfree(rbd_dev->header_name);
 	rbd_dev->header_name = NULL;
 	kfree(rbd_dev->header.object_prefix);
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 0a99099801a4..15077db662ed 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -87,6 +87,8 @@ struct ceph_pg {
  *
  *  lpgp_num -- as above.
  */
+#define CEPH_NOPOOL  ((__u64) (-1))  /* pool id not defined */
+
 #define CEPH_PG_TYPE_REP     1
 #define CEPH_PG_TYPE_RAID4   2
 #define CEPH_PG_POOL_VERSION 2
-- 
cgit v1.2.3-55-g7522


From 72afc71ffca0f444ee0e1ef8c7e34ab209bb48b3 Mon Sep 17 00:00:00 2001
From: Alex Elder
Date: Tue, 30 Oct 2012 19:40:33 -0500
Subject: libceph: define ceph_pg_pool_name_by_id()

Define and export function ceph_pg_pool_name_by_id() to supply
the name of a pg pool whose id is given.  This will be used by
the next patch.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/osdmap.h |  1 +
 net/ceph/osdmap.c           | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index e88a620b9f8a..5ea57ba69320 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -123,6 +123,7 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
 				struct ceph_pg pgid);
 
+extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
 extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
 
 #endif
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index f552aa48fd9e..de73214b5d26 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -469,6 +469,22 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
 	return NULL;
 }
 
+const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
+{
+	struct ceph_pg_pool_info *pi;
+
+	if (id == CEPH_NOPOOL)
+		return NULL;
+
+	if (WARN_ON_ONCE(id > (u64) INT_MAX))
+		return NULL;
+
+	pi = __lookup_pg_pool(&map->pg_pools, (int) id);
+
+	return pi ? pi->name : NULL;
+}
+EXPORT_SYMBOL(ceph_pg_pool_name_by_id);
+
 int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
 {
 	struct rb_node *rbp;
-- 
cgit v1.2.3-55-g7522


From a8fc92778080c845eaadc369a0ecf5699a03bef0 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov
Date: Thu, 1 Nov 2012 02:01:48 +0000
Subject: sk-filter: Add ability to get socket filter program (v2)

The SO_ATTACH_FILTER option is set only. I propose to add the get
ability by using SO_ATTACH_FILTER in getsockopt. To be less
irritating to eyes the SO_GET_FILTER alias to it is declared. This
ability is required by checkpoint-restore project to be able to
save full state of a socket.

There are two issues with getting filter back.

First, kernel modifies the sock_filter->code on filter load, thus in
order to return the filter element back to user we have to decode it
into user-visible constants. Fortunately the modification in question
is interconvertible.

Second, the BPF_S_ALU_DIV_K code modifies the command argument k to
speed up the run-time division by doing kernel_k = reciprocal(user_k).
Bad news is that different user_k may result in same kernel_k, so we
can't get the original user_k back. Good news is that we don't have
to do it. What we need to is calculate a user2_k so, that

  reciprocal(user2_k) == reciprocal(user_k) == kernel_k

i.e. if it's re-loaded back the compiled again value will be exactly
the same as it was. That said, the user2_k can be calculated like this

  user2_k = reciprocal(kernel_k)

with an exception, that if kernel_k == 0, then user2_k == 1.

The optlen argument is treated like this -- when zero, kernel returns
the amount of sock_fprog elements in filter, otherwise it should be
large enough for the sock_fprog array.

changes since v1:
* Declared SO_GET_FILTER in all arch headers
* Added decode of vlan-tag codes

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/asm/socket.h        |   1 +
 arch/avr32/include/uapi/asm/socket.h   |   1 +
 arch/cris/include/asm/socket.h         |   1 +
 arch/frv/include/uapi/asm/socket.h     |   1 +
 arch/h8300/include/asm/socket.h        |   1 +
 arch/ia64/include/uapi/asm/socket.h    |   1 +
 arch/m32r/include/asm/socket.h         |   1 +
 arch/m68k/include/asm/socket.h         |   1 +
 arch/mips/include/uapi/asm/socket.h    |   1 +
 arch/mn10300/include/uapi/asm/socket.h |   1 +
 arch/parisc/include/asm/socket.h       |   1 +
 arch/powerpc/include/uapi/asm/socket.h |   1 +
 arch/s390/include/uapi/asm/socket.h    |   1 +
 arch/sparc/include/uapi/asm/socket.h   |   1 +
 arch/xtensa/include/asm/socket.h       |   1 +
 include/linux/filter.h                 |   1 +
 include/uapi/asm-generic/socket.h      |   1 +
 net/core/filter.c                      | 130 +++++++++++++++++++++++++++++++++
 net/core/sock.c                        |   6 ++
 19 files changed, 153 insertions(+)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index 7d2f75be932e..0087d053b77f 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h
@@ -47,6 +47,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		28
 #define SO_TIMESTAMP		29
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index a473f8c6a9aa..486df68abeec 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -40,6 +40,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		28
 #define SO_TIMESTAMP		29
diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h
index ae52825021af..b681b043f6c8 100644
--- a/arch/cris/include/asm/socket.h
+++ b/arch/cris/include/asm/socket.h
@@ -42,6 +42,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		28
 #define SO_TIMESTAMP           29
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index a5b1d7dbb205..871f89b7fbda 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -40,6 +40,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME             28
 #define SO_TIMESTAMP		29
diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h
index ec4554e7b04b..90a2e573c7e6 100644
--- a/arch/h8300/include/asm/socket.h
+++ b/arch/h8300/include/asm/socket.h
@@ -40,6 +40,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME             28
 #define SO_TIMESTAMP		29
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 41fc28a4a18a..23d6759bb57b 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -49,6 +49,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER	26
 #define SO_DETACH_FILTER	27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		28
 #define SO_TIMESTAMP		29
diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h
index a15f40b52783..5e7088a26726 100644
--- a/arch/m32r/include/asm/socket.h
+++ b/arch/m32r/include/asm/socket.h
@@ -40,6 +40,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		28
 #define SO_TIMESTAMP		29
diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h
index d1be684edf97..285da3b6ad92 100644
--- a/arch/m68k/include/asm/socket.h
+++ b/arch/m68k/include/asm/socket.h
@@ -40,6 +40,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME             28
 #define SO_TIMESTAMP		29
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index c5ed59549cb8..17307ab90474 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -63,6 +63,7 @@ To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME             28
 #define SO_TIMESTAMP		29
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index 820463a484b8..af5366bbfe62 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -40,6 +40,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		28
 #define SO_TIMESTAMP		29
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
index 1b52c2c31a7a..d9ff4731253b 100644
--- a/arch/parisc/include/asm/socket.h
+++ b/arch/parisc/include/asm/socket.h
@@ -48,6 +48,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        0x401a
 #define SO_DETACH_FILTER        0x401b
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_ACCEPTCONN		0x401c
 
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index 3d5179bb122f..eb0b1864d400 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -47,6 +47,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER	26
 #define SO_DETACH_FILTER	27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		28
 #define SO_TIMESTAMP		29
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 69718cd6d635..436d07c23be8 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -46,6 +46,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		28
 #define SO_TIMESTAMP		29
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index bea1568ae4af..c83a937ead00 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -41,6 +41,7 @@
 
 #define SO_ATTACH_FILTER	0x001a
 #define SO_DETACH_FILTER        0x001b
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		0x001c
 #define SO_TIMESTAMP		0x001d
diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h
index e36c68184920..38079be1cf1e 100644
--- a/arch/xtensa/include/asm/socket.h
+++ b/arch/xtensa/include/asm/socket.h
@@ -52,6 +52,7 @@
 
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		28
 #define SO_TIMESTAMP		29
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c9f0005c35e2..c45eabc135e1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -45,6 +45,7 @@ extern void sk_unattached_filter_destroy(struct sk_filter *fp);
 extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 extern int sk_detach_filter(struct sock *sk);
 extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen);
+extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len);
 
 #ifdef CONFIG_BPF_JIT
 extern void bpf_jit_compile(struct sk_filter *fp);
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index b1bea03274d5..2d32d073a6f9 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -43,6 +43,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER	26
 #define SO_DETACH_FILTER	27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		28
 #define SO_TIMESTAMP		29
diff --git a/net/core/filter.c b/net/core/filter.c
index 5a114d41bf11..c23543cba132 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -760,3 +760,133 @@ int sk_detach_filter(struct sock *sk)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(sk_detach_filter);
+
+static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
+{
+	static const u16 decodes[] = {
+		[BPF_S_ALU_ADD_K]	= BPF_ALU|BPF_ADD|BPF_K,
+		[BPF_S_ALU_ADD_X]	= BPF_ALU|BPF_ADD|BPF_X,
+		[BPF_S_ALU_SUB_K]	= BPF_ALU|BPF_SUB|BPF_K,
+		[BPF_S_ALU_SUB_X]	= BPF_ALU|BPF_SUB|BPF_X,
+		[BPF_S_ALU_MUL_K]	= BPF_ALU|BPF_MUL|BPF_K,
+		[BPF_S_ALU_MUL_X]	= BPF_ALU|BPF_MUL|BPF_X,
+		[BPF_S_ALU_DIV_X]	= BPF_ALU|BPF_DIV|BPF_X,
+		[BPF_S_ALU_MOD_K]	= BPF_ALU|BPF_MOD|BPF_K,
+		[BPF_S_ALU_MOD_X]	= BPF_ALU|BPF_MOD|BPF_X,
+		[BPF_S_ALU_AND_K]	= BPF_ALU|BPF_AND|BPF_K,
+		[BPF_S_ALU_AND_X]	= BPF_ALU|BPF_AND|BPF_X,
+		[BPF_S_ALU_OR_K]	= BPF_ALU|BPF_OR|BPF_K,
+		[BPF_S_ALU_OR_X]	= BPF_ALU|BPF_OR|BPF_X,
+		[BPF_S_ALU_XOR_K]	= BPF_ALU|BPF_XOR|BPF_K,
+		[BPF_S_ALU_XOR_X]	= BPF_ALU|BPF_XOR|BPF_X,
+		[BPF_S_ALU_LSH_K]	= BPF_ALU|BPF_LSH|BPF_K,
+		[BPF_S_ALU_LSH_X]	= BPF_ALU|BPF_LSH|BPF_X,
+		[BPF_S_ALU_RSH_K]	= BPF_ALU|BPF_RSH|BPF_K,
+		[BPF_S_ALU_RSH_X]	= BPF_ALU|BPF_RSH|BPF_X,
+		[BPF_S_ALU_NEG]		= BPF_ALU|BPF_NEG,
+		[BPF_S_LD_W_ABS]	= BPF_LD|BPF_W|BPF_ABS,
+		[BPF_S_LD_H_ABS]	= BPF_LD|BPF_H|BPF_ABS,
+		[BPF_S_LD_B_ABS]	= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_PROTOCOL]	= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_PKTTYPE]	= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_IFINDEX]	= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_NLATTR]	= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_NLATTR_NEST]	= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_MARK]	= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_QUEUE]	= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_HATYPE]	= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_RXHASH]	= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_CPU]		= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_ALU_XOR_X]	= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_VLAN_TAG]	= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_LD_W_LEN]	= BPF_LD|BPF_W|BPF_LEN,
+		[BPF_S_LD_W_IND]	= BPF_LD|BPF_W|BPF_IND,
+		[BPF_S_LD_H_IND]	= BPF_LD|BPF_H|BPF_IND,
+		[BPF_S_LD_B_IND]	= BPF_LD|BPF_B|BPF_IND,
+		[BPF_S_LD_IMM]		= BPF_LD|BPF_IMM,
+		[BPF_S_LDX_W_LEN]	= BPF_LDX|BPF_W|BPF_LEN,
+		[BPF_S_LDX_B_MSH]	= BPF_LDX|BPF_B|BPF_MSH,
+		[BPF_S_LDX_IMM]		= BPF_LDX|BPF_IMM,
+		[BPF_S_MISC_TAX]	= BPF_MISC|BPF_TAX,
+		[BPF_S_MISC_TXA]	= BPF_MISC|BPF_TXA,
+		[BPF_S_RET_K]		= BPF_RET|BPF_K,
+		[BPF_S_RET_A]		= BPF_RET|BPF_A,
+		[BPF_S_ALU_DIV_K]	= BPF_ALU|BPF_DIV|BPF_K,
+		[BPF_S_LD_MEM]		= BPF_LD|BPF_MEM,
+		[BPF_S_LDX_MEM]		= BPF_LDX|BPF_MEM,
+		[BPF_S_ST]		= BPF_ST,
+		[BPF_S_STX]		= BPF_STX,
+		[BPF_S_JMP_JA]		= BPF_JMP|BPF_JA,
+		[BPF_S_JMP_JEQ_K]	= BPF_JMP|BPF_JEQ|BPF_K,
+		[BPF_S_JMP_JEQ_X]	= BPF_JMP|BPF_JEQ|BPF_X,
+		[BPF_S_JMP_JGE_K]	= BPF_JMP|BPF_JGE|BPF_K,
+		[BPF_S_JMP_JGE_X]	= BPF_JMP|BPF_JGE|BPF_X,
+		[BPF_S_JMP_JGT_K]	= BPF_JMP|BPF_JGT|BPF_K,
+		[BPF_S_JMP_JGT_X]	= BPF_JMP|BPF_JGT|BPF_X,
+		[BPF_S_JMP_JSET_K]	= BPF_JMP|BPF_JSET|BPF_K,
+		[BPF_S_JMP_JSET_X]	= BPF_JMP|BPF_JSET|BPF_X,
+	};
+	u16 code;
+
+	code = filt->code;
+
+	to->code = decodes[code];
+	to->jt = filt->jt;
+	to->jf = filt->jf;
+
+	if (code == BPF_S_ALU_DIV_K) {
+		/*
+		 * When loaded this rule user gave us X, which was
+		 * translated into R = r(X). Now we calculate the
+		 * RR = r(R) and report it back. If next time this
+		 * value is loaded and RRR = r(RR) is calculated
+		 * then the R == RRR will be true.
+		 *
+		 * One exception. X == 1 translates into R == 0 and
+		 * we can't calculate RR out of it with r().
+		 */
+
+		if (filt->k == 0)
+			to->k = 1;
+		else
+			to->k = reciprocal_value(filt->k);
+
+		BUG_ON(reciprocal_value(to->k) != filt->k);
+	} else
+		to->k = filt->k;
+}
+
+int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
+{
+	struct sk_filter *filter;
+	int i, ret;
+
+	lock_sock(sk);
+	filter = rcu_dereference_protected(sk->sk_filter,
+			sock_owned_by_user(sk));
+	ret = 0;
+	if (!filter)
+		goto out;
+	ret = filter->len;
+	if (!len)
+		goto out;
+	ret = -EINVAL;
+	if (len < filter->len)
+		goto out;
+
+	ret = -EFAULT;
+	for (i = 0; i < filter->len; i++) {
+		struct sock_filter fb;
+
+		sk_decode_filter(&filter->insns[i], &fb);
+		if (copy_to_user(&ubuf[i], &fb, sizeof(fb)))
+			goto out;
+	}
+
+	ret = filter->len;
+out:
+	release_sock(sk);
+	return ret;
+}
diff --git a/net/core/sock.c b/net/core/sock.c
index 0a023b8daa55..06286006a2cc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1077,6 +1077,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 	case SO_BINDTODEVICE:
 		v.val = sk->sk_bound_dev_if;
 		break;
+	case SO_GET_FILTER:
+		len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
+		if (len < 0)
+			return len;
+
+		goto lenout;
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3-55-g7522


From 65f8f9a1c1db831e5159e3e3e50912d1f214cd0c Mon Sep 17 00:00:00 2001
From: Richard Cochran
Date: Wed, 31 Oct 2012 06:27:25 +0000
Subject: time: remove the timecompare code.

This patch removes the timecompare code from the kernel. The top five
reasons to do this are:

1. There are no more users of this code.
2. The original idea was a bit weak.
3. The original author has disappeared.
4. The code was not general purpose but tuned to a particular hardware,
5. There are better ways to accomplish clock synchronization.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: John Stultz <john.stultz@linaro.org>
Tested-by: Bob Liu <lliubbo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/timecompare.h | 125 ----------------------------
 kernel/time/Makefile        |   2 +-
 kernel/time/timecompare.c   | 193 --------------------------------------------
 3 files changed, 1 insertion(+), 319 deletions(-)
 delete mode 100644 include/linux/timecompare.h
 delete mode 100644 kernel/time/timecompare.c

(limited to 'include/linux')

diff --git a/include/linux/timecompare.h b/include/linux/timecompare.h
deleted file mode 100644
index 546e2234e4b3..000000000000
--- a/include/linux/timecompare.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Utility code which helps transforming between two different time
- * bases, called "source" and "target" time in this code.
- *
- * Source time has to be provided via the timecounter API while target
- * time is accessed via a function callback whose prototype
- * intentionally matches ktime_get() and ktime_get_real(). These
- * interfaces where chosen like this so that the code serves its
- * initial purpose without additional glue code.
- *
- * This purpose is synchronizing a hardware clock in a NIC with system
- * time, in order to implement the Precision Time Protocol (PTP,
- * IEEE1588) with more accurate hardware assisted time stamping.  In
- * that context only synchronization against system time (=
- * ktime_get_real()) is currently needed. But this utility code might
- * become useful in other situations, which is why it was written as
- * general purpose utility code.
- *
- * The source timecounter is assumed to return monotonically
- * increasing time (but this code does its best to compensate if that
- * is not the case) whereas target time may jump.
- *
- * The target time corresponding to a source time is determined by
- * reading target time, reading source time, reading target time
- * again, then assuming that average target time corresponds to source
- * time. In other words, the assumption is that reading the source
- * time is slow and involves equal time for sending the request and
- * receiving the reply, whereas reading target time is assumed to be
- * fast.
- *
- * Copyright (C) 2009 Intel Corporation.
- * Author: Patrick Ohly <patrick.ohly@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-#ifndef _LINUX_TIMECOMPARE_H
-#define _LINUX_TIMECOMPARE_H
-
-#include <linux/clocksource.h>
-#include <linux/ktime.h>
-
-/**
- * struct timecompare - stores state and configuration for the two clocks
- *
- * Initialize to zero, then set source/target/num_samples.
- *
- * Transformation between source time and target time is done with:
- * target_time = source_time + offset +
- *               (source_time - last_update) * skew /
- *               TIMECOMPARE_SKEW_RESOLUTION
- *
- * @source:          used to get source time stamps via timecounter_read()
- * @target:          function returning target time (for example, ktime_get
- *                   for monotonic time, or ktime_get_real for wall clock)
- * @num_samples:     number of times that source time and target time are to
- *                   be compared when determining their offset
- * @offset:          (target time - source time) at the time of the last update
- * @skew:            average (target time - source time) / delta source time *
- *                   TIMECOMPARE_SKEW_RESOLUTION
- * @last_update:     last source time stamp when time offset was measured
- */
-struct timecompare {
-	struct timecounter *source;
-	ktime_t (*target)(void);
-	int num_samples;
-
-	s64 offset;
-	s64 skew;
-	u64 last_update;
-};
-
-/**
- * timecompare_transform - transform source time stamp into target time base
- * @sync:            context for time sync
- * @source_tstamp:   the result of timecounter_read() or
- *                   timecounter_cyc2time()
- */
-extern ktime_t timecompare_transform(struct timecompare *sync,
-				     u64 source_tstamp);
-
-/**
- * timecompare_offset - measure current (target time - source time) offset
- * @sync:            context for time sync
- * @offset:          average offset during sample period returned here
- * @source_tstamp:   average source time during sample period returned here
- *
- * Returns number of samples used. Might be zero (= no result) in the
- * unlikely case that target time was monotonically decreasing for all
- * samples (= broken).
- */
-extern int timecompare_offset(struct timecompare *sync,
-			      s64 *offset,
-			      u64 *source_tstamp);
-
-extern void __timecompare_update(struct timecompare *sync,
-				 u64 source_tstamp);
-
-/**
- * timecompare_update - update offset and skew by measuring current offset
- * @sync:            context for time sync
- * @source_tstamp:   the result of timecounter_read() or
- *                   timecounter_cyc2time(), pass zero to force update
- *
- * Updates are only done at most once per second.
- */
-static inline void timecompare_update(struct timecompare *sync,
-				      u64 source_tstamp)
-{
-	if (!source_tstamp ||
-	    (s64)(source_tstamp - sync->last_update) >= NSEC_PER_SEC)
-		__timecompare_update(sync, source_tstamp);
-}
-
-#endif /* _LINUX_TIMECOMPARE_H */
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index e2fd74b8e8c2..ff7d9d2ab504 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,4 +1,4 @@
-obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
+obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
 obj-y += timeconv.o posix-clock.o alarmtimer.o
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o
diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c
deleted file mode 100644
index a9ae369925ce..000000000000
--- a/kernel/time/timecompare.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (C) 2009 Intel Corporation.
- * Author: Patrick Ohly <patrick.ohly@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/timecompare.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/math64.h>
-#include <linux/kernel.h>
-
-/*
- * fixed point arithmetic scale factor for skew
- *
- * Usually one would measure skew in ppb (parts per billion, 1e9), but
- * using a factor of 2 simplifies the math.
- */
-#define TIMECOMPARE_SKEW_RESOLUTION (((s64)1)<<30)
-
-ktime_t timecompare_transform(struct timecompare *sync,
-			      u64 source_tstamp)
-{
-	u64 nsec;
-
-	nsec = source_tstamp + sync->offset;
-	nsec += (s64)(source_tstamp - sync->last_update) * sync->skew /
-		TIMECOMPARE_SKEW_RESOLUTION;
-
-	return ns_to_ktime(nsec);
-}
-EXPORT_SYMBOL_GPL(timecompare_transform);
-
-int timecompare_offset(struct timecompare *sync,
-		       s64 *offset,
-		       u64 *source_tstamp)
-{
-	u64 start_source = 0, end_source = 0;
-	struct {
-		s64 offset;
-		s64 duration_target;
-	} buffer[10], sample, *samples;
-	int counter = 0, i;
-	int used;
-	int index;
-	int num_samples = sync->num_samples;
-
-	if (num_samples > ARRAY_SIZE(buffer)) {
-		samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC);
-		if (!samples) {
-			samples = buffer;
-			num_samples = ARRAY_SIZE(buffer);
-		}
-	} else {
-		samples = buffer;
-	}
-
-	/* run until we have enough valid samples, but do not try forever */
-	i = 0;
-	counter = 0;
-	while (1) {
-		u64 ts;
-		ktime_t start, end;
-
-		start = sync->target();
-		ts = timecounter_read(sync->source);
-		end = sync->target();
-
-		if (!i)
-			start_source = ts;
-
-		/* ignore negative durations */
-		sample.duration_target = ktime_to_ns(ktime_sub(end, start));
-		if (sample.duration_target >= 0) {
-			/*
-			 * assume symetric delay to and from source:
-			 * average target time corresponds to measured
-			 * source time
-			 */
-			sample.offset =
-				(ktime_to_ns(end) + ktime_to_ns(start)) / 2 -
-				ts;
-
-			/* simple insertion sort based on duration */
-			index = counter - 1;
-			while (index >= 0) {
-				if (samples[index].duration_target <
-				    sample.duration_target)
-					break;
-				samples[index + 1] = samples[index];
-				index--;
-			}
-			samples[index + 1] = sample;
-			counter++;
-		}
-
-		i++;
-		if (counter >= num_samples || i >= 100000) {
-			end_source = ts;
-			break;
-		}
-	}
-
-	*source_tstamp = (end_source + start_source) / 2;
-
-	/* remove outliers by only using 75% of the samples */
-	used = counter * 3 / 4;
-	if (!used)
-		used = counter;
-	if (used) {
-		/* calculate average */
-		s64 off = 0;
-		for (index = 0; index < used; index++)
-			off += samples[index].offset;
-		*offset = div_s64(off, used);
-	}
-
-	if (samples && samples != buffer)
-		kfree(samples);
-
-	return used;
-}
-EXPORT_SYMBOL_GPL(timecompare_offset);
-
-void __timecompare_update(struct timecompare *sync,
-			  u64 source_tstamp)
-{
-	s64 offset;
-	u64 average_time;
-
-	if (!timecompare_offset(sync, &offset, &average_time))
-		return;
-
-	if (!sync->last_update) {
-		sync->last_update = average_time;
-		sync->offset = offset;
-		sync->skew = 0;
-	} else {
-		s64 delta_nsec = average_time - sync->last_update;
-
-		/* avoid division by negative or small deltas */
-		if (delta_nsec >= 10000) {
-			s64 delta_offset_nsec = offset - sync->offset;
-			s64 skew; /* delta_offset_nsec *
-				     TIMECOMPARE_SKEW_RESOLUTION /
-				     delta_nsec */
-			u64 divisor;
-
-			/* div_s64() is limited to 32 bit divisor */
-			skew = delta_offset_nsec * TIMECOMPARE_SKEW_RESOLUTION;
-			divisor = delta_nsec;
-			while (unlikely(divisor >= ((s64)1) << 32)) {
-				/* divide both by 2; beware, right shift
-				   of negative value has undefined
-				   behavior and can only be used for
-				   the positive divisor */
-				skew = div_s64(skew, 2);
-				divisor >>= 1;
-			}
-			skew = div_s64(skew, divisor);
-
-			/*
-			 * Calculate new overall skew as 4/16 the
-			 * old value and 12/16 the new one. This is
-			 * a rather arbitrary tradeoff between
-			 * only using the latest measurement (0/16 and
-			 * 16/16) and even more weight on past measurements.
-			 */
-#define TIMECOMPARE_NEW_SKEW_PER_16 12
-			sync->skew =
-				div_s64((16 - TIMECOMPARE_NEW_SKEW_PER_16) *
-					sync->skew +
-					TIMECOMPARE_NEW_SKEW_PER_16 * skew,
-					16);
-			sync->last_update = average_time;
-			sync->offset = offset;
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(__timecompare_update);
-- 
cgit v1.2.3-55-g7522


From 6b60393e08f9263c7b129d54eeb261e8f970175c Mon Sep 17 00:00:00 2001
From: Richard Cochran
Date: Mon, 29 Oct 2012 08:45:17 +0000
Subject: cpsw: add a DT field for the cpts offset

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/cpsw.txt | 3 +++
 drivers/net/ethernet/ti/cpsw.c                 | 7 +++++++
 include/linux/platform_data/cpsw.h             | 1 +
 3 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt
index 3af47b78caea..dba014fc5e7b 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -16,6 +16,7 @@ Required properties:
 - ale_entries		: Specifies No of entries ALE can hold
 - host_port_reg_ofs	: Specifies host port register offset
 - hw_stats_reg_ofs	: Specifies hardware statistics register offset
+- cpts_reg_ofs		: Specifies the offset of the CPTS registers
 - bd_ram_ofs		: Specifies internal desciptor RAM offset
 - bd_ram_size		: Specifies internal descriptor RAM size
 - rx_descs		: Specifies number of Rx descriptors
@@ -52,6 +53,7 @@ Examples:
 		ale_entries = <1024>;
 		host_port_reg_ofs = <0x108>;
 		hw_stats_reg_ofs = <0x900>;
+		cpts_reg_ofs = <0xc00>;
 		bd_ram_ofs = <0x2000>;
 		bd_ram_size = <0x2000>;
 		no_bd_ram = <0>;
@@ -86,6 +88,7 @@ Examples:
 		ale_entries = <1024>;
 		host_port_reg_ofs = <0x108>;
 		hw_stats_reg_ofs = <0x900>;
+		cpts_reg_ofs = <0xc00>;
 		bd_ram_ofs = <0x2000>;
 		bd_ram_size = <0x2000>;
 		no_bd_ram = <0>;
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 588f5c340490..f1af5e08cabb 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -912,6 +912,13 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 	}
 	data->hw_stats_reg_ofs = prop;
 
+	if (of_property_read_u32(node, "cpts_reg_ofs", &prop)) {
+		pr_err("Missing cpts_reg_ofs property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->cpts_reg_ofs = prop;
+
 	if (of_property_read_u32(node, "bd_ram_ofs", &prop)) {
 		pr_err("Missing bd_ram_ofs property in the DT.\n");
 		ret = -EINVAL;
diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h
index c4e23d029498..a052b1dca957 100644
--- a/include/linux/platform_data/cpsw.h
+++ b/include/linux/platform_data/cpsw.h
@@ -41,6 +41,7 @@ struct cpsw_platform_data {
 	u32     host_port_num; /* The port number for the host port */
 
 	u32	hw_stats_reg_ofs;  /* cpsw hardware statistics counters */
+	u32	cpts_reg_ofs;      /* cpts registers */
 
 	u32	bd_ram_ofs;   /* embedded buffer descriptor RAM offset*/
 	u32	bd_ram_size;  /*buffer descriptor ram size */
-- 
cgit v1.2.3-55-g7522


From 78ca0b287314ad6b7b06161b3ff9b13e8a8bcce0 Mon Sep 17 00:00:00 2001
From: Richard Cochran
Date: Mon, 29 Oct 2012 08:45:18 +0000
Subject: cpsw: add a DT field for the active time stamping port

Because time stamping on both external ports of the switch simultaneously
is positively useless from the application's point of view, this patch
provides a DT configuration method to choose the active port.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/cpsw.txt | 3 +++
 drivers/net/ethernet/ti/cpsw.c                 | 7 +++++++
 include/linux/platform_data/cpsw.h             | 1 +
 3 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt
index dba014fc5e7b..9f61f2b51681 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -23,6 +23,7 @@ Required properties:
 - mac_control		: Specifies Default MAC control register content
 			  for the specific platform
 - slaves		: Specifies number for slaves
+- cpts_active_slave	: Specifies the slave to use for time stamping
 - slave_reg_ofs		: Specifies slave register offset
 - sliver_reg_ofs	: Specifies slave sliver register offset
 - phy_id		: Specifies slave phy id
@@ -60,6 +61,7 @@ Examples:
 		rx_descs = <64>;
 		mac_control = <0x20>;
 		slaves = <2>;
+		cpts_active_slave = <0>;
 		cpsw_emac0: slave@0 {
 			slave_reg_ofs = <0x200>;
 			sliver_reg_ofs = <0xd80>;
@@ -95,6 +97,7 @@ Examples:
 		rx_descs = <64>;
 		mac_control = <0x20>;
 		slaves = <2>;
+		cpts_active_slave = <0>;
 		cpsw_emac0: slave@0 {
 			slave_reg_ofs = <0x200>;
 			sliver_reg_ofs = <0xd80>;
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index f1af5e08cabb..f16579123c1b 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -847,6 +847,13 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 	}
 	data->slaves = prop;
 
+	if (of_property_read_u32(node, "cpts_active_slave", &prop)) {
+		pr_err("Missing cpts_active_slave property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->cpts_active_slave = prop;
+
 	data->slave_data = kzalloc(sizeof(struct cpsw_slave_data) *
 				   data->slaves, GFP_KERNEL);
 	if (!data->slave_data) {
diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h
index a052b1dca957..15a077eb0689 100644
--- a/include/linux/platform_data/cpsw.h
+++ b/include/linux/platform_data/cpsw.h
@@ -33,6 +33,7 @@ struct cpsw_platform_data {
 
 	u32	slaves;		/* number of slave cpgmac ports */
 	struct cpsw_slave_data	*slave_data;
+	u32	cpts_active_slave; /* time stamping slave */
 
 	u32	ale_reg_ofs;	/* address lookup engine reg offset */
 	u32	ale_entries;	/* ale table size */
-- 
cgit v1.2.3-55-g7522


From 00ab94eeaf6c1ad38ad7368c5148fed31403c8a2 Mon Sep 17 00:00:00 2001
From: Richard Cochran
Date: Mon, 29 Oct 2012 08:45:19 +0000
Subject: cpts: specify the input clock frequency via DT

This patch adds a way to configure the CPTS input clock scaling factors
via the device tree.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/cpsw.txt |  6 ++++++
 drivers/net/ethernet/ti/cpsw.c                 | 14 ++++++++++++++
 include/linux/platform_data/cpsw.h             |  2 ++
 3 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt
index 9f61f2b51681..221460714c56 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -24,6 +24,8 @@ Required properties:
 			  for the specific platform
 - slaves		: Specifies number for slaves
 - cpts_active_slave	: Specifies the slave to use for time stamping
+- cpts_clock_mult	: Numerator to convert input clock ticks into nanoseconds
+- cpts_clock_shift	: Denominator to convert input clock ticks into nanoseconds
 - slave_reg_ofs		: Specifies slave register offset
 - sliver_reg_ofs	: Specifies slave sliver register offset
 - phy_id		: Specifies slave phy id
@@ -62,6 +64,8 @@ Examples:
 		mac_control = <0x20>;
 		slaves = <2>;
 		cpts_active_slave = <0>;
+		cpts_clock_mult = <0x80000000>;
+		cpts_clock_shift = <29>;
 		cpsw_emac0: slave@0 {
 			slave_reg_ofs = <0x200>;
 			sliver_reg_ofs = <0xd80>;
@@ -98,6 +102,8 @@ Examples:
 		mac_control = <0x20>;
 		slaves = <2>;
 		cpts_active_slave = <0>;
+		cpts_clock_mult = <0x80000000>;
+		cpts_clock_shift = <29>;
 		cpsw_emac0: slave@0 {
 			slave_reg_ofs = <0x200>;
 			sliver_reg_ofs = <0xd80>;
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index f16579123c1b..c04627cd60dd 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -854,6 +854,20 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 	}
 	data->cpts_active_slave = prop;
 
+	if (of_property_read_u32(node, "cpts_clock_mult", &prop)) {
+		pr_err("Missing cpts_clock_mult property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->cpts_clock_mult = prop;
+
+	if (of_property_read_u32(node, "cpts_clock_shift", &prop)) {
+		pr_err("Missing cpts_clock_shift property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->cpts_clock_shift = prop;
+
 	data->slave_data = kzalloc(sizeof(struct cpsw_slave_data) *
 				   data->slaves, GFP_KERNEL);
 	if (!data->slave_data) {
diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h
index 15a077eb0689..b5c16c3df458 100644
--- a/include/linux/platform_data/cpsw.h
+++ b/include/linux/platform_data/cpsw.h
@@ -34,6 +34,8 @@ struct cpsw_platform_data {
 	u32	slaves;		/* number of slave cpgmac ports */
 	struct cpsw_slave_data	*slave_data;
 	u32	cpts_active_slave; /* time stamping slave */
+	u32	cpts_clock_mult;  /* convert input clock ticks to nanoseconds */
+	u32	cpts_clock_shift; /* convert input clock ticks to nanoseconds */
 
 	u32	ale_reg_ofs;	/* address lookup engine reg offset */
 	u32	ale_entries;	/* ale table size */
-- 
cgit v1.2.3-55-g7522


From 50ecf2c3afead23a05227ab004e4212eca08c207 Mon Sep 17 00:00:00 2001
From: Yoshihiro YUNOMAE
Date: Thu, 11 Oct 2012 16:27:54 -0700
Subject: ring-buffer: Change unsigned long type of
 ring_buffer_oldest_event_ts() to u64

ring_buffer_oldest_event_ts() should return a value of u64 type, because
ring_buffer_per_cpu->buffer_page->buffer_data_page->time_stamp is u64 type.

Link: http://lkml.kernel.org/r/1349998076-15495-5-git-send-email-dhsharp@google.com

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Vaibhav Nagarnaik <vnagarnaik@google.com>
Signed-off-by: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
Signed-off-by: David Sharp <dhsharp@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 2 +-
 kernel/trace/ring_buffer.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 2007375cfe77..519777e3fa01 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -159,7 +159,7 @@ int ring_buffer_record_is_on(struct ring_buffer *buffer);
 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
 
-unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu);
+u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_entries(struct ring_buffer *buffer);
 unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 23a384b92512..3c7834c24e54 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2932,12 +2932,12 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
  * @buffer: The ring buffer
  * @cpu: The per CPU buffer to read from.
  */
-unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
+u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
 {
 	unsigned long flags;
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct buffer_page *bpage;
-	unsigned long ret;
+	u64 ret;
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return 0;
-- 
cgit v1.2.3-55-g7522


From 0d5c6e1c19bab82fad4837108c2902f557d62a04 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Thu, 1 Nov 2012 20:54:21 -0400
Subject: tracing: Use irq_work for wake ups and remove *_nowake_*() functions

Have the ring buffer commit function use the irq_work infrastructure to
wake up any waiters waiting on the ring buffer for new data. The irq_work
was created for such a purpose, where doing the actual wake up at the
time of adding data is too dangerous, as an event or function trace may
be in the midst of the work queue locks and cause deadlocks. The irq_work
will either delay the action to the next timer interrupt, or trigger an IPI
to itself forcing an interrupt to do the work (in a safe location).

With irq_work, all ring buffer commits can safely do wakeups, removing
the need for the ring buffer commit "nowake" variants, which were used
by events and function tracing. All commits can now safely use the
normal commit, and the "nowake" variants can be removed.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h      |  14 ++---
 include/trace/ftrace.h            |   3 +-
 kernel/trace/Kconfig              |   1 +
 kernel/trace/trace.c              | 121 +++++++++++++++++++++-----------------
 kernel/trace/trace.h              |   5 --
 kernel/trace/trace_events.c       |   2 +-
 kernel/trace/trace_kprobe.c       |   8 +--
 kernel/trace/trace_sched_switch.c |   2 +-
 kernel/trace/trace_selftest.c     |   1 +
 9 files changed, 84 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 642928cf57b4..b80c8ddfbbdc 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -127,13 +127,13 @@ trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer,
 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
 					struct ring_buffer_event *event,
 					unsigned long flags, int pc);
-void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
-				       struct ring_buffer_event *event,
-					unsigned long flags, int pc);
-void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
-					    struct ring_buffer_event *event,
-					    unsigned long flags, int pc,
-					    struct pt_regs *regs);
+void trace_buffer_unlock_commit(struct ring_buffer *buffer,
+				struct ring_buffer_event *event,
+				unsigned long flags, int pc);
+void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
+				     struct ring_buffer_event *event,
+				     unsigned long flags, int pc,
+				     struct pt_regs *regs);
 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
 					 struct ring_buffer_event *event);
 
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a763888a36f9..698f2a890322 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -545,8 +545,7 @@ ftrace_raw_event_##call(void *__data, proto)				\
 	{ assign; }							\
 									\
 	if (!filter_current_check_discard(buffer, event_call, entry, event)) \
-		trace_nowake_buffer_unlock_commit(buffer,		\
-						  event, irq_flags, pc); \
+		trace_buffer_unlock_commit(buffer, event, irq_flags, pc); \
 }
 /*
  * The ftrace_test_probe is compiled out, it is only here as a build time check
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 4cea4f41c1d9..5d89335a485f 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -119,6 +119,7 @@ config TRACING
 	select BINARY_PRINTF
 	select EVENT_TRACING
 	select TRACE_CLOCK
+	select IRQ_WORK
 
 config GENERIC_TRACER
 	bool
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d5cbc0d3f209..37d1c703e3ec 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -19,6 +19,7 @@
 #include <linux/seq_file.h>
 #include <linux/notifier.h>
 #include <linux/irqflags.h>
+#include <linux/irq_work.h>
 #include <linux/debugfs.h>
 #include <linux/pagemap.h>
 #include <linux/hardirq.h>
@@ -84,6 +85,14 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
  */
 static DEFINE_PER_CPU(bool, trace_cmdline_save);
 
+/*
+ * When a reader is waiting for data, then this variable is
+ * set to true.
+ */
+static bool trace_wakeup_needed;
+
+static struct irq_work trace_work_wakeup;
+
 /*
  * Kill all tracing for good (never come back).
  * It is initialized to 1 but will turn to zero if the initialization
@@ -329,12 +338,18 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
 static int trace_stop_count;
 static DEFINE_RAW_SPINLOCK(tracing_start_lock);
 
-static void wakeup_work_handler(struct work_struct *work)
+/**
+ * trace_wake_up - wake up tasks waiting for trace input
+ *
+ * Schedules a delayed work to wake up any task that is blocked on the
+ * trace_wait queue. These is used with trace_poll for tasks polling the
+ * trace.
+ */
+static void trace_wake_up(struct irq_work *work)
 {
-	wake_up(&trace_wait);
-}
+	wake_up_all(&trace_wait);
 
-static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
+}
 
 /**
  * tracing_on - enable tracing buffers
@@ -389,22 +404,6 @@ int tracing_is_on(void)
 }
 EXPORT_SYMBOL_GPL(tracing_is_on);
 
-/**
- * trace_wake_up - wake up tasks waiting for trace input
- *
- * Schedules a delayed work to wake up any task that is blocked on the
- * trace_wait queue. These is used with trace_poll for tasks polling the
- * trace.
- */
-void trace_wake_up(void)
-{
-	const unsigned long delay = msecs_to_jiffies(2);
-
-	if (trace_flags & TRACE_ITER_BLOCK)
-		return;
-	schedule_delayed_work(&wakeup_work, delay);
-}
-
 static int __init set_buf_size(char *str)
 {
 	unsigned long buf_size;
@@ -753,6 +752,40 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 }
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
+static void default_wait_pipe(struct trace_iterator *iter)
+{
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
+
+	/*
+	 * The events can happen in critical sections where
+	 * checking a work queue can cause deadlocks.
+	 * After adding a task to the queue, this flag is set
+	 * only to notify events to try to wake up the queue
+	 * using irq_work.
+	 *
+	 * We don't clear it even if the buffer is no longer
+	 * empty. The flag only causes the next event to run
+	 * irq_work to do the work queue wake up. The worse
+	 * that can happen if we race with !trace_empty() is that
+	 * an event will cause an irq_work to try to wake up
+	 * an empty queue.
+	 *
+	 * There's no reason to protect this flag either, as
+	 * the work queue and irq_work logic will do the necessary
+	 * synchronization for the wake ups. The only thing
+	 * that is necessary is that the wake up happens after
+	 * a task has been queued. It's OK for spurious wake ups.
+	 */
+	trace_wakeup_needed = true;
+
+	if (trace_empty(iter))
+		schedule();
+
+	finish_wait(&trace_wait, &wait);
+}
+
 /**
  * register_tracer - register a tracer with the ftrace system.
  * @type - the plugin for the tracer
@@ -1156,30 +1189,32 @@ void
 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
 {
 	__this_cpu_write(trace_cmdline_save, true);
+	if (trace_wakeup_needed) {
+		trace_wakeup_needed = false;
+		/* irq_work_queue() supplies it's own memory barriers */
+		irq_work_queue(&trace_work_wakeup);
+	}
 	ring_buffer_unlock_commit(buffer, event);
 }
 
 static inline void
 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
 			     struct ring_buffer_event *event,
-			     unsigned long flags, int pc,
-			     int wake)
+			     unsigned long flags, int pc)
 {
 	__buffer_unlock_commit(buffer, event);
 
 	ftrace_trace_stack(buffer, flags, 6, pc);
 	ftrace_trace_userstack(buffer, flags, pc);
-
-	if (wake)
-		trace_wake_up();
 }
 
 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
 				struct ring_buffer_event *event,
 				unsigned long flags, int pc)
 {
-	__trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
+	__trace_buffer_unlock_commit(buffer, event, flags, pc);
 }
+EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
 
 struct ring_buffer_event *
 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
@@ -1196,29 +1231,21 @@ void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
 					struct ring_buffer_event *event,
 					unsigned long flags, int pc)
 {
-	__trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
+	__trace_buffer_unlock_commit(buffer, event, flags, pc);
 }
 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
 
-void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
-				       struct ring_buffer_event *event,
-				       unsigned long flags, int pc)
-{
-	__trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
-}
-EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
-
-void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
-					    struct ring_buffer_event *event,
-					    unsigned long flags, int pc,
-					    struct pt_regs *regs)
+void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
+				     struct ring_buffer_event *event,
+				     unsigned long flags, int pc,
+				     struct pt_regs *regs)
 {
 	__buffer_unlock_commit(buffer, event);
 
 	ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
 	ftrace_trace_userstack(buffer, flags, pc);
 }
-EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs);
+EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
 
 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
 					 struct ring_buffer_event *event)
@@ -3354,19 +3381,6 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
 	}
 }
 
-
-void default_wait_pipe(struct trace_iterator *iter)
-{
-	DEFINE_WAIT(wait);
-
-	prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
-
-	if (trace_empty(iter))
-		schedule();
-
-	finish_wait(&trace_wait, &wait);
-}
-
 /*
  * This is a make-shift waitqueue.
  * A tracer might use this callback on some rare cases:
@@ -5107,6 +5121,7 @@ __init static int tracer_alloc_buffers(void)
 #endif
 
 	trace_init_cmdlines();
+	init_irq_work(&trace_work_wakeup, trace_wake_up);
 
 	register_tracer(&nop_trace);
 	current_trace = &nop_trace;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3e8a176f64e6..55010ed175f0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -327,7 +327,6 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu)
 
 int tracer_init(struct tracer *t, struct trace_array *tr);
 int tracing_is_enabled(void);
-void trace_wake_up(void);
 void tracing_reset(struct trace_array *tr, int cpu);
 void tracing_reset_online_cpus(struct trace_array *tr);
 void tracing_reset_current(int cpu);
@@ -349,9 +348,6 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer,
 			  unsigned long len,
 			  unsigned long flags,
 			  int pc);
-void trace_buffer_unlock_commit(struct ring_buffer *buffer,
-				struct ring_buffer_event *event,
-				unsigned long flags, int pc);
 
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 						struct trace_array_cpu *data);
@@ -370,7 +366,6 @@ void trace_init_global_iter(struct trace_iterator *iter);
 
 void tracing_iter_reset(struct trace_iterator *iter, int cpu);
 
-void default_wait_pipe(struct trace_iterator *iter);
 void poll_wait_pipe(struct trace_iterator *iter);
 
 void ftrace(struct trace_array *tr,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index cb2df3b70f7f..880073d0b946 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1760,7 +1760,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip,
 	entry->ip			= ip;
 	entry->parent_ip		= parent_ip;
 
-	trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
+	trace_buffer_unlock_commit(buffer, event, flags, pc);
 
  out:
 	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5a3c533ef060..1865d5f76538 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -751,8 +751,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 
 	if (!filter_current_check_discard(buffer, call, entry, event))
-		trace_nowake_buffer_unlock_commit_regs(buffer, event,
-						       irq_flags, pc, regs);
+		trace_buffer_unlock_commit_regs(buffer, event,
+						irq_flags, pc, regs);
 }
 
 /* Kretprobe handler */
@@ -784,8 +784,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 
 	if (!filter_current_check_discard(buffer, call, entry, event))
-		trace_nowake_buffer_unlock_commit_regs(buffer, event,
-						       irq_flags, pc, regs);
+		trace_buffer_unlock_commit_regs(buffer, event,
+						irq_flags, pc, regs);
 }
 
 /* Event entry printers */
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index b0a136ac382a..3374c792ccd8 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -102,7 +102,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry->next_cpu			= task_cpu(wakee);
 
 	if (!filter_check_discard(call, entry, buffer, event))
-		trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
+		trace_buffer_unlock_commit(buffer, event, flags, pc);
 }
 
 static void
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 091b815f7b0a..47623169a815 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -1094,6 +1094,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
 	tracing_stop();
 	/* check both trace buffers */
 	ret = trace_test_buffer(tr, NULL);
+	printk("ret = %d\n", ret);
 	if (!ret)
 		ret = trace_test_buffer(&max_tr, &count);
 
-- 
cgit v1.2.3-55-g7522


From 873e698067cfa21420576632e1c3387c0f90ce4a Mon Sep 17 00:00:00 2001
From: Tony Lindgren
Date: Wed, 31 Oct 2012 09:54:56 -0700
Subject: ARM: OMAP: Remove NEED_MACH_GPIO_H

Omap no longer needs this option, mach/gpio.h is
empty.

Also remove mach/irqs.h from gpio-omap.h and
include it directly from the related omap1
gpio init files.

Otherwise omap2+ build fails for MULTI_PLATFORM.

Cc: Peter Ujfalusi <peter.ujfalusi@ti.com>
Cc: Jarkko Nikula <jarkko.nikula@bitmer.com>
Cc: Liam Girdwood <lrg@ti.com>
Cc: alsa-devel@alsa-project.org
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/Kconfig                        | 1 -
 arch/arm/mach-omap1/gpio15xx.c          | 2 ++
 arch/arm/mach-omap1/gpio16xx.c          | 2 ++
 arch/arm/mach-omap1/gpio7xx.c           | 2 ++
 arch/arm/mach-omap1/include/mach/gpio.h | 3 ---
 arch/arm/mach-omap2/include/mach/gpio.h | 3 ---
 include/linux/platform_data/gpio-omap.h | 1 -
 sound/soc/omap/am3517evm.c              | 2 --
 sound/soc/omap/n810.c                   | 1 -
 sound/soc/omap/osk5912.c                | 1 -
 sound/soc/omap/sdp3430.c                | 2 --
 sound/soc/omap/zoom2.c                  | 3 ---
 12 files changed, 6 insertions(+), 17 deletions(-)
 delete mode 100644 arch/arm/mach-omap1/include/mach/gpio.h
 delete mode 100644 arch/arm/mach-omap2/include/mach/gpio.h

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 73067efd4845..6207cf734b2b 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -937,7 +937,6 @@ config ARCH_OMAP
 	select CLKSRC_MMIO
 	select GENERIC_CLOCKEVENTS
 	select HAVE_CLK
-	select NEED_MACH_GPIO_H
 	help
 	  Support for TI's OMAP platform (OMAP1/2/3/4).
 
diff --git a/arch/arm/mach-omap1/gpio15xx.c b/arch/arm/mach-omap1/gpio15xx.c
index 98e6f39224a4..02b3eb2e201c 100644
--- a/arch/arm/mach-omap1/gpio15xx.c
+++ b/arch/arm/mach-omap1/gpio15xx.c
@@ -19,6 +19,8 @@
 #include <linux/gpio.h>
 #include <linux/platform_data/gpio-omap.h>
 
+#include <mach/irqs.h>
+
 #define OMAP1_MPUIO_VBASE		OMAP1_MPUIO_BASE
 #define OMAP1510_GPIO_BASE		0xFFFCE000
 
diff --git a/arch/arm/mach-omap1/gpio16xx.c b/arch/arm/mach-omap1/gpio16xx.c
index 33f419236b17..b9952a258d82 100644
--- a/arch/arm/mach-omap1/gpio16xx.c
+++ b/arch/arm/mach-omap1/gpio16xx.c
@@ -19,6 +19,8 @@
 #include <linux/gpio.h>
 #include <linux/platform_data/gpio-omap.h>
 
+#include <mach/irqs.h>
+
 #define OMAP1610_GPIO1_BASE		0xfffbe400
 #define OMAP1610_GPIO2_BASE		0xfffbec00
 #define OMAP1610_GPIO3_BASE		0xfffbb400
diff --git a/arch/arm/mach-omap1/gpio7xx.c b/arch/arm/mach-omap1/gpio7xx.c
index 958ce9acee95..f5819b2b7cbe 100644
--- a/arch/arm/mach-omap1/gpio7xx.c
+++ b/arch/arm/mach-omap1/gpio7xx.c
@@ -19,6 +19,8 @@
 #include <linux/gpio.h>
 #include <linux/platform_data/gpio-omap.h>
 
+#include <mach/irqs.h>
+
 #define OMAP7XX_GPIO1_BASE		0xfffbc000
 #define OMAP7XX_GPIO2_BASE		0xfffbc800
 #define OMAP7XX_GPIO3_BASE		0xfffbd000
diff --git a/arch/arm/mach-omap1/include/mach/gpio.h b/arch/arm/mach-omap1/include/mach/gpio.h
deleted file mode 100644
index ebf86c0f4f46..000000000000
--- a/arch/arm/mach-omap1/include/mach/gpio.h
+++ /dev/null
@@ -1,3 +0,0 @@
-/*
- * arch/arm/mach-omap1/include/mach/gpio.h
- */
diff --git a/arch/arm/mach-omap2/include/mach/gpio.h b/arch/arm/mach-omap2/include/mach/gpio.h
deleted file mode 100644
index 5621cc59c9f4..000000000000
--- a/arch/arm/mach-omap2/include/mach/gpio.h
+++ /dev/null
@@ -1,3 +0,0 @@
-/*
- * arch/arm/mach-omap2/include/mach/gpio.h
- */
diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h
index e8741c2678d5..5d50b25a73d7 100644
--- a/include/linux/platform_data/gpio-omap.h
+++ b/include/linux/platform_data/gpio-omap.h
@@ -26,7 +26,6 @@
 
 #include <linux/io.h>
 #include <linux/platform_device.h>
-#include <mach/irqs.h>
 
 #define OMAP1_MPUIO_BASE			0xfffb5000
 
diff --git a/sound/soc/omap/am3517evm.c b/sound/soc/omap/am3517evm.c
index fad350682ca2..c1900b2a6f28 100644
--- a/sound/soc/omap/am3517evm.c
+++ b/sound/soc/omap/am3517evm.c
@@ -25,8 +25,6 @@
 #include <sound/soc.h>
 
 #include <asm/mach-types.h>
-#include <mach/hardware.h>
-#include <mach/gpio.h>
 #include <linux/platform_data/asoc-ti-mcbsp.h>
 
 #include "omap-mcbsp.h"
diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c
index 521bfc3d2b2b..230b8c144848 100644
--- a/sound/soc/omap/n810.c
+++ b/sound/soc/omap/n810.c
@@ -29,7 +29,6 @@
 #include <sound/soc.h>
 
 #include <asm/mach-types.h>
-#include <mach/hardware.h>
 #include <linux/gpio.h>
 #include <linux/module.h>
 #include <linux/platform_data/asoc-ti-mcbsp.h>
diff --git a/sound/soc/omap/osk5912.c b/sound/soc/omap/osk5912.c
index 3960e8df9c76..06ef8d67ed1c 100644
--- a/sound/soc/omap/osk5912.c
+++ b/sound/soc/omap/osk5912.c
@@ -28,7 +28,6 @@
 #include <sound/soc.h>
 
 #include <asm/mach-types.h>
-#include <mach/hardware.h>
 #include <linux/gpio.h>
 #include <linux/module.h>
 #include <linux/platform_data/asoc-ti-mcbsp.h>
diff --git a/sound/soc/omap/sdp3430.c b/sound/soc/omap/sdp3430.c
index 597cae769cea..b462a2c9385f 100644
--- a/sound/soc/omap/sdp3430.c
+++ b/sound/soc/omap/sdp3430.c
@@ -31,8 +31,6 @@
 #include <sound/jack.h>
 
 #include <asm/mach-types.h>
-#include <mach/hardware.h>
-#include <mach/gpio.h>
 #include <linux/platform_data/gpio-omap.h>
 #include <linux/platform_data/asoc-ti-mcbsp.h>
 
diff --git a/sound/soc/omap/zoom2.c b/sound/soc/omap/zoom2.c
index 677b567935f8..27c501c301b7 100644
--- a/sound/soc/omap/zoom2.c
+++ b/sound/soc/omap/zoom2.c
@@ -26,9 +26,6 @@
 #include <sound/soc.h>
 
 #include <asm/mach-types.h>
-#include <mach/hardware.h>
-#include <mach/gpio.h>
-#include <mach/board-zoom.h>
 #include <linux/platform_data/asoc-ti-mcbsp.h>
 
 /* Register descriptions for twl4030 codec part */
-- 
cgit v1.2.3-55-g7522


From e19d6763cc300fcb706bd291b24ac06be71e1ce6 Mon Sep 17 00:00:00 2001
From: Michael S. Tsirkin
Date: Thu, 1 Nov 2012 09:16:22 +0000
Subject: skb: report completion status for zero copy skbs

Even if skb is marked for zero copy, net core might still decide
to copy it later which is somewhat slower than a copy in user context:
besides copying the data we need to pin/unpin the pages.

Add a parameter reporting such cases through zero copy callback:
if this happens a lot, device can take this into account
and switch to copying in user context.

This patch updates all users but ignores the passed value for now:
it will be used by follow-up patches.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/vhost.c  | 2 +-
 drivers/vhost/vhost.h  | 2 +-
 include/linux/skbuff.h | 4 +++-
 net/core/skbuff.c      | 4 ++--
 4 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 99ac2cb08b43..73d08db2e677 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1600,7 +1600,7 @@ void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
 	kfree(ubufs);
 }
 
-void vhost_zerocopy_callback(struct ubuf_info *ubuf)
+void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool status)
 {
 	struct vhost_ubuf_ref *ubufs = ubuf->ctx;
 	struct vhost_virtqueue *vq = ubufs->vq;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 1125af3d27d1..2de4ce2fec40 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -191,7 +191,7 @@ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
 
 int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
 		    unsigned int log_num, u64 len);
-void vhost_zerocopy_callback(struct ubuf_info *);
+void vhost_zerocopy_callback(struct ubuf_info *, bool);
 int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq);
 
 #define vq_err(vq, fmt, ...) do {                                  \
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a2a0bdb95a8f..e5eae5bc1d4d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -235,11 +235,13 @@ enum {
 /*
  * The callback notifies userspace to release buffers when skb DMA is done in
  * lower device, the skb last reference should be 0 when calling this.
+ * The zerocopy_success argument is true if zero copy transmit occurred,
+ * false on data copy or out of memory error caused by data copy attempt.
  * The ctx field is used to track device context.
  * The desc field is used to track userspace buffer index.
  */
 struct ubuf_info {
-	void (*callback)(struct ubuf_info *);
+	void (*callback)(struct ubuf_info *, bool zerocopy_success);
 	void *ctx;
 	unsigned long desc;
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6e04b1fa11f2..4abdf71a23f8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -519,7 +519,7 @@ static void skb_release_data(struct sk_buff *skb)
 
 			uarg = skb_shinfo(skb)->destructor_arg;
 			if (uarg->callback)
-				uarg->callback(uarg);
+				uarg->callback(uarg, true);
 		}
 
 		if (skb_has_frag_list(skb))
@@ -797,7 +797,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 	for (i = 0; i < num_frags; i++)
 		skb_frag_unref(skb, i);
 
-	uarg->callback(uarg);
+	uarg->callback(uarg, false);
 
 	/* skb frags point to kernel buffers */
 	for (i = num_frags - 1; i >= 0; i--) {
-- 
cgit v1.2.3-55-g7522


From 25121173f7b1e4ac3fc692df6e7b8c52ec36abba Mon Sep 17 00:00:00 2001
From: Michael S. Tsirkin
Date: Thu, 1 Nov 2012 09:16:28 +0000
Subject: skb: api to report errors for zero copy skbs

Orphaning frags for zero copy skbs needs to allocate data in atomic
context so is has a chance to fail. If it does we currently discard
the skb which is safe, but we don't report anything to the caller,
so it can not recover by e.g. disabling zero copy.

Add an API to free skb reporting such errors: this is used
by tun in case orphaning frags fails.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  1 +
 net/core/skbuff.c      | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e5eae5bc1d4d..f2af494330ab 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -568,6 +568,7 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb)
 }
 
 extern void kfree_skb(struct sk_buff *skb);
+extern void skb_tx_error(struct sk_buff *skb);
 extern void consume_skb(struct sk_buff *skb);
 extern void	       __kfree_skb(struct sk_buff *skb);
 extern struct kmem_cache *skbuff_head_cache;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4abdf71a23f8..d9addea10309 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -634,6 +634,26 @@ void kfree_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(kfree_skb);
 
+/**
+ *	skb_tx_error - report an sk_buff xmit error
+ *	@skb: buffer that triggered an error
+ *
+ *	Report xmit error if a device callback is tracking this skb.
+ *	skb must be freed afterwards.
+ */
+void skb_tx_error(struct sk_buff *skb)
+{
+	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+		struct ubuf_info *uarg;
+
+		uarg = skb_shinfo(skb)->destructor_arg;
+		if (uarg->callback)
+			uarg->callback(uarg, false);
+		skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
+	}
+}
+EXPORT_SYMBOL(skb_tx_error);
+
 /**
  *	consume_skb - free an skbuff
  *	@skb: buffer to free
-- 
cgit v1.2.3-55-g7522


From 46acc460c07b5c74287560a00b6cbc6111136ab6 Mon Sep 17 00:00:00 2001
From: Ben Hutchings
Date: Thu, 1 Nov 2012 09:11:11 +0000
Subject: eth: Make is_link_local() consistent with other address tests

Function name should include '_ether_addr'.
Return type should be bool.
Parameter name should be 'addr' not 'dest' (also matching kernel-doc).

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c     | 2 +-
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +-
 drivers/net/ethernet/intel/ixgbevf/vf.c           | 2 +-
 include/linux/etherdevice.h                       | 6 +++---
 net/bridge/br_input.c                             | 2 +-
 net/bridge/br_sysfs_br.c                          | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index b54bc40f00b0..690535a0322f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6953,7 +6953,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		return -EINVAL;
 	}
 
-	if (is_unicast_ether_addr(addr) || is_link_local(addr)) {
+	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
 		u32 rar_uc_entries = IXGBE_MAX_PF_MACVLANS;
 
 		if (netdev_uc_count(dev) < rar_uc_entries)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index c281ee9c9e2a..f3d3947ae962 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -2979,7 +2979,7 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	unsigned short f;
 #endif
 	u8 *dst_mac = skb_header_pointer(skb, 0, 0, NULL);
-	if (!dst_mac || is_link_local(dst_mac)) {
+	if (!dst_mac || is_link_local_ether_addr(dst_mac)) {
 		dev_kfree_skb(skb);
 		return NETDEV_TX_OK;
 	}
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index c0fd1ef49400..0c94557b53df 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -331,7 +331,7 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
 	netdev_for_each_mc_addr(ha, netdev) {
 		if (i == cnt)
 			break;
-		if (is_link_local(ha->addr))
+		if (is_link_local_ether_addr(ha->addr))
 			continue;
 
 		vector_list[i++] = ixgbevf_mta_vector(hw, ha->addr);
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 45ded4be7b43..7cf2516ec74c 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -55,15 +55,15 @@ extern struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
 static const u8 br_reserved_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
 
 /**
- * is_link_local -  Determine if given Eth addr is a link local mcast address.
+ * is_link_local_ether_addr - Determine if given Ethernet address is link-local
  * @addr: Pointer to a six-byte array containing the Ethernet address
  *
  * Return true if address is link local reserved addr (01:80:c2:00:00:0X) per
  * IEEE 802.1Q 8.6.3 Frame filtering.
  */
-static inline int is_link_local(const unsigned char *dest)
+static inline bool is_link_local_ether_addr(const u8 *addr)
 {
-	__be16 *a = (__be16 *)dest;
+	__be16 *a = (__be16 *)addr;
 	static const __be16 *b = (const __be16 *)br_reserved_address;
 	static const __be16 m = cpu_to_be16(0xfff0);
 
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d047978bf025..4b34207419b1 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -147,7 +147,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 
 	p = br_port_get_rcu(skb->dev);
 
-	if (unlikely(is_link_local(dest))) {
+	if (unlikely(is_link_local_ether_addr(dest))) {
 		/*
 		 * See IEEE 802.1D Table 7-10 Reserved addresses
 		 *
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 82385fd2f187..cffb76e2161c 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -309,7 +309,7 @@ static ssize_t store_group_addr(struct device *d,
 		   &new_addr[3], &new_addr[4], &new_addr[5]) != 6)
 		return -EINVAL;
 
-	if (!is_link_local(new_addr))
+	if (!is_link_local_ether_addr(new_addr))
 		return -EINVAL;
 
 	if (new_addr[5] == 1 ||		/* 802.3x Pause address */
-- 
cgit v1.2.3-55-g7522


From 2bc80059fe19229e68a306ce12f5e61e80b92c5c Mon Sep 17 00:00:00 2001
From: Ben Hutchings
Date: Thu, 1 Nov 2012 09:12:02 +0000
Subject: eth: Rename and properly align br_reserved_address array

Since this array is no longer part of the bridge driver, it should
have an 'eth' prefix not 'br'.

We also assume that either it's 16-bit-aligned or the architecture has
efficient unaligned access.  Ensure the first of these is true by
explicitly aligning it.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 5 +++--
 net/bridge/br_device.c      | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 7cf2516ec74c..243eea1e33d8 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -52,7 +52,8 @@ extern struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
 #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
 
 /* Reserved Ethernet Addresses per IEEE 802.1Q */
-static const u8 br_reserved_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
+static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) =
+{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
 
 /**
  * is_link_local_ether_addr - Determine if given Ethernet address is link-local
@@ -64,7 +65,7 @@ static const u8 br_reserved_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00,
 static inline bool is_link_local_ether_addr(const u8 *addr)
 {
 	__be16 *a = (__be16 *)addr;
-	static const __be16 *b = (const __be16 *)br_reserved_address;
+	static const __be16 *b = (const __be16 *)eth_reserved_addr_base;
 	static const __be16 m = cpu_to_be16(0xfff0);
 
 	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 4245e991dd98..7c78e2640190 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -358,7 +358,7 @@ void br_dev_setup(struct net_device *dev)
 	br->bridge_id.prio[0] = 0x80;
 	br->bridge_id.prio[1] = 0x00;
 
-	memcpy(br->group_addr, br_reserved_address, ETH_ALEN);
+	memcpy(br->group_addr, eth_reserved_addr_base, ETH_ALEN);
 
 	br->stp_enabled = BR_NO_STP;
 	br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
-- 
cgit v1.2.3-55-g7522


From 19f5ee2716373519fda2129e9333f4c3847aa742 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Sun, 28 Oct 2012 18:14:14 +0100
Subject: uprobes: Kill arch_uprobe_enable/disable_step() hooks

Kill arch_uprobe_enable/disable_step() hooks, they do nothing and
nobody needs them.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 include/linux/uprobes.h |  2 --
 kernel/events/uprobes.c | 10 ----------
 2 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 24594571c5a3..2615c4d7788d 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -101,8 +101,6 @@ extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
 extern void uprobe_free_utask(struct task_struct *t);
 extern void uprobe_copy_process(struct task_struct *t);
 extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
-extern void __weak arch_uprobe_enable_step(struct arch_uprobe *arch);
-extern void __weak arch_uprobe_disable_step(struct arch_uprobe *arch);
 extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
 extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
 extern void uprobe_notify_resume(struct pt_regs *regs);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index abbfd8440a6d..39c75cc51efc 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1430,14 +1430,6 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
 	return uprobe;
 }
 
-void __weak arch_uprobe_enable_step(struct arch_uprobe *arch)
-{
-}
-
-void __weak arch_uprobe_disable_step(struct arch_uprobe *arch)
-{
-}
-
 /*
  * Run handler and ask thread to singlestep.
  * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
@@ -1491,7 +1483,6 @@ static void handle_swbp(struct pt_regs *regs)
 		goto out;
 
 	if (!pre_ssout(uprobe, regs, bp_vaddr)) {
-		arch_uprobe_enable_step(&uprobe->arch);
 		utask->active_uprobe = uprobe;
 		utask->state = UTASK_SSTEP;
 		return;
@@ -1523,7 +1514,6 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
 	else
 		WARN_ON_ONCE(1);
 
-	arch_uprobe_disable_step(&uprobe->arch);
 	put_uprobe(uprobe);
 	utask->active_uprobe = NULL;
 	utask->state = UTASK_RUNNING;
-- 
cgit v1.2.3-55-g7522


From 398f382c0a5bd62f0f31871e844700e1b9fd1ad3 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann
Date: Sun, 28 Oct 2012 08:27:19 +0000
Subject: pktgen: clean up ktime_t helpers

Some years ago, the ktime_t helper functions ktime_now() and ktime_lt()
have been introduced. Instead of defining them inside pktgen.c, they
should either use ktime_t library functions or, if not available, they
should be defined in ktime.h, so that also others can benefit from them.
ktime_compare() is introduced with a similar notion as in timespec_compare().

Signed-off-by: Daniel Borkmann <daniel.borkmann@tik.ee.ethz.ch>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ktime.h | 19 +++++++++++++++++++
 net/core/pktgen.c     | 41 +++++++++++++----------------------------
 2 files changed, 32 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 06177ba10a16..e83512f63df5 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -282,6 +282,25 @@ static inline int ktime_equal(const ktime_t cmp1, const ktime_t cmp2)
 	return cmp1.tv64 == cmp2.tv64;
 }
 
+/**
+ * ktime_compare - Compares two ktime_t variables for less, greater or equal
+ * @cmp1:	comparable1
+ * @cmp2:	comparable2
+ *
+ * Returns ...
+ *   cmp1  < cmp2: return <0
+ *   cmp1 == cmp2: return 0
+ *   cmp1  > cmp2: return >0
+ */
+static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2)
+{
+	if (cmp1.tv64 < cmp2.tv64)
+		return -1;
+	if (cmp1.tv64 > cmp2.tv64)
+		return 1;
+	return 0;
+}
+
 static inline s64 ktime_to_us(const ktime_t kt)
 {
 	struct timeval tv = ktime_to_timeval(kt);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1d1c216ffd9a..b29dacf900f9 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -419,20 +419,6 @@ struct pktgen_thread {
 #define REMOVE 1
 #define FIND   0
 
-static inline ktime_t ktime_now(void)
-{
-	struct timespec ts;
-	ktime_get_ts(&ts);
-
-	return timespec_to_ktime(ts);
-}
-
-/* This works even if 32 bit because of careful byte order choice */
-static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
-{
-	return cmp1.tv64 < cmp2.tv64;
-}
-
 static const char version[] =
 	"Packet Generator for packet performance testing. "
 	"Version: " VERSION "\n";
@@ -675,7 +661,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\n");
 
 	/* not really stopped, more like last-running-at */
-	stopped = pkt_dev->running ? ktime_now() : pkt_dev->stopped_at;
+	stopped = pkt_dev->running ? ktime_get() : pkt_dev->stopped_at;
 	idle = pkt_dev->idle_acc;
 	do_div(idle, NSEC_PER_USEC);
 
@@ -2141,12 +2127,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 		return;
 	}
 
-	start_time = ktime_now();
+	start_time = ktime_get();
 	if (remaining < 100000) {
 		/* for small delays (<100us), just loop until limit is reached */
 		do {
-			end_time = ktime_now();
-		} while (ktime_lt(end_time, spin_until));
+			end_time = ktime_get();
+		} while (ktime_compare(end_time, spin_until) < 0);
 	} else {
 		/* see do_nanosleep */
 		hrtimer_init_sleeper(&t, current);
@@ -2162,7 +2148,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 			hrtimer_cancel(&t.timer);
 		} while (t.task && pkt_dev->running && !signal_pending(current));
 		__set_current_state(TASK_RUNNING);
-		end_time = ktime_now();
+		end_time = ktime_get();
 	}
 
 	pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
@@ -2912,8 +2898,7 @@ static void pktgen_run(struct pktgen_thread *t)
 			pktgen_clear_counters(pkt_dev);
 			pkt_dev->running = 1;	/* Cranke yeself! */
 			pkt_dev->skb = NULL;
-			pkt_dev->started_at =
-				pkt_dev->next_tx = ktime_now();
+			pkt_dev->started_at = pkt_dev->next_tx = ktime_get();
 
 			set_pkt_overhead(pkt_dev);
 
@@ -3072,7 +3057,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
 
 	kfree_skb(pkt_dev->skb);
 	pkt_dev->skb = NULL;
-	pkt_dev->stopped_at = ktime_now();
+	pkt_dev->stopped_at = ktime_get();
 	pkt_dev->running = 0;
 
 	show_results(pkt_dev, nr_frags);
@@ -3091,7 +3076,7 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
 			continue;
 		if (best == NULL)
 			best = pkt_dev;
-		else if (ktime_lt(pkt_dev->next_tx, best->next_tx))
+		else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0)
 			best = pkt_dev;
 	}
 	if_unlock(t);
@@ -3176,14 +3161,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
 
 static void pktgen_resched(struct pktgen_dev *pkt_dev)
 {
-	ktime_t idle_start = ktime_now();
+	ktime_t idle_start = ktime_get();
 	schedule();
-	pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
+	pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start));
 }
 
 static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
 {
-	ktime_t idle_start = ktime_now();
+	ktime_t idle_start = ktime_get();
 
 	while (atomic_read(&(pkt_dev->skb->users)) != 1) {
 		if (signal_pending(current))
@@ -3194,7 +3179,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
 		else
 			cpu_relax();
 	}
-	pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
+	pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start));
 }
 
 static void pktgen_xmit(struct pktgen_dev *pkt_dev)
@@ -3216,7 +3201,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 	 * "never transmit"
 	 */
 	if (unlikely(pkt_dev->delay == ULLONG_MAX)) {
-		pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX);
+		pkt_dev->next_tx = ktime_add_ns(ktime_get(), ULONG_MAX);
 		return;
 	}
 
-- 
cgit v1.2.3-55-g7522


From cdfcc572be0a8b423cecfb4ab5fd735fafe9c54a Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 30 Oct 2012 14:31:38 -0600
Subject: PCI: Add pci_stop_and_remove_root_bus()

It supports both PCI root bus and PCI bus under PCI bridge.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>---
 drivers/pci/remove.c | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/pci.h  |  2 ++
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 513972f3ed13..7c0fd9252e6f 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -111,3 +111,39 @@ void pci_stop_and_remove_bus_device(struct pci_dev *dev)
 	pci_remove_bus_device(dev);
 }
 EXPORT_SYMBOL(pci_stop_and_remove_bus_device);
+
+void pci_stop_root_bus(struct pci_bus *bus)
+{
+	struct pci_dev *child, *tmp;
+	struct pci_host_bridge *host_bridge;
+
+	if (!pci_is_root_bus(bus))
+		return;
+
+	host_bridge = to_pci_host_bridge(bus->bridge);
+	list_for_each_entry_safe_reverse(child, tmp,
+					 &bus->devices, bus_list)
+		pci_stop_bus_device(child);
+
+	/* stop the host bridge */
+	device_del(&host_bridge->dev);
+}
+
+void pci_remove_root_bus(struct pci_bus *bus)
+{
+	struct pci_dev *child, *tmp;
+	struct pci_host_bridge *host_bridge;
+
+	if (!pci_is_root_bus(bus))
+		return;
+
+	host_bridge = to_pci_host_bridge(bus->bridge);
+	list_for_each_entry_safe(child, tmp,
+				 &bus->devices, bus_list)
+		pci_remove_bus_device(child);
+	pci_remove_bus(bus);
+	host_bridge->bus = NULL;
+
+	/* remove the host bridge */
+	put_device(&host_bridge->dev);
+}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f543eb3b1c0c..786094254d57 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -712,6 +712,8 @@ extern struct pci_dev *pci_dev_get(struct pci_dev *dev);
 extern void pci_dev_put(struct pci_dev *dev);
 extern void pci_remove_bus(struct pci_bus *b);
 extern void pci_stop_and_remove_bus_device(struct pci_dev *dev);
+void pci_stop_root_bus(struct pci_bus *bus);
+void pci_remove_root_bus(struct pci_bus *bus);
 void pci_setup_cardbus(struct pci_bus *bus);
 extern void pci_sort_breadthfirst(void);
 #define dev_is_pci(d) ((d)->bus == &pci_bus_type)
-- 
cgit v1.2.3-55-g7522


From 3b98c0c2093d1f92e5b7394ae0b13d142e7ef880 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Mon, 7 Mar 2011 12:49:34 +0100
Subject: drbd: switch configuration interface from connector to genetlink

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_actlog.c |    3 +-
 drivers/block/drbd/drbd_int.h    |   36 +-
 drivers/block/drbd/drbd_main.c   |   27 +-
 drivers/block/drbd/drbd_nl.c     | 1536 +++++++++++++++++++-------------------
 drivers/block/drbd/drbd_state.c  |    7 +-
 include/linux/drbd.h             |   35 +-
 include/linux/genl_magic_func.h  |    2 +-
 7 files changed, 806 insertions(+), 840 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 7cd78617669b..c1a90616776b 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -702,6 +702,7 @@ static int w_update_odbm(struct drbd_work *w, int unused)
 {
 	struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
 	struct drbd_conf *mdev = w->mdev;
+	struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
 
 	if (!get_ldev(mdev)) {
 		if (__ratelimit(&drbd_ratelimit_state))
@@ -725,7 +726,7 @@ static int w_update_odbm(struct drbd_work *w, int unused)
 			break;
 		}
 	}
-	drbd_bcast_sync_progress(mdev);
+	drbd_bcast_event(mdev, &sib);
 
 	return 1;
 }
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index e68758344647..429fd8da6b71 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -44,6 +44,7 @@
 #include <net/tcp.h>
 #include <linux/lru_cache.h>
 #include <linux/prefetch.h>
+#include <linux/drbd_genl_api.h>
 #include <linux/drbd.h>
 #include "drbd_state.h"
 
@@ -65,7 +66,6 @@
 extern unsigned int minor_count;
 extern int disable_sendpage;
 extern int allow_oos;
-extern unsigned int cn_idx;
 
 #ifdef CONFIG_DRBD_FAULT_INJECTION
 extern int enable_faults;
@@ -865,14 +865,6 @@ struct drbd_md {
 	 */
 };
 
-/* for sync_conf and other types... */
-#define NL_PACKET(name, number, fields) struct name { fields };
-#define NL_INTEGER(pn,pr,member) int member;
-#define NL_INT64(pn,pr,member) __u64 member;
-#define NL_BIT(pn,pr,member)   unsigned member:1;
-#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len;
-#include "linux/drbd_nl.h"
-
 struct drbd_backing_dev {
 	struct block_device *backing_bdev;
 	struct block_device *md_bdev;
@@ -1502,7 +1494,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor,
 extern void drbd_free_mdev(struct drbd_conf *mdev);
 extern void drbd_delete_device(unsigned int minor);
 
-struct drbd_tconn *drbd_new_tconn(char *name);
+struct drbd_tconn *drbd_new_tconn(const char *name);
 extern void drbd_free_tconn(struct drbd_tconn *tconn);
 struct drbd_tconn *conn_by_name(const char *name);
 
@@ -1679,16 +1671,22 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
 extern void drbd_al_apply_to_bm(struct drbd_conf *mdev);
 extern void drbd_al_shrink(struct drbd_conf *mdev);
 
-
 /* drbd_nl.c */
-
-void drbd_nl_cleanup(void);
-int __init drbd_nl_init(void);
-void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state);
-void drbd_bcast_sync_progress(struct drbd_conf *mdev);
-void drbd_bcast_ee(struct drbd_conf *, const char *, const int, const char *,
-		   const char *, const struct drbd_peer_request *);
-
+/* state info broadcast */
+struct sib_info {
+	enum drbd_state_info_bcast_reason sib_reason;
+	union {
+		struct {
+			char *helper_name;
+			unsigned helper_exit_code;
+		};
+		struct {
+			union drbd_state os;
+			union drbd_state ns;
+		};
+	};
+};
+void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib);
 
 /*
  * inline helper functions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9f6db5947c65..9697ab872098 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -86,7 +86,6 @@ MODULE_PARM_DESC(allow_oos, "DONT USE!");
 module_param(minor_count, uint, 0444);
 module_param(disable_sendpage, bool, 0644);
 module_param(allow_oos, bool, 0);
-module_param(cn_idx, uint, 0444);
 module_param(proc_details, int, 0644);
 
 #ifdef CONFIG_DRBD_FAULT_INJECTION
@@ -108,7 +107,6 @@ module_param(fault_devs, int, 0644);
 unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
 int disable_sendpage;
 int allow_oos;
-unsigned int cn_idx = CN_IDX_DRBD;
 int proc_details;       /* Detail level in proc drbd*/
 
 /* Module parameter for setting the user mode helper program
@@ -2175,7 +2173,7 @@ static void drbd_cleanup(void)
 	if (drbd_proc)
 		remove_proc_entry("drbd", NULL);
 
-	drbd_nl_cleanup();
+	drbd_genl_unregister();
 
 	idr_for_each_entry(&minors, mdev, i)
 		drbd_delete_device(i);
@@ -2237,6 +2235,9 @@ struct drbd_tconn *conn_by_name(const char *name)
 {
 	struct drbd_tconn *tconn;
 
+	if (!name || !name[0])
+		return NULL;
+
 	write_lock_irq(&global_state_lock);
 	list_for_each_entry(tconn, &drbd_tconns, all_tconn) {
 		if (!strcmp(tconn->name, name))
@@ -2248,7 +2249,7 @@ found:
 	return tconn;
 }
 
-struct drbd_tconn *drbd_new_tconn(char *name)
+struct drbd_tconn *drbd_new_tconn(const char *name)
 {
 	struct drbd_tconn *tconn;
 
@@ -2333,6 +2334,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor,
 
 	mdev->tconn = tconn;
 	mdev->minor = minor;
+	mdev->vnr = vnr;
 
 	drbd_init_set_defaults(mdev);
 
@@ -2461,10 +2463,6 @@ int __init drbd_init(void)
 #endif
 	}
 
-	err = drbd_nl_init();
-	if (err)
-		return err;
-
 	err = register_blkdev(DRBD_MAJOR, "drbd");
 	if (err) {
 		printk(KERN_ERR
@@ -2473,6 +2471,13 @@ int __init drbd_init(void)
 		return err;
 	}
 
+	err = drbd_genl_register();
+	if (err) {
+		printk(KERN_ERR "drbd: unable to register generic netlink family\n");
+		goto fail;
+	}
+
+
 	register_reboot_notifier(&drbd_notifier);
 
 	/*
@@ -2487,12 +2492,12 @@ int __init drbd_init(void)
 
 	err = drbd_create_mempools();
 	if (err)
-		goto Enomem;
+		goto fail;
 
 	drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
 	if (!drbd_proc)	{
 		printk(KERN_ERR "drbd: unable to register proc file\n");
-		goto Enomem;
+		goto fail;
 	}
 
 	rwlock_init(&global_state_lock);
@@ -2507,7 +2512,7 @@ int __init drbd_init(void)
 
 	return 0; /* Success! */
 
-Enomem:
+fail:
 	drbd_cleanup();
 	if (err == -ENOMEM)
 		/* currently always the case */
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index f2739fd188a0..f9be14248e33 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -29,110 +29,225 @@
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/slab.h>
-#include <linux/connector.h>
 #include <linux/blkpg.h>
 #include <linux/cpumask.h>
 #include "drbd_int.h"
 #include "drbd_req.h"
 #include "drbd_wrappers.h"
 #include <asm/unaligned.h>
-#include <linux/drbd_tag_magic.h>
 #include <linux/drbd_limits.h>
-#include <linux/compiler.h>
 #include <linux/kthread.h>
 
-static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
-static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
-static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *);
-
-/* see get_sb_bdev and bd_claim */
+#include <net/genetlink.h>
+
+/* .doit */
+// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
+// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
+/* .dumpit */
+int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
+
+#include <linux/drbd_genl_api.h>
+#include <linux/genl_magic_func.h>
+
+/* used blkdev_get_by_path, to claim our meta data device(s) */
 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
 
-/* Generate the tag_list to struct functions */
-#define NL_PACKET(name, number, fields) \
-static int name ## _from_tags( \
-	unsigned short *tags, struct name *arg) __attribute__ ((unused)); \
-static int name ## _from_tags( \
-	unsigned short *tags, struct name *arg) \
-{ \
-	int tag; \
-	int dlen; \
-	\
-	while ((tag = get_unaligned(tags++)) != TT_END) {	\
-		dlen = get_unaligned(tags++);			\
-		switch (tag_number(tag)) { \
-		fields \
-		default: \
-			if (tag & T_MANDATORY) { \
-				printk(KERN_ERR "drbd: Unknown tag: %d\n", tag_number(tag)); \
-				return 0; \
-			} \
-		} \
-		tags = (unsigned short *)((char *)tags + dlen); \
-	} \
-	return 1; \
+/* Configuration is strictly serialized, because generic netlink message
+ * processing is strictly serialized by the genl_lock().
+ * Which means we can use one static global drbd_config_context struct.
+ */
+static struct drbd_config_context {
+	/* assigned from drbd_genlmsghdr */
+	unsigned int minor;
+	/* assigned from request attributes, if present */
+	unsigned int volume;
+#define VOLUME_UNSPECIFIED		(-1U)
+	/* pointer into the request skb,
+	 * limited lifetime! */
+	char *conn_name;
+
+	/* reply buffer */
+	struct sk_buff *reply_skb;
+	/* pointer into reply buffer */
+	struct drbd_genlmsghdr *reply_dh;
+	/* resolved from attributes, if possible */
+	struct drbd_conf *mdev;
+	struct drbd_tconn *tconn;
+} adm_ctx;
+
+static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
+{
+	genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
+	if (genlmsg_reply(skb, info))
+		printk(KERN_ERR "drbd: error sending genl reply\n");
 }
-#define NL_INTEGER(pn, pr, member) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \
-		arg->member = get_unaligned((int *)(tags));	\
-		break;
-#define NL_INT64(pn, pr, member) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \
-		arg->member = get_unaligned((u64 *)(tags));	\
-		break;
-#define NL_BIT(pn, pr, member) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \
-		arg->member = *(char *)(tags) ? 1 : 0; \
-		break;
-#define NL_STRING(pn, pr, member, len) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \
-		if (dlen > len) { \
-			printk(KERN_ERR "drbd: arg too long: %s (%u wanted, max len: %u bytes)\n", \
-				#member, dlen, (unsigned int)len); \
-			return 0; \
-		} \
-		 arg->member ## _len = dlen; \
-		 memcpy(arg->member, tags, min_t(size_t, dlen, len)); \
-		 break;
-#include "linux/drbd_nl.h"
-
-/* Generate the struct to tag_list functions */
-#define NL_PACKET(name, number, fields) \
-static unsigned short* \
-name ## _to_tags( \
-	struct name *arg, unsigned short *tags) __attribute__ ((unused)); \
-static unsigned short* \
-name ## _to_tags( \
-	struct name *arg, unsigned short *tags) \
-{ \
-	fields \
-	return tags; \
+
+/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
+ * reason it could fail was no space in skb, and there are 4k available. */
+static int drbd_msg_put_info(const char *info)
+{
+	struct sk_buff *skb = adm_ctx.reply_skb;
+	struct nlattr *nla;
+	int err = -EMSGSIZE;
+
+	if (!info || !info[0])
+		return 0;
+
+	nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
+	if (!nla)
+		return err;
+
+	err = nla_put_string(skb, T_info_text, info);
+	if (err) {
+		nla_nest_cancel(skb, nla);
+		return err;
+	} else
+		nla_nest_end(skb, nla);
+	return 0;
 }
 
-#define NL_INTEGER(pn, pr, member) \
-	put_unaligned(pn | pr | TT_INTEGER, tags++);	\
-	put_unaligned(sizeof(int), tags++);		\
-	put_unaligned(arg->member, (int *)tags);	\
-	tags = (unsigned short *)((char *)tags+sizeof(int));
-#define NL_INT64(pn, pr, member) \
-	put_unaligned(pn | pr | TT_INT64, tags++);	\
-	put_unaligned(sizeof(u64), tags++);		\
-	put_unaligned(arg->member, (u64 *)tags);	\
-	tags = (unsigned short *)((char *)tags+sizeof(u64));
-#define NL_BIT(pn, pr, member) \
-	put_unaligned(pn | pr | TT_BIT, tags++);	\
-	put_unaligned(sizeof(char), tags++);		\
-	*(char *)tags = arg->member; \
-	tags = (unsigned short *)((char *)tags+sizeof(char));
-#define NL_STRING(pn, pr, member, len) \
-	put_unaligned(pn | pr | TT_STRING, tags++);	\
-	put_unaligned(arg->member ## _len, tags++);	\
-	memcpy(tags, arg->member, arg->member ## _len); \
-	tags = (unsigned short *)((char *)tags + arg->member ## _len);
-#include "linux/drbd_nl.h"
-
-void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name);
-void drbd_nl_send_reply(struct cn_msg *, int);
+/* This would be a good candidate for a "pre_doit" hook,
+ * and per-family private info->pointers.
+ * But we need to stay compatible with older kernels.
+ * If it returns successfully, adm_ctx members are valid.
+ */
+#define DRBD_ADM_NEED_MINOR	1
+#define DRBD_ADM_NEED_CONN	2
+static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
+		unsigned flags)
+{
+	struct drbd_genlmsghdr *d_in = info->userhdr;
+	const u8 cmd = info->genlhdr->cmd;
+	int err;
+
+	memset(&adm_ctx, 0, sizeof(adm_ctx));
+
+	/* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
+	if (cmd != DRBD_ADM_GET_STATUS
+	&& security_netlink_recv(skb, CAP_SYS_ADMIN))
+	       return -EPERM;
+
+	adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!adm_ctx.reply_skb)
+		goto fail;
+
+	adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb,
+					info, &drbd_genl_family, 0, cmd);
+	/* put of a few bytes into a fresh skb of >= 4k will always succeed.
+	 * but anyways */
+	if (!adm_ctx.reply_dh)
+		goto fail;
+
+	adm_ctx.reply_dh->minor = d_in->minor;
+	adm_ctx.reply_dh->ret_code = NO_ERROR;
+
+	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
+		struct nlattr *nla;
+		/* parse and validate only */
+		err = drbd_cfg_context_from_attrs(NULL, info->attrs);
+		if (err)
+			goto fail;
+
+		/* It was present, and valid,
+		 * copy it over to the reply skb. */
+		err = nla_put_nohdr(adm_ctx.reply_skb,
+				info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
+				info->attrs[DRBD_NLA_CFG_CONTEXT]);
+		if (err)
+			goto fail;
+
+		/* and assign stuff to the global adm_ctx */
+		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
+		adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED;
+		nla = nested_attr_tb[__nla_type(T_ctx_conn_name)];
+		if (nla)
+			adm_ctx.conn_name = nla_data(nla);
+	} else
+		adm_ctx.volume = VOLUME_UNSPECIFIED;
+
+	adm_ctx.minor = d_in->minor;
+	adm_ctx.mdev = minor_to_mdev(d_in->minor);
+	adm_ctx.tconn = conn_by_name(adm_ctx.conn_name);
+
+	if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) {
+		drbd_msg_put_info("unknown minor");
+		return ERR_MINOR_INVALID;
+	}
+	if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_CONN)) {
+		drbd_msg_put_info("unknown connection");
+		return ERR_INVALID_REQUEST;
+	}
+
+	/* some more paranoia, if the request was over-determined */
+	if (adm_ctx.mdev &&
+	    adm_ctx.volume != VOLUME_UNSPECIFIED &&
+	    adm_ctx.volume != adm_ctx.mdev->vnr) {
+		pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
+				adm_ctx.minor, adm_ctx.volume,
+				adm_ctx.mdev->vnr, adm_ctx.mdev->tconn->name);
+		drbd_msg_put_info("over-determined configuration context mismatch");
+		return ERR_INVALID_REQUEST;
+	}
+	if (adm_ctx.mdev && adm_ctx.tconn &&
+	    adm_ctx.mdev->tconn != adm_ctx.tconn) {
+		pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n",
+				adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name);
+		drbd_msg_put_info("over-determined configuration context mismatch");
+		return ERR_INVALID_REQUEST;
+	}
+	return NO_ERROR;
+
+fail:
+	nlmsg_free(adm_ctx.reply_skb);
+	adm_ctx.reply_skb = NULL;
+	return -ENOMEM;
+}
+
+static int drbd_adm_finish(struct genl_info *info, int retcode)
+{
+	struct nlattr *nla;
+	const char *conn_name = NULL;
+
+	if (!adm_ctx.reply_skb)
+		return -ENOMEM;
+
+	adm_ctx.reply_dh->ret_code = retcode;
+
+	nla = info->attrs[DRBD_NLA_CFG_CONTEXT];
+	if (nla) {
+		nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name));
+		if (nla)
+			conn_name = nla_data(nla);
+	}
+
+	drbd_adm_send_reply(adm_ctx.reply_skb, info);
+	return 0;
+}
 
 int drbd_khelper(struct drbd_conf *mdev, char *cmd)
 {
@@ -142,9 +257,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
 			NULL, /* Will be set to address family */
 			NULL, /* Will be set to address */
 			NULL };
-
 	char mb[12], af[20], ad[60], *afs;
 	char *argv[] = {usermode_helper, cmd, mb, NULL };
+	struct sib_info sib;
 	int ret;
 
 	snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
@@ -177,8 +292,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
 	drbd_md_sync(mdev);
 
 	dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
-
-	drbd_bcast_ev_helper(mdev, cmd);
+	sib.sib_reason = SIB_HELPER_PRE;
+	sib.helper_name = cmd;
+	drbd_bcast_event(mdev, &sib);
 	ret = call_usermodehelper(usermode_helper, argv, envp, 1);
 	if (ret)
 		dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
@@ -188,6 +304,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
 		dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
 				usermode_helper, cmd, mb,
 				(ret >> 8) & 0xff, ret);
+	sib.sib_reason = SIB_HELPER_POST;
+	sib.helper_exit_code = ret;
+	drbd_bcast_event(mdev, &sib);
 
 	if (ret < 0) /* Ignore any ERRNOs we got. */
 		ret = 0;
@@ -362,7 +481,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 		}
 
 		if (rv == SS_NOTHING_TO_DO)
-			goto fail;
+			goto out;
 		if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
 			nps = drbd_try_outdate_peer(mdev);
 
@@ -388,13 +507,13 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 			rv = _drbd_request_state(mdev, mask, val,
 						CS_VERBOSE + CS_WAIT_COMPLETE);
 			if (rv < SS_SUCCESS)
-				goto fail;
+				goto out;
 		}
 		break;
 	}
 
 	if (rv < SS_SUCCESS)
-		goto fail;
+		goto out;
 
 	if (forced)
 		dev_warn(DEV, "Forced to consider local data as UpToDate!\n");
@@ -438,33 +557,46 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 	drbd_md_sync(mdev);
 
 	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
- fail:
+out:
 	mutex_unlock(mdev->state_mutex);
 	return rv;
 }
 
-static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			   struct drbd_nl_cfg_reply *reply)
+static const char *from_attrs_err_to_txt(int err)
 {
-	struct primary primary_args;
-
-	memset(&primary_args, 0, sizeof(struct primary));
-	if (!primary_from_tags(nlp->tag_list, &primary_args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
-	}
-
-	reply->ret_code =
-		drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force);
-
-	return 0;
+	return	err == -ENOMSG ? "required attribute missing" :
+		err == -EOPNOTSUPP ? "unknown mandatory attribute" :
+		"invalid attribute value";
 }
 
-static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 {
-	reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0);
+	struct set_role_parms parms;
+	int err;
+	enum drbd_ret_code retcode;
 
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	memset(&parms, 0, sizeof(parms));
+	if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
+		err = set_role_parms_from_attrs(&parms, info->attrs);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out;
+		}
+	}
+
+	if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
+		retcode = drbd_set_role(adm_ctx.mdev, R_PRIMARY, parms.assume_uptodate);
+	else
+		retcode = drbd_set_role(adm_ctx.mdev, R_SECONDARY, 0);
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
@@ -541,6 +673,12 @@ char *ppsize(char *buf, unsigned long long size)
  *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
  *  peer may not initiate a resize.
  */
+/* Note these are not to be confused with
+ * drbd_adm_suspend_io/drbd_adm_resume_io,
+ * which are (sub) state changes triggered by admin (drbdsetup),
+ * and can be long lived.
+ * This changes an mdev->flag, is triggered by drbd internals,
+ * and should be short-lived. */
 void drbd_suspend_io(struct drbd_conf *mdev)
 {
 	set_bit(SUSPEND_IO, &mdev->flags);
@@ -881,11 +1019,10 @@ static void drbd_suspend_al(struct drbd_conf *mdev)
 		dev_info(DEV, "Suspended AL updates\n");
 }
 
-/* does always return 0;
- * interesting return code is in reply->ret_code */
-static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 {
+	struct drbd_conf *mdev;
+	int err;
 	enum drbd_ret_code retcode;
 	enum determine_dev_size dd;
 	sector_t max_possible_sectors;
@@ -897,6 +1034,13 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	enum drbd_state_rv rv;
 	int cp_discovered = 0;
 
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto fail;
+
+	mdev = adm_ctx.mdev;
 	conn_reconfig_start(mdev->tconn);
 
 	/* if you want to reconfigure, please tear down first */
@@ -910,7 +1054,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	 * to realize a "hot spare" feature (not that I'd recommend that) */
 	wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
 
-	/* allocation not in the IO path, cqueue thread context */
+	/* allocation not in the IO path, drbdsetup context */
 	nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
 	if (!nbc) {
 		retcode = ERR_NOMEM;
@@ -922,12 +1066,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	nbc->dc.fencing       = DRBD_FENCING_DEF;
 	nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF;
 
-	if (!disk_conf_from_tags(nlp->tag_list, &nbc->dc)) {
+	err = disk_conf_from_attrs(&nbc->dc, info->attrs);
+	if (err) {
 		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
-	if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
+	if ((int)nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
 		retcode = ERR_MD_IDX_INVALID;
 		goto fail;
 	}
@@ -961,7 +1107,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	 */
 	bdev = blkdev_get_by_path(nbc->dc.meta_dev,
 				  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
-				  (nbc->dc.meta_dev_idx < 0) ?
+				  ((int)nbc->dc.meta_dev_idx < 0) ?
 				  (void *)mdev : (void *)drbd_m_holder);
 	if (IS_ERR(bdev)) {
 		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
@@ -997,7 +1143,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 		goto fail;
 	}
 
-	if (nbc->dc.meta_dev_idx < 0) {
+	if ((int)nbc->dc.meta_dev_idx < 0) {
 		max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
 		/* at least one MB, otherwise it does not make sense */
 		min_md_device_sectors = (2<<10);
@@ -1028,7 +1174,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 		dev_warn(DEV, "==> truncating very big lower level device "
 			"to currently maximum possible %llu sectors <==\n",
 			(unsigned long long) max_possible_sectors);
-		if (nbc->dc.meta_dev_idx >= 0)
+		if ((int)nbc->dc.meta_dev_idx >= 0)
 			dev_warn(DEV, "==>> using internal or flexible "
 				      "meta data may help <<==\n");
 	}
@@ -1242,8 +1388,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
 	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
 	put_ldev(mdev);
-	reply->ret_code = retcode;
 	conn_reconfig_done(mdev->tconn);
+	drbd_adm_finish(info, retcode);
 	return 0;
 
  force_diskless_dec:
@@ -1251,6 +1397,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
  force_diskless:
 	drbd_force_state(mdev, NS(disk, D_FAILED));
 	drbd_md_sync(mdev);
+	conn_reconfig_done(mdev->tconn);
  fail:
 	if (nbc) {
 		if (nbc->backing_bdev)
@@ -1263,8 +1410,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	}
 	lc_destroy(resync_lru);
 
-	reply->ret_code = retcode;
-	conn_reconfig_done(mdev->tconn);
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
@@ -1273,42 +1419,54 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
  * internal references as well.
  * Only then we have finally detached. */
-static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			  struct drbd_nl_cfg_reply *reply)
+int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 {
+	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
-	int ret;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
 	drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
-	retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
-	/* D_FAILED will transition to DISKLESS. */
-	ret = wait_event_interruptible(mdev->misc_wait,
-			mdev->state.disk != D_FAILED);
+	retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS));
+	wait_event(mdev->misc_wait,
+			mdev->state.disk != D_DISKLESS ||
+			!atomic_read(&mdev->local_cnt));
 	drbd_resume_io(mdev);
-	if ((int)retcode == (int)SS_IS_DISKLESS)
-		retcode = SS_NOTHING_TO_DO;
-	if (ret)
-		retcode = ERR_INTR;
-	reply->ret_code = retcode;
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nlp,
-			    struct drbd_nl_cfg_reply *reply)
+int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 {
-	int i;
-	enum drbd_ret_code retcode;
+	char hmac_name[CRYPTO_MAX_ALG_NAME];
+	struct drbd_conf *mdev;
 	struct net_conf *new_conf = NULL;
 	struct crypto_hash *tfm = NULL;
 	struct crypto_hash *integrity_w_tfm = NULL;
 	struct crypto_hash *integrity_r_tfm = NULL;
-	struct drbd_conf *mdev;
-	char hmac_name[CRYPTO_MAX_ALG_NAME];
 	void *int_dig_out = NULL;
 	void *int_dig_in = NULL;
 	void *int_dig_vv = NULL;
 	struct drbd_tconn *oconn;
+	struct drbd_tconn *tconn;
 	struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
+	enum drbd_ret_code retcode;
+	int i;
+	int err;
 
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	tconn = adm_ctx.tconn;
 	conn_reconfig_start(tconn);
 
 	if (tconn->cstate > C_STANDALONE) {
@@ -1343,8 +1501,10 @@ static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nl
 	new_conf->on_congestion    = DRBD_ON_CONGESTION_DEF;
 	new_conf->cong_extents     = DRBD_CONG_EXTENTS_DEF;
 
-	if (!net_conf_from_tags(nlp->tag_list, new_conf)) {
+	err = net_conf_from_attrs(new_conf, info->attrs);
+	if (err) {
 		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
@@ -1495,8 +1655,8 @@ static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nl
 		mdev->recv_cnt = 0;
 		kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
 	}
-	reply->ret_code = retcode;
 	conn_reconfig_done(tconn);
+	drbd_adm_finish(info, retcode);
 	return 0;
 
 fail:
@@ -1508,24 +1668,37 @@ fail:
 	crypto_free_hash(integrity_r_tfm);
 	kfree(new_conf);
 
-	reply->ret_code = retcode;
 	conn_reconfig_done(tconn);
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_disconnect(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode;
-	struct disconnect dc;
+	struct disconnect_parms parms;
+	struct drbd_tconn *tconn;
+	enum drbd_ret_code retcode;
+	int err;
 
-	memset(&dc, 0, sizeof(struct disconnect));
-	if (!disconnect_from_tags(nlp->tag_list, &dc)) {
-		retcode = ERR_MANDATORY_TAG;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
 		goto fail;
+
+	tconn = adm_ctx.tconn;
+	memset(&parms, 0, sizeof(parms));
+	if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
+		err = disconnect_parms_from_attrs(&parms, info->attrs);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto fail;
+		}
 	}
 
-	if (dc.force) {
+	if (parms.force_disconnect) {
 		spin_lock_irq(&tconn->req_lock);
 		if (tconn->cstate >= C_WF_CONNECTION)
 			_conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
@@ -1567,7 +1740,7 @@ static int drbd_nl_disconnect(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *
  done:
 	retcode = NO_ERROR;
  fail:
-	reply->ret_code = retcode;
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
@@ -1587,20 +1760,32 @@ void resync_after_online_grow(struct drbd_conf *mdev)
 		_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
 }
 
-static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			  struct drbd_nl_cfg_reply *reply)
+int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 {
-	struct resize rs;
-	int retcode = NO_ERROR;
+	struct resize_parms rs;
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
 	enum determine_dev_size dd;
 	enum dds_flags ddsf;
+	int err;
 
-	memset(&rs, 0, sizeof(struct resize));
-	if (!resize_from_tags(nlp->tag_list, &rs)) {
-		retcode = ERR_MANDATORY_TAG;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
 		goto fail;
+
+	memset(&rs, 0, sizeof(struct resize_parms));
+	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
+		err = resize_parms_from_attrs(&rs, info->attrs);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto fail;
+		}
 	}
 
+	mdev = adm_ctx.mdev;
 	if (mdev->state.conn > C_CONNECTED) {
 		retcode = ERR_RESIZE_RESYNC;
 		goto fail;
@@ -1644,14 +1829,14 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 	}
 
  fail:
-	reply->ret_code = retcode;
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			       struct drbd_nl_cfg_reply *reply)
+int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
 	int err;
 	int ovr; /* online verify running */
 	int rsr; /* re-sync running */
@@ -1662,12 +1847,21 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 	int *rs_plan_s = NULL;
 	int fifo_size;
 
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto fail;
+	mdev = adm_ctx.mdev;
+
 	if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
 		retcode = ERR_NOMEM;
+		drbd_msg_put_info("unable to allocate cpumask");
 		goto fail;
 	}
 
-	if (nlp->flags & DRBD_NL_SET_DEFAULTS) {
+	if (((struct drbd_genlmsghdr*)info->userhdr)->flags
+			& DRBD_GENL_F_SET_DEFAULTS) {
 		memset(&sc, 0, sizeof(struct syncer_conf));
 		sc.rate       = DRBD_RATE_DEF;
 		sc.after      = DRBD_AFTER_DEF;
@@ -1681,8 +1875,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 	} else
 		memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
 
-	if (!syncer_conf_from_tags(nlp->tag_list, &sc)) {
+	err = syncer_conf_from_attrs(&sc, info->attrs);
+	if (err) {
 		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
@@ -1832,14 +2028,23 @@ fail:
 	free_cpumask_var(new_cpu_mask);
 	crypto_free_hash(csums_tfm);
 	crypto_free_hash(verify_tfm);
-	reply->ret_code = retcode;
+
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode;
+	struct drbd_conf *mdev;
+	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
 
 	/* If there is still bitmap IO pending, probably because of a previous
 	 * resync just being finished, wait for it before requesting a new resync. */
@@ -1862,7 +2067,8 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
 		retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
 	}
 
-	reply->ret_code = retcode;
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
@@ -1875,56 +2081,58 @@ static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
 	return rv;
 }
 
-static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-				   struct drbd_nl_cfg_reply *reply)
+static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
+		union drbd_state mask, union drbd_state val)
 {
-	int retcode;
-
-	/* If there is still bitmap IO pending, probably because of a previous
-	 * resync just being finished, wait for it before requesting a new resync. */
-	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+	enum drbd_ret_code retcode;
 
-	retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
-
-	if (retcode < SS_SUCCESS) {
-		if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) {
-			/* The peer will get a resync upon connect anyways. Just make that
-			   into a full resync. */
-			retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
-			if (retcode >= SS_SUCCESS) {
-				if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
-					"set_n_write from invalidate_peer",
-					BM_LOCKED_SET_ALLOWED))
-					retcode = ERR_IO_MD_DISK;
-			}
-		} else
-			retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
-	}
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	reply->ret_code = retcode;
+	retcode = drbd_request_state(adm_ctx.mdev, mask, val);
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
+	return drbd_adm_simple_request_state(skb, info, NS(conn, C_STARTING_SYNC_S));
+}
 
-	if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
-		retcode = ERR_PAUSE_IS_SET;
+int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	reply->ret_code = retcode;
+	if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
+		retcode = ERR_PAUSE_IS_SET;
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			       struct drbd_nl_cfg_reply *reply)
+int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
 	union drbd_state s;
+	enum drbd_ret_code retcode;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
-		s = mdev->state;
+	if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
+		s = adm_ctx.mdev->state;
 		if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
 			retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
 				  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
@@ -1933,28 +2141,35 @@ static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 		}
 	}
 
-	reply->ret_code = retcode;
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
 {
-	reply->ret_code = drbd_request_state(mdev, NS(susp, 1));
-
-	return 0;
+	return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
 }
 
-static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
 {
+	struct drbd_conf *mdev;
+	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
 	if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
 		drbd_uuid_new_current(mdev);
 		clear_bit(NEW_CUR_UUID, &mdev->flags);
 	}
 	drbd_suspend_io(mdev);
-	reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
-	if (reply->ret_code == SS_SUCCESS) {
+	retcode = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
+	if (retcode == SS_SUCCESS) {
 		if (mdev->state.conn < C_CONNECTED)
 			tl_clear(mdev->tconn);
 		if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
@@ -1962,138 +2177,259 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	}
 	drbd_resume_io(mdev);
 
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			   struct drbd_nl_cfg_reply *reply)
+int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
 {
-	reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED));
-	return 0;
+	return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
 }
 
-static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			   struct drbd_nl_cfg_reply *reply)
+int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
+		const struct sib_info *sib)
 {
-	unsigned short *tl;
-
-	tl = reply->tag_list;
-
-	if (get_ldev(mdev)) {
-		tl = disk_conf_to_tags(&mdev->ldev->dc, tl);
-		put_ldev(mdev);
+	struct state_info *si = NULL; /* for sizeof(si->member); */
+	struct nlattr *nla;
+	int got_ldev;
+	int got_net;
+	int err = 0;
+	int exclude_sensitive;
+
+	/* If sib != NULL, this is drbd_bcast_event, which anyone can listen
+	 * to.  So we better exclude_sensitive information.
+	 *
+	 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
+	 * in the context of the requesting user process. Exclude sensitive
+	 * information, unless current has superuser.
+	 *
+	 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
+	 * relies on the current implementation of netlink_dump(), which
+	 * executes the dump callback successively from netlink_recvmsg(),
+	 * always in the context of the receiving process */
+	exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
+
+	got_ldev = get_ldev(mdev);
+	got_net = get_net_conf(mdev->tconn);
+
+	/* We need to add connection name and volume number information still.
+	 * Minor number is in drbd_genlmsghdr. */
+	nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
+	if (!nla)
+		goto nla_put_failure;
+	NLA_PUT_U32(skb, T_ctx_volume, mdev->vnr);
+	NLA_PUT_STRING(skb, T_ctx_conn_name, mdev->tconn->name);
+	nla_nest_end(skb, nla);
+
+	if (got_ldev)
+		if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive))
+			goto nla_put_failure;
+	if (got_net)
+		if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive))
+			goto nla_put_failure;
+
+	if (syncer_conf_to_skb(skb, &mdev->sync_conf, exclude_sensitive))
+			goto nla_put_failure;
+
+	nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
+	if (!nla)
+		goto nla_put_failure;
+	NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY);
+	NLA_PUT_U32(skb, T_current_state, mdev->state.i);
+	NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid);
+	NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev));
+
+	if (got_ldev) {
+		NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags);
+		NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid);
+		NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev));
+		NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev));
+		if (C_SYNC_SOURCE <= mdev->state.conn &&
+		    C_PAUSED_SYNC_T >= mdev->state.conn) {
+			NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total);
+			NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed);
+		}
 	}
 
-	if (get_net_conf(mdev->tconn)) {
-		tl = net_conf_to_tags(mdev->tconn->net_conf, tl);
-		put_net_conf(mdev->tconn);
+	if (sib) {
+		switch(sib->sib_reason) {
+		case SIB_SYNC_PROGRESS:
+		case SIB_GET_STATUS_REPLY:
+			break;
+		case SIB_STATE_CHANGE:
+			NLA_PUT_U32(skb, T_prev_state, sib->os.i);
+			NLA_PUT_U32(skb, T_new_state, sib->ns.i);
+			break;
+		case SIB_HELPER_POST:
+			NLA_PUT_U32(skb,
+				T_helper_exit_code, sib->helper_exit_code);
+			/* fall through */
+		case SIB_HELPER_PRE:
+			NLA_PUT_STRING(skb, T_helper, sib->helper_name);
+			break;
+		}
 	}
-	tl = syncer_conf_to_tags(&mdev->sync_conf, tl);
-
-	put_unaligned(TT_END, tl++); /* Close the tag list */
+	nla_nest_end(skb, nla);
 
-	return (int)((char *)tl - (char *)reply->tag_list);
+	if (0)
+nla_put_failure:
+		err = -EMSGSIZE;
+	if (got_ldev)
+		put_ldev(mdev);
+	if (got_net)
+		put_net_conf(mdev->tconn);
+	return err;
 }
 
-static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
 {
-	unsigned short *tl = reply->tag_list;
-	union drbd_state s = mdev->state;
-	unsigned long rs_left;
-	unsigned int res;
+	enum drbd_ret_code retcode;
+	int err;
 
-	tl = get_state_to_tags((struct get_state *)&s, tl);
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	/* no local ref, no bitmap, no syncer progress. */
-	if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) {
-		if (get_ldev(mdev)) {
-			drbd_get_syncer_progress(mdev, &rs_left, &res);
-			tl = tl_add_int(tl, T_sync_progress, &res);
-			put_ldev(mdev);
-		}
+	err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.mdev, NULL);
+	if (err) {
+		nlmsg_free(adm_ctx.reply_skb);
+		return err;
 	}
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	return (int)((char *)tl - (char *)reply->tag_list);
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	unsigned short *tl;
-
-	tl = reply->tag_list;
+	struct drbd_conf *mdev;
+	struct drbd_genlmsghdr *dh;
+	int minor = cb->args[0];
+
+	/* Open coded deferred single idr_for_each_entry iteration.
+	 * This may miss entries inserted after this dump started,
+	 * or entries deleted before they are reached.
+	 * But we need to make sure the mdev won't disappear while
+	 * we are looking at it. */
+
+	rcu_read_lock();
+	mdev = idr_get_next(&minors, &minor);
+	if (mdev) {
+		dh = genlmsg_put(skb, NETLINK_CB(cb->skb).pid,
+				cb->nlh->nlmsg_seq, &drbd_genl_family,
+				NLM_F_MULTI, DRBD_ADM_GET_STATUS);
+		if (!dh)
+			goto errout;
+
+		D_ASSERT(mdev->minor == minor);
+
+		dh->minor = minor;
+		dh->ret_code = NO_ERROR;
+
+		if (nla_put_status_info(skb, mdev, NULL)) {
+			genlmsg_cancel(skb, dh);
+			goto errout;
+		}
+		genlmsg_end(skb, dh);
+        }
 
-	if (get_ldev(mdev)) {
-		tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64));
-		tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags);
-		put_ldev(mdev);
-	}
-	put_unaligned(TT_END, tl++); /* Close the tag list */
+errout:
+	rcu_read_unlock();
+	/* where to start idr_get_next with the next iteration */
+        cb->args[0] = minor+1;
 
-	return (int)((char *)tl - (char *)reply->tag_list);
+	/* No more minors found: empty skb. Which will terminate the dump. */
+        return skb->len;
 }
 
-/**
- * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use
- * @mdev:	DRBD device.
- * @nlp:	Netlink/connector packet from drbdsetup
- * @reply:	Reply packet for drbdsetup
- */
-static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-				    struct drbd_nl_cfg_reply *reply)
+int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
 {
-	unsigned short *tl;
-	char rv;
-
-	tl = reply->tag_list;
+	enum drbd_ret_code retcode;
+	struct timeout_parms tp;
+	int err;
 
-	rv = mdev->state.pdsk == D_OUTDATED        ? UT_PEER_OUTDATED :
-	  test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv));
-	put_unaligned(TT_END, tl++); /* Close the tag list */
+	tp.timeout_type =
+		adm_ctx.mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
+		test_bit(USE_DEGR_WFC_T, &adm_ctx.mdev->flags) ? UT_DEGRADED :
+		UT_DEFAULT;
 
-	return (int)((char *)tl - (char *)reply->tag_list);
+	err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
+	if (err) {
+		nlmsg_free(adm_ctx.reply_skb);
+		return err;
+	}
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-				    struct drbd_nl_cfg_reply *reply)
+int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 {
-	/* default to resume from last known position, if possible */
-	struct start_ov args =
-		{ .start_sector = mdev->ov_start_sector };
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
 
-	if (!start_ov_from_tags(nlp->tag_list, &args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
-	}
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
+	mdev = adm_ctx.mdev;
+	if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
+		/* resume from last known position, if possible */
+		struct start_ov_parms parms =
+			{ .ov_start_sector = mdev->ov_start_sector };
+		int err = start_ov_parms_from_attrs(&parms, info->attrs);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out;
+		}
+		/* w_make_ov_request expects position to be aligned */
+		mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT;
+	}
 	/* If there is still bitmap IO pending, e.g. previous resync or verify
 	 * just being finished, wait for it before requesting a new resync. */
 	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
-
-	/* w_make_ov_request expects position to be aligned */
-	mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
-	reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
+	retcode = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
 
-static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
 	int skip_initial_sync = 0;
 	int err;
+	struct new_c_uuid_parms args;
 
-	struct new_c_uuid args;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out_nolock;
 
-	memset(&args, 0, sizeof(struct new_c_uuid));
-	if (!new_c_uuid_from_tags(nlp->tag_list, &args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
+	mdev = adm_ctx.mdev;
+	memset(&args, 0, sizeof(args));
+	if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
+		err = new_c_uuid_parms_from_attrs(&args, info->attrs);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out_nolock;
+		}
 	}
 
 	mutex_lock(mdev->state_mutex); /* Protects us against serialized state changes. */
@@ -2139,510 +2475,164 @@ out_dec:
 	put_ldev(mdev);
 out:
 	mutex_unlock(mdev->state_mutex);
-
-	reply->ret_code = retcode;
-	return 0;
-}
-
-static int drbd_nl_new_conn(struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply)
-{
-	struct new_connection args;
-
-	if (!new_connection_from_tags(nlp->tag_list, &args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
-	}
-
-	reply->ret_code = NO_ERROR;
-	if (!drbd_new_tconn(args.name))
-		reply->ret_code = ERR_NOMEM;
-
-	return 0;
-}
-
-static int drbd_nl_new_minor(struct drbd_tconn *tconn,
-		      struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply)
-{
-	struct new_minor args;
-
-	args.vol_nr = 0;
-	args.minor = 0;
-
-	if (!new_minor_from_tags(nlp->tag_list, &args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
-	}
-
-	reply->ret_code = conn_new_minor(tconn, args.minor, args.vol_nr);
-
+out_nolock:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_del_minor(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+static enum drbd_ret_code
+drbd_check_conn_name(const char *name)
 {
-	if (mdev->state.disk == D_DISKLESS &&
-	    mdev->state.conn == C_STANDALONE &&
-	    mdev->state.role == R_SECONDARY) {
-		drbd_delete_device(mdev_to_minor(mdev));
-		reply->ret_code = NO_ERROR;
-	} else {
-		reply->ret_code = ERR_MINOR_CONFIGURED;
+	if (!name || !name[0]) {
+		drbd_msg_put_info("connection name missing");
+		return ERR_MANDATORY_TAG;
 	}
-	return 0;
-}
-
-static int drbd_nl_del_conn(struct drbd_tconn *tconn,
-			    struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply)
-{
-	if (conn_lowest_minor(tconn) < 0) {
-		drbd_free_tconn(tconn);
-		reply->ret_code = NO_ERROR;
-	} else {
-		reply->ret_code = ERR_CONN_IN_USE;
+	/* if we want to use these in sysfs/configfs/debugfs some day,
+	 * we must not allow slashes */
+	if (strchr(name, '/')) {
+		drbd_msg_put_info("invalid connection name");
+		return ERR_INVALID_REQUEST;
 	}
-
-	return 0;
+	return NO_ERROR;
 }
 
-enum cn_handler_type {
-	CHT_MINOR,
-	CHT_CONN,
-	CHT_CTOR,
-	/* CHT_RES, later */
-};
-struct cn_handler_struct {
-	enum cn_handler_type type;
-	union {
-		int (*minor_based)(struct drbd_conf *,
-				   struct drbd_nl_cfg_req *,
-				   struct drbd_nl_cfg_reply *);
-		int (*conn_based)(struct drbd_tconn *,
-				  struct drbd_nl_cfg_req *,
-				  struct drbd_nl_cfg_reply *);
-		int (*constructor)(struct drbd_nl_cfg_req *,
-				   struct drbd_nl_cfg_reply *);
-	};
-	int reply_body_size;
-};
-
-static struct cn_handler_struct cnd_table[] = {
-	[ P_primary ]		= { CHT_MINOR, { &drbd_nl_primary },	0 },
-	[ P_secondary ]		= { CHT_MINOR, { &drbd_nl_secondary },	0 },
-	[ P_disk_conf ]		= { CHT_MINOR, { &drbd_nl_disk_conf },	0 },
-	[ P_detach ]		= { CHT_MINOR, { &drbd_nl_detach },	0 },
-	[ P_net_conf ]		= { CHT_CONN,  { .conn_based = &drbd_nl_net_conf },	0 },
-	[ P_disconnect ]	= { CHT_CONN,  { .conn_based = &drbd_nl_disconnect },	0 },
-	[ P_resize ]		= { CHT_MINOR, { &drbd_nl_resize },	0 },
-	[ P_syncer_conf ]	= { CHT_MINOR, { &drbd_nl_syncer_conf },0 },
-	[ P_invalidate ]	= { CHT_MINOR, { &drbd_nl_invalidate },	0 },
-	[ P_invalidate_peer ]	= { CHT_MINOR, { &drbd_nl_invalidate_peer },0 },
-	[ P_pause_sync ]	= { CHT_MINOR, { &drbd_nl_pause_sync },	0 },
-	[ P_resume_sync ]	= { CHT_MINOR, { &drbd_nl_resume_sync },0 },
-	[ P_suspend_io ]	= { CHT_MINOR, { &drbd_nl_suspend_io },	0 },
-	[ P_resume_io ]		= { CHT_MINOR, { &drbd_nl_resume_io },	0 },
-	[ P_outdate ]		= { CHT_MINOR, { &drbd_nl_outdate },	0 },
-	[ P_get_config ]	= { CHT_MINOR, { &drbd_nl_get_config },
-				    sizeof(struct syncer_conf_tag_len_struct) +
-				    sizeof(struct disk_conf_tag_len_struct) +
-				    sizeof(struct net_conf_tag_len_struct) },
-	[ P_get_state ]		= { CHT_MINOR, { &drbd_nl_get_state },
-				    sizeof(struct get_state_tag_len_struct) +
-				    sizeof(struct sync_progress_tag_len_struct)	},
-	[ P_get_uuids ]		= { CHT_MINOR, { &drbd_nl_get_uuids },
-				    sizeof(struct get_uuids_tag_len_struct) },
-	[ P_get_timeout_flag ]	= { CHT_MINOR, { &drbd_nl_get_timeout_flag },
-				    sizeof(struct get_timeout_flag_tag_len_struct)},
-	[ P_start_ov ]		= { CHT_MINOR, { &drbd_nl_start_ov },	0 },
-	[ P_new_c_uuid ]	= { CHT_MINOR, { &drbd_nl_new_c_uuid },	0 },
-	[ P_new_connection ]	= { CHT_CTOR,  { .constructor = &drbd_nl_new_conn }, 0 },
-	[ P_new_minor ]		= { CHT_CONN,  { .conn_based = &drbd_nl_new_minor }, 0 },
-	[ P_del_minor ]		= { CHT_MINOR, { &drbd_nl_del_minor },	0 },
-	[ P_del_connection ]    = { CHT_CONN,  { .conn_based = &drbd_nl_del_conn }, 0 },
-};
-
-static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp)
+int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data;
-	struct cn_handler_struct *cm;
-	struct cn_msg *cn_reply;
-	struct drbd_nl_cfg_reply *reply;
-	struct drbd_conf *mdev;
-	struct drbd_tconn *tconn;
-	int retcode, rr;
-	int reply_size = sizeof(struct cn_msg)
-		+ sizeof(struct drbd_nl_cfg_reply)
-		+ sizeof(short int);
-
-	if (!try_module_get(THIS_MODULE)) {
-		printk(KERN_ERR "drbd: try_module_get() failed!\n");
-		return;
-	}
-
-	if (!cap_raised(current_cap(), CAP_SYS_ADMIN)) {
-		retcode = ERR_PERM;
-		goto fail;
-	}
+	enum drbd_ret_code retcode;
 
-	if (nlp->packet_type >= P_nl_after_last_packet ||
-	    nlp->packet_type == P_return_code_only) {
-		retcode = ERR_PACKET_NR;
-		goto fail;
-	}
+	retcode = drbd_adm_prepare(skb, info, 0);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	cm = cnd_table + nlp->packet_type;
+	retcode = drbd_check_conn_name(adm_ctx.conn_name);
+	if (retcode != NO_ERROR)
+		goto out;
 
-	/* This may happen if packet number is 0: */
-	if (cm->minor_based == NULL) {
-		retcode = ERR_PACKET_NR;
-		goto fail;
+	if (adm_ctx.tconn) {
+		retcode = ERR_INVALID_REQUEST;
+		drbd_msg_put_info("connection exists");
+		goto out;
 	}
 
-	reply_size += cm->reply_body_size;
-
-	/* allocation not in the IO path, cqueue thread context */
-	cn_reply = kzalloc(reply_size, GFP_KERNEL);
-	if (!cn_reply) {
+	if (!drbd_new_tconn(adm_ctx.conn_name))
 		retcode = ERR_NOMEM;
-		goto fail;
-	}
-	reply = (struct drbd_nl_cfg_reply *) cn_reply->data;
-
-	reply->packet_type =
-		cm->reply_body_size ? nlp->packet_type : P_return_code_only;
-	reply->minor = nlp->drbd_minor;
-	reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
-	/* reply->tag_list; might be modified by cm->function. */
-
-	retcode = ERR_MINOR_INVALID;
-	rr = 0;
-	switch (cm->type) {
-	case CHT_MINOR:
-		mdev = minor_to_mdev(nlp->drbd_minor);
-		if (!mdev)
-			goto fail;
-		rr = cm->minor_based(mdev, nlp, reply);
-		break;
-	case CHT_CONN:
-		tconn = conn_by_name(nlp->obj_name);
-		if (!tconn) {
-			retcode = ERR_CONN_NOT_KNOWN;
-			goto fail;
-		}
-		rr = cm->conn_based(tconn, nlp, reply);
-		break;
-	case CHT_CTOR:
-		rr = cm->constructor(nlp, reply);
-		break;
-	/* case CHT_RES: */
-	}
-
-	cn_reply->id = req->id;
-	cn_reply->seq = req->seq;
-	cn_reply->ack = req->ack  + 1;
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr;
-	cn_reply->flags = 0;
-
-	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL);
-	if (rr && rr != -ESRCH)
-		printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
-
-	kfree(cn_reply);
-	module_put(THIS_MODULE);
-	return;
- fail:
-	drbd_nl_send_reply(req, retcode);
-	module_put(THIS_MODULE);
-}
-
-static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */
-
-static unsigned short *
-__tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data,
-	unsigned short len, int nul_terminated)
-{
-	unsigned short l = tag_descriptions[tag_number(tag)].max_len;
-	len = (len < l) ? len :  l;
-	put_unaligned(tag, tl++);
-	put_unaligned(len, tl++);
-	memcpy(tl, data, len);
-	tl = (unsigned short*)((char*)tl + len);
-	if (nul_terminated)
-		*((char*)tl - 1) = 0;
-	return tl;
-}
-
-static unsigned short *
-tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len)
-{
-	return __tl_add_blob(tl, tag, data, len, 0);
-}
-
-static unsigned short *
-tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str)
-{
-	return __tl_add_blob(tl, tag, str, strlen(str)+1, 0);
-}
-
-static unsigned short *
-tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val)
-{
-	put_unaligned(tag, tl++);
-	switch(tag_type(tag)) {
-	case TT_INTEGER:
-		put_unaligned(sizeof(int), tl++);
-		put_unaligned(*(int *)val, (int *)tl);
-		tl = (unsigned short*)((char*)tl+sizeof(int));
-		break;
-	case TT_INT64:
-		put_unaligned(sizeof(u64), tl++);
-		put_unaligned(*(u64 *)val, (u64 *)tl);
-		tl = (unsigned short*)((char*)tl+sizeof(u64));
-		break;
-	default:
-		/* someone did something stupid. */
-		;
-	}
-	return tl;
-}
-
-void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state)
-{
-	char buffer[sizeof(struct cn_msg)+
-		    sizeof(struct drbd_nl_cfg_reply)+
-		    sizeof(struct get_state_tag_len_struct)+
-		    sizeof(short int)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	unsigned short *tl = reply->tag_list;
-
-	/* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
-
-	tl = get_state_to_tags((struct get_state *)&state, tl);
-
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
-
-	cn_reply->seq = atomic_inc_return(&drbd_nl_seq);
-	cn_reply->ack = 0; /* not used here. */
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char *)tl - (char *)reply->tag_list);
-	cn_reply->flags = 0;
-
-	reply->packet_type = P_get_state;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
-
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
-}
-
-void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name)
-{
-	char buffer[sizeof(struct cn_msg)+
-		    sizeof(struct drbd_nl_cfg_reply)+
-		    sizeof(struct call_helper_tag_len_struct)+
-		    sizeof(short int)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	unsigned short *tl = reply->tag_list;
-
-	/* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
-
-	tl = tl_add_str(tl, T_helper, helper_name);
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
-
-	cn_reply->seq = atomic_inc_return(&drbd_nl_seq);
-	cn_reply->ack = 0; /* not used here. */
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char *)tl - (char *)reply->tag_list);
-	cn_reply->flags = 0;
-
-	reply->packet_type = P_call_helper;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
-
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs,
-		   const char *seen_hash, const char *calc_hash,
-			   const struct drbd_peer_request *peer_req)
+int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info)
 {
-	struct cn_msg *cn_reply;
-	struct drbd_nl_cfg_reply *reply;
-	unsigned short *tl;
-	struct page *page;
-	unsigned len;
+	struct drbd_genlmsghdr *dh = info->userhdr;
+	enum drbd_ret_code retcode;
 
-	if (!peer_req)
-		return;
-	if (!reason || !reason[0])
-		return;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	/* apparently we have to memcpy twice, first to prepare the data for the
-	 * struct cn_msg, then within cn_netlink_send from the cn_msg to the
-	 * netlink skb. */
-	/* receiver thread context, which is not in the writeout path (of this node),
-	 * but may be in the writeout path of the _other_ node.
-	 * GFP_NOIO to avoid potential "distributed deadlock". */
-	cn_reply = kzalloc(
-		sizeof(struct cn_msg)+
-		sizeof(struct drbd_nl_cfg_reply)+
-		sizeof(struct dump_ee_tag_len_struct)+
-		sizeof(short int),
-		GFP_NOIO);
-
-	if (!cn_reply) {
-		dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, "
-			     "sector %llu, size %u\n",
-			(unsigned long long)peer_req->i.sector,
-			peer_req->i.size);
-		return;
+	/* FIXME drop minor_count parameter, limit to MINORMASK */
+	if (dh->minor >= minor_count) {
+		drbd_msg_put_info("requested minor out of range");
+		retcode = ERR_INVALID_REQUEST;
+		goto out;
 	}
-
-	reply = (struct drbd_nl_cfg_reply*)cn_reply->data;
-	tl = reply->tag_list;
-
-	tl = tl_add_str(tl, T_dump_ee_reason, reason);
-	tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs);
-	tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs);
-	tl = tl_add_int(tl, T_ee_sector, &peer_req->i.sector);
-	tl = tl_add_int(tl, T_ee_block_id, &peer_req->block_id);
-
-	/* dump the first 32k */
-	len = min_t(unsigned, peer_req->i.size, 32 << 10);
-	put_unaligned(T_ee_data, tl++);
-	put_unaligned(len, tl++);
-
-	page = peer_req->pages;
-	page_chain_for_each(page) {
-		void *d = kmap_atomic(page, KM_USER0);
-		unsigned l = min_t(unsigned, len, PAGE_SIZE);
-		memcpy(tl, d, l);
-		kunmap_atomic(d, KM_USER0);
-		tl = (unsigned short*)((char*)tl + l);
-		len -= l;
-		if (len == 0)
-			break;
+	/* FIXME we need a define here */
+	if (adm_ctx.volume >= 256) {
+		drbd_msg_put_info("requested volume id out of range");
+		retcode = ERR_INVALID_REQUEST;
+		goto out;
 	}
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
-
-	cn_reply->seq = atomic_inc_return(&drbd_nl_seq);
-	cn_reply->ack = 0; // not used here.
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char*)tl - (char*)reply->tag_list);
-	cn_reply->flags = 0;
 
-	reply->packet_type = P_dump_ee;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
-
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
-	kfree(cn_reply);
+	retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume);
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-void drbd_bcast_sync_progress(struct drbd_conf *mdev)
+int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info)
 {
-	char buffer[sizeof(struct cn_msg)+
-		    sizeof(struct drbd_nl_cfg_reply)+
-		    sizeof(struct sync_progress_tag_len_struct)+
-		    sizeof(short int)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	unsigned short *tl = reply->tag_list;
-	unsigned long rs_left;
-	unsigned int res;
-
-	/* no local ref, no bitmap, no syncer progress, no broadcast. */
-	if (!get_ldev(mdev))
-		return;
-	drbd_get_syncer_progress(mdev, &rs_left, &res);
-	put_ldev(mdev);
-
-	tl = tl_add_int(tl, T_sync_progress, &res);
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
-
-	cn_reply->seq = atomic_inc_return(&drbd_nl_seq);
-	cn_reply->ack = 0; /* not used here. */
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char *)tl - (char *)reply->tag_list);
-	cn_reply->flags = 0;
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
 
-	reply->packet_type = P_sync_progress;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+	mdev = adm_ctx.mdev;
+	if (mdev->state.disk == D_DISKLESS &&
+	    mdev->state.conn == C_STANDALONE &&
+	    mdev->state.role == R_SECONDARY) {
+		drbd_delete_device(mdev_to_minor(mdev));
+		retcode = NO_ERROR;
+	} else
+		retcode = ERR_MINOR_CONFIGURED;
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-int __init drbd_nl_init(void)
+int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info)
 {
-	static struct cb_id cn_id_drbd;
-	int err, try=10;
-
-	cn_id_drbd.val = CN_VAL_DRBD;
-	do {
-		cn_id_drbd.idx = cn_idx;
-		err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback);
-		if (!err)
-			break;
-		cn_idx = (cn_idx + CN_IDX_STEP);
-	} while (try--);
+	enum drbd_ret_code retcode;
 
-	if (err) {
-		printk(KERN_ERR "drbd: cn_drbd failed to register\n");
-		return err;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	if (conn_lowest_minor(adm_ctx.tconn) < 0) {
+		drbd_free_tconn(adm_ctx.tconn);
+		retcode = NO_ERROR;
+	} else {
+		retcode = ERR_CONN_IN_USE;
 	}
 
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-void drbd_nl_cleanup(void)
+void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib)
 {
-	static struct cb_id cn_id_drbd;
-
-	cn_id_drbd.idx = cn_idx;
-	cn_id_drbd.val = CN_VAL_DRBD;
+	static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
+	struct sk_buff *msg;
+	struct drbd_genlmsghdr *d_out;
+	unsigned seq;
+	int err = -ENOMEM;
+
+	seq = atomic_inc_return(&drbd_genl_seq);
+	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+	if (!msg)
+		goto failed;
+
+	err = -EMSGSIZE;
+	d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
+	if (!d_out) /* cannot happen, but anyways. */
+		goto nla_put_failure;
+	d_out->minor = mdev_to_minor(mdev);
+	d_out->ret_code = 0;
+
+	if (nla_put_status_info(msg, mdev, sib))
+		goto nla_put_failure;
+	genlmsg_end(msg, d_out);
+	err = drbd_genl_multicast_events(msg, 0);
+	/* msg has been consumed or freed in netlink_broadcast() */
+	if (err && err != -ESRCH)
+		goto failed;
 
-	cn_del_callback(&cn_id_drbd);
-}
+	return;
 
-void drbd_nl_send_reply(struct cn_msg *req, int ret_code)
-{
-	char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	int rr;
-
-	memset(buffer, 0, sizeof(buffer));
-	cn_reply->id = req->id;
-
-	cn_reply->seq = req->seq;
-	cn_reply->ack = req->ack  + 1;
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply);
-	cn_reply->flags = 0;
-
-	reply->packet_type = P_return_code_only;
-	reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
-	reply->ret_code = ret_code;
-
-	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
-	if (rr && rr != -ESRCH)
-		printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
+nla_put_failure:
+	nlmsg_free(msg);
+failed:
+	dev_err(DEV, "Error %d while broadcasting event. "
+			"Event seq:%u sib_reason:%u\n",
+			err, seq, sib->sib_reason);
 }
-
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index ffee90d6d374..a280bc238acd 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -970,6 +970,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	enum drbd_fencing_p fp;
 	enum drbd_req_event what = NOTHING;
 	union drbd_state nsm = (union drbd_state){ .i = -1 };
+	struct sib_info sib;
+
+	sib.sib_reason = SIB_STATE_CHANGE;
+	sib.os = os;
+	sib.ns = ns;
 
 	if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
 		clear_bit(CRASHED_PRIMARY, &mdev->flags);
@@ -984,7 +989,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	}
 
 	/* Inform userspace about the change... */
-	drbd_bcast_state(mdev, ns);
+	drbd_bcast_event(mdev, &sib);
 
 	if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
 	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index e192167e6145..d28fdd8fcd49 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -51,7 +51,6 @@
 
 #endif
 
-
 extern const char *drbd_buildtag(void);
 #define REL_VERSION "8.3.11"
 #define API_VERSION 88
@@ -159,6 +158,7 @@ enum drbd_ret_code {
 	ERR_CONN_IN_USE         = 159,
 	ERR_MINOR_CONFIGURED    = 160,
 	ERR_MINOR_EXISTS	= 161,
+	ERR_INVALID_REQUEST	= 162,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
@@ -349,37 +349,4 @@ enum drbd_timeout_flag {
 #define DRBD_MD_INDEX_FLEX_EXT -2
 #define DRBD_MD_INDEX_FLEX_INT -3
 
-/* Start of the new netlink/connector stuff */
-
-enum drbd_ncr_flags {
-	DRBD_NL_CREATE_DEVICE = 0x01,
-	DRBD_NL_SET_DEFAULTS =  0x02,
-};
-#define DRBD_NL_OBJ_NAME_LEN 32
-
-
-/* For searching a vacant cn_idx value */
-#define CN_IDX_STEP			6977
-
-struct drbd_nl_cfg_req {
-	int packet_type;
-	union {
-		struct {
-			unsigned int drbd_minor;
-			enum drbd_ncr_flags flags;
-		};
-		struct {
-			char obj_name[DRBD_NL_OBJ_NAME_LEN];
-		};
-	};
-	unsigned short tag_list[];
-};
-
-struct drbd_nl_cfg_reply {
-	int packet_type;
-	unsigned int minor;
-	int ret_code; /* enum ret_code or set_st_err_t */
-	unsigned short tag_list[]; /* only used with get_* calls */
-};
-
 #endif
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 8a86f659d363..c8c67239f616 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -95,7 +95,7 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly =		\
 #endif
 #endif
 
-#if 1
+#ifdef GENL_MAGIC_DEBUG
 static void dprint_field(const char *dir, int nla_type,
 		const char *name, void *valp)
 {
-- 
cgit v1.2.3-55-g7522


From 73d901b74f1070c8a664349b564ba6f8bc8ab283 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Mon, 7 Mar 2011 10:38:56 +0100
Subject: drbd: remove now unused connector related files

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_nl.h        | 172 -----------------------------------------
 include/linux/drbd_tag_magic.h |  84 --------------------
 2 files changed, 256 deletions(-)
 delete mode 100644 include/linux/drbd_nl.h
 delete mode 100644 include/linux/drbd_tag_magic.h

(limited to 'include/linux')

diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
deleted file mode 100644
index 1216c7a432c5..000000000000
--- a/include/linux/drbd_nl.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
-   PAKET( name,
-	  TYPE ( pn, pr, member )
-	  ...
-   )
-
-   You may never reissue one of the pn arguments
-*/
-
-#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64)
-#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined"
-#endif
-
-NL_PACKET(primary, 1,
-       NL_BIT(		1,	T_MAY_IGNORE,	primary_force)
-)
-
-NL_PACKET(secondary, 2, )
-
-NL_PACKET(disk_conf, 3,
-	NL_INT64(	2,	T_MAY_IGNORE,	disk_size)
-	NL_STRING(	3,	T_MANDATORY,	backing_dev,	128)
-	NL_STRING(	4,	T_MANDATORY,	meta_dev,	128)
-	NL_INTEGER(	5,	T_MANDATORY,	meta_dev_idx)
-	NL_INTEGER(	6,	T_MAY_IGNORE,	on_io_error)
-	NL_INTEGER(	7,	T_MAY_IGNORE,	fencing)
-	NL_BIT(		37,	T_MAY_IGNORE,	use_bmbv)
-	NL_BIT(		53,	T_MAY_IGNORE,	no_disk_flush)
-	NL_BIT(		54,	T_MAY_IGNORE,	no_md_flush)
-	  /*  55 max_bio_size was available in 8.2.6rc2 */
-	NL_INTEGER(	56,	T_MAY_IGNORE,	max_bio_bvecs)
-	NL_BIT(		57,	T_MAY_IGNORE,	no_disk_barrier)
-	NL_BIT(		58,	T_MAY_IGNORE,	no_disk_drain)
-)
-
-NL_PACKET(detach, 4, )
-
-NL_PACKET(net_conf, 5,
-	NL_STRING(	8,	T_MANDATORY,	my_addr,	128)
-	NL_STRING(	9,	T_MANDATORY,	peer_addr,	128)
-	NL_STRING(	10,	T_MAY_IGNORE,	shared_secret,	SHARED_SECRET_MAX)
-	NL_STRING(	11,	T_MAY_IGNORE,	cram_hmac_alg,	SHARED_SECRET_MAX)
-	NL_STRING(	44,	T_MAY_IGNORE,	integrity_alg,	SHARED_SECRET_MAX)
-	NL_INTEGER(	14,	T_MAY_IGNORE,	timeout)
-	NL_INTEGER(	15,	T_MANDATORY,	wire_protocol)
-	NL_INTEGER(	16,	T_MAY_IGNORE,	try_connect_int)
-	NL_INTEGER(	17,	T_MAY_IGNORE,	ping_int)
-	NL_INTEGER(	18,	T_MAY_IGNORE,	max_epoch_size)
-	NL_INTEGER(	19,	T_MAY_IGNORE,	max_buffers)
-	NL_INTEGER(	20,	T_MAY_IGNORE,	unplug_watermark)
-	NL_INTEGER(	21,	T_MAY_IGNORE,	sndbuf_size)
-	NL_INTEGER(	22,	T_MAY_IGNORE,	ko_count)
-	NL_INTEGER(	24,	T_MAY_IGNORE,	after_sb_0p)
-	NL_INTEGER(	25,	T_MAY_IGNORE,	after_sb_1p)
-	NL_INTEGER(	26,	T_MAY_IGNORE,	after_sb_2p)
-	NL_INTEGER(	39,	T_MAY_IGNORE,	rr_conflict)
-	NL_INTEGER(	40,	T_MAY_IGNORE,	ping_timeo)
-	NL_INTEGER(	67,	T_MAY_IGNORE,	rcvbuf_size)
-	NL_INTEGER(	81,	T_MAY_IGNORE,	on_congestion)
-	NL_INTEGER(	82,	T_MAY_IGNORE,	cong_fill)
-	NL_INTEGER(	83,	T_MAY_IGNORE,	cong_extents)
-	  /* 59 addr_family was available in GIT, never released */
-	NL_BIT(		60,	T_MANDATORY,	mind_af)
-	NL_BIT(		27,	T_MAY_IGNORE,	want_lose)
-	NL_BIT(		28,	T_MAY_IGNORE,	two_primaries)
-	NL_BIT(		41,	T_MAY_IGNORE,	always_asbp)
-	NL_BIT(		61,	T_MAY_IGNORE,	no_cork)
-	NL_BIT(		62,	T_MANDATORY,	auto_sndbuf_size)
-	NL_BIT(		70,	T_MANDATORY,	dry_run)
-)
-
-NL_PACKET(disconnect, 6,
-	NL_BIT(		84,	T_MAY_IGNORE,	force)
-)
-
-NL_PACKET(resize, 7,
-	NL_INT64(		29,	T_MAY_IGNORE,	resize_size)
-	NL_BIT(			68,	T_MAY_IGNORE,	resize_force)
-	NL_BIT(			69,	T_MANDATORY,	no_resync)
-)
-
-NL_PACKET(syncer_conf, 8,
-	NL_INTEGER(	30,	T_MAY_IGNORE,	rate)
-	NL_INTEGER(	31,	T_MAY_IGNORE,	after)
-	NL_INTEGER(	32,	T_MAY_IGNORE,	al_extents)
-/*	NL_INTEGER(     71,	T_MAY_IGNORE,	dp_volume)
- *	NL_INTEGER(     72,	T_MAY_IGNORE,	dp_interval)
- *	NL_INTEGER(     73,	T_MAY_IGNORE,	throttle_th)
- *	NL_INTEGER(     74,	T_MAY_IGNORE,	hold_off_th)
- * feature will be reimplemented differently with 8.3.9 */
-	NL_STRING(      52,     T_MAY_IGNORE,   verify_alg,     SHARED_SECRET_MAX)
-	NL_STRING(      51,     T_MAY_IGNORE,   cpu_mask,       32)
-	NL_STRING(	64,	T_MAY_IGNORE,	csums_alg,	SHARED_SECRET_MAX)
-	NL_BIT(         65,     T_MAY_IGNORE,   use_rle)
-	NL_INTEGER(	75,	T_MAY_IGNORE,	on_no_data)
-	NL_INTEGER(	76,	T_MAY_IGNORE,	c_plan_ahead)
-	NL_INTEGER(     77,	T_MAY_IGNORE,	c_delay_target)
-	NL_INTEGER(     78,	T_MAY_IGNORE,	c_fill_target)
-	NL_INTEGER(     79,	T_MAY_IGNORE,	c_max_rate)
-	NL_INTEGER(     80,	T_MAY_IGNORE,	c_min_rate)
-)
-
-NL_PACKET(invalidate, 9, )
-NL_PACKET(invalidate_peer, 10, )
-NL_PACKET(pause_sync, 11, )
-NL_PACKET(resume_sync, 12, )
-NL_PACKET(suspend_io, 13, )
-NL_PACKET(resume_io, 14, )
-NL_PACKET(outdate, 15, )
-NL_PACKET(get_config, 16, )
-NL_PACKET(get_state, 17,
-	NL_INTEGER(	33,	T_MAY_IGNORE,	state_i)
-)
-
-NL_PACKET(get_uuids, 18,
-	NL_STRING(	34,	T_MAY_IGNORE,	uuids,	(UI_SIZE*sizeof(__u64)))
-	NL_INTEGER(	35,	T_MAY_IGNORE,	uuids_flags)
-)
-
-NL_PACKET(get_timeout_flag, 19,
-	NL_BIT(		36,	T_MAY_IGNORE,	use_degraded)
-)
-
-NL_PACKET(call_helper, 20,
-	NL_STRING(	38,	T_MAY_IGNORE,	helper,		32)
-)
-
-/* Tag nr 42 already allocated in drbd-8.1 development. */
-
-NL_PACKET(sync_progress, 23,
-	NL_INTEGER(	43,	T_MAY_IGNORE,	sync_progress)
-)
-
-NL_PACKET(dump_ee, 24,
-	NL_STRING(	45,	T_MAY_IGNORE,	dump_ee_reason, 32)
-	NL_STRING(	46,	T_MAY_IGNORE,	seen_digest, SHARED_SECRET_MAX)
-	NL_STRING(	47,	T_MAY_IGNORE,	calc_digest, SHARED_SECRET_MAX)
-	NL_INT64(	48,	T_MAY_IGNORE,	ee_sector)
-	NL_INT64(	49,	T_MAY_IGNORE,	ee_block_id)
-	NL_STRING(	50,	T_MAY_IGNORE,	ee_data,	32 << 10)
-)
-
-NL_PACKET(start_ov, 25,
-	NL_INT64(	66,	T_MAY_IGNORE,	start_sector)
-)
-
-NL_PACKET(new_c_uuid, 26,
-       NL_BIT(		63,	T_MANDATORY,	clear_bm)
-)
-
-#ifdef NL_RESPONSE
-NL_RESPONSE(return_code_only, 27)
-#endif
-
-NL_PACKET(new_connection, 28, /* CHT_CTOR */
-	NL_STRING(	85,	T_MANDATORY,	name, DRBD_NL_OBJ_NAME_LEN)
-)
-
-NL_PACKET(new_minor, 29, /* CHT_CONN */
-	NL_INTEGER(	86,	T_MANDATORY,	minor)
-	NL_INTEGER(	87,	T_MANDATORY,	vol_nr)
-)
-
-NL_PACKET(del_minor, 30, ) /* CHT_MINOR */
-NL_PACKET(del_connection, 31, ) /* CHT_CONN */
-
-#undef NL_PACKET
-#undef NL_INTEGER
-#undef NL_INT64
-#undef NL_BIT
-#undef NL_STRING
-#undef NL_RESPONSE
diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h
deleted file mode 100644
index 069543190516..000000000000
--- a/include/linux/drbd_tag_magic.h
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef DRBD_TAG_MAGIC_H
-#define DRBD_TAG_MAGIC_H
-
-#define TT_END     0
-#define TT_REMOVED 0xE000
-
-/* declare packet_type enums */
-enum packet_types {
-#define NL_PACKET(name, number, fields) P_ ## name = number,
-#define NL_RESPONSE(name, number) P_ ## name = number,
-#define NL_INTEGER(pn, pr, member)
-#define NL_INT64(pn, pr, member)
-#define NL_BIT(pn, pr, member)
-#define NL_STRING(pn, pr, member, len)
-#include "drbd_nl.h"
-	P_nl_after_last_packet,
-};
-
-/* These struct are used to deduce the size of the tag lists: */
-#define NL_PACKET(name, number, fields)	\
-	struct name ## _tag_len_struct { fields };
-#define NL_INTEGER(pn, pr, member)		\
-	int member; int tag_and_len ## member;
-#define NL_INT64(pn, pr, member)		\
-	__u64 member; int tag_and_len ## member;
-#define NL_BIT(pn, pr, member)		\
-	unsigned char member:1; int tag_and_len ## member;
-#define NL_STRING(pn, pr, member, len)	\
-	unsigned char member[len]; int member ## _len; \
-	int tag_and_len ## member;
-#include "linux/drbd_nl.h"
-
-/* declare tag-list-sizes */
-static const int tag_list_sizes[] = {
-#define NL_PACKET(name, number, fields) 2 fields ,
-#define NL_INTEGER(pn, pr, member)      + 4 + 4
-#define NL_INT64(pn, pr, member)        + 4 + 8
-#define NL_BIT(pn, pr, member)          + 4 + 1
-#define NL_STRING(pn, pr, member, len)  + 4 + (len)
-#include "drbd_nl.h"
-};
-
-/* The two highest bits are used for the tag type */
-#define TT_MASK      0xC000
-#define TT_INTEGER   0x0000
-#define TT_INT64     0x4000
-#define TT_BIT       0x8000
-#define TT_STRING    0xC000
-/* The next bit indicates if processing of the tag is mandatory */
-#define T_MANDATORY  0x2000
-#define T_MAY_IGNORE 0x0000
-#define TN_MASK      0x1fff
-/* The remaining 13 bits are used to enumerate the tags */
-
-#define tag_type(T)   ((T) & TT_MASK)
-#define tag_number(T) ((T) & TN_MASK)
-
-/* declare tag enums */
-#define NL_PACKET(name, number, fields) fields
-enum drbd_tags {
-#define NL_INTEGER(pn, pr, member)     T_ ## member = pn | TT_INTEGER | pr ,
-#define NL_INT64(pn, pr, member)       T_ ## member = pn | TT_INT64   | pr ,
-#define NL_BIT(pn, pr, member)         T_ ## member = pn | TT_BIT     | pr ,
-#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING  | pr ,
-#include "drbd_nl.h"
-};
-
-struct tag {
-	const char *name;
-	int type_n_flags;
-	int max_len;
-};
-
-/* declare tag names */
-#define NL_PACKET(name, number, fields) fields
-static const struct tag tag_descriptions[] = {
-#define NL_INTEGER(pn, pr, member)     [ pn ] = { #member, TT_INTEGER | pr, sizeof(int)   },
-#define NL_INT64(pn, pr, member)       [ pn ] = { #member, TT_INT64   | pr, sizeof(__u64) },
-#define NL_BIT(pn, pr, member)         [ pn ] = { #member, TT_BIT     | pr, sizeof(int)   },
-#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING  | pr, (len)         },
-#include "drbd_nl.h"
-};
-
-#endif
-- 
cgit v1.2.3-55-g7522


From 85f75dd7630436b0aa46a6393099c0f23121f5f0 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Tue, 15 Mar 2011 16:26:37 +0100
Subject: drbd: introduce in-kernel "down" command

This greatly simplifies deconfiguration of whole resources.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_main.c |   2 -
 drivers/block/drbd/drbd_nl.c   | 203 ++++++++++++++++++++++++++++++-----------
 include/linux/drbd_genl.h      |   2 +
 3 files changed, 154 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 113c7b465384..40b7b93def75 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2303,9 +2303,7 @@ fail:
 
 void drbd_free_tconn(struct drbd_tconn *tconn)
 {
-	mutex_lock(&drbd_cfg_mutex);
 	list_del(&tconn->all_tconn);
-	mutex_unlock(&drbd_cfg_mutex);
 	idr_destroy(&tconn->volumes);
 
 	free_cpumask_var(tconn->cpu_mask);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index f965dfe4b5ff..d952e877f8d5 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -49,6 +49,7 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info);
 
 int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
 
 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
@@ -1416,6 +1417,18 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+static int adm_detach(struct drbd_conf *mdev)
+{
+	enum drbd_ret_code retcode;
+	drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
+	retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS));
+	wait_event(mdev->misc_wait,
+			mdev->state.disk != D_DISKLESS ||
+			!atomic_read(&mdev->local_cnt));
+	drbd_resume_io(mdev);
+	return retcode;
+}
+
 /* Detaching the disk is a process in multiple stages.  First we need to lock
  * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
@@ -1423,7 +1436,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
  * Only then we have finally detached. */
 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
 
 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
@@ -1432,13 +1444,7 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mdev = adm_ctx.mdev;
-	drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
-	retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS));
-	wait_event(mdev->misc_wait,
-			mdev->state.disk != D_DISKLESS ||
-			!atomic_read(&mdev->local_cnt));
-	drbd_resume_io(mdev);
+	retcode = adm_detach(adm_ctx.mdev);
 out:
 	drbd_adm_finish(info, retcode);
 	return 0;
@@ -1680,10 +1686,49 @@ out:
 	return 0;
 }
 
+static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force)
+{
+	enum drbd_state_rv rv;
+	if (force) {
+		spin_lock_irq(&tconn->req_lock);
+		if (tconn->cstate >= C_WF_CONNECTION)
+			_conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+		spin_unlock_irq(&tconn->req_lock);
+		return SS_SUCCESS;
+	}
+
+	rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0);
+
+	switch (rv) {
+	case SS_NOTHING_TO_DO:
+	case SS_ALREADY_STANDALONE:
+		return SS_SUCCESS;
+	case SS_PRIMARY_NOP:
+		/* Our state checking code wants to see the peer outdated. */
+		rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
+							pdsk, D_OUTDATED), CS_VERBOSE);
+		break;
+	case SS_CW_FAILED_BY_PEER:
+		/* The peer probably wants to see us outdated. */
+		rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
+							disk, D_OUTDATED), 0);
+		if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
+			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+			rv = SS_SUCCESS;
+		}
+		break;
+	default:;
+		/* no special handling necessary */
+	}
+
+	return rv;
+}
+
 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 {
 	struct disconnect_parms parms;
 	struct drbd_tconn *tconn;
+	enum drbd_state_rv rv;
 	enum drbd_ret_code retcode;
 	int err;
 
@@ -1704,35 +1749,8 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
-	if (parms.force_disconnect) {
-		spin_lock_irq(&tconn->req_lock);
-		if (tconn->cstate >= C_WF_CONNECTION)
-			_conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
-		spin_unlock_irq(&tconn->req_lock);
-		goto done;
-	}
-
-	retcode = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0);
-
-	if (retcode == SS_NOTHING_TO_DO)
-		goto done;
-	else if (retcode == SS_ALREADY_STANDALONE)
-		goto done;
-	else if (retcode == SS_PRIMARY_NOP) {
-		/* Our state checking code wants to see the peer outdated. */
-		retcode = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
-							pdsk, D_OUTDATED), CS_VERBOSE);
-	} else if (retcode == SS_CW_FAILED_BY_PEER) {
-		/* The peer probably wants to see us outdated. */
-		retcode = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
-							disk, D_OUTDATED), 0);
-		if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) {
-			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
-			retcode = SS_SUCCESS;
-		}
-	}
-
-	if (retcode < SS_SUCCESS)
+	rv = conn_try_disconnect(tconn, parms.force_disconnect);
+	if (rv < SS_SUCCESS)
 		goto fail;
 
 	if (wait_event_interruptible(tconn->ping_wait,
@@ -1743,7 +1761,6 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 		goto fail;
 	}
 
- done:
 	retcode = NO_ERROR;
  fail:
 	drbd_adm_finish(info, retcode);
@@ -2644,9 +2661,21 @@ out:
 	return 0;
 }
 
+static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev)
+{
+	if (mdev->state.disk == D_DISKLESS &&
+	    /* no need to be mdev->state.conn == C_STANDALONE &&
+	     * we may want to delete a minor from a live replication group.
+	     */
+	    mdev->state.role == R_SECONDARY) {
+		drbd_delete_device(mdev_to_minor(mdev));
+		return NO_ERROR;
+	} else
+		return ERR_MINOR_CONFIGURED;
+}
+
 int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
 
 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
@@ -2655,19 +2684,89 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mdev = adm_ctx.mdev;
-	if (mdev->state.disk == D_DISKLESS &&
-	    /* no need to be mdev->state.conn == C_STANDALONE &&
-	     * we may want to delete a minor from a live replication group.
-	     */
-	    mdev->state.role == R_SECONDARY) {
-		drbd_delete_device(mdev_to_minor(mdev));
-		retcode = NO_ERROR;
-		/* if this was the last volume of this connection,
-		 * this will terminate all threads */
+	mutex_lock(&drbd_cfg_mutex);
+	retcode = adm_delete_minor(adm_ctx.mdev);
+	mutex_unlock(&drbd_cfg_mutex);
+	/* if this was the last volume of this connection,
+	 * this will terminate all threads */
+	if (retcode == NO_ERROR)
 		conn_reconfig_done(adm_ctx.tconn);
-	} else
-		retcode = ERR_MINOR_CONFIGURED;
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
+int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+	enum drbd_state_rv rv;
+	struct drbd_conf *mdev;
+	unsigned i;
+
+	retcode = drbd_adm_prepare(skb, info, 0);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	if (!adm_ctx.tconn) {
+		retcode = ERR_CONN_NOT_KNOWN;
+		goto out;
+	}
+
+	mutex_lock(&drbd_cfg_mutex);
+	/* demote */
+	idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
+		retcode = drbd_set_role(mdev, R_SECONDARY, 0);
+		if (retcode < SS_SUCCESS) {
+			drbd_msg_put_info("failed to demote");
+			goto out_unlock;
+		}
+	}
+
+	/* disconnect */
+	rv = conn_try_disconnect(adm_ctx.tconn, 0);
+	if (rv < SS_SUCCESS) {
+		retcode = rv; /* enum type mismatch! */
+		drbd_msg_put_info("failed to disconnect");
+		goto out_unlock;
+	}
+
+	/* detach */
+	idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
+		rv = adm_detach(mdev);
+		if (rv < SS_SUCCESS) {
+			retcode = rv; /* enum type mismatch! */
+			drbd_msg_put_info("failed to detach");
+			goto out_unlock;
+		}
+	}
+
+	/* delete volumes */
+	idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
+		retcode = adm_delete_minor(mdev);
+		if (retcode != NO_ERROR) {
+			/* "can not happen" */
+			drbd_msg_put_info("failed to delete volume");
+			goto out_unlock;
+		}
+	}
+
+	/* stop all threads */
+	conn_reconfig_done(adm_ctx.tconn);
+
+	/* delete connection */
+	if (conn_lowest_minor(adm_ctx.tconn) < 0) {
+		drbd_free_tconn(adm_ctx.tconn);
+		retcode = NO_ERROR;
+	} else {
+		/* "can not happen" */
+		retcode = ERR_CONN_IN_USE;
+		drbd_msg_put_info("failed to delete connection");
+		goto out_unlock;
+	}
+out_unlock:
+	mutex_unlock(&drbd_cfg_mutex);
 out:
 	drbd_adm_finish(info, retcode);
 	return 0;
@@ -2683,12 +2782,14 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
+	mutex_lock(&drbd_cfg_mutex);
 	if (conn_lowest_minor(adm_ctx.tconn) < 0) {
 		drbd_free_tconn(adm_ctx.tconn);
 		retcode = NO_ERROR;
 	} else {
 		retcode = ERR_CONN_IN_USE;
 	}
+	mutex_unlock(&drbd_cfg_mutex);
 
 out:
 	drbd_adm_finish(info, retcode);
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 84e16848f7a1..a07d69279b1a 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -347,3 +347,5 @@ GENL_op(DRBD_ADM_OUTDATE,	25, GENL_doit(drbd_adm_outdate),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
 GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_DOWN,		27, GENL_doit(drbd_adm_down),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
-- 
cgit v1.2.3-55-g7522


From 047e95e259e81d7b97eca10cda0aa93082531ac1 Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Wed, 16 Mar 2011 14:43:36 +0100
Subject: drbd: Allow volumes to become primary only on one side

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_state.c   | 21 ++++++++++++++++++---
 drivers/block/drbd/drbd_strings.c |  1 +
 include/linux/drbd.h              |  3 ++-
 3 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 886b996ec7b3..11685658659e 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -329,6 +329,18 @@ static void print_state_change(struct drbd_conf *mdev, union drbd_state os, unio
 		dev_info(DEV, "%s\n", pb);
 }
 
+static bool vol_has_primary_peer(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int minor;
+
+	idr_for_each_entry(&tconn->volumes, mdev, minor) {
+		if (mdev->state.peer == R_PRIMARY)
+			return true;
+	}
+	return false;
+}
+
 /**
  * is_valid_state() - Returns an SS_ error code if ns is not valid
  * @mdev:	DRBD device.
@@ -349,9 +361,12 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
 	}
 
 	if (get_net_conf(mdev->tconn)) {
-		if (!mdev->tconn->net_conf->two_primaries &&
-		    ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
-			rv = SS_TWO_PRIMARIES;
+		if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY) {
+			if (ns.peer == R_PRIMARY)
+				rv = SS_TWO_PRIMARIES;
+			else if (vol_has_primary_peer(mdev->tconn))
+				rv = SS_O_VOL_PEER_PRI;
+			}
 		put_net_conf(mdev->tconn);
 	}
 
diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c
index c44a2a602772..9a664bd27404 100644
--- a/drivers/block/drbd/drbd_strings.c
+++ b/drivers/block/drbd/drbd_strings.c
@@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = {
 	[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
 	[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
 	[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
+	[-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config",
 };
 
 const char *drbd_conn_str(enum drbd_conns s)
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index d28fdd8fcd49..9cdb888607ae 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -300,7 +300,8 @@ enum drbd_state_rv {
 	SS_NOT_SUPPORTED = -17,      /* drbd-8.2 only */
 	SS_IN_TRANSIENT_STATE = -18,  /* Retry after the next state change */
 	SS_CONCURRENT_ST_CHG = -19,   /* Concurrent cluster side state change! */
-	SS_AFTER_LAST_ERROR = -20,    /* Keep this at bottom */
+	SS_O_VOL_PEER_PRI = -20,
+	SS_AFTER_LAST_ERROR = -21,    /* Keep this at bottom */
 };
 
 /* from drbd_strings.c */
-- 
cgit v1.2.3-55-g7522


From 2cbacafd7af0f1cc7a433668c662a91ba6aabc1b Mon Sep 17 00:00:00 2001
From: Russell King
Date: Tue, 17 Apr 2012 16:34:13 +0100
Subject: SERIAL: core: add hardware assisted s/w flow control support

Ports which are capable of handling s/w flow control in hardware to
know when the s/w flow control termios settings are changed.  Add a
flag to allow the low level serial drivers to indicate that they
support this, and these changes should be propagated to them.

Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/tty/serial/serial_core.c | 16 ++++++++++++++--
 include/linux/serial_core.h      |  2 ++
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index bc2065d323b9..bd10bbd56446 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1213,7 +1213,19 @@ static void uart_set_termios(struct tty_struct *tty,
 	struct uart_port *uport = state->uart_port;
 	unsigned long flags;
 	unsigned int cflag = tty->termios.c_cflag;
+	unsigned int iflag_mask = IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK;
+	bool sw_changed = false;
 
+	/*
+	 * Drivers doing software flow control also need to know
+	 * about changes to these input settings.
+	 */
+	if (uport->flags & UPF_SOFT_FLOW) {
+		iflag_mask |= IXANY|IXON|IXOFF;
+		sw_changed =
+		   tty->termios.c_cc[VSTART] != old_termios->c_cc[VSTART] ||
+		   tty->termios.c_cc[VSTOP] != old_termios->c_cc[VSTOP];
+	}
 
 	/*
 	 * These are the bits that are used to setup various
@@ -1221,11 +1233,11 @@ static void uart_set_termios(struct tty_struct *tty,
 	 * bits in c_cflag; c_[io]speed will always be set
 	 * appropriately by set_termios() in tty_ioctl.c
 	 */
-#define RELEVANT_IFLAG(iflag)	((iflag) & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
 	if ((cflag ^ old_termios->c_cflag) == 0 &&
 	    tty->termios.c_ospeed == old_termios->c_ospeed &&
 	    tty->termios.c_ispeed == old_termios->c_ispeed &&
-	    RELEVANT_IFLAG(tty->termios.c_iflag ^ old_termios->c_iflag) == 0) {
+	    ((tty->termios.c_iflag ^ old_termios->c_iflag) & iflag_mask) == 0 &&
+	    !sw_changed) {
 		return;
 	}
 
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 3c430228d232..00051388de3c 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -163,6 +163,8 @@ struct uart_port {
 #define UPF_BUGGY_UART		((__force upf_t) (1 << 14))
 #define UPF_NO_TXEN_TEST	((__force upf_t) (1 << 15))
 #define UPF_MAGIC_MULTIPLIER	((__force upf_t) (1 << 16))
+/* Port has hardware-assisted s/w flow control */
+#define UPF_SOFT_FLOW		((__force upf_t) (1 << 22))
 #define UPF_CONS_FLOW		((__force upf_t) (1 << 23))
 #define UPF_SHARE_IRQ		((__force upf_t) (1 << 24))
 #define UPF_EXAR_EFR		((__force upf_t) (1 << 25))
-- 
cgit v1.2.3-55-g7522


From dba05832cbe4f305dfd998fb26d7c685d91fbbd8 Mon Sep 17 00:00:00 2001
From: Russell King
Date: Tue, 17 Apr 2012 16:41:10 +0100
Subject: SERIAL: core: add hardware assisted h/w flow control support

Ports which are handling h/w flow control in hardware must not have
their RTS state altered depending on the tty's hardware-stopped state.
Avoid this additional logic when setting the termios state.

Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/tty/serial/serial_core.c | 7 +++++++
 include/linux/serial_core.h      | 2 ++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index bd10bbd56446..9d8796e77188 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1255,6 +1255,13 @@ static void uart_set_termios(struct tty_struct *tty,
 		uart_set_mctrl(uport, mask);
 	}
 
+	/*
+	 * If the port is doing h/w assisted flow control, do nothing.
+	 * We assume that tty->hw_stopped has never been set.
+	 */
+	if (uport->flags & UPF_HARD_FLOW)
+		return;
+
 	/* Handle turning off CRTSCTS */
 	if ((old_termios->c_cflag & CRTSCTS) && !(cflag & CRTSCTS)) {
 		spin_lock_irqsave(&uport->lock, flags);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 00051388de3c..e2cda5d04e48 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -163,6 +163,8 @@ struct uart_port {
 #define UPF_BUGGY_UART		((__force upf_t) (1 << 14))
 #define UPF_NO_TXEN_TEST	((__force upf_t) (1 << 15))
 #define UPF_MAGIC_MULTIPLIER	((__force upf_t) (1 << 16))
+/* Port has hardware-assisted h/w flow control (iow, auto-RTS *not* auto-CTS) */
+#define UPF_HARD_FLOW		((__force upf_t) (1 << 21))
 /* Port has hardware-assisted s/w flow control */
 #define UPF_SOFT_FLOW		((__force upf_t) (1 << 22))
 #define UPF_CONS_FLOW		((__force upf_t) (1 << 23))
-- 
cgit v1.2.3-55-g7522


From 9aba8d5b011193c8e01d565c5b585df5b94f1db2 Mon Sep 17 00:00:00 2001
From: Russell King
Date: Tue, 17 Apr 2012 17:23:14 +0100
Subject: SERIAL: core: add throttle/unthrottle callbacks for hardware assisted
 flow control

Add two callbacks for hardware assisted flow control; we need to know
when the tty layers want us to stop and restart due to their buffer
levels.

Call a driver specific throttle/unthrottle function if and only if the
driver indicates that it is using an enabled hardware assisted flow
control method, otherwise fall back to the non-hardware assisted
methods.

Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/tty/serial/serial_core.c | 31 +++++++++++++++++++++++++++----
 include/linux/serial_core.h      |  2 ++
 2 files changed, 29 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 9d8796e77188..098bb99c2b9f 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -610,27 +610,50 @@ static void uart_send_xchar(struct tty_struct *tty, char ch)
 static void uart_throttle(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
+	struct uart_port *port = state->uart_port;
+	uint32_t mask = 0;
 
 	if (I_IXOFF(tty))
+		mask |= UPF_SOFT_FLOW;
+	if (tty->termios.c_cflag & CRTSCTS)
+		mask |= UPF_HARD_FLOW;
+
+	if (port->flags & mask) {
+		port->ops->throttle(port);
+		mask &= ~port->flags;
+	}
+
+	if (mask & UPF_SOFT_FLOW)
 		uart_send_xchar(tty, STOP_CHAR(tty));
 
-	if (tty->termios.c_cflag & CRTSCTS)
-		uart_clear_mctrl(state->uart_port, TIOCM_RTS);
+	if (mask & UPF_HARD_FLOW)
+		uart_clear_mctrl(port, TIOCM_RTS);
 }
 
 static void uart_unthrottle(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
 	struct uart_port *port = state->uart_port;
+	uint32_t mask = 0;
 
-	if (I_IXOFF(tty)) {
+	if (I_IXOFF(tty))
+		mask |= UPF_SOFT_FLOW;
+	if (tty->termios.c_cflag & CRTSCTS)
+		mask |= UPF_HARD_FLOW;
+
+	if (port->flags & mask) {
+		port->ops->unthrottle(port);
+		mask &= ~port->flags;
+	}
+
+	if (mask & UPF_SOFT_FLOW) {
 		if (port->x_char)
 			port->x_char = 0;
 		else
 			uart_send_xchar(tty, START_CHAR(tty));
 	}
 
-	if (tty->termios.c_cflag & CRTSCTS)
+	if (mask & UPF_HARD_FLOW)
 		uart_set_mctrl(port, TIOCM_RTS);
 }
 
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index e2cda5d04e48..c6690a2a27fb 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -46,6 +46,8 @@ struct uart_ops {
 	unsigned int	(*get_mctrl)(struct uart_port *);
 	void		(*stop_tx)(struct uart_port *);
 	void		(*start_tx)(struct uart_port *);
+	void		(*throttle)(struct uart_port *);
+	void		(*unthrottle)(struct uart_port *);
 	void		(*send_xchar)(struct uart_port *, char ch);
 	void		(*stop_rx)(struct uart_port *);
 	void		(*enable_ms)(struct uart_port *);
-- 
cgit v1.2.3-55-g7522


From 2389d5014342e9535aad212d0c68d439aaf534ba Mon Sep 17 00:00:00 2001
From: Linus Walleij
Date: Wed, 31 Oct 2012 22:04:31 +0100
Subject: ARM: plat-versatile: move FPGA irq driver to drivers/irqchip

This moves the Versatile FPGA interrupt controller driver, used in
the Integrator/AP, Integrator/CP and some Versatile boards, out
of arch/arm/plat-versatile and down to drivers/irqchip where we
have consensus that such drivers belong. The header file is
consequently moved to <linux/platform_data/irq-versatile-fpga.h>.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/Kconfig                                |   4 +-
 arch/arm/mach-integrator/integrator_ap.c        |   3 +-
 arch/arm/mach-integrator/integrator_cp.c        |   2 +-
 arch/arm/mach-versatile/core.c                  |   2 +-
 arch/arm/plat-versatile/Kconfig                 |   9 --
 arch/arm/plat-versatile/Makefile                |   1 -
 arch/arm/plat-versatile/fpga-irq.c              | 204 ------------------------
 arch/arm/plat-versatile/include/plat/fpga-irq.h |  13 --
 drivers/irqchip/Kconfig                         |   9 +-
 drivers/irqchip/Makefile                        |   1 +
 drivers/irqchip/irq-versatile-fpga.c            | 204 ++++++++++++++++++++++++
 include/linux/irqchip/versatile-fpga.h          |  13 ++
 12 files changed, 231 insertions(+), 234 deletions(-)
 delete mode 100644 arch/arm/plat-versatile/fpga-irq.c
 delete mode 100644 arch/arm/plat-versatile/include/plat/fpga-irq.h
 create mode 100644 drivers/irqchip/irq-versatile-fpga.c
 create mode 100644 include/linux/irqchip/versatile-fpga.h

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 73067efd4845..2205e3eb55bf 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -284,8 +284,8 @@ config ARCH_INTEGRATOR
 	select MULTI_IRQ_HANDLER
 	select NEED_MACH_MEMORY_H
 	select PLAT_VERSATILE
-	select PLAT_VERSATILE_FPGA_IRQ
 	select SPARSE_IRQ
+	select VERSATILE_FPGA_IRQ
 	help
 	  Support for ARM's Integrator platform.
 
@@ -318,7 +318,7 @@ config ARCH_VERSATILE
 	select PLAT_VERSATILE
 	select PLAT_VERSATILE_CLCD
 	select PLAT_VERSATILE_CLOCK
-	select PLAT_VERSATILE_FPGA_IRQ
+	select VERSATILE_FPGA_IRQ
 	help
 	  This enables support for ARM Ltd Versatile board.
 
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index 4f13bc57e5a4..e67a9fe18d1b 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -31,6 +31,7 @@
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/irqchip/versatile-fpga.h>
 #include <linux/mtd/physmap.h>
 #include <linux/clk.h>
 #include <linux/platform_data/clk-integrator.h>
@@ -56,8 +57,6 @@
 #include <asm/mach/pci.h>
 #include <asm/mach/time.h>
 
-#include <plat/fpga-irq.h>
-
 #include "common.h"
 
 /* 
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index 4423bc8e84c5..acecf04f50f7 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -20,6 +20,7 @@
 #include <linux/amba/clcd.h>
 #include <linux/amba/mmci.h>
 #include <linux/io.h>
+#include <linux/irqchip/versatile-fpga.h>
 #include <linux/gfp.h>
 #include <linux/mtd/physmap.h>
 #include <linux/platform_data/clk-integrator.h>
@@ -46,7 +47,6 @@
 #include <asm/hardware/timer-sp.h>
 
 #include <plat/clcd.h>
-#include <plat/fpga-irq.h>
 #include <plat/sched_clock.h>
 
 #include "common.h"
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 5b5c1eeb5b5c..5d5929450366 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -32,6 +32,7 @@
 #include <linux/amba/mmci.h>
 #include <linux/amba/pl022.h>
 #include <linux/io.h>
+#include <linux/irqchip/versatile-fpga.h>
 #include <linux/gfp.h>
 #include <linux/clkdev.h>
 #include <linux/mtd/physmap.h>
@@ -51,7 +52,6 @@
 #include <asm/hardware/timer-sp.h>
 
 #include <plat/clcd.h>
-#include <plat/fpga-irq.h>
 #include <plat/sched_clock.h>
 
 #include "core.h"
diff --git a/arch/arm/plat-versatile/Kconfig b/arch/arm/plat-versatile/Kconfig
index 2a4ae8a6a081..619f0fa0f06c 100644
--- a/arch/arm/plat-versatile/Kconfig
+++ b/arch/arm/plat-versatile/Kconfig
@@ -6,15 +6,6 @@ config PLAT_VERSATILE_CLOCK
 config PLAT_VERSATILE_CLCD
 	bool
 
-config PLAT_VERSATILE_FPGA_IRQ
-	bool
-	select IRQ_DOMAIN
-
-config PLAT_VERSATILE_FPGA_IRQ_NR
-       int
-       default 4
-       depends on PLAT_VERSATILE_FPGA_IRQ
-
 config PLAT_VERSATILE_LEDS
 	def_bool y if NEW_LEDS
 	depends on ARCH_REALVIEW || ARCH_VERSATILE
diff --git a/arch/arm/plat-versatile/Makefile b/arch/arm/plat-versatile/Makefile
index 74cfd94cbf80..f88d448b629c 100644
--- a/arch/arm/plat-versatile/Makefile
+++ b/arch/arm/plat-versatile/Makefile
@@ -2,7 +2,6 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include
 
 obj-$(CONFIG_PLAT_VERSATILE_CLOCK) += clock.o
 obj-$(CONFIG_PLAT_VERSATILE_CLCD) += clcd.o
-obj-$(CONFIG_PLAT_VERSATILE_FPGA_IRQ) += fpga-irq.o
 obj-$(CONFIG_PLAT_VERSATILE_LEDS) += leds.o
 obj-$(CONFIG_PLAT_VERSATILE_SCHED_CLOCK) += sched-clock.o
 obj-$(CONFIG_SMP) += headsmp.o platsmp.o
diff --git a/arch/arm/plat-versatile/fpga-irq.c b/arch/arm/plat-versatile/fpga-irq.c
deleted file mode 100644
index dfe317c20354..000000000000
--- a/arch/arm/plat-versatile/fpga-irq.c
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- *  Support for Versatile FPGA-based IRQ controllers
- */
-#include <linux/bitops.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/irqdomain.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-
-#include <asm/exception.h>
-#include <asm/mach/irq.h>
-#include <plat/fpga-irq.h>
-
-#define IRQ_STATUS		0x00
-#define IRQ_RAW_STATUS		0x04
-#define IRQ_ENABLE_SET		0x08
-#define IRQ_ENABLE_CLEAR	0x0c
-#define INT_SOFT_SET		0x10
-#define INT_SOFT_CLEAR		0x14
-#define FIQ_STATUS		0x20
-#define FIQ_RAW_STATUS		0x24
-#define FIQ_ENABLE		0x28
-#define FIQ_ENABLE_SET		0x28
-#define FIQ_ENABLE_CLEAR	0x2C
-
-/**
- * struct fpga_irq_data - irq data container for the FPGA IRQ controller
- * @base: memory offset in virtual memory
- * @chip: chip container for this instance
- * @domain: IRQ domain for this instance
- * @valid: mask for valid IRQs on this controller
- * @used_irqs: number of active IRQs on this controller
- */
-struct fpga_irq_data {
-	void __iomem *base;
-	struct irq_chip chip;
-	u32 valid;
-	struct irq_domain *domain;
-	u8 used_irqs;
-};
-
-/* we cannot allocate memory when the controllers are initially registered */
-static struct fpga_irq_data fpga_irq_devices[CONFIG_PLAT_VERSATILE_FPGA_IRQ_NR];
-static int fpga_irq_id;
-
-static void fpga_irq_mask(struct irq_data *d)
-{
-	struct fpga_irq_data *f = irq_data_get_irq_chip_data(d);
-	u32 mask = 1 << d->hwirq;
-
-	writel(mask, f->base + IRQ_ENABLE_CLEAR);
-}
-
-static void fpga_irq_unmask(struct irq_data *d)
-{
-	struct fpga_irq_data *f = irq_data_get_irq_chip_data(d);
-	u32 mask = 1 << d->hwirq;
-
-	writel(mask, f->base + IRQ_ENABLE_SET);
-}
-
-static void fpga_irq_handle(unsigned int irq, struct irq_desc *desc)
-{
-	struct fpga_irq_data *f = irq_desc_get_handler_data(desc);
-	u32 status = readl(f->base + IRQ_STATUS);
-
-	if (status == 0) {
-		do_bad_IRQ(irq, desc);
-		return;
-	}
-
-	do {
-		irq = ffs(status) - 1;
-		status &= ~(1 << irq);
-		generic_handle_irq(irq_find_mapping(f->domain, irq));
-	} while (status);
-}
-
-/*
- * Handle each interrupt in a single FPGA IRQ controller.  Returns non-zero
- * if we've handled at least one interrupt.  This does a single read of the
- * status register and handles all interrupts in order from LSB first.
- */
-static int handle_one_fpga(struct fpga_irq_data *f, struct pt_regs *regs)
-{
-	int handled = 0;
-	int irq;
-	u32 status;
-
-	while ((status  = readl(f->base + IRQ_STATUS))) {
-		irq = ffs(status) - 1;
-		handle_IRQ(irq_find_mapping(f->domain, irq), regs);
-		handled = 1;
-	}
-
-	return handled;
-}
-
-/*
- * Keep iterating over all registered FPGA IRQ controllers until there are
- * no pending interrupts.
- */
-asmlinkage void __exception_irq_entry fpga_handle_irq(struct pt_regs *regs)
-{
-	int i, handled;
-
-	do {
-		for (i = 0, handled = 0; i < fpga_irq_id; ++i)
-			handled |= handle_one_fpga(&fpga_irq_devices[i], regs);
-	} while (handled);
-}
-
-static int fpga_irqdomain_map(struct irq_domain *d, unsigned int irq,
-		irq_hw_number_t hwirq)
-{
-	struct fpga_irq_data *f = d->host_data;
-
-	/* Skip invalid IRQs, only register handlers for the real ones */
-	if (!(f->valid & BIT(hwirq)))
-		return -ENOTSUPP;
-	irq_set_chip_data(irq, f);
-	irq_set_chip_and_handler(irq, &f->chip,
-				handle_level_irq);
-	set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-	return 0;
-}
-
-static struct irq_domain_ops fpga_irqdomain_ops = {
-	.map = fpga_irqdomain_map,
-	.xlate = irq_domain_xlate_onetwocell,
-};
-
-void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
-			  int parent_irq, u32 valid, struct device_node *node)
-{
-	struct fpga_irq_data *f;
-	int i;
-
-	if (fpga_irq_id >= ARRAY_SIZE(fpga_irq_devices)) {
-		pr_err("%s: too few FPGA IRQ controllers, increase CONFIG_PLAT_VERSATILE_FPGA_IRQ_NR\n", __func__);
-		return;
-	}
-	f = &fpga_irq_devices[fpga_irq_id];
-	f->base = base;
-	f->chip.name = name;
-	f->chip.irq_ack = fpga_irq_mask;
-	f->chip.irq_mask = fpga_irq_mask;
-	f->chip.irq_unmask = fpga_irq_unmask;
-	f->valid = valid;
-
-	if (parent_irq != -1) {
-		irq_set_handler_data(parent_irq, f);
-		irq_set_chained_handler(parent_irq, fpga_irq_handle);
-	}
-
-	/* This will also allocate irq descriptors */
-	f->domain = irq_domain_add_simple(node, fls(valid), irq_start,
-					  &fpga_irqdomain_ops, f);
-
-	/* This will allocate all valid descriptors in the linear case */
-	for (i = 0; i < fls(valid); i++)
-		if (valid & BIT(i)) {
-			if (!irq_start)
-				irq_create_mapping(f->domain, i);
-			f->used_irqs++;
-		}
-
-	pr_info("FPGA IRQ chip %d \"%s\" @ %p, %u irqs\n",
-		fpga_irq_id, name, base, f->used_irqs);
-
-	fpga_irq_id++;
-}
-
-#ifdef CONFIG_OF
-int __init fpga_irq_of_init(struct device_node *node,
-			    struct device_node *parent)
-{
-	struct fpga_irq_data *f;
-	void __iomem *base;
-	u32 clear_mask;
-	u32 valid_mask;
-
-	if (WARN_ON(!node))
-		return -ENODEV;
-
-	base = of_iomap(node, 0);
-	WARN(!base, "unable to map fpga irq registers\n");
-
-	if (of_property_read_u32(node, "clear-mask", &clear_mask))
-		clear_mask = 0;
-
-	if (of_property_read_u32(node, "valid-mask", &valid_mask))
-		valid_mask = 0;
-
-	fpga_irq_init(base, node->name, 0, -1, valid_mask, node);
-
-	writel(clear_mask, base + IRQ_ENABLE_CLEAR);
-	writel(clear_mask, base + FIQ_ENABLE_CLEAR);
-
-	return 0;
-}
-#endif
diff --git a/arch/arm/plat-versatile/include/plat/fpga-irq.h b/arch/arm/plat-versatile/include/plat/fpga-irq.h
deleted file mode 100644
index 1fac9651d3ca..000000000000
--- a/arch/arm/plat-versatile/include/plat/fpga-irq.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef PLAT_FPGA_IRQ_H
-#define PLAT_FPGA_IRQ_H
-
-struct device_node;
-struct pt_regs;
-
-void fpga_handle_irq(struct pt_regs *regs);
-void fpga_irq_init(void __iomem *, const char *, int, int, u32,
-		struct device_node *node);
-int fpga_irq_of_init(struct device_node *node,
-		     struct device_node *parent);
-
-#endif
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 1bb8bf6d7fd4..62ca575701d3 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -1 +1,8 @@
-# empty
+config VERSATILE_FPGA_IRQ
+	bool
+	select IRQ_DOMAIN
+
+config VERSATILE_FPGA_IRQ_NR
+       int
+       default 4
+       depends on VERSATILE_FPGA_IRQ
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 054321db4350..e2e6eb5d32f4 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o
+obj-$(CONFIG_VERSATILE_FPGA_IRQ) += irq-versatile-fpga.o
diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c
new file mode 100644
index 000000000000..789b3e526930
--- /dev/null
+++ b/drivers/irqchip/irq-versatile-fpga.c
@@ -0,0 +1,204 @@
+/*
+ *  Support for Versatile FPGA-based IRQ controllers
+ */
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/irqchip/versatile-fpga.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/exception.h>
+#include <asm/mach/irq.h>
+
+#define IRQ_STATUS		0x00
+#define IRQ_RAW_STATUS		0x04
+#define IRQ_ENABLE_SET		0x08
+#define IRQ_ENABLE_CLEAR	0x0c
+#define INT_SOFT_SET		0x10
+#define INT_SOFT_CLEAR		0x14
+#define FIQ_STATUS		0x20
+#define FIQ_RAW_STATUS		0x24
+#define FIQ_ENABLE		0x28
+#define FIQ_ENABLE_SET		0x28
+#define FIQ_ENABLE_CLEAR	0x2C
+
+/**
+ * struct fpga_irq_data - irq data container for the FPGA IRQ controller
+ * @base: memory offset in virtual memory
+ * @chip: chip container for this instance
+ * @domain: IRQ domain for this instance
+ * @valid: mask for valid IRQs on this controller
+ * @used_irqs: number of active IRQs on this controller
+ */
+struct fpga_irq_data {
+	void __iomem *base;
+	struct irq_chip chip;
+	u32 valid;
+	struct irq_domain *domain;
+	u8 used_irqs;
+};
+
+/* we cannot allocate memory when the controllers are initially registered */
+static struct fpga_irq_data fpga_irq_devices[CONFIG_VERSATILE_FPGA_IRQ_NR];
+static int fpga_irq_id;
+
+static void fpga_irq_mask(struct irq_data *d)
+{
+	struct fpga_irq_data *f = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << d->hwirq;
+
+	writel(mask, f->base + IRQ_ENABLE_CLEAR);
+}
+
+static void fpga_irq_unmask(struct irq_data *d)
+{
+	struct fpga_irq_data *f = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << d->hwirq;
+
+	writel(mask, f->base + IRQ_ENABLE_SET);
+}
+
+static void fpga_irq_handle(unsigned int irq, struct irq_desc *desc)
+{
+	struct fpga_irq_data *f = irq_desc_get_handler_data(desc);
+	u32 status = readl(f->base + IRQ_STATUS);
+
+	if (status == 0) {
+		do_bad_IRQ(irq, desc);
+		return;
+	}
+
+	do {
+		irq = ffs(status) - 1;
+		status &= ~(1 << irq);
+		generic_handle_irq(irq_find_mapping(f->domain, irq));
+	} while (status);
+}
+
+/*
+ * Handle each interrupt in a single FPGA IRQ controller.  Returns non-zero
+ * if we've handled at least one interrupt.  This does a single read of the
+ * status register and handles all interrupts in order from LSB first.
+ */
+static int handle_one_fpga(struct fpga_irq_data *f, struct pt_regs *regs)
+{
+	int handled = 0;
+	int irq;
+	u32 status;
+
+	while ((status  = readl(f->base + IRQ_STATUS))) {
+		irq = ffs(status) - 1;
+		handle_IRQ(irq_find_mapping(f->domain, irq), regs);
+		handled = 1;
+	}
+
+	return handled;
+}
+
+/*
+ * Keep iterating over all registered FPGA IRQ controllers until there are
+ * no pending interrupts.
+ */
+asmlinkage void __exception_irq_entry fpga_handle_irq(struct pt_regs *regs)
+{
+	int i, handled;
+
+	do {
+		for (i = 0, handled = 0; i < fpga_irq_id; ++i)
+			handled |= handle_one_fpga(&fpga_irq_devices[i], regs);
+	} while (handled);
+}
+
+static int fpga_irqdomain_map(struct irq_domain *d, unsigned int irq,
+		irq_hw_number_t hwirq)
+{
+	struct fpga_irq_data *f = d->host_data;
+
+	/* Skip invalid IRQs, only register handlers for the real ones */
+	if (!(f->valid & BIT(hwirq)))
+		return -ENOTSUPP;
+	irq_set_chip_data(irq, f);
+	irq_set_chip_and_handler(irq, &f->chip,
+				handle_level_irq);
+	set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+	return 0;
+}
+
+static struct irq_domain_ops fpga_irqdomain_ops = {
+	.map = fpga_irqdomain_map,
+	.xlate = irq_domain_xlate_onetwocell,
+};
+
+void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
+			  int parent_irq, u32 valid, struct device_node *node)
+{
+	struct fpga_irq_data *f;
+	int i;
+
+	if (fpga_irq_id >= ARRAY_SIZE(fpga_irq_devices)) {
+		pr_err("%s: too few FPGA IRQ controllers, increase CONFIG_PLAT_VERSATILE_FPGA_IRQ_NR\n", __func__);
+		return;
+	}
+	f = &fpga_irq_devices[fpga_irq_id];
+	f->base = base;
+	f->chip.name = name;
+	f->chip.irq_ack = fpga_irq_mask;
+	f->chip.irq_mask = fpga_irq_mask;
+	f->chip.irq_unmask = fpga_irq_unmask;
+	f->valid = valid;
+
+	if (parent_irq != -1) {
+		irq_set_handler_data(parent_irq, f);
+		irq_set_chained_handler(parent_irq, fpga_irq_handle);
+	}
+
+	/* This will also allocate irq descriptors */
+	f->domain = irq_domain_add_simple(node, fls(valid), irq_start,
+					  &fpga_irqdomain_ops, f);
+
+	/* This will allocate all valid descriptors in the linear case */
+	for (i = 0; i < fls(valid); i++)
+		if (valid & BIT(i)) {
+			if (!irq_start)
+				irq_create_mapping(f->domain, i);
+			f->used_irqs++;
+		}
+
+	pr_info("FPGA IRQ chip %d \"%s\" @ %p, %u irqs\n",
+		fpga_irq_id, name, base, f->used_irqs);
+
+	fpga_irq_id++;
+}
+
+#ifdef CONFIG_OF
+int __init fpga_irq_of_init(struct device_node *node,
+			    struct device_node *parent)
+{
+	struct fpga_irq_data *f;
+	void __iomem *base;
+	u32 clear_mask;
+	u32 valid_mask;
+
+	if (WARN_ON(!node))
+		return -ENODEV;
+
+	base = of_iomap(node, 0);
+	WARN(!base, "unable to map fpga irq registers\n");
+
+	if (of_property_read_u32(node, "clear-mask", &clear_mask))
+		clear_mask = 0;
+
+	if (of_property_read_u32(node, "valid-mask", &valid_mask))
+		valid_mask = 0;
+
+	fpga_irq_init(base, node->name, 0, -1, valid_mask, node);
+
+	writel(clear_mask, base + IRQ_ENABLE_CLEAR);
+	writel(clear_mask, base + FIQ_ENABLE_CLEAR);
+
+	return 0;
+}
+#endif
diff --git a/include/linux/irqchip/versatile-fpga.h b/include/linux/irqchip/versatile-fpga.h
new file mode 100644
index 000000000000..1fac9651d3ca
--- /dev/null
+++ b/include/linux/irqchip/versatile-fpga.h
@@ -0,0 +1,13 @@
+#ifndef PLAT_FPGA_IRQ_H
+#define PLAT_FPGA_IRQ_H
+
+struct device_node;
+struct pt_regs;
+
+void fpga_handle_irq(struct pt_regs *regs);
+void fpga_irq_init(void __iomem *, const char *, int, int, u32,
+		struct device_node *node);
+int fpga_irq_of_init(struct device_node *node,
+		     struct device_node *parent);
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 23064088d6aea049746755e9851760620921a80b Mon Sep 17 00:00:00 2001
From: Durgadoss R
Date: Tue, 18 Sep 2012 11:04:52 +0530
Subject: Thermal: Refactor thermal.h file

This patch rearranges the code in thermal.h file,
in the following order, so that it is easy to
read/maintain.
1. All #defines
2. All enums
3. All fops structures
4. All device structures
5. All function declarations

Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 include/linux/thermal.h | 78 +++++++++++++++++++++++++++----------------------
 1 file changed, 43 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 91b34812cd84..8611e3eba60c 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -29,6 +29,23 @@
 #include <linux/device.h>
 #include <linux/workqueue.h>
 
+#define THERMAL_TRIPS_NONE	-1
+#define THERMAL_MAX_TRIPS	12
+#define THERMAL_NAME_LENGTH	20
+
+/* No upper/lower limit requirement */
+#define THERMAL_NO_LIMIT	-1UL
+
+/* Unit conversion macros */
+#define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
+				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
+#define CELSIUS_TO_KELVIN(t)	((t)*10+2732)
+
+/* Adding event notification support elements */
+#define THERMAL_GENL_FAMILY_NAME                "thermal_event"
+#define THERMAL_GENL_VERSION                    0x01
+#define THERMAL_GENL_MCAST_GROUP_NAME           "thermal_mc_group"
+
 struct thermal_zone_device;
 struct thermal_cooling_device;
 
@@ -50,6 +67,30 @@ enum thermal_trend {
 	THERMAL_TREND_DROPPING, /* temperature is dropping */
 };
 
+/* Events supported by Thermal Netlink */
+enum events {
+	THERMAL_AUX0,
+	THERMAL_AUX1,
+	THERMAL_CRITICAL,
+	THERMAL_DEV_FAULT,
+};
+
+/* attributes of thermal_genl_family */
+enum {
+	THERMAL_GENL_ATTR_UNSPEC,
+	THERMAL_GENL_ATTR_EVENT,
+	__THERMAL_GENL_ATTR_MAX,
+};
+#define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
+
+/* commands supported by the thermal_genl_family */
+enum {
+	THERMAL_GENL_CMD_UNSPEC,
+	THERMAL_GENL_CMD_EVENT,
+	__THERMAL_GENL_CMD_MAX,
+};
+#define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1)
+
 struct thermal_zone_device_ops {
 	int (*bind) (struct thermal_zone_device *,
 		     struct thermal_cooling_device *);
@@ -83,11 +124,6 @@ struct thermal_cooling_device_ops {
 	int (*set_cur_state) (struct thermal_cooling_device *, unsigned long);
 };
 
-#define THERMAL_NO_LIMIT -1UL /* no upper/lower limit requirement */
-
-#define THERMAL_TRIPS_NONE -1
-#define THERMAL_MAX_TRIPS 12
-#define THERMAL_NAME_LENGTH 20
 struct thermal_cooling_device {
 	int id;
 	char type[THERMAL_NAME_LENGTH];
@@ -100,10 +136,6 @@ struct thermal_cooling_device {
 	struct list_head node;
 };
 
-#define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
-				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
-#define CELSIUS_TO_KELVIN(t)	((t)*10+2732)
-
 struct thermal_attr {
 	struct device_attribute attr;
 	char name[THERMAL_NAME_LENGTH];
@@ -131,38 +163,13 @@ struct thermal_zone_device {
 	struct list_head node;
 	struct delayed_work poll_queue;
 };
-/* Adding event notification support elements */
-#define THERMAL_GENL_FAMILY_NAME                "thermal_event"
-#define THERMAL_GENL_VERSION                    0x01
-#define THERMAL_GENL_MCAST_GROUP_NAME           "thermal_mc_group"
-
-enum events {
-	THERMAL_AUX0,
-	THERMAL_AUX1,
-	THERMAL_CRITICAL,
-	THERMAL_DEV_FAULT,
-};
 
 struct thermal_genl_event {
 	u32 orig;
 	enum events event;
 };
-/* attributes of thermal_genl_family */
-enum {
-	THERMAL_GENL_ATTR_UNSPEC,
-	THERMAL_GENL_ATTR_EVENT,
-	__THERMAL_GENL_ATTR_MAX,
-};
-#define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
-
-/* commands supported by the thermal_genl_family */
-enum {
-	THERMAL_GENL_CMD_UNSPEC,
-	THERMAL_GENL_CMD_EVENT,
-	__THERMAL_GENL_CMD_MAX,
-};
-#define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1)
 
+/* Function declarations */
 struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
 		void *, const struct thermal_zone_device_ops *, int, int);
 void thermal_zone_device_unregister(struct thermal_zone_device *);
@@ -173,6 +180,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int,
 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int,
 				       struct thermal_cooling_device *);
 void thermal_zone_device_update(struct thermal_zone_device *);
+
 struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
 		const struct thermal_cooling_device_ops *);
 void thermal_cooling_device_unregister(struct thermal_cooling_device *);
-- 
cgit v1.2.3-55-g7522


From 9b4298a088907811a4fe5a5d7cd2454da60708c5 Mon Sep 17 00:00:00 2001
From: Durgadoss R
Date: Tue, 18 Sep 2012 11:04:54 +0530
Subject: Thermal: Add get trend, get instance API's to thermal_sys

This patch adds the following API's to thermal_sys.c, that
can be used by other Thermal drivers.
 * get_tz_trend: obtain the trend of the given thermal zone
 * get_thermal_instance: obtain the instance corresponding
   to the given tz, cdev and the trip point.

Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/thermal_sys.c | 40 ++++++++++++++++++++++++++++++++++++++++
 include/linux/thermal.h       |  4 ++++
 2 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index bbc834625f7f..1f98c560a88e 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -82,6 +82,46 @@ static void release_idr(struct idr *idr, struct mutex *lock, int id)
 		mutex_unlock(lock);
 }
 
+int get_tz_trend(struct thermal_zone_device *tz, int trip)
+{
+	enum thermal_trend trend;
+
+	if (!tz->ops->get_trend || tz->ops->get_trend(tz, trip, &trend)) {
+		if (tz->temperature > tz->last_temperature)
+			trend = THERMAL_TREND_RAISING;
+		else if (tz->temperature < tz->last_temperature)
+			trend = THERMAL_TREND_DROPPING;
+		else
+			trend = THERMAL_TREND_STABLE;
+	}
+
+	return trend;
+}
+EXPORT_SYMBOL(get_tz_trend);
+
+struct thermal_instance *get_thermal_instance(struct thermal_zone_device *tz,
+			struct thermal_cooling_device *cdev, int trip)
+{
+	struct thermal_instance *pos = NULL;
+	struct thermal_instance *target_instance = NULL;
+
+	mutex_lock(&tz->lock);
+	mutex_lock(&cdev->lock);
+
+	list_for_each_entry(pos, &tz->thermal_instances, tz_node) {
+		if (pos->tz == tz && pos->trip == trip && pos->cdev == cdev) {
+			target_instance = pos;
+			break;
+		}
+	}
+
+	mutex_unlock(&cdev->lock);
+	mutex_unlock(&tz->lock);
+
+	return target_instance;
+}
+EXPORT_SYMBOL(get_thermal_instance);
+
 /* sys I/F for thermal zone */
 
 #define to_thermal_zone(_dev) \
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 8611e3eba60c..32af124d23cd 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -185,6 +185,10 @@ struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
 		const struct thermal_cooling_device_ops *);
 void thermal_cooling_device_unregister(struct thermal_cooling_device *);
 
+int get_tz_trend(struct thermal_zone_device *, int);
+struct thermal_instance *get_thermal_instance(struct thermal_zone_device *,
+		struct thermal_cooling_device *, int);
+
 #ifdef CONFIG_NET
 extern int thermal_generate_netlink_event(u32 orig, enum events event);
 #else
-- 
cgit v1.2.3-55-g7522


From ef87394791206019e4e4e04cb746865f2dc115ed Mon Sep 17 00:00:00 2001
From: Durgadoss R
Date: Tue, 18 Sep 2012 11:04:55 +0530
Subject: Thermal: Add platform level information to thermal.h

This patch adds platform level information to thermal.h
by introducing two structures to hold:
 * bind parameters for a thermal zone,
 * zone level platform parameters

Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 include/linux/thermal.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 32af124d23cd..4caa32e400b4 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -157,6 +157,7 @@ struct thermal_zone_device {
 	int passive;
 	unsigned int forced_passive;
 	const struct thermal_zone_device_ops *ops;
+	const struct thermal_zone_params *tzp;
 	struct list_head thermal_instances;
 	struct idr idr;
 	struct mutex lock; /* protect thermal_instances list */
@@ -164,6 +165,34 @@ struct thermal_zone_device {
 	struct delayed_work poll_queue;
 };
 
+/* Structure that holds binding parameters for a zone */
+struct thermal_bind_params {
+	struct thermal_cooling_device *cdev;
+
+	/*
+	 * This is a measure of 'how effectively these devices can
+	 * cool 'this' thermal zone. The shall be determined by platform
+	 * characterization. This is on a 'percentage' scale.
+	 * See Documentation/thermal/sysfs-api.txt for more information.
+	 */
+	int weight;
+
+	/*
+	 * This is a bit mask that gives the binding relation between this
+	 * thermal zone and cdev, for a particular trip point.
+	 * See Documentation/thermal/sysfs-api.txt for more information.
+	 */
+	int trip_mask;
+	int (*match) (struct thermal_zone_device *tz,
+			struct thermal_cooling_device *cdev);
+};
+
+/* Structure to define Thermal Zone parameters */
+struct thermal_zone_params {
+	int num_tbps;	/* Number of tbp entries */
+	struct thermal_bind_params *tbp;
+};
+
 struct thermal_genl_event {
 	u32 orig;
 	enum events event;
-- 
cgit v1.2.3-55-g7522


From 50125a9b27dd09e9afdc1b8712ba0b3859886c68 Mon Sep 17 00:00:00 2001
From: Durgadoss R
Date: Tue, 18 Sep 2012 11:04:56 +0530
Subject: Thermal: Pass zone parameters as argument to tzd_register

This patch adds the thermal zone parameter as an argument to
the tzd_register() function call; and updates other drivers
using this function.

Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/acpi/thermal.c                             | 6 +++---
 drivers/platform/x86/acerhdf.c                     | 2 +-
 drivers/platform/x86/intel_mid_thermal.c           | 2 +-
 drivers/power/power_supply_core.c                  | 2 +-
 drivers/staging/omap-thermal/omap-thermal-common.c | 2 +-
 drivers/thermal/exynos_thermal.c                   | 2 +-
 drivers/thermal/rcar_thermal.c                     | 2 +-
 drivers/thermal/spear_thermal.c                    | 2 +-
 drivers/thermal/thermal_sys.c                      | 3 +++
 include/linux/thermal.h                            | 3 ++-
 10 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 804204d41999..1794468223c4 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -900,14 +900,14 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz)
 	if (tz->trips.passive.flags.valid)
 		tz->thermal_zone =
 			thermal_zone_device_register("acpitz", trips, 0, tz,
-						     &acpi_thermal_zone_ops,
+						&acpi_thermal_zone_ops, NULL,
 						     tz->trips.passive.tsp*100,
 						     tz->polling_frequency*100);
 	else
 		tz->thermal_zone =
 			thermal_zone_device_register("acpitz", trips, 0, tz,
-						     &acpi_thermal_zone_ops, 0,
-						     tz->polling_frequency*100);
+						&acpi_thermal_zone_ops, NULL,
+						0, tz->polling_frequency*100);
 	if (IS_ERR(tz->thermal_zone))
 		return -ENODEV;
 
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index 84c56881ba80..c2e3e63d2c15 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -662,7 +662,7 @@ static int acerhdf_register_thermal(void)
 		return -EINVAL;
 
 	thz_dev = thermal_zone_device_register("acerhdf", 1, 0, NULL,
-					      &acerhdf_dev_ops, 0,
+					      &acerhdf_dev_ops, NULL, 0,
 					      (kernelmode) ? interval*1000 : 0);
 	if (IS_ERR(thz_dev))
 		return -EINVAL;
diff --git a/drivers/platform/x86/intel_mid_thermal.c b/drivers/platform/x86/intel_mid_thermal.c
index c8097616dd62..93de09019d1d 100644
--- a/drivers/platform/x86/intel_mid_thermal.c
+++ b/drivers/platform/x86/intel_mid_thermal.c
@@ -502,7 +502,7 @@ static int mid_thermal_probe(struct platform_device *pdev)
 			goto err;
 		}
 		pinfo->tzd[i] = thermal_zone_device_register(name[i],
-				0, 0, td_info, &tzd_ops, 0, 0);
+				0, 0, td_info, &tzd_ops, NULL, 0, 0);
 		if (IS_ERR(pinfo->tzd[i])) {
 			kfree(td_info);
 			ret = PTR_ERR(pinfo->tzd[i]);
diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 2436f1350013..f77a41272e5d 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -201,7 +201,7 @@ static int psy_register_thermal(struct power_supply *psy)
 	for (i = 0; i < psy->num_properties; i++) {
 		if (psy->properties[i] == POWER_SUPPLY_PROP_TEMP) {
 			psy->tzd = thermal_zone_device_register(psy->name, 0, 0,
-					psy, &psy_tzd_ops, 0, 0);
+					psy, &psy_tzd_ops, NULL, 0, 0);
 			if (IS_ERR(psy->tzd))
 				return PTR_ERR(psy->tzd);
 			break;
diff --git a/drivers/staging/omap-thermal/omap-thermal-common.c b/drivers/staging/omap-thermal/omap-thermal-common.c
index 5c0c203b887f..788f64f2f467 100644
--- a/drivers/staging/omap-thermal/omap-thermal-common.c
+++ b/drivers/staging/omap-thermal/omap-thermal-common.c
@@ -270,7 +270,7 @@ int omap_thermal_expose_sensor(struct omap_bandgap *bg_ptr, int id,
 	/* Create thermal zone */
 	data->omap_thermal = thermal_zone_device_register(domain,
 				OMAP_TRIP_NUMBER, 0, data, &omap_thermal_ops,
-				FAST_TEMP_MONITORING_RATE,
+				NULL, FAST_TEMP_MONITORING_RATE,
 				FAST_TEMP_MONITORING_RATE);
 	if (IS_ERR_OR_NULL(data->omap_thermal)) {
 		dev_err(bg_ptr->dev, "thermal zone device is NULL\n");
diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c
index 6dd29e4ce36b..7772d1603769 100644
--- a/drivers/thermal/exynos_thermal.c
+++ b/drivers/thermal/exynos_thermal.c
@@ -451,7 +451,7 @@ static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf)
 	th_zone->cool_dev_size++;
 
 	th_zone->therm_dev = thermal_zone_device_register(sensor_conf->name,
-			EXYNOS_ZONE_COUNT, 0, NULL, &exynos_dev_ops, 0,
+			EXYNOS_ZONE_COUNT, 0, NULL, &exynos_dev_ops, NULL, 0,
 			IDLE_INTERVAL);
 
 	if (IS_ERR(th_zone->therm_dev)) {
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index f7a1b574a304..762f6373d50c 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -211,7 +211,7 @@ static int rcar_thermal_probe(struct platform_device *pdev)
 	}
 
 	zone = thermal_zone_device_register("rcar_thermal", 0, 0, priv,
-					    &rcar_thermal_zone_ops, 0, 0);
+				    &rcar_thermal_zone_ops, NULL, 0, 0);
 	if (IS_ERR(zone)) {
 		dev_err(&pdev->dev, "thermal zone device is NULL\n");
 		ret = PTR_ERR(zone);
diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c
index 9bc969261d01..6b2d8b21aaee 100644
--- a/drivers/thermal/spear_thermal.c
+++ b/drivers/thermal/spear_thermal.c
@@ -147,7 +147,7 @@ static int spear_thermal_probe(struct platform_device *pdev)
 	writel_relaxed(stdev->flags, stdev->thermal_base);
 
 	spear_thermal = thermal_zone_device_register("spear_thermal", 0, 0,
-				stdev, &ops, 0, 0);
+				stdev, &ops, NULL, 0, 0);
 	if (IS_ERR(spear_thermal)) {
 		dev_err(&pdev->dev, "thermal zone device is NULL\n");
 		ret = PTR_ERR(spear_thermal);
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 1f98c560a88e..dca3bfc0e702 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -1341,6 +1341,7 @@ static void remove_trip_attrs(struct thermal_zone_device *tz)
  * @mask:	a bit string indicating the writeablility of trip points
  * @devdata:	private device data
  * @ops:	standard thermal zone device callbacks
+ * @tzp:	thermal zone platform parameters
  * @passive_delay: number of milliseconds to wait between polls when
  *		   performing passive cooling
  * @polling_delay: number of milliseconds to wait between polls when checking
@@ -1353,6 +1354,7 @@ static void remove_trip_attrs(struct thermal_zone_device *tz)
 struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	int trips, int mask, void *devdata,
 	const struct thermal_zone_device_ops *ops,
+	const struct thermal_zone_params *tzp,
 	int passive_delay, int polling_delay)
 {
 	struct thermal_zone_device *tz;
@@ -1386,6 +1388,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 
 	strcpy(tz->type, type ? : "");
 	tz->ops = ops;
+	tz->tzp = tzp;
 	tz->device.class = &thermal_class;
 	tz->devdata = devdata;
 	tz->trips = trips;
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 4caa32e400b4..58cb1c036a0e 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -200,7 +200,8 @@ struct thermal_genl_event {
 
 /* Function declarations */
 struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
-		void *, const struct thermal_zone_device_ops *, int, int);
+		void *, const struct thermal_zone_device_ops *,
+		const struct thermal_zone_params *, int, int);
 void thermal_zone_device_unregister(struct thermal_zone_device *);
 
 int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int,
-- 
cgit v1.2.3-55-g7522


From a4a15485fbba44d162d626141b9a7404e790f570 Mon Sep 17 00:00:00 2001
From: Durgadoss R
Date: Tue, 18 Sep 2012 11:04:57 +0530
Subject: Thermal: Add thermal governor registration APIs

This patch creates a structure to hold platform
thermal governor information, and provides APIs
for individual thermal governors to register/unregister
with the Thermal framework.

Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/thermal_sys.c | 90 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/thermal.h       | 16 ++++++++
 2 files changed, 106 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index dca3bfc0e702..0afd84cb127d 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -49,7 +49,86 @@ static DEFINE_MUTEX(thermal_idr_lock);
 
 static LIST_HEAD(thermal_tz_list);
 static LIST_HEAD(thermal_cdev_list);
+static LIST_HEAD(thermal_governor_list);
+
 static DEFINE_MUTEX(thermal_list_lock);
+static DEFINE_MUTEX(thermal_governor_lock);
+
+static struct thermal_governor *__find_governor(const char *name)
+{
+	struct thermal_governor *pos;
+
+	list_for_each_entry(pos, &thermal_governor_list, governor_list)
+		if (!strnicmp(name, pos->name, THERMAL_NAME_LENGTH))
+			return pos;
+
+	return NULL;
+}
+
+int thermal_register_governor(struct thermal_governor *governor)
+{
+	int err;
+	const char *name;
+	struct thermal_zone_device *pos;
+
+	if (!governor)
+		return -EINVAL;
+
+	mutex_lock(&thermal_governor_lock);
+
+	err = -EBUSY;
+	if (__find_governor(governor->name) == NULL) {
+		err = 0;
+		list_add(&governor->governor_list, &thermal_governor_list);
+	}
+
+	mutex_lock(&thermal_list_lock);
+
+	list_for_each_entry(pos, &thermal_tz_list, node) {
+		if (pos->governor)
+			continue;
+		if (pos->tzp)
+			name = pos->tzp->governor_name;
+		else
+			name = DEFAULT_THERMAL_GOVERNOR;
+		if (!strnicmp(name, governor->name, THERMAL_NAME_LENGTH))
+			pos->governor = governor;
+	}
+
+	mutex_unlock(&thermal_list_lock);
+	mutex_unlock(&thermal_governor_lock);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(thermal_register_governor);
+
+void thermal_unregister_governor(struct thermal_governor *governor)
+{
+	struct thermal_zone_device *pos;
+
+	if (!governor)
+		return;
+
+	mutex_lock(&thermal_governor_lock);
+
+	if (__find_governor(governor->name) == NULL)
+		goto exit;
+
+	mutex_lock(&thermal_list_lock);
+
+	list_for_each_entry(pos, &thermal_tz_list, node) {
+		if (!strnicmp(pos->governor->name, governor->name,
+						THERMAL_NAME_LENGTH))
+			pos->governor = NULL;
+	}
+
+	mutex_unlock(&thermal_list_lock);
+	list_del(&governor->governor_list);
+exit:
+	mutex_unlock(&thermal_governor_lock);
+	return;
+}
+EXPORT_SYMBOL_GPL(thermal_unregister_governor);
 
 static int get_idr(struct idr *idr, struct mutex *lock, int *id)
 {
@@ -1437,6 +1516,16 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	if (result)
 		goto unregister;
 
+	/* Update 'this' zone's governor information */
+	mutex_lock(&thermal_governor_lock);
+
+	if (tz->tzp)
+		tz->governor = __find_governor(tz->tzp->governor_name);
+	else
+		tz->governor = __find_governor(DEFAULT_THERMAL_GOVERNOR);
+
+	mutex_unlock(&thermal_governor_lock);
+
 	result = thermal_add_hwmon_sysfs(tz);
 	if (result)
 		goto unregister;
@@ -1500,6 +1589,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 	if (tz->ops->get_mode)
 		device_remove_file(&tz->device, &dev_attr_mode);
 	remove_trip_attrs(tz);
+	tz->governor = NULL;
 
 	thermal_remove_hwmon_sysfs(tz);
 	release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 58cb1c036a0e..6182bd5f7504 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -46,6 +46,9 @@
 #define THERMAL_GENL_VERSION                    0x01
 #define THERMAL_GENL_MCAST_GROUP_NAME           "thermal_mc_group"
 
+/* Default Thermal Governor: Does Linear Throttling */
+#define DEFAULT_THERMAL_GOVERNOR	"step_wise"
+
 struct thermal_zone_device;
 struct thermal_cooling_device;
 
@@ -158,6 +161,7 @@ struct thermal_zone_device {
 	unsigned int forced_passive;
 	const struct thermal_zone_device_ops *ops;
 	const struct thermal_zone_params *tzp;
+	struct thermal_governor *governor;
 	struct list_head thermal_instances;
 	struct idr idr;
 	struct mutex lock; /* protect thermal_instances list */
@@ -165,6 +169,14 @@ struct thermal_zone_device {
 	struct delayed_work poll_queue;
 };
 
+/* Structure that holds thermal governor information */
+struct thermal_governor {
+	char name[THERMAL_NAME_LENGTH];
+	int (*throttle)(struct thermal_zone_device *tz, int trip);
+	struct list_head	governor_list;
+	struct module		*owner;
+};
+
 /* Structure that holds binding parameters for a zone */
 struct thermal_bind_params {
 	struct thermal_cooling_device *cdev;
@@ -189,6 +201,7 @@ struct thermal_bind_params {
 
 /* Structure to define Thermal Zone parameters */
 struct thermal_zone_params {
+	char governor_name[THERMAL_NAME_LENGTH];
 	int num_tbps;	/* Number of tbp entries */
 	struct thermal_bind_params *tbp;
 };
@@ -219,6 +232,9 @@ int get_tz_trend(struct thermal_zone_device *, int);
 struct thermal_instance *get_thermal_instance(struct thermal_zone_device *,
 		struct thermal_cooling_device *, int);
 
+int thermal_register_governor(struct thermal_governor *);
+void thermal_unregister_governor(struct thermal_governor *);
+
 #ifdef CONFIG_NET
 extern int thermal_generate_netlink_event(u32 orig, enum events event);
 #else
-- 
cgit v1.2.3-55-g7522


From dc76548269ca2c44e5ddc29db5c36188bc4f2234 Mon Sep 17 00:00:00 2001
From: Durgadoss R
Date: Tue, 18 Sep 2012 11:05:00 +0530
Subject: Thermal: Make thermal_cdev_update as a global function

This patch makes the thermal_cdev_update function as a
global one, so that other files can use it. This function
serves as a single arbitrator to set the state of a cooling
device.

Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/thermal_sys.c | 5 +++--
 include/linux/thermal.h       | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index a32803224e3c..9d21f82986ee 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -1257,7 +1257,7 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
 }
 EXPORT_SYMBOL(thermal_cooling_device_unregister);
 
-static void thermal_cdev_do_update(struct thermal_cooling_device *cdev)
+void thermal_cdev_update(struct thermal_cooling_device *cdev)
 {
 	struct thermal_instance *instance;
 	unsigned long target = 0;
@@ -1278,13 +1278,14 @@ static void thermal_cdev_do_update(struct thermal_cooling_device *cdev)
 	cdev->ops->set_cur_state(cdev, target);
 	cdev->updated = true;
 }
+EXPORT_SYMBOL(thermal_cdev_update);
 
 static void thermal_zone_do_update(struct thermal_zone_device *tz)
 {
 	struct thermal_instance *instance;
 
 	list_for_each_entry(instance, &tz->thermal_instances, tz_node)
-		thermal_cdev_do_update(instance->cdev);
+		thermal_cdev_update(instance->cdev);
 }
 
 /*
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 6182bd5f7504..2bd915841818 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -231,6 +231,7 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *);
 int get_tz_trend(struct thermal_zone_device *, int);
 struct thermal_instance *get_thermal_instance(struct thermal_zone_device *,
 		struct thermal_cooling_device *, int);
+void thermal_cdev_update(struct thermal_cooling_device *);
 
 int thermal_register_governor(struct thermal_governor *);
 void thermal_unregister_governor(struct thermal_governor *);
-- 
cgit v1.2.3-55-g7522


From f2b4caafd4aa64c9438b0a94af78b127228e6905 Mon Sep 17 00:00:00 2001
From: Durgadoss R
Date: Tue, 18 Sep 2012 11:05:05 +0530
Subject: Thermal: Add a notification API

This patch adds a notification API which the sensor drivers'
can use to notify the framework. The framework then takes
care of the throttling according to the configured policy.

Signed-off-by: Durgadoss R <durgadoss.r@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/thermal_sys.c | 18 ++++++++++++++++++
 include/linux/thermal.h       |  1 +
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 64f3e9735316..a69f24c4414a 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -1368,6 +1368,24 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
 }
 EXPORT_SYMBOL(thermal_cdev_update);
 
+/**
+ * notify_thermal_framework - Sensor drivers use this API to notify framework
+ * @tz:		thermal zone device
+ * @trip:	indicates which trip point has been crossed
+ *
+ * This function handles the trip events from sensor drivers. It starts
+ * throttling the cooling devices according to the policy configured.
+ * For CRITICAL and HOT trip points, this notifies the respective drivers,
+ * and does actual throttling for other trip points i.e ACTIVE and PASSIVE.
+ * The throttling policy is based on the configured platform data; if no
+ * platform data is provided, this uses the step_wise throttling policy.
+ */
+void notify_thermal_framework(struct thermal_zone_device *tz, int trip)
+{
+	handle_thermal_trip(tz, trip);
+}
+EXPORT_SYMBOL(notify_thermal_framework);
+
 /**
  * create_trip_attrs - create attributes for trip points
  * @tz:		the thermal zone device
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 2bd915841818..807f2146fe35 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -232,6 +232,7 @@ int get_tz_trend(struct thermal_zone_device *, int);
 struct thermal_instance *get_thermal_instance(struct thermal_zone_device *,
 		struct thermal_cooling_device *, int);
 void thermal_cdev_update(struct thermal_cooling_device *);
+void notify_thermal_framework(struct thermal_zone_device *, int);
 
 int thermal_register_governor(struct thermal_governor *);
 void thermal_unregister_governor(struct thermal_governor *);
-- 
cgit v1.2.3-55-g7522


From 7cb15e10365203bffc5cc75c79725b6c31aa663e Mon Sep 17 00:00:00 2001
From: Linus Walleij
Date: Wed, 10 Oct 2012 14:27:58 +0200
Subject: pinctrl/nomadik: move the platform data header

This moves the platform data header for the Nomadik pin controller
to <linux/platform_data/pinctrl-nomadik.h>.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-nomadik/board-nhk8815.c             |   2 +-
 arch/arm/mach-nomadik/cpu-8815.c                  |   2 +-
 arch/arm/mach-nomadik/i2c-8815nhk.c               |   2 +-
 arch/arm/mach-ux500/board-mop500-audio.c          |   2 +-
 arch/arm/mach-ux500/board-mop500-pins.c           |   2 +-
 arch/arm/mach-ux500/board-mop500.c                |   2 +-
 arch/arm/mach-ux500/cpu-db8500.c                  |   4 +-
 arch/arm/mach-ux500/devices-common.c              |   3 +-
 arch/arm/plat-nomadik/include/plat/gpio-nomadik.h | 108 ----------------------
 drivers/pinctrl/pinctrl-nomadik.c                 |   2 +-
 drivers/pinctrl/pinctrl-nomadik.h                 |   2 +-
 include/linux/platform_data/pinctrl-nomadik.h     | 108 ++++++++++++++++++++++
 12 files changed, 119 insertions(+), 120 deletions(-)
 delete mode 100644 arch/arm/plat-nomadik/include/plat/gpio-nomadik.h
 create mode 100644 include/linux/platform_data/pinctrl-nomadik.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-nomadik/board-nhk8815.c b/arch/arm/mach-nomadik/board-nhk8815.c
index bfa1eab91f41..3a41ad00bd68 100644
--- a/arch/arm/mach-nomadik/board-nhk8815.c
+++ b/arch/arm/mach-nomadik/board-nhk8815.c
@@ -24,6 +24,7 @@
 #include <linux/i2c.h>
 #include <linux/io.h>
 #include <linux/pinctrl/machine.h>
+#include <linux/platform_data/pinctrl-nomadik.h>
 #include <asm/hardware/vic.h>
 #include <asm/sizes.h>
 #include <asm/mach-types.h>
@@ -32,7 +33,6 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/time.h>
 
-#include <plat/gpio-nomadik.h>
 #include <plat/mtu.h>
 #include <plat/pincfg.h>
 
diff --git a/arch/arm/mach-nomadik/cpu-8815.c b/arch/arm/mach-nomadik/cpu-8815.c
index b617eaed0ce5..1273931303fb 100644
--- a/arch/arm/mach-nomadik/cpu-8815.c
+++ b/arch/arm/mach-nomadik/cpu-8815.c
@@ -26,8 +26,8 @@
 #include <linux/irq.h>
 #include <linux/dma-mapping.h>
 #include <linux/platform_data/clk-nomadik.h>
+#include <linux/platform_data/pinctrl-nomadik.h>
 
-#include <plat/gpio-nomadik.h>
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-nomadik/i2c-8815nhk.c b/arch/arm/mach-nomadik/i2c-8815nhk.c
index 6d14454d4609..f81496f5790e 100644
--- a/arch/arm/mach-nomadik/i2c-8815nhk.c
+++ b/arch/arm/mach-nomadik/i2c-8815nhk.c
@@ -4,7 +4,7 @@
 #include <linux/i2c-algo-bit.h>
 #include <linux/i2c-gpio.h>
 #include <linux/platform_device.h>
-#include <plat/gpio-nomadik.h>
+#include <linux/platform_data/pinctrl-nomadik.h>
 #include <plat/pincfg.h>
 
 /*
diff --git a/arch/arm/mach-ux500/board-mop500-audio.c b/arch/arm/mach-ux500/board-mop500-audio.c
index 070629a95625..ea828cc56357 100644
--- a/arch/arm/mach-ux500/board-mop500-audio.c
+++ b/arch/arm/mach-ux500/board-mop500-audio.c
@@ -7,8 +7,8 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/gpio.h>
+#include <linux/platform_data/pinctrl-nomadik.h>
 
-#include <plat/gpio-nomadik.h>
 #include <plat/pincfg.h>
 #include <plat/ste_dma40.h>
 
diff --git a/arch/arm/mach-ux500/board-mop500-pins.c b/arch/arm/mach-ux500/board-mop500-pins.c
index a267c6d30e37..05f2bb581130 100644
--- a/arch/arm/mach-ux500/board-mop500-pins.c
+++ b/arch/arm/mach-ux500/board-mop500-pins.c
@@ -9,10 +9,10 @@
 #include <linux/bug.h>
 #include <linux/string.h>
 #include <linux/pinctrl/machine.h>
+#include <linux/platform_data/pinctrl-nomadik.h>
 
 #include <asm/mach-types.h>
 #include <plat/pincfg.h>
-#include <plat/gpio-nomadik.h>
 
 #include <mach/hardware.h>
 
diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index 416d436111f2..0a3dd601a400 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -37,13 +37,13 @@
 #include <linux/of_platform.h>
 #include <linux/leds.h>
 #include <linux/pinctrl/consumer.h>
+#include <linux/platform_data/pinctrl-nomadik.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/hardware/gic.h>
 
 #include <plat/ste_dma40.h>
-#include <plat/gpio-nomadik.h>
 
 #include <mach/hardware.h>
 #include <mach/setup.h>
diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index bcdfe6b1d453..87a8f9fbb100 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -17,14 +17,14 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/mfd/abx500/ab8500.h>
+#include <linux/platform_data/usb-musb-ux500.h>
+#include <linux/platform_data/pinctrl-nomadik.h>
 
 #include <asm/pmu.h>
 #include <asm/mach/map.h>
-#include <plat/gpio-nomadik.h>
 #include <mach/hardware.h>
 #include <mach/setup.h>
 #include <mach/devices.h>
-#include <linux/platform_data/usb-musb-ux500.h>
 #include <mach/db8500-regs.h>
 
 #include "devices-db8500.h"
diff --git a/arch/arm/mach-ux500/devices-common.c b/arch/arm/mach-ux500/devices-common.c
index dfdd4a54668d..692a77a1c153 100644
--- a/arch/arm/mach-ux500/devices-common.c
+++ b/arch/arm/mach-ux500/devices-common.c
@@ -11,8 +11,7 @@
 #include <linux/irq.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
-
-#include <plat/gpio-nomadik.h>
+#include <linux/platform_data/pinctrl-nomadik.h>
 
 #include <mach/hardware.h>
 
diff --git a/arch/arm/plat-nomadik/include/plat/gpio-nomadik.h b/arch/arm/plat-nomadik/include/plat/gpio-nomadik.h
deleted file mode 100644
index a74999438de4..000000000000
--- a/arch/arm/plat-nomadik/include/plat/gpio-nomadik.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Structures and registers for GPIO access in the Nomadik SoC
- *
- * Copyright (C) 2008 STMicroelectronics
- *     Author: Prafulla WADASKAR <prafulla.wadaskar@st.com>
- * Copyright (C) 2009 Alessandro Rubini <rubini@unipv.it>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __PLAT_NOMADIK_GPIO
-#define __PLAT_NOMADIK_GPIO
-
-/*
- * "nmk_gpio" and "NMK_GPIO" stand for "Nomadik GPIO", leaving
- * the "gpio" namespace for generic and cross-machine functions
- */
-
-#define GPIO_BLOCK_SHIFT 5
-#define NMK_GPIO_PER_CHIP (1 << GPIO_BLOCK_SHIFT)
-
-/* Register in the logic block */
-#define NMK_GPIO_DAT	0x00
-#define NMK_GPIO_DATS	0x04
-#define NMK_GPIO_DATC	0x08
-#define NMK_GPIO_PDIS	0x0c
-#define NMK_GPIO_DIR	0x10
-#define NMK_GPIO_DIRS	0x14
-#define NMK_GPIO_DIRC	0x18
-#define NMK_GPIO_SLPC	0x1c
-#define NMK_GPIO_AFSLA	0x20
-#define NMK_GPIO_AFSLB	0x24
-#define NMK_GPIO_LOWEMI	0x28
-
-#define NMK_GPIO_RIMSC	0x40
-#define NMK_GPIO_FIMSC	0x44
-#define NMK_GPIO_IS	0x48
-#define NMK_GPIO_IC	0x4c
-#define NMK_GPIO_RWIMSC	0x50
-#define NMK_GPIO_FWIMSC	0x54
-#define NMK_GPIO_WKS	0x58
-/* These appear in DB8540 and later ASICs */
-#define NMK_GPIO_EDGELEVEL 0x5C
-#define NMK_GPIO_LEVEL	0x60
-
-/* Alternate functions: function C is set in hw by setting both A and B */
-#define NMK_GPIO_ALT_GPIO	0
-#define NMK_GPIO_ALT_A	1
-#define NMK_GPIO_ALT_B	2
-#define NMK_GPIO_ALT_C	(NMK_GPIO_ALT_A | NMK_GPIO_ALT_B)
-
-#define NMK_GPIO_ALT_CX_SHIFT 2
-#define NMK_GPIO_ALT_C1	((1<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C)
-#define NMK_GPIO_ALT_C2	((2<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C)
-#define NMK_GPIO_ALT_C3	((3<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C)
-#define NMK_GPIO_ALT_C4	((4<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C)
-
-/* Pull up/down values */
-enum nmk_gpio_pull {
-	NMK_GPIO_PULL_NONE,
-	NMK_GPIO_PULL_UP,
-	NMK_GPIO_PULL_DOWN,
-};
-
-/* Sleep mode */
-enum nmk_gpio_slpm {
-	NMK_GPIO_SLPM_INPUT,
-	NMK_GPIO_SLPM_WAKEUP_ENABLE = NMK_GPIO_SLPM_INPUT,
-	NMK_GPIO_SLPM_NOCHANGE,
-	NMK_GPIO_SLPM_WAKEUP_DISABLE = NMK_GPIO_SLPM_NOCHANGE,
-};
-
-extern int nmk_gpio_set_slpm(int gpio, enum nmk_gpio_slpm mode);
-extern int nmk_gpio_set_pull(int gpio, enum nmk_gpio_pull pull);
-#ifdef CONFIG_PINCTRL_NOMADIK
-extern int nmk_gpio_set_mode(int gpio, int gpio_mode);
-#else
-static inline int nmk_gpio_set_mode(int gpio, int gpio_mode)
-{
-	return -ENODEV;
-}
-#endif
-extern int nmk_gpio_get_mode(int gpio);
-
-extern void nmk_gpio_wakeups_suspend(void);
-extern void nmk_gpio_wakeups_resume(void);
-
-extern void nmk_gpio_clocks_enable(void);
-extern void nmk_gpio_clocks_disable(void);
-
-extern void nmk_gpio_read_pull(int gpio_bank, u32 *pull_up);
-
-/*
- * Platform data to register a block: only the initial gpio/irq number.
- */
-struct nmk_gpio_platform_data {
-	char *name;
-	int first_gpio;
-	int first_irq;
-	int num_gpio;
-	u32 (*get_secondary_status)(unsigned int bank);
-	void (*set_ioforce)(bool enable);
-	bool supports_sleepmode;
-};
-
-#endif /* __PLAT_NOMADIK_GPIO */
diff --git a/drivers/pinctrl/pinctrl-nomadik.c b/drivers/pinctrl/pinctrl-nomadik.c
index f5c0da1f3b30..e24fa05e59b8 100644
--- a/drivers/pinctrl/pinctrl-nomadik.c
+++ b/drivers/pinctrl/pinctrl-nomadik.c
@@ -44,11 +44,11 @@ static inline u32 prcmu_read(unsigned int reg) {
 static inline void prcmu_write(unsigned int reg, u32 value) {}
 static inline void prcmu_write_masked(unsigned int reg, u32 mask, u32 value) {}
 #endif
+#include <linux/platform_data/pinctrl-nomadik.h>
 
 #include <asm/mach/irq.h>
 
 #include <plat/pincfg.h>
-#include <plat/gpio-nomadik.h>
 
 #include "pinctrl-nomadik.h"
 
diff --git a/drivers/pinctrl/pinctrl-nomadik.h b/drivers/pinctrl/pinctrl-nomadik.h
index eef316e979a0..bcd4191e10ea 100644
--- a/drivers/pinctrl/pinctrl-nomadik.h
+++ b/drivers/pinctrl/pinctrl-nomadik.h
@@ -1,7 +1,7 @@
 #ifndef PINCTRL_PINCTRL_NOMADIK_H
 #define PINCTRL_PINCTRL_NOMADIK_H
 
-#include <plat/gpio-nomadik.h>
+#include <linux/platform_data/pinctrl-nomadik.h>
 
 /* Package definitions */
 #define PINCTRL_NMK_STN8815	0
diff --git a/include/linux/platform_data/pinctrl-nomadik.h b/include/linux/platform_data/pinctrl-nomadik.h
new file mode 100644
index 000000000000..a74999438de4
--- /dev/null
+++ b/include/linux/platform_data/pinctrl-nomadik.h
@@ -0,0 +1,108 @@
+/*
+ * Structures and registers for GPIO access in the Nomadik SoC
+ *
+ * Copyright (C) 2008 STMicroelectronics
+ *     Author: Prafulla WADASKAR <prafulla.wadaskar@st.com>
+ * Copyright (C) 2009 Alessandro Rubini <rubini@unipv.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __PLAT_NOMADIK_GPIO
+#define __PLAT_NOMADIK_GPIO
+
+/*
+ * "nmk_gpio" and "NMK_GPIO" stand for "Nomadik GPIO", leaving
+ * the "gpio" namespace for generic and cross-machine functions
+ */
+
+#define GPIO_BLOCK_SHIFT 5
+#define NMK_GPIO_PER_CHIP (1 << GPIO_BLOCK_SHIFT)
+
+/* Register in the logic block */
+#define NMK_GPIO_DAT	0x00
+#define NMK_GPIO_DATS	0x04
+#define NMK_GPIO_DATC	0x08
+#define NMK_GPIO_PDIS	0x0c
+#define NMK_GPIO_DIR	0x10
+#define NMK_GPIO_DIRS	0x14
+#define NMK_GPIO_DIRC	0x18
+#define NMK_GPIO_SLPC	0x1c
+#define NMK_GPIO_AFSLA	0x20
+#define NMK_GPIO_AFSLB	0x24
+#define NMK_GPIO_LOWEMI	0x28
+
+#define NMK_GPIO_RIMSC	0x40
+#define NMK_GPIO_FIMSC	0x44
+#define NMK_GPIO_IS	0x48
+#define NMK_GPIO_IC	0x4c
+#define NMK_GPIO_RWIMSC	0x50
+#define NMK_GPIO_FWIMSC	0x54
+#define NMK_GPIO_WKS	0x58
+/* These appear in DB8540 and later ASICs */
+#define NMK_GPIO_EDGELEVEL 0x5C
+#define NMK_GPIO_LEVEL	0x60
+
+/* Alternate functions: function C is set in hw by setting both A and B */
+#define NMK_GPIO_ALT_GPIO	0
+#define NMK_GPIO_ALT_A	1
+#define NMK_GPIO_ALT_B	2
+#define NMK_GPIO_ALT_C	(NMK_GPIO_ALT_A | NMK_GPIO_ALT_B)
+
+#define NMK_GPIO_ALT_CX_SHIFT 2
+#define NMK_GPIO_ALT_C1	((1<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C)
+#define NMK_GPIO_ALT_C2	((2<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C)
+#define NMK_GPIO_ALT_C3	((3<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C)
+#define NMK_GPIO_ALT_C4	((4<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C)
+
+/* Pull up/down values */
+enum nmk_gpio_pull {
+	NMK_GPIO_PULL_NONE,
+	NMK_GPIO_PULL_UP,
+	NMK_GPIO_PULL_DOWN,
+};
+
+/* Sleep mode */
+enum nmk_gpio_slpm {
+	NMK_GPIO_SLPM_INPUT,
+	NMK_GPIO_SLPM_WAKEUP_ENABLE = NMK_GPIO_SLPM_INPUT,
+	NMK_GPIO_SLPM_NOCHANGE,
+	NMK_GPIO_SLPM_WAKEUP_DISABLE = NMK_GPIO_SLPM_NOCHANGE,
+};
+
+extern int nmk_gpio_set_slpm(int gpio, enum nmk_gpio_slpm mode);
+extern int nmk_gpio_set_pull(int gpio, enum nmk_gpio_pull pull);
+#ifdef CONFIG_PINCTRL_NOMADIK
+extern int nmk_gpio_set_mode(int gpio, int gpio_mode);
+#else
+static inline int nmk_gpio_set_mode(int gpio, int gpio_mode)
+{
+	return -ENODEV;
+}
+#endif
+extern int nmk_gpio_get_mode(int gpio);
+
+extern void nmk_gpio_wakeups_suspend(void);
+extern void nmk_gpio_wakeups_resume(void);
+
+extern void nmk_gpio_clocks_enable(void);
+extern void nmk_gpio_clocks_disable(void);
+
+extern void nmk_gpio_read_pull(int gpio_bank, u32 *pull_up);
+
+/*
+ * Platform data to register a block: only the initial gpio/irq number.
+ */
+struct nmk_gpio_platform_data {
+	char *name;
+	int first_gpio;
+	int first_irq;
+	int num_gpio;
+	u32 (*get_secondary_status)(unsigned int bank);
+	void (*set_ioforce)(bool enable);
+	bool supports_sleepmode;
+};
+
+#endif /* __PLAT_NOMADIK_GPIO */
-- 
cgit v1.2.3-55-g7522


From c3123cfdc9e3241055f1ba0d6ceb8b41f97e31b9 Mon Sep 17 00:00:00 2001
From: Linus Walleij
Date: Wed, 10 Oct 2012 14:35:17 +0200
Subject: pinctrl/nomadik: merge old pincfg header

This merges the old <plat/pincfg.h> header into
<linux/platform_data/pinctrl-nomadik.h> and rids us of
yet one more <plat/*> include.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-nomadik/board-nhk8815.c         |   1 -
 arch/arm/mach-nomadik/i2c-8815nhk.c           |   1 -
 arch/arm/mach-ux500/board-mop500-audio.c      |   1 -
 arch/arm/mach-ux500/board-mop500-pins.c       |   1 -
 arch/arm/plat-nomadik/include/plat/pincfg.h   | 173 --------------------------
 drivers/pinctrl/pinctrl-nomadik.c             |   4 +-
 include/linux/platform_data/pinctrl-nomadik.h | 158 +++++++++++++++++++++++
 7 files changed, 159 insertions(+), 180 deletions(-)
 delete mode 100644 arch/arm/plat-nomadik/include/plat/pincfg.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-nomadik/board-nhk8815.c b/arch/arm/mach-nomadik/board-nhk8815.c
index 3a41ad00bd68..22ef8a1abe08 100644
--- a/arch/arm/mach-nomadik/board-nhk8815.c
+++ b/arch/arm/mach-nomadik/board-nhk8815.c
@@ -34,7 +34,6 @@
 #include <asm/mach/time.h>
 
 #include <plat/mtu.h>
-#include <plat/pincfg.h>
 
 #include <linux/platform_data/mtd-nomadik-nand.h>
 #include <mach/fsmc.h>
diff --git a/arch/arm/mach-nomadik/i2c-8815nhk.c b/arch/arm/mach-nomadik/i2c-8815nhk.c
index f81496f5790e..0c2f6628299a 100644
--- a/arch/arm/mach-nomadik/i2c-8815nhk.c
+++ b/arch/arm/mach-nomadik/i2c-8815nhk.c
@@ -5,7 +5,6 @@
 #include <linux/i2c-gpio.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/pinctrl-nomadik.h>
-#include <plat/pincfg.h>
 
 /*
  * There are two busses in the 8815NHK.
diff --git a/arch/arm/mach-ux500/board-mop500-audio.c b/arch/arm/mach-ux500/board-mop500-audio.c
index ea828cc56357..33631c9f1218 100644
--- a/arch/arm/mach-ux500/board-mop500-audio.c
+++ b/arch/arm/mach-ux500/board-mop500-audio.c
@@ -9,7 +9,6 @@
 #include <linux/gpio.h>
 #include <linux/platform_data/pinctrl-nomadik.h>
 
-#include <plat/pincfg.h>
 #include <plat/ste_dma40.h>
 
 #include <mach/devices.h>
diff --git a/arch/arm/mach-ux500/board-mop500-pins.c b/arch/arm/mach-ux500/board-mop500-pins.c
index 05f2bb581130..c34d4efd0d5c 100644
--- a/arch/arm/mach-ux500/board-mop500-pins.c
+++ b/arch/arm/mach-ux500/board-mop500-pins.c
@@ -12,7 +12,6 @@
 #include <linux/platform_data/pinctrl-nomadik.h>
 
 #include <asm/mach-types.h>
-#include <plat/pincfg.h>
 
 #include <mach/hardware.h>
 
diff --git a/arch/arm/plat-nomadik/include/plat/pincfg.h b/arch/arm/plat-nomadik/include/plat/pincfg.h
deleted file mode 100644
index 3b8ec60af351..000000000000
--- a/arch/arm/plat-nomadik/include/plat/pincfg.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (C) ST-Ericsson SA 2010
- *
- * License terms: GNU General Public License, version 2
- * Author: Rabin Vincent <rabin.vincent@stericsson.com> for ST-Ericsson
- *
- * Based on arch/arm/mach-pxa/include/mach/mfp.h:
- *   Copyright (C) 2007 Marvell International Ltd.
- *   eric miao <eric.miao@marvell.com>
- */
-
-#ifndef __PLAT_PINCFG_H
-#define __PLAT_PINCFG_H
-
-/*
- * pin configurations are represented by 32-bit integers:
- *
- *	bit  0.. 8 - Pin Number (512 Pins Maximum)
- *	bit  9..10 - Alternate Function Selection
- *	bit 11..12 - Pull up/down state
- *	bit     13 - Sleep mode behaviour
- *	bit     14 - Direction
- *	bit     15 - Value (if output)
- *	bit 16..18 - SLPM pull up/down state
- *	bit 19..20 - SLPM direction
- *	bit 21..22 - SLPM Value (if output)
- *	bit 23..25 - PDIS value (if input)
- *	bit	26 - Gpio mode
- *	bit	27 - Sleep mode
- *
- * to facilitate the definition, the following macros are provided
- *
- * PIN_CFG_DEFAULT - default config (0):
- *		     pull up/down = disabled
- *		     sleep mode = input/wakeup
- *		     direction = input
- *		     value = low
- *		     SLPM direction = same as normal
- *		     SLPM pull = same as normal
- *		     SLPM value = same as normal
- *
- * PIN_CFG	   - default config with alternate function
- */
-
-typedef unsigned long pin_cfg_t;
-
-#define PIN_NUM_MASK		0x1ff
-#define PIN_NUM(x)		((x) & PIN_NUM_MASK)
-
-#define PIN_ALT_SHIFT		9
-#define PIN_ALT_MASK		(0x3 << PIN_ALT_SHIFT)
-#define PIN_ALT(x)		(((x) & PIN_ALT_MASK) >> PIN_ALT_SHIFT)
-#define PIN_GPIO		(NMK_GPIO_ALT_GPIO << PIN_ALT_SHIFT)
-#define PIN_ALT_A		(NMK_GPIO_ALT_A << PIN_ALT_SHIFT)
-#define PIN_ALT_B		(NMK_GPIO_ALT_B << PIN_ALT_SHIFT)
-#define PIN_ALT_C		(NMK_GPIO_ALT_C << PIN_ALT_SHIFT)
-
-#define PIN_PULL_SHIFT		11
-#define PIN_PULL_MASK		(0x3 << PIN_PULL_SHIFT)
-#define PIN_PULL(x)		(((x) & PIN_PULL_MASK) >> PIN_PULL_SHIFT)
-#define PIN_PULL_NONE		(NMK_GPIO_PULL_NONE << PIN_PULL_SHIFT)
-#define PIN_PULL_UP		(NMK_GPIO_PULL_UP << PIN_PULL_SHIFT)
-#define PIN_PULL_DOWN		(NMK_GPIO_PULL_DOWN << PIN_PULL_SHIFT)
-
-#define PIN_SLPM_SHIFT		13
-#define PIN_SLPM_MASK		(0x1 << PIN_SLPM_SHIFT)
-#define PIN_SLPM(x)		(((x) & PIN_SLPM_MASK) >> PIN_SLPM_SHIFT)
-#define PIN_SLPM_MAKE_INPUT	(NMK_GPIO_SLPM_INPUT << PIN_SLPM_SHIFT)
-#define PIN_SLPM_NOCHANGE	(NMK_GPIO_SLPM_NOCHANGE << PIN_SLPM_SHIFT)
-/* These two replace the above in DB8500v2+ */
-#define PIN_SLPM_WAKEUP_ENABLE	(NMK_GPIO_SLPM_WAKEUP_ENABLE << PIN_SLPM_SHIFT)
-#define PIN_SLPM_WAKEUP_DISABLE	(NMK_GPIO_SLPM_WAKEUP_DISABLE << PIN_SLPM_SHIFT)
-#define PIN_SLPM_USE_MUX_SETTINGS_IN_SLEEP PIN_SLPM_WAKEUP_DISABLE
-
-#define PIN_SLPM_GPIO  PIN_SLPM_WAKEUP_ENABLE /* In SLPM, pin is a gpio */
-#define PIN_SLPM_ALTFUNC PIN_SLPM_WAKEUP_DISABLE /* In SLPM, pin is altfunc */
-
-#define PIN_DIR_SHIFT		14
-#define PIN_DIR_MASK		(0x1 << PIN_DIR_SHIFT)
-#define PIN_DIR(x)		(((x) & PIN_DIR_MASK) >> PIN_DIR_SHIFT)
-#define PIN_DIR_INPUT		(0 << PIN_DIR_SHIFT)
-#define PIN_DIR_OUTPUT		(1 << PIN_DIR_SHIFT)
-
-#define PIN_VAL_SHIFT		15
-#define PIN_VAL_MASK		(0x1 << PIN_VAL_SHIFT)
-#define PIN_VAL(x)		(((x) & PIN_VAL_MASK) >> PIN_VAL_SHIFT)
-#define PIN_VAL_LOW		(0 << PIN_VAL_SHIFT)
-#define PIN_VAL_HIGH		(1 << PIN_VAL_SHIFT)
-
-#define PIN_SLPM_PULL_SHIFT	16
-#define PIN_SLPM_PULL_MASK	(0x7 << PIN_SLPM_PULL_SHIFT)
-#define PIN_SLPM_PULL(x)	\
-	(((x) & PIN_SLPM_PULL_MASK) >> PIN_SLPM_PULL_SHIFT)
-#define PIN_SLPM_PULL_NONE	\
-	((1 + NMK_GPIO_PULL_NONE) << PIN_SLPM_PULL_SHIFT)
-#define PIN_SLPM_PULL_UP	\
-	((1 + NMK_GPIO_PULL_UP) << PIN_SLPM_PULL_SHIFT)
-#define PIN_SLPM_PULL_DOWN	\
-	((1 + NMK_GPIO_PULL_DOWN) << PIN_SLPM_PULL_SHIFT)
-
-#define PIN_SLPM_DIR_SHIFT	19
-#define PIN_SLPM_DIR_MASK	(0x3 << PIN_SLPM_DIR_SHIFT)
-#define PIN_SLPM_DIR(x)		\
-	(((x) & PIN_SLPM_DIR_MASK) >> PIN_SLPM_DIR_SHIFT)
-#define PIN_SLPM_DIR_INPUT	((1 + 0) << PIN_SLPM_DIR_SHIFT)
-#define PIN_SLPM_DIR_OUTPUT	((1 + 1) << PIN_SLPM_DIR_SHIFT)
-
-#define PIN_SLPM_VAL_SHIFT	21
-#define PIN_SLPM_VAL_MASK	(0x3 << PIN_SLPM_VAL_SHIFT)
-#define PIN_SLPM_VAL(x)		\
-	(((x) & PIN_SLPM_VAL_MASK) >> PIN_SLPM_VAL_SHIFT)
-#define PIN_SLPM_VAL_LOW	((1 + 0) << PIN_SLPM_VAL_SHIFT)
-#define PIN_SLPM_VAL_HIGH	((1 + 1) << PIN_SLPM_VAL_SHIFT)
-
-#define PIN_SLPM_PDIS_SHIFT		23
-#define PIN_SLPM_PDIS_MASK		(0x3 << PIN_SLPM_PDIS_SHIFT)
-#define PIN_SLPM_PDIS(x)	\
-	(((x) & PIN_SLPM_PDIS_MASK) >> PIN_SLPM_PDIS_SHIFT)
-#define PIN_SLPM_PDIS_NO_CHANGE		(0 << PIN_SLPM_PDIS_SHIFT)
-#define PIN_SLPM_PDIS_DISABLED		(1 << PIN_SLPM_PDIS_SHIFT)
-#define PIN_SLPM_PDIS_ENABLED		(2 << PIN_SLPM_PDIS_SHIFT)
-
-#define PIN_LOWEMI_SHIFT	25
-#define PIN_LOWEMI_MASK		(0x1 << PIN_LOWEMI_SHIFT)
-#define PIN_LOWEMI(x)		(((x) & PIN_LOWEMI_MASK) >> PIN_LOWEMI_SHIFT)
-#define PIN_LOWEMI_DISABLED	(0 << PIN_LOWEMI_SHIFT)
-#define PIN_LOWEMI_ENABLED	(1 << PIN_LOWEMI_SHIFT)
-
-#define PIN_GPIOMODE_SHIFT	26
-#define PIN_GPIOMODE_MASK	(0x1 << PIN_GPIOMODE_SHIFT)
-#define PIN_GPIOMODE(x)		(((x) & PIN_GPIOMODE_MASK) >> PIN_GPIOMODE_SHIFT)
-#define PIN_GPIOMODE_DISABLED	(0 << PIN_GPIOMODE_SHIFT)
-#define PIN_GPIOMODE_ENABLED	(1 << PIN_GPIOMODE_SHIFT)
-
-#define PIN_SLEEPMODE_SHIFT	27
-#define PIN_SLEEPMODE_MASK	(0x1 << PIN_SLEEPMODE_SHIFT)
-#define PIN_SLEEPMODE(x)	(((x) & PIN_SLEEPMODE_MASK) >> PIN_SLEEPMODE_SHIFT)
-#define PIN_SLEEPMODE_DISABLED	(0 << PIN_SLEEPMODE_SHIFT)
-#define PIN_SLEEPMODE_ENABLED	(1 << PIN_SLEEPMODE_SHIFT)
-
-
-/* Shortcuts.  Use these instead of separate DIR, PULL, and VAL.  */
-#define PIN_INPUT_PULLDOWN	(PIN_DIR_INPUT | PIN_PULL_DOWN)
-#define PIN_INPUT_PULLUP	(PIN_DIR_INPUT | PIN_PULL_UP)
-#define PIN_INPUT_NOPULL	(PIN_DIR_INPUT | PIN_PULL_NONE)
-#define PIN_OUTPUT_LOW		(PIN_DIR_OUTPUT | PIN_VAL_LOW)
-#define PIN_OUTPUT_HIGH		(PIN_DIR_OUTPUT | PIN_VAL_HIGH)
-
-#define PIN_SLPM_INPUT_PULLDOWN	(PIN_SLPM_DIR_INPUT | PIN_SLPM_PULL_DOWN)
-#define PIN_SLPM_INPUT_PULLUP	(PIN_SLPM_DIR_INPUT | PIN_SLPM_PULL_UP)
-#define PIN_SLPM_INPUT_NOPULL	(PIN_SLPM_DIR_INPUT | PIN_SLPM_PULL_NONE)
-#define PIN_SLPM_OUTPUT_LOW	(PIN_SLPM_DIR_OUTPUT | PIN_SLPM_VAL_LOW)
-#define PIN_SLPM_OUTPUT_HIGH	(PIN_SLPM_DIR_OUTPUT | PIN_SLPM_VAL_HIGH)
-
-#define PIN_CFG_DEFAULT		(0)
-
-#define PIN_CFG(num, alt)		\
-	(PIN_CFG_DEFAULT |\
-	 (PIN_NUM(num) | PIN_##alt))
-
-#define PIN_CFG_INPUT(num, alt, pull)		\
-	(PIN_CFG_DEFAULT |\
-	 (PIN_NUM(num) | PIN_##alt | PIN_INPUT_##pull))
-
-#define PIN_CFG_OUTPUT(num, alt, val)		\
-	(PIN_CFG_DEFAULT |\
-	 (PIN_NUM(num) | PIN_##alt | PIN_OUTPUT_##val))
-
-extern int nmk_config_pin(pin_cfg_t cfg, bool sleep);
-extern int nmk_config_pins(pin_cfg_t *cfgs, int num);
-extern int nmk_config_pins_sleep(pin_cfg_t *cfgs, int num);
-
-#endif
diff --git a/drivers/pinctrl/pinctrl-nomadik.c b/drivers/pinctrl/pinctrl-nomadik.c
index e24fa05e59b8..a49052305756 100644
--- a/drivers/pinctrl/pinctrl-nomadik.c
+++ b/drivers/pinctrl/pinctrl-nomadik.c
@@ -48,8 +48,6 @@ static inline void prcmu_write_masked(unsigned int reg, u32 mask, u32 value) {}
 
 #include <asm/mach/irq.h>
 
-#include <plat/pincfg.h>
-
 #include "pinctrl-nomadik.h"
 
 /*
@@ -534,7 +532,7 @@ static int __nmk_config_pins(pin_cfg_t *cfgs, int num, bool sleep)
  * and its sleep mode based on the specified configuration.  The @cfg is
  * usually one of the SoC specific macros defined in mach/<soc>-pins.h.  These
  * are constructed using, and can be further enhanced with, the macros in
- * plat/pincfg.h.
+ * <linux/platform_data/pinctrl-nomadik.h>
  *
  * If a pin's mode is set to GPIO, it is configured as an input to avoid
  * side-effects.  The gpio can be manipulated later using standard GPIO API
diff --git a/include/linux/platform_data/pinctrl-nomadik.h b/include/linux/platform_data/pinctrl-nomadik.h
index a74999438de4..f73b2f0c55b7 100644
--- a/include/linux/platform_data/pinctrl-nomadik.h
+++ b/include/linux/platform_data/pinctrl-nomadik.h
@@ -13,6 +13,160 @@
 #ifndef __PLAT_NOMADIK_GPIO
 #define __PLAT_NOMADIK_GPIO
 
+/*
+ * pin configurations are represented by 32-bit integers:
+ *
+ *	bit  0.. 8 - Pin Number (512 Pins Maximum)
+ *	bit  9..10 - Alternate Function Selection
+ *	bit 11..12 - Pull up/down state
+ *	bit     13 - Sleep mode behaviour
+ *	bit     14 - Direction
+ *	bit     15 - Value (if output)
+ *	bit 16..18 - SLPM pull up/down state
+ *	bit 19..20 - SLPM direction
+ *	bit 21..22 - SLPM Value (if output)
+ *	bit 23..25 - PDIS value (if input)
+ *	bit	26 - Gpio mode
+ *	bit	27 - Sleep mode
+ *
+ * to facilitate the definition, the following macros are provided
+ *
+ * PIN_CFG_DEFAULT - default config (0):
+ *		     pull up/down = disabled
+ *		     sleep mode = input/wakeup
+ *		     direction = input
+ *		     value = low
+ *		     SLPM direction = same as normal
+ *		     SLPM pull = same as normal
+ *		     SLPM value = same as normal
+ *
+ * PIN_CFG	   - default config with alternate function
+ */
+
+typedef unsigned long pin_cfg_t;
+
+#define PIN_NUM_MASK		0x1ff
+#define PIN_NUM(x)		((x) & PIN_NUM_MASK)
+
+#define PIN_ALT_SHIFT		9
+#define PIN_ALT_MASK		(0x3 << PIN_ALT_SHIFT)
+#define PIN_ALT(x)		(((x) & PIN_ALT_MASK) >> PIN_ALT_SHIFT)
+#define PIN_GPIO		(NMK_GPIO_ALT_GPIO << PIN_ALT_SHIFT)
+#define PIN_ALT_A		(NMK_GPIO_ALT_A << PIN_ALT_SHIFT)
+#define PIN_ALT_B		(NMK_GPIO_ALT_B << PIN_ALT_SHIFT)
+#define PIN_ALT_C		(NMK_GPIO_ALT_C << PIN_ALT_SHIFT)
+
+#define PIN_PULL_SHIFT		11
+#define PIN_PULL_MASK		(0x3 << PIN_PULL_SHIFT)
+#define PIN_PULL(x)		(((x) & PIN_PULL_MASK) >> PIN_PULL_SHIFT)
+#define PIN_PULL_NONE		(NMK_GPIO_PULL_NONE << PIN_PULL_SHIFT)
+#define PIN_PULL_UP		(NMK_GPIO_PULL_UP << PIN_PULL_SHIFT)
+#define PIN_PULL_DOWN		(NMK_GPIO_PULL_DOWN << PIN_PULL_SHIFT)
+
+#define PIN_SLPM_SHIFT		13
+#define PIN_SLPM_MASK		(0x1 << PIN_SLPM_SHIFT)
+#define PIN_SLPM(x)		(((x) & PIN_SLPM_MASK) >> PIN_SLPM_SHIFT)
+#define PIN_SLPM_MAKE_INPUT	(NMK_GPIO_SLPM_INPUT << PIN_SLPM_SHIFT)
+#define PIN_SLPM_NOCHANGE	(NMK_GPIO_SLPM_NOCHANGE << PIN_SLPM_SHIFT)
+/* These two replace the above in DB8500v2+ */
+#define PIN_SLPM_WAKEUP_ENABLE	(NMK_GPIO_SLPM_WAKEUP_ENABLE << PIN_SLPM_SHIFT)
+#define PIN_SLPM_WAKEUP_DISABLE	(NMK_GPIO_SLPM_WAKEUP_DISABLE << PIN_SLPM_SHIFT)
+#define PIN_SLPM_USE_MUX_SETTINGS_IN_SLEEP PIN_SLPM_WAKEUP_DISABLE
+
+#define PIN_SLPM_GPIO  PIN_SLPM_WAKEUP_ENABLE /* In SLPM, pin is a gpio */
+#define PIN_SLPM_ALTFUNC PIN_SLPM_WAKEUP_DISABLE /* In SLPM, pin is altfunc */
+
+#define PIN_DIR_SHIFT		14
+#define PIN_DIR_MASK		(0x1 << PIN_DIR_SHIFT)
+#define PIN_DIR(x)		(((x) & PIN_DIR_MASK) >> PIN_DIR_SHIFT)
+#define PIN_DIR_INPUT		(0 << PIN_DIR_SHIFT)
+#define PIN_DIR_OUTPUT		(1 << PIN_DIR_SHIFT)
+
+#define PIN_VAL_SHIFT		15
+#define PIN_VAL_MASK		(0x1 << PIN_VAL_SHIFT)
+#define PIN_VAL(x)		(((x) & PIN_VAL_MASK) >> PIN_VAL_SHIFT)
+#define PIN_VAL_LOW		(0 << PIN_VAL_SHIFT)
+#define PIN_VAL_HIGH		(1 << PIN_VAL_SHIFT)
+
+#define PIN_SLPM_PULL_SHIFT	16
+#define PIN_SLPM_PULL_MASK	(0x7 << PIN_SLPM_PULL_SHIFT)
+#define PIN_SLPM_PULL(x)	\
+	(((x) & PIN_SLPM_PULL_MASK) >> PIN_SLPM_PULL_SHIFT)
+#define PIN_SLPM_PULL_NONE	\
+	((1 + NMK_GPIO_PULL_NONE) << PIN_SLPM_PULL_SHIFT)
+#define PIN_SLPM_PULL_UP	\
+	((1 + NMK_GPIO_PULL_UP) << PIN_SLPM_PULL_SHIFT)
+#define PIN_SLPM_PULL_DOWN	\
+	((1 + NMK_GPIO_PULL_DOWN) << PIN_SLPM_PULL_SHIFT)
+
+#define PIN_SLPM_DIR_SHIFT	19
+#define PIN_SLPM_DIR_MASK	(0x3 << PIN_SLPM_DIR_SHIFT)
+#define PIN_SLPM_DIR(x)		\
+	(((x) & PIN_SLPM_DIR_MASK) >> PIN_SLPM_DIR_SHIFT)
+#define PIN_SLPM_DIR_INPUT	((1 + 0) << PIN_SLPM_DIR_SHIFT)
+#define PIN_SLPM_DIR_OUTPUT	((1 + 1) << PIN_SLPM_DIR_SHIFT)
+
+#define PIN_SLPM_VAL_SHIFT	21
+#define PIN_SLPM_VAL_MASK	(0x3 << PIN_SLPM_VAL_SHIFT)
+#define PIN_SLPM_VAL(x)		\
+	(((x) & PIN_SLPM_VAL_MASK) >> PIN_SLPM_VAL_SHIFT)
+#define PIN_SLPM_VAL_LOW	((1 + 0) << PIN_SLPM_VAL_SHIFT)
+#define PIN_SLPM_VAL_HIGH	((1 + 1) << PIN_SLPM_VAL_SHIFT)
+
+#define PIN_SLPM_PDIS_SHIFT		23
+#define PIN_SLPM_PDIS_MASK		(0x3 << PIN_SLPM_PDIS_SHIFT)
+#define PIN_SLPM_PDIS(x)	\
+	(((x) & PIN_SLPM_PDIS_MASK) >> PIN_SLPM_PDIS_SHIFT)
+#define PIN_SLPM_PDIS_NO_CHANGE		(0 << PIN_SLPM_PDIS_SHIFT)
+#define PIN_SLPM_PDIS_DISABLED		(1 << PIN_SLPM_PDIS_SHIFT)
+#define PIN_SLPM_PDIS_ENABLED		(2 << PIN_SLPM_PDIS_SHIFT)
+
+#define PIN_LOWEMI_SHIFT	25
+#define PIN_LOWEMI_MASK		(0x1 << PIN_LOWEMI_SHIFT)
+#define PIN_LOWEMI(x)		(((x) & PIN_LOWEMI_MASK) >> PIN_LOWEMI_SHIFT)
+#define PIN_LOWEMI_DISABLED	(0 << PIN_LOWEMI_SHIFT)
+#define PIN_LOWEMI_ENABLED	(1 << PIN_LOWEMI_SHIFT)
+
+#define PIN_GPIOMODE_SHIFT	26
+#define PIN_GPIOMODE_MASK	(0x1 << PIN_GPIOMODE_SHIFT)
+#define PIN_GPIOMODE(x)		(((x) & PIN_GPIOMODE_MASK) >> PIN_GPIOMODE_SHIFT)
+#define PIN_GPIOMODE_DISABLED	(0 << PIN_GPIOMODE_SHIFT)
+#define PIN_GPIOMODE_ENABLED	(1 << PIN_GPIOMODE_SHIFT)
+
+#define PIN_SLEEPMODE_SHIFT	27
+#define PIN_SLEEPMODE_MASK	(0x1 << PIN_SLEEPMODE_SHIFT)
+#define PIN_SLEEPMODE(x)	(((x) & PIN_SLEEPMODE_MASK) >> PIN_SLEEPMODE_SHIFT)
+#define PIN_SLEEPMODE_DISABLED	(0 << PIN_SLEEPMODE_SHIFT)
+#define PIN_SLEEPMODE_ENABLED	(1 << PIN_SLEEPMODE_SHIFT)
+
+
+/* Shortcuts.  Use these instead of separate DIR, PULL, and VAL.  */
+#define PIN_INPUT_PULLDOWN	(PIN_DIR_INPUT | PIN_PULL_DOWN)
+#define PIN_INPUT_PULLUP	(PIN_DIR_INPUT | PIN_PULL_UP)
+#define PIN_INPUT_NOPULL	(PIN_DIR_INPUT | PIN_PULL_NONE)
+#define PIN_OUTPUT_LOW		(PIN_DIR_OUTPUT | PIN_VAL_LOW)
+#define PIN_OUTPUT_HIGH		(PIN_DIR_OUTPUT | PIN_VAL_HIGH)
+
+#define PIN_SLPM_INPUT_PULLDOWN	(PIN_SLPM_DIR_INPUT | PIN_SLPM_PULL_DOWN)
+#define PIN_SLPM_INPUT_PULLUP	(PIN_SLPM_DIR_INPUT | PIN_SLPM_PULL_UP)
+#define PIN_SLPM_INPUT_NOPULL	(PIN_SLPM_DIR_INPUT | PIN_SLPM_PULL_NONE)
+#define PIN_SLPM_OUTPUT_LOW	(PIN_SLPM_DIR_OUTPUT | PIN_SLPM_VAL_LOW)
+#define PIN_SLPM_OUTPUT_HIGH	(PIN_SLPM_DIR_OUTPUT | PIN_SLPM_VAL_HIGH)
+
+#define PIN_CFG_DEFAULT		(0)
+
+#define PIN_CFG(num, alt)		\
+	(PIN_CFG_DEFAULT |\
+	 (PIN_NUM(num) | PIN_##alt))
+
+#define PIN_CFG_INPUT(num, alt, pull)		\
+	(PIN_CFG_DEFAULT |\
+	 (PIN_NUM(num) | PIN_##alt | PIN_INPUT_##pull))
+
+#define PIN_CFG_OUTPUT(num, alt, val)		\
+	(PIN_CFG_DEFAULT |\
+	 (PIN_NUM(num) | PIN_##alt | PIN_OUTPUT_##val))
+
 /*
  * "nmk_gpio" and "NMK_GPIO" stand for "Nomadik GPIO", leaving
  * the "gpio" namespace for generic and cross-machine functions
@@ -72,6 +226,10 @@ enum nmk_gpio_slpm {
 	NMK_GPIO_SLPM_WAKEUP_DISABLE = NMK_GPIO_SLPM_NOCHANGE,
 };
 
+/* Older deprecated pin config API that should go away soon */
+extern int nmk_config_pin(pin_cfg_t cfg, bool sleep);
+extern int nmk_config_pins(pin_cfg_t *cfgs, int num);
+extern int nmk_config_pins_sleep(pin_cfg_t *cfgs, int num);
 extern int nmk_gpio_set_slpm(int gpio, enum nmk_gpio_slpm mode);
 extern int nmk_gpio_set_pull(int gpio, enum nmk_gpio_pull pull);
 #ifdef CONFIG_PINCTRL_NOMADIK
-- 
cgit v1.2.3-55-g7522


From 865fab601b8f910b2c634cf4c9211176f2c71cad Mon Sep 17 00:00:00 2001
From: Linus Walleij
Date: Thu, 18 Oct 2012 14:20:16 +0200
Subject: ARM: plat-nomadik: move DMA40 header to <linux/platform_data>

This moves the DMA40 platform data header from <plat/ste_dma40.h>
to <linux/platform_data/dma-ste-dma40.h> where is belongs.

Cc: Dan Williams <djbw@fb.com>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Ola Lilja <ola.o.lilja@stericsson.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Andreas Westin <andreas.westin@stericsson.com>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/board-mop500-audio.c       |   3 +-
 arch/arm/mach-ux500/board-mop500-sdi.c         |   2 +-
 arch/arm/mach-ux500/board-mop500.c             |   3 +-
 arch/arm/mach-ux500/devices-db8500.c           |   3 +-
 arch/arm/mach-ux500/include/mach/msp.h         |   2 +-
 arch/arm/mach-ux500/usb.c                      |   4 +-
 arch/arm/plat-nomadik/include/plat/ste_dma40.h | 223 -------------------------
 drivers/crypto/ux500/cryp/cryp_core.c          |   3 +-
 drivers/dma/ste_dma40.c                        |   3 +-
 drivers/dma/ste_dma40_ll.c                     |   2 +-
 include/linux/platform_data/crypto-ux500.h     |   2 +-
 include/linux/platform_data/dma-ste-dma40.h    | 223 +++++++++++++++++++++++++
 sound/soc/ux500/ux500_pcm.c                    |   3 +-
 13 files changed, 235 insertions(+), 241 deletions(-)
 delete mode 100644 arch/arm/plat-nomadik/include/plat/ste_dma40.h
 create mode 100644 include/linux/platform_data/dma-ste-dma40.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/board-mop500-audio.c b/arch/arm/mach-ux500/board-mop500-audio.c
index 33631c9f1218..71a2ca726476 100644
--- a/arch/arm/mach-ux500/board-mop500-audio.c
+++ b/arch/arm/mach-ux500/board-mop500-audio.c
@@ -8,8 +8,7 @@
 #include <linux/init.h>
 #include <linux/gpio.h>
 #include <linux/platform_data/pinctrl-nomadik.h>
-
-#include <plat/ste_dma40.h>
+#include <linux/platform_data/dma-ste-dma40.h>
 
 #include <mach/devices.h>
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-ux500/board-mop500-sdi.c b/arch/arm/mach-ux500/board-mop500-sdi.c
index 9c8e4a9e83ee..051b62c27102 100644
--- a/arch/arm/mach-ux500/board-mop500-sdi.c
+++ b/arch/arm/mach-ux500/board-mop500-sdi.c
@@ -11,9 +11,9 @@
 #include <linux/amba/mmci.h>
 #include <linux/mmc/host.h>
 #include <linux/platform_device.h>
+#include <linux/platform_data/dma-ste-dma40.h>
 
 #include <asm/mach-types.h>
-#include <plat/ste_dma40.h>
 #include <mach/devices.h>
 #include <mach/hardware.h>
 
diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index 0a3dd601a400..0213e896fbfd 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -38,13 +38,12 @@
 #include <linux/leds.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_data/pinctrl-nomadik.h>
+#include <linux/platform_data/dma-ste-dma40.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/hardware/gic.h>
 
-#include <plat/ste_dma40.h>
-
 #include <mach/hardware.h>
 #include <mach/setup.h>
 #include <mach/devices.h>
diff --git a/arch/arm/mach-ux500/devices-db8500.c b/arch/arm/mach-ux500/devices-db8500.c
index 91754a8a0d49..bdc963cb2f45 100644
--- a/arch/arm/mach-ux500/devices-db8500.c
+++ b/arch/arm/mach-ux500/devices-db8500.c
@@ -12,8 +12,7 @@
 #include <linux/gpio.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl022.h>
-
-#include <plat/ste_dma40.h>
+#include <linux/platform_data/dma-ste-dma40.h>
 
 #include <mach/hardware.h>
 #include <mach/setup.h>
diff --git a/arch/arm/mach-ux500/include/mach/msp.h b/arch/arm/mach-ux500/include/mach/msp.h
index 3cc7142eee02..9991aea3d577 100644
--- a/arch/arm/mach-ux500/include/mach/msp.h
+++ b/arch/arm/mach-ux500/include/mach/msp.h
@@ -8,7 +8,7 @@
 #ifndef __MSP_H
 #define __MSP_H
 
-#include <plat/ste_dma40.h>
+#include <linux/platform_data/dma-ste-dma40.h>
 
 enum msp_i2s_id {
 	MSP_I2S_0 = 0,
diff --git a/arch/arm/mach-ux500/usb.c b/arch/arm/mach-ux500/usb.c
index 145482e74418..78ac65f62e87 100644
--- a/arch/arm/mach-ux500/usb.c
+++ b/arch/arm/mach-ux500/usb.c
@@ -7,10 +7,10 @@
 #include <linux/platform_device.h>
 #include <linux/usb/musb.h>
 #include <linux/dma-mapping.h>
+#include <linux/platform_data/usb-musb-ux500.h>
+#include <linux/platform_data/dma-ste-dma40.h>
 
-#include <plat/ste_dma40.h>
 #include <mach/hardware.h>
-#include <linux/platform_data/usb-musb-ux500.h>
 
 #define MUSB_DMA40_RX_CH { \
 		.mode = STEDMA40_MODE_LOGICAL, \
diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
deleted file mode 100644
index 9ff93b065686..000000000000
--- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (C) ST-Ericsson SA 2007-2010
- * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
- * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
- * License terms: GNU General Public License (GPL) version 2
- */
-
-
-#ifndef STE_DMA40_H
-#define STE_DMA40_H
-
-#include <linux/dmaengine.h>
-#include <linux/scatterlist.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-
-/*
- * Maxium size for a single dma descriptor
- * Size is limited to 16 bits.
- * Size is in the units of addr-widths (1,2,4,8 bytes)
- * Larger transfers will be split up to multiple linked desc
- */
-#define STEDMA40_MAX_SEG_SIZE 0xFFFF
-
-/* dev types for memcpy */
-#define STEDMA40_DEV_DST_MEMORY (-1)
-#define	STEDMA40_DEV_SRC_MEMORY (-1)
-
-enum stedma40_mode {
-	STEDMA40_MODE_LOGICAL = 0,
-	STEDMA40_MODE_PHYSICAL,
-	STEDMA40_MODE_OPERATION,
-};
-
-enum stedma40_mode_opt {
-	STEDMA40_PCHAN_BASIC_MODE = 0,
-	STEDMA40_LCHAN_SRC_LOG_DST_LOG = 0,
-	STEDMA40_PCHAN_MODULO_MODE,
-	STEDMA40_PCHAN_DOUBLE_DST_MODE,
-	STEDMA40_LCHAN_SRC_PHY_DST_LOG,
-	STEDMA40_LCHAN_SRC_LOG_DST_PHY,
-};
-
-#define STEDMA40_ESIZE_8_BIT  0x0
-#define STEDMA40_ESIZE_16_BIT 0x1
-#define STEDMA40_ESIZE_32_BIT 0x2
-#define STEDMA40_ESIZE_64_BIT 0x3
-
-/* The value 4 indicates that PEN-reg shall be set to 0 */
-#define STEDMA40_PSIZE_PHY_1  0x4
-#define STEDMA40_PSIZE_PHY_2  0x0
-#define STEDMA40_PSIZE_PHY_4  0x1
-#define STEDMA40_PSIZE_PHY_8  0x2
-#define STEDMA40_PSIZE_PHY_16 0x3
-
-/*
- * The number of elements differ in logical and
- * physical mode
- */
-#define STEDMA40_PSIZE_LOG_1  STEDMA40_PSIZE_PHY_2
-#define STEDMA40_PSIZE_LOG_4  STEDMA40_PSIZE_PHY_4
-#define STEDMA40_PSIZE_LOG_8  STEDMA40_PSIZE_PHY_8
-#define STEDMA40_PSIZE_LOG_16 STEDMA40_PSIZE_PHY_16
-
-/* Maximum number of possible physical channels */
-#define STEDMA40_MAX_PHYS 32
-
-enum stedma40_flow_ctrl {
-	STEDMA40_NO_FLOW_CTRL,
-	STEDMA40_FLOW_CTRL,
-};
-
-enum stedma40_periph_data_width {
-	STEDMA40_BYTE_WIDTH = STEDMA40_ESIZE_8_BIT,
-	STEDMA40_HALFWORD_WIDTH = STEDMA40_ESIZE_16_BIT,
-	STEDMA40_WORD_WIDTH = STEDMA40_ESIZE_32_BIT,
-	STEDMA40_DOUBLEWORD_WIDTH = STEDMA40_ESIZE_64_BIT
-};
-
-enum stedma40_xfer_dir {
-	STEDMA40_MEM_TO_MEM = 1,
-	STEDMA40_MEM_TO_PERIPH,
-	STEDMA40_PERIPH_TO_MEM,
-	STEDMA40_PERIPH_TO_PERIPH
-};
-
-
-/**
- * struct stedma40_chan_cfg - dst/src channel configuration
- *
- * @big_endian: true if the src/dst should be read as big endian
- * @data_width: Data width of the src/dst hardware
- * @p_size: Burst size
- * @flow_ctrl: Flow control on/off.
- */
-struct stedma40_half_channel_info {
-	bool big_endian;
-	enum stedma40_periph_data_width data_width;
-	int psize;
-	enum stedma40_flow_ctrl flow_ctrl;
-};
-
-/**
- * struct stedma40_chan_cfg - Structure to be filled by client drivers.
- *
- * @dir: MEM 2 MEM, PERIPH 2 MEM , MEM 2 PERIPH, PERIPH 2 PERIPH
- * @high_priority: true if high-priority
- * @realtime: true if realtime mode is to be enabled.  Only available on DMA40
- * version 3+, i.e DB8500v2+
- * @mode: channel mode: physical, logical, or operation
- * @mode_opt: options for the chosen channel mode
- * @src_dev_type: Src device type
- * @dst_dev_type: Dst device type
- * @src_info: Parameters for dst half channel
- * @dst_info: Parameters for dst half channel
- * @use_fixed_channel: if true, use physical channel specified by phy_channel
- * @phy_channel: physical channel to use, only if use_fixed_channel is true
- *
- * This structure has to be filled by the client drivers.
- * It is recommended to do all dma configurations for clients in the machine.
- *
- */
-struct stedma40_chan_cfg {
-	enum stedma40_xfer_dir			 dir;
-	bool					 high_priority;
-	bool					 realtime;
-	enum stedma40_mode			 mode;
-	enum stedma40_mode_opt			 mode_opt;
-	int					 src_dev_type;
-	int					 dst_dev_type;
-	struct stedma40_half_channel_info	 src_info;
-	struct stedma40_half_channel_info	 dst_info;
-
-	bool					 use_fixed_channel;
-	int					 phy_channel;
-};
-
-/**
- * struct stedma40_platform_data - Configuration struct for the dma device.
- *
- * @dev_len: length of dev_tx and dev_rx
- * @dev_tx: mapping between destination event line and io address
- * @dev_rx: mapping between source event line and io address
- * @memcpy: list of memcpy event lines
- * @memcpy_len: length of memcpy
- * @memcpy_conf_phy: default configuration of physical channel memcpy
- * @memcpy_conf_log: default configuration of logical channel memcpy
- * @disabled_channels: A vector, ending with -1, that marks physical channels
- * that are for different reasons not available for the driver.
- */
-struct stedma40_platform_data {
-	u32				 dev_len;
-	const dma_addr_t		*dev_tx;
-	const dma_addr_t		*dev_rx;
-	int				*memcpy;
-	u32				 memcpy_len;
-	struct stedma40_chan_cfg	*memcpy_conf_phy;
-	struct stedma40_chan_cfg	*memcpy_conf_log;
-	int				 disabled_channels[STEDMA40_MAX_PHYS];
-	bool				 use_esram_lcla;
-};
-
-#ifdef CONFIG_STE_DMA40
-
-/**
- * stedma40_filter() - Provides stedma40_chan_cfg to the
- * ste_dma40 dma driver via the dmaengine framework.
- * does some checking of what's provided.
- *
- * Never directly called by client. It used by dmaengine.
- * @chan: dmaengine handle.
- * @data: Must be of type: struct stedma40_chan_cfg and is
- * the configuration of the framework.
- *
- *
- */
-
-bool stedma40_filter(struct dma_chan *chan, void *data);
-
-/**
- * stedma40_slave_mem() - Transfers a raw data buffer to or from a slave
- * (=device)
- *
- * @chan: dmaengine handle
- * @addr: source or destination physicall address.
- * @size: bytes to transfer
- * @direction: direction of transfer
- * @flags: is actually enum dma_ctrl_flags. See dmaengine.h
- */
-
-static inline struct
-dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
-					    dma_addr_t addr,
-					    unsigned int size,
-					    enum dma_transfer_direction direction,
-					    unsigned long flags)
-{
-	struct scatterlist sg;
-	sg_init_table(&sg, 1);
-	sg.dma_address = addr;
-	sg.length = size;
-
-	return dmaengine_prep_slave_sg(chan, &sg, 1, direction, flags);
-}
-
-#else
-static inline bool stedma40_filter(struct dma_chan *chan, void *data)
-{
-	return false;
-}
-
-static inline struct
-dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
-					    dma_addr_t addr,
-					    unsigned int size,
-					    enum dma_transfer_direction direction,
-					    unsigned long flags)
-{
-	return NULL;
-}
-#endif
-
-#endif
diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index bc615cc56266..8bc5fef07e7a 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -23,6 +23,7 @@
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/semaphore.h>
+#include <linux/platform_data/dma-ste-dma40.h>
 
 #include <crypto/aes.h>
 #include <crypto/algapi.h>
@@ -30,8 +31,6 @@
 #include <crypto/des.h>
 #include <crypto/scatterwalk.h>
 
-#include <plat/ste_dma40.h>
-
 #include <linux/platform_data/crypto-ux500.h>
 #include <mach/hardware.h>
 
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index ae55091c2272..23c5573e62dd 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -19,8 +19,7 @@
 #include <linux/err.h>
 #include <linux/amba/bus.h>
 #include <linux/regulator/consumer.h>
-
-#include <plat/ste_dma40.h>
+#include <linux/platform_data/dma-ste-dma40.h>
 
 #include "dmaengine.h"
 #include "ste_dma40_ll.h"
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index cad9e1daedff..851ad56e8409 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -6,7 +6,7 @@
  */
 
 #include <linux/kernel.h>
-#include <plat/ste_dma40.h>
+#include <linux/platform_data/dma-ste-dma40.h>
 
 #include "ste_dma40_ll.h"
 
diff --git a/include/linux/platform_data/crypto-ux500.h b/include/linux/platform_data/crypto-ux500.h
index 5b2d0817e26a..94df96d9a336 100644
--- a/include/linux/platform_data/crypto-ux500.h
+++ b/include/linux/platform_data/crypto-ux500.h
@@ -7,7 +7,7 @@
 #ifndef _CRYPTO_UX500_H
 #define _CRYPTO_UX500_H
 #include <linux/dmaengine.h>
-#include <plat/ste_dma40.h>
+#include <linux/platform_data/dma-ste-dma40.h>
 
 struct hash_platform_data {
 	void *mem_to_engine;
diff --git a/include/linux/platform_data/dma-ste-dma40.h b/include/linux/platform_data/dma-ste-dma40.h
new file mode 100644
index 000000000000..9ff93b065686
--- /dev/null
+++ b/include/linux/platform_data/dma-ste-dma40.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2007-2010
+ * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+
+#ifndef STE_DMA40_H
+#define STE_DMA40_H
+
+#include <linux/dmaengine.h>
+#include <linux/scatterlist.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+
+/*
+ * Maxium size for a single dma descriptor
+ * Size is limited to 16 bits.
+ * Size is in the units of addr-widths (1,2,4,8 bytes)
+ * Larger transfers will be split up to multiple linked desc
+ */
+#define STEDMA40_MAX_SEG_SIZE 0xFFFF
+
+/* dev types for memcpy */
+#define STEDMA40_DEV_DST_MEMORY (-1)
+#define	STEDMA40_DEV_SRC_MEMORY (-1)
+
+enum stedma40_mode {
+	STEDMA40_MODE_LOGICAL = 0,
+	STEDMA40_MODE_PHYSICAL,
+	STEDMA40_MODE_OPERATION,
+};
+
+enum stedma40_mode_opt {
+	STEDMA40_PCHAN_BASIC_MODE = 0,
+	STEDMA40_LCHAN_SRC_LOG_DST_LOG = 0,
+	STEDMA40_PCHAN_MODULO_MODE,
+	STEDMA40_PCHAN_DOUBLE_DST_MODE,
+	STEDMA40_LCHAN_SRC_PHY_DST_LOG,
+	STEDMA40_LCHAN_SRC_LOG_DST_PHY,
+};
+
+#define STEDMA40_ESIZE_8_BIT  0x0
+#define STEDMA40_ESIZE_16_BIT 0x1
+#define STEDMA40_ESIZE_32_BIT 0x2
+#define STEDMA40_ESIZE_64_BIT 0x3
+
+/* The value 4 indicates that PEN-reg shall be set to 0 */
+#define STEDMA40_PSIZE_PHY_1  0x4
+#define STEDMA40_PSIZE_PHY_2  0x0
+#define STEDMA40_PSIZE_PHY_4  0x1
+#define STEDMA40_PSIZE_PHY_8  0x2
+#define STEDMA40_PSIZE_PHY_16 0x3
+
+/*
+ * The number of elements differ in logical and
+ * physical mode
+ */
+#define STEDMA40_PSIZE_LOG_1  STEDMA40_PSIZE_PHY_2
+#define STEDMA40_PSIZE_LOG_4  STEDMA40_PSIZE_PHY_4
+#define STEDMA40_PSIZE_LOG_8  STEDMA40_PSIZE_PHY_8
+#define STEDMA40_PSIZE_LOG_16 STEDMA40_PSIZE_PHY_16
+
+/* Maximum number of possible physical channels */
+#define STEDMA40_MAX_PHYS 32
+
+enum stedma40_flow_ctrl {
+	STEDMA40_NO_FLOW_CTRL,
+	STEDMA40_FLOW_CTRL,
+};
+
+enum stedma40_periph_data_width {
+	STEDMA40_BYTE_WIDTH = STEDMA40_ESIZE_8_BIT,
+	STEDMA40_HALFWORD_WIDTH = STEDMA40_ESIZE_16_BIT,
+	STEDMA40_WORD_WIDTH = STEDMA40_ESIZE_32_BIT,
+	STEDMA40_DOUBLEWORD_WIDTH = STEDMA40_ESIZE_64_BIT
+};
+
+enum stedma40_xfer_dir {
+	STEDMA40_MEM_TO_MEM = 1,
+	STEDMA40_MEM_TO_PERIPH,
+	STEDMA40_PERIPH_TO_MEM,
+	STEDMA40_PERIPH_TO_PERIPH
+};
+
+
+/**
+ * struct stedma40_chan_cfg - dst/src channel configuration
+ *
+ * @big_endian: true if the src/dst should be read as big endian
+ * @data_width: Data width of the src/dst hardware
+ * @p_size: Burst size
+ * @flow_ctrl: Flow control on/off.
+ */
+struct stedma40_half_channel_info {
+	bool big_endian;
+	enum stedma40_periph_data_width data_width;
+	int psize;
+	enum stedma40_flow_ctrl flow_ctrl;
+};
+
+/**
+ * struct stedma40_chan_cfg - Structure to be filled by client drivers.
+ *
+ * @dir: MEM 2 MEM, PERIPH 2 MEM , MEM 2 PERIPH, PERIPH 2 PERIPH
+ * @high_priority: true if high-priority
+ * @realtime: true if realtime mode is to be enabled.  Only available on DMA40
+ * version 3+, i.e DB8500v2+
+ * @mode: channel mode: physical, logical, or operation
+ * @mode_opt: options for the chosen channel mode
+ * @src_dev_type: Src device type
+ * @dst_dev_type: Dst device type
+ * @src_info: Parameters for dst half channel
+ * @dst_info: Parameters for dst half channel
+ * @use_fixed_channel: if true, use physical channel specified by phy_channel
+ * @phy_channel: physical channel to use, only if use_fixed_channel is true
+ *
+ * This structure has to be filled by the client drivers.
+ * It is recommended to do all dma configurations for clients in the machine.
+ *
+ */
+struct stedma40_chan_cfg {
+	enum stedma40_xfer_dir			 dir;
+	bool					 high_priority;
+	bool					 realtime;
+	enum stedma40_mode			 mode;
+	enum stedma40_mode_opt			 mode_opt;
+	int					 src_dev_type;
+	int					 dst_dev_type;
+	struct stedma40_half_channel_info	 src_info;
+	struct stedma40_half_channel_info	 dst_info;
+
+	bool					 use_fixed_channel;
+	int					 phy_channel;
+};
+
+/**
+ * struct stedma40_platform_data - Configuration struct for the dma device.
+ *
+ * @dev_len: length of dev_tx and dev_rx
+ * @dev_tx: mapping between destination event line and io address
+ * @dev_rx: mapping between source event line and io address
+ * @memcpy: list of memcpy event lines
+ * @memcpy_len: length of memcpy
+ * @memcpy_conf_phy: default configuration of physical channel memcpy
+ * @memcpy_conf_log: default configuration of logical channel memcpy
+ * @disabled_channels: A vector, ending with -1, that marks physical channels
+ * that are for different reasons not available for the driver.
+ */
+struct stedma40_platform_data {
+	u32				 dev_len;
+	const dma_addr_t		*dev_tx;
+	const dma_addr_t		*dev_rx;
+	int				*memcpy;
+	u32				 memcpy_len;
+	struct stedma40_chan_cfg	*memcpy_conf_phy;
+	struct stedma40_chan_cfg	*memcpy_conf_log;
+	int				 disabled_channels[STEDMA40_MAX_PHYS];
+	bool				 use_esram_lcla;
+};
+
+#ifdef CONFIG_STE_DMA40
+
+/**
+ * stedma40_filter() - Provides stedma40_chan_cfg to the
+ * ste_dma40 dma driver via the dmaengine framework.
+ * does some checking of what's provided.
+ *
+ * Never directly called by client. It used by dmaengine.
+ * @chan: dmaengine handle.
+ * @data: Must be of type: struct stedma40_chan_cfg and is
+ * the configuration of the framework.
+ *
+ *
+ */
+
+bool stedma40_filter(struct dma_chan *chan, void *data);
+
+/**
+ * stedma40_slave_mem() - Transfers a raw data buffer to or from a slave
+ * (=device)
+ *
+ * @chan: dmaengine handle
+ * @addr: source or destination physicall address.
+ * @size: bytes to transfer
+ * @direction: direction of transfer
+ * @flags: is actually enum dma_ctrl_flags. See dmaengine.h
+ */
+
+static inline struct
+dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
+					    dma_addr_t addr,
+					    unsigned int size,
+					    enum dma_transfer_direction direction,
+					    unsigned long flags)
+{
+	struct scatterlist sg;
+	sg_init_table(&sg, 1);
+	sg.dma_address = addr;
+	sg.length = size;
+
+	return dmaengine_prep_slave_sg(chan, &sg, 1, direction, flags);
+}
+
+#else
+static inline bool stedma40_filter(struct dma_chan *chan, void *data)
+{
+	return false;
+}
+
+static inline struct
+dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
+					    dma_addr_t addr,
+					    unsigned int size,
+					    enum dma_transfer_direction direction,
+					    unsigned long flags)
+{
+	return NULL;
+}
+#endif
+
+#endif
diff --git a/sound/soc/ux500/ux500_pcm.c b/sound/soc/ux500/ux500_pcm.c
index 1a04e248453c..b55b79f7536c 100644
--- a/sound/soc/ux500/ux500_pcm.c
+++ b/sound/soc/ux500/ux500_pcm.c
@@ -18,8 +18,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/slab.h>
-
-#include <plat/ste_dma40.h>
+#include <linux/platform_data/dma-ste-dma40.h>
 
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
-- 
cgit v1.2.3-55-g7522


From 694e33a7f42de7dcc8b43c3990c597b19ef9b438 Mon Sep 17 00:00:00 2001
From: Linus Walleij
Date: Thu, 18 Oct 2012 14:01:25 +0200
Subject: ARM: plat-nomadik: move MTU, kill plat-nomadik

This moves the MTU timer driver from arch/arm/plat-nomadik
to drivers/clocksource and moves the header file to the
platform_data directory.

As this moves the last file being compiled to an object out
of arch/arm/plat-nomadik, we have to "turn off the light"
and delete the plat-nomadik directory, because it is not
allowed to have an empty Makefile in a plat-* directory.
This is probably also a desired side effect of depopulating
the arch/arm directory of drivers. Luckily we have just
deleted all the <plat/*> include files prior to this so
by moving the last one we may delete the directory.

After this all the Ux500 and Nomadik device drivers live
outside of the arch/arm hierarchy.

Cc: Alessandro Rubini <rubini@unipv.it>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/Kconfig                                   |   1 -
 arch/arm/Makefile                                  |   1 -
 arch/arm/mach-nomadik/Kconfig                      |   2 +-
 arch/arm/mach-nomadik/board-nhk8815.c              |   6 +-
 arch/arm/mach-ux500/Kconfig                        |   2 +-
 arch/arm/mach-ux500/timer.c                        |   3 +-
 arch/arm/plat-nomadik/Kconfig                      |  29 ---
 arch/arm/plat-nomadik/Makefile                     |   5 -
 arch/arm/plat-nomadik/include/plat/mtu.h           |   9 -
 arch/arm/plat-nomadik/timer.c                      | 224 ---------------------
 drivers/char/hw_random/Kconfig                     |   2 +-
 drivers/clocksource/Kconfig                        |  17 +-
 drivers/clocksource/Makefile                       |   1 +
 drivers/clocksource/nomadik-mtu.c                  | 224 +++++++++++++++++++++
 drivers/input/keyboard/Kconfig                     |   2 +-
 drivers/mtd/nand/Kconfig                           |   2 +-
 .../linux/platform_data/clocksource-nomadik-mtu.h  |   9 +
 17 files changed, 258 insertions(+), 281 deletions(-)
 delete mode 100644 arch/arm/plat-nomadik/Kconfig
 delete mode 100644 arch/arm/plat-nomadik/Makefile
 delete mode 100644 arch/arm/plat-nomadik/include/plat/mtu.h
 delete mode 100644 arch/arm/plat-nomadik/timer.c
 create mode 100644 drivers/clocksource/nomadik-mtu.c
 create mode 100644 include/linux/platform_data/clocksource-nomadik-mtu.h

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ade7e924bef5..8dae1e016ac8 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1065,7 +1065,6 @@ source "arch/arm/mach-mxs/Kconfig"
 source "arch/arm/mach-netx/Kconfig"
 
 source "arch/arm/mach-nomadik/Kconfig"
-source "arch/arm/plat-nomadik/Kconfig"
 
 source "arch/arm/plat-omap/Kconfig"
 
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 5f914fca911b..89087d599ad2 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -201,7 +201,6 @@ plat-$(CONFIG_ARCH_OMAP)	+= omap
 plat-$(CONFIG_ARCH_S3C64XX)	+= samsung
 plat-$(CONFIG_ARCH_ZYNQ)	+= versatile
 plat-$(CONFIG_PLAT_IOP)		+= iop
-plat-$(CONFIG_PLAT_NOMADIK)	+= nomadik
 plat-$(CONFIG_PLAT_ORION)	+= orion
 plat-$(CONFIG_PLAT_PXA)		+= pxa
 plat-$(CONFIG_PLAT_S3C24XX)	+= s3c24xx samsung
diff --git a/arch/arm/mach-nomadik/Kconfig b/arch/arm/mach-nomadik/Kconfig
index c744946ef022..706dc5727bbe 100644
--- a/arch/arm/mach-nomadik/Kconfig
+++ b/arch/arm/mach-nomadik/Kconfig
@@ -4,7 +4,7 @@ menu "Nomadik boards"
 
 config MACH_NOMADIK_8815NHK
 	bool "ST 8815 Nomadik Hardware Kit (evaluation board)"
-	select HAS_MTU
+	select CLKSRC_NOMADIK_MTU
 	select NOMADIK_8815
 
 endmenu
diff --git a/arch/arm/mach-nomadik/board-nhk8815.c b/arch/arm/mach-nomadik/board-nhk8815.c
index 16f10e04a805..5ccdf53c5a9d 100644
--- a/arch/arm/mach-nomadik/board-nhk8815.c
+++ b/arch/arm/mach-nomadik/board-nhk8815.c
@@ -25,16 +25,14 @@
 #include <linux/io.h>
 #include <linux/pinctrl/machine.h>
 #include <linux/platform_data/pinctrl-nomadik.h>
+#include <linux/platform_data/clocksource-nomadik-mtu.h>
+#include <linux/platform_data/mtd-nomadik-nand.h>
 #include <asm/hardware/vic.h>
 #include <asm/sizes.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/time.h>
-
-#include <plat/mtu.h>
-
-#include <linux/platform_data/mtd-nomadik-nand.h>
 #include <mach/fsmc.h>
 #include <mach/irqs.h>
 
diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig
index e8c3f0d70ca6..5dea90636d94 100644
--- a/arch/arm/mach-ux500/Kconfig
+++ b/arch/arm/mach-ux500/Kconfig
@@ -7,8 +7,8 @@ config UX500_SOC_COMMON
 	select ARM_ERRATA_764369 if SMP
 	select ARM_GIC
 	select CACHE_L2X0
+	select CLKSRC_NOMADIK_MTU
 	select COMMON_CLK
-	select HAS_MTU
 	select PINCTRL
 	select PINCTRL_NOMADIK
 	select PL310_ERRATA_753970 if CACHE_PL310
diff --git a/arch/arm/mach-ux500/timer.c b/arch/arm/mach-ux500/timer.c
index 20d02fa01ec3..875309acb022 100644
--- a/arch/arm/mach-ux500/timer.c
+++ b/arch/arm/mach-ux500/timer.c
@@ -9,11 +9,10 @@
 #include <linux/clksrc-dbx500-prcmu.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/platform_data/clocksource-nomadik-mtu.h>
 
 #include <asm/smp_twd.h>
 
-#include <plat/mtu.h>
-
 #include <mach/setup.h>
 #include <mach/hardware.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/plat-nomadik/Kconfig b/arch/arm/plat-nomadik/Kconfig
deleted file mode 100644
index 19f55cae5d73..000000000000
--- a/arch/arm/plat-nomadik/Kconfig
+++ /dev/null
@@ -1,29 +0,0 @@
-# We keep common IP's here for Nomadik and other similar
-# familiy of processors from ST-Ericsson. At the moment we have
-# just MTU, others to follow soon.
-
-config PLAT_NOMADIK
-	bool
-	depends on ARCH_NOMADIK || ARCH_U8500
-	default y
-	select CLKSRC_MMIO
-	help
-	  Common platform code for Nomadik and other ST-Ericsson
-	  platforms.
-
-if PLAT_NOMADIK
-
-config HAS_MTU
-	bool
-	help
-	  Support for Multi Timer Unit. MTU provides access
-	  to multiple interrupt generating programmable
-	  32-bit free running decrementing counters.
-
-config NOMADIK_MTU_SCHED_CLOCK
-	bool
-	depends on HAS_MTU
-	help
-	  Use the Multi Timer Unit as the sched_clock.
-
-endif
diff --git a/arch/arm/plat-nomadik/Makefile b/arch/arm/plat-nomadik/Makefile
deleted file mode 100644
index 37c7cdd0f8f0..000000000000
--- a/arch/arm/plat-nomadik/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-# arch/arm/plat-nomadik/Makefile
-# Copyright 2009 ST-Ericsson
-# Licensed under GPLv2
-
-obj-$(CONFIG_HAS_MTU)	+= timer.o
diff --git a/arch/arm/plat-nomadik/include/plat/mtu.h b/arch/arm/plat-nomadik/include/plat/mtu.h
deleted file mode 100644
index 80088973b734..000000000000
--- a/arch/arm/plat-nomadik/include/plat/mtu.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __PLAT_MTU_H
-#define __PLAT_MTU_H
-
-void nmdk_timer_init(void __iomem *base, int irq);
-void nmdk_clkevt_reset(void);
-void nmdk_clksrc_reset(void);
-
-#endif /* __PLAT_MTU_H */
-
diff --git a/arch/arm/plat-nomadik/timer.c b/arch/arm/plat-nomadik/timer.c
deleted file mode 100644
index f9114306657b..000000000000
--- a/arch/arm/plat-nomadik/timer.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- *  linux/arch/arm/plat-nomadik/timer.c
- *
- * Copyright (C) 2008 STMicroelectronics
- * Copyright (C) 2010 Alessandro Rubini
- * Copyright (C) 2010 Linus Walleij for ST-Ericsson
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2, as
- * published by the Free Software Foundation.
- */
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/clockchips.h>
-#include <linux/clk.h>
-#include <linux/jiffies.h>
-#include <linux/err.h>
-#include <asm/mach/time.h>
-#include <asm/sched_clock.h>
-
-/*
- * The MTU device hosts four different counters, with 4 set of
- * registers. These are register names.
- */
-
-#define MTU_IMSC	0x00	/* Interrupt mask set/clear */
-#define MTU_RIS		0x04	/* Raw interrupt status */
-#define MTU_MIS		0x08	/* Masked interrupt status */
-#define MTU_ICR		0x0C	/* Interrupt clear register */
-
-/* per-timer registers take 0..3 as argument */
-#define MTU_LR(x)	(0x10 + 0x10 * (x) + 0x00)	/* Load value */
-#define MTU_VAL(x)	(0x10 + 0x10 * (x) + 0x04)	/* Current value */
-#define MTU_CR(x)	(0x10 + 0x10 * (x) + 0x08)	/* Control reg */
-#define MTU_BGLR(x)	(0x10 + 0x10 * (x) + 0x0c)	/* At next overflow */
-
-/* bits for the control register */
-#define MTU_CRn_ENA		0x80
-#define MTU_CRn_PERIODIC	0x40	/* if 0 = free-running */
-#define MTU_CRn_PRESCALE_MASK	0x0c
-#define MTU_CRn_PRESCALE_1		0x00
-#define MTU_CRn_PRESCALE_16		0x04
-#define MTU_CRn_PRESCALE_256		0x08
-#define MTU_CRn_32BITS		0x02
-#define MTU_CRn_ONESHOT		0x01	/* if 0 = wraps reloading from BGLR*/
-
-/* Other registers are usual amba/primecell registers, currently not used */
-#define MTU_ITCR	0xff0
-#define MTU_ITOP	0xff4
-
-#define MTU_PERIPH_ID0	0xfe0
-#define MTU_PERIPH_ID1	0xfe4
-#define MTU_PERIPH_ID2	0xfe8
-#define MTU_PERIPH_ID3	0xfeC
-
-#define MTU_PCELL0	0xff0
-#define MTU_PCELL1	0xff4
-#define MTU_PCELL2	0xff8
-#define MTU_PCELL3	0xffC
-
-static void __iomem *mtu_base;
-static bool clkevt_periodic;
-static u32 clk_prescale;
-static u32 nmdk_cycle;		/* write-once */
-
-#ifdef CONFIG_NOMADIK_MTU_SCHED_CLOCK
-/*
- * Override the global weak sched_clock symbol with this
- * local implementation which uses the clocksource to get some
- * better resolution when scheduling the kernel.
- */
-static u32 notrace nomadik_read_sched_clock(void)
-{
-	if (unlikely(!mtu_base))
-		return 0;
-
-	return -readl(mtu_base + MTU_VAL(0));
-}
-#endif
-
-/* Clockevent device: use one-shot mode */
-static int nmdk_clkevt_next(unsigned long evt, struct clock_event_device *ev)
-{
-	writel(1 << 1, mtu_base + MTU_IMSC);
-	writel(evt, mtu_base + MTU_LR(1));
-	/* Load highest value, enable device, enable interrupts */
-	writel(MTU_CRn_ONESHOT | clk_prescale |
-	       MTU_CRn_32BITS | MTU_CRn_ENA,
-	       mtu_base + MTU_CR(1));
-
-	return 0;
-}
-
-void nmdk_clkevt_reset(void)
-{
-	if (clkevt_periodic) {
-		/* Timer: configure load and background-load, and fire it up */
-		writel(nmdk_cycle, mtu_base + MTU_LR(1));
-		writel(nmdk_cycle, mtu_base + MTU_BGLR(1));
-
-		writel(MTU_CRn_PERIODIC | clk_prescale |
-		       MTU_CRn_32BITS | MTU_CRn_ENA,
-		       mtu_base + MTU_CR(1));
-		writel(1 << 1, mtu_base + MTU_IMSC);
-	} else {
-		/* Generate an interrupt to start the clockevent again */
-		(void) nmdk_clkevt_next(nmdk_cycle, NULL);
-	}
-}
-
-static void nmdk_clkevt_mode(enum clock_event_mode mode,
-			     struct clock_event_device *dev)
-{
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-		clkevt_periodic = true;
-		nmdk_clkevt_reset();
-		break;
-	case CLOCK_EVT_MODE_ONESHOT:
-		clkevt_periodic = false;
-		break;
-	case CLOCK_EVT_MODE_SHUTDOWN:
-	case CLOCK_EVT_MODE_UNUSED:
-		writel(0, mtu_base + MTU_IMSC);
-		/* disable timer */
-		writel(0, mtu_base + MTU_CR(1));
-		/* load some high default value */
-		writel(0xffffffff, mtu_base + MTU_LR(1));
-		break;
-	case CLOCK_EVT_MODE_RESUME:
-		break;
-	}
-}
-
-static struct clock_event_device nmdk_clkevt = {
-	.name		= "mtu_1",
-	.features	= CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
-	.rating		= 200,
-	.set_mode	= nmdk_clkevt_mode,
-	.set_next_event	= nmdk_clkevt_next,
-};
-
-/*
- * IRQ Handler for timer 1 of the MTU block.
- */
-static irqreturn_t nmdk_timer_interrupt(int irq, void *dev_id)
-{
-	struct clock_event_device *evdev = dev_id;
-
-	writel(1 << 1, mtu_base + MTU_ICR); /* Interrupt clear reg */
-	evdev->event_handler(evdev);
-	return IRQ_HANDLED;
-}
-
-static struct irqaction nmdk_timer_irq = {
-	.name		= "Nomadik Timer Tick",
-	.flags		= IRQF_DISABLED | IRQF_TIMER,
-	.handler	= nmdk_timer_interrupt,
-	.dev_id		= &nmdk_clkevt,
-};
-
-void nmdk_clksrc_reset(void)
-{
-	/* Disable */
-	writel(0, mtu_base + MTU_CR(0));
-
-	/* ClockSource: configure load and background-load, and fire it up */
-	writel(nmdk_cycle, mtu_base + MTU_LR(0));
-	writel(nmdk_cycle, mtu_base + MTU_BGLR(0));
-
-	writel(clk_prescale | MTU_CRn_32BITS | MTU_CRn_ENA,
-	       mtu_base + MTU_CR(0));
-}
-
-void __init nmdk_timer_init(void __iomem *base, int irq)
-{
-	unsigned long rate;
-	struct clk *clk0;
-
-	mtu_base = base;
-	clk0 = clk_get_sys("mtu0", NULL);
-	BUG_ON(IS_ERR(clk0));
-	BUG_ON(clk_prepare(clk0) < 0);
-	BUG_ON(clk_enable(clk0) < 0);
-
-	/*
-	 * Tick rate is 2.4MHz for Nomadik and 2.4Mhz, 100MHz or 133 MHz
-	 * for ux500.
-	 * Use a divide-by-16 counter if the tick rate is more than 32MHz.
-	 * At 32 MHz, the timer (with 32 bit counter) can be programmed
-	 * to wake-up at a max 127s a head in time. Dividing a 2.4 MHz timer
-	 * with 16 gives too low timer resolution.
-	 */
-	rate = clk_get_rate(clk0);
-	if (rate > 32000000) {
-		rate /= 16;
-		clk_prescale = MTU_CRn_PRESCALE_16;
-	} else {
-		clk_prescale = MTU_CRn_PRESCALE_1;
-	}
-
-	/* Cycles for periodic mode */
-	nmdk_cycle = DIV_ROUND_CLOSEST(rate, HZ);
-
-
-	/* Timer 0 is the free running clocksource */
-	nmdk_clksrc_reset();
-
-	if (clocksource_mmio_init(mtu_base + MTU_VAL(0), "mtu_0",
-			rate, 200, 32, clocksource_mmio_readl_down))
-		pr_err("timer: failed to initialize clock source %s\n",
-		       "mtu_0");
-
-#ifdef CONFIG_NOMADIK_MTU_SCHED_CLOCK
-	setup_sched_clock(nomadik_read_sched_clock, 32, rate);
-#endif
-
-	/* Timer 1 is used for events, register irq and clockevents */
-	setup_irq(irq, &nmdk_timer_irq);
-	nmdk_clkevt.cpumask = cpumask_of(0);
-	clockevents_config_and_register(&nmdk_clkevt, rate, 2, 0xffffffffU);
-}
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index fbd9b2b850ef..5bc09eec9bbb 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -216,7 +216,7 @@ config HW_RANDOM_MXC_RNGA
 
 config HW_RANDOM_NOMADIK
 	tristate "ST-Ericsson Nomadik Random Number Generator support"
-	depends on HW_RANDOM && PLAT_NOMADIK
+	depends on HW_RANDOM && ARCH_NOMADIK
 	---help---
 	  This driver provides kernel-side support for the Random Number
 	  Generator hardware found on ST-Ericsson SoCs (8815 and 8500).
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 6a78073c3808..c9f67de8b7b4 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -22,6 +22,21 @@ config DW_APB_TIMER_OF
 config ARMADA_370_XP_TIMER
 	bool
 
+config CLKSRC_NOMADIK_MTU
+	bool
+	depends on (ARCH_NOMADIK || ARCH_U8500)
+	select CLKSRC_MMIO
+	help
+	  Support for Multi Timer Unit. MTU provides access
+	  to multiple interrupt generating programmable
+	  32-bit free running decrementing counters.
+
+config CLKSRC_NOMADIK_MTU_SCHED_CLOCK
+	bool
+	depends on CLKSRC_NOMADIK_MTU
+	help
+	  Use the Multi Timer Unit as the sched_clock.
+
 config CLKSRC_DBX500_PRCMU
 	bool "Clocksource PRCMU Timer"
 	depends on UX500_SOC_DB8500
@@ -31,7 +46,7 @@ config CLKSRC_DBX500_PRCMU
 
 config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 	bool "Clocksource PRCMU Timer sched_clock"
-	depends on (CLKSRC_DBX500_PRCMU && !NOMADIK_MTU_SCHED_CLOCK)
+	depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK)
 	default y
 	help
 	  Use the always on PRCMU Timer as sched_clock
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 603be366f762..24fb888ee0a2 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_CLKBLD_I8253)	+= i8253.o
 obj-$(CONFIG_CLKSRC_MMIO)	+= mmio.o
 obj-$(CONFIG_DW_APB_TIMER)	+= dw_apb_timer.o
 obj-$(CONFIG_DW_APB_TIMER_OF)	+= dw_apb_timer_of.o
+obj-$(CONFIG_CLKSRC_NOMADIK_MTU)	+= nomadik-mtu.o
 obj-$(CONFIG_CLKSRC_DBX500_PRCMU)	+= clksrc-dbx500-prcmu.o
 obj-$(CONFIG_ARMADA_370_XP_TIMER)	+= time-armada-370-xp.o
 obj-$(CONFIG_ARCH_BCM2835)	+= bcm2835_timer.o
diff --git a/drivers/clocksource/nomadik-mtu.c b/drivers/clocksource/nomadik-mtu.c
new file mode 100644
index 000000000000..23c780ba0d35
--- /dev/null
+++ b/drivers/clocksource/nomadik-mtu.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2008 STMicroelectronics
+ * Copyright (C) 2010 Alessandro Rubini
+ * Copyright (C) 2010 Linus Walleij for ST-Ericsson
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/clk.h>
+#include <linux/jiffies.h>
+#include <linux/err.h>
+#include <linux/platform_data/clocksource-nomadik-mtu.h>
+#include <asm/mach/time.h>
+#include <asm/sched_clock.h>
+
+/*
+ * The MTU device hosts four different counters, with 4 set of
+ * registers. These are register names.
+ */
+
+#define MTU_IMSC	0x00	/* Interrupt mask set/clear */
+#define MTU_RIS		0x04	/* Raw interrupt status */
+#define MTU_MIS		0x08	/* Masked interrupt status */
+#define MTU_ICR		0x0C	/* Interrupt clear register */
+
+/* per-timer registers take 0..3 as argument */
+#define MTU_LR(x)	(0x10 + 0x10 * (x) + 0x00)	/* Load value */
+#define MTU_VAL(x)	(0x10 + 0x10 * (x) + 0x04)	/* Current value */
+#define MTU_CR(x)	(0x10 + 0x10 * (x) + 0x08)	/* Control reg */
+#define MTU_BGLR(x)	(0x10 + 0x10 * (x) + 0x0c)	/* At next overflow */
+
+/* bits for the control register */
+#define MTU_CRn_ENA		0x80
+#define MTU_CRn_PERIODIC	0x40	/* if 0 = free-running */
+#define MTU_CRn_PRESCALE_MASK	0x0c
+#define MTU_CRn_PRESCALE_1		0x00
+#define MTU_CRn_PRESCALE_16		0x04
+#define MTU_CRn_PRESCALE_256		0x08
+#define MTU_CRn_32BITS		0x02
+#define MTU_CRn_ONESHOT		0x01	/* if 0 = wraps reloading from BGLR*/
+
+/* Other registers are usual amba/primecell registers, currently not used */
+#define MTU_ITCR	0xff0
+#define MTU_ITOP	0xff4
+
+#define MTU_PERIPH_ID0	0xfe0
+#define MTU_PERIPH_ID1	0xfe4
+#define MTU_PERIPH_ID2	0xfe8
+#define MTU_PERIPH_ID3	0xfeC
+
+#define MTU_PCELL0	0xff0
+#define MTU_PCELL1	0xff4
+#define MTU_PCELL2	0xff8
+#define MTU_PCELL3	0xffC
+
+static void __iomem *mtu_base;
+static bool clkevt_periodic;
+static u32 clk_prescale;
+static u32 nmdk_cycle;		/* write-once */
+
+#ifdef CONFIG_NOMADIK_MTU_SCHED_CLOCK
+/*
+ * Override the global weak sched_clock symbol with this
+ * local implementation which uses the clocksource to get some
+ * better resolution when scheduling the kernel.
+ */
+static u32 notrace nomadik_read_sched_clock(void)
+{
+	if (unlikely(!mtu_base))
+		return 0;
+
+	return -readl(mtu_base + MTU_VAL(0));
+}
+#endif
+
+/* Clockevent device: use one-shot mode */
+static int nmdk_clkevt_next(unsigned long evt, struct clock_event_device *ev)
+{
+	writel(1 << 1, mtu_base + MTU_IMSC);
+	writel(evt, mtu_base + MTU_LR(1));
+	/* Load highest value, enable device, enable interrupts */
+	writel(MTU_CRn_ONESHOT | clk_prescale |
+	       MTU_CRn_32BITS | MTU_CRn_ENA,
+	       mtu_base + MTU_CR(1));
+
+	return 0;
+}
+
+void nmdk_clkevt_reset(void)
+{
+	if (clkevt_periodic) {
+		/* Timer: configure load and background-load, and fire it up */
+		writel(nmdk_cycle, mtu_base + MTU_LR(1));
+		writel(nmdk_cycle, mtu_base + MTU_BGLR(1));
+
+		writel(MTU_CRn_PERIODIC | clk_prescale |
+		       MTU_CRn_32BITS | MTU_CRn_ENA,
+		       mtu_base + MTU_CR(1));
+		writel(1 << 1, mtu_base + MTU_IMSC);
+	} else {
+		/* Generate an interrupt to start the clockevent again */
+		(void) nmdk_clkevt_next(nmdk_cycle, NULL);
+	}
+}
+
+static void nmdk_clkevt_mode(enum clock_event_mode mode,
+			     struct clock_event_device *dev)
+{
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		clkevt_periodic = true;
+		nmdk_clkevt_reset();
+		break;
+	case CLOCK_EVT_MODE_ONESHOT:
+		clkevt_periodic = false;
+		break;
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	case CLOCK_EVT_MODE_UNUSED:
+		writel(0, mtu_base + MTU_IMSC);
+		/* disable timer */
+		writel(0, mtu_base + MTU_CR(1));
+		/* load some high default value */
+		writel(0xffffffff, mtu_base + MTU_LR(1));
+		break;
+	case CLOCK_EVT_MODE_RESUME:
+		break;
+	}
+}
+
+static struct clock_event_device nmdk_clkevt = {
+	.name		= "mtu_1",
+	.features	= CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
+	.rating		= 200,
+	.set_mode	= nmdk_clkevt_mode,
+	.set_next_event	= nmdk_clkevt_next,
+};
+
+/*
+ * IRQ Handler for timer 1 of the MTU block.
+ */
+static irqreturn_t nmdk_timer_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *evdev = dev_id;
+
+	writel(1 << 1, mtu_base + MTU_ICR); /* Interrupt clear reg */
+	evdev->event_handler(evdev);
+	return IRQ_HANDLED;
+}
+
+static struct irqaction nmdk_timer_irq = {
+	.name		= "Nomadik Timer Tick",
+	.flags		= IRQF_DISABLED | IRQF_TIMER,
+	.handler	= nmdk_timer_interrupt,
+	.dev_id		= &nmdk_clkevt,
+};
+
+void nmdk_clksrc_reset(void)
+{
+	/* Disable */
+	writel(0, mtu_base + MTU_CR(0));
+
+	/* ClockSource: configure load and background-load, and fire it up */
+	writel(nmdk_cycle, mtu_base + MTU_LR(0));
+	writel(nmdk_cycle, mtu_base + MTU_BGLR(0));
+
+	writel(clk_prescale | MTU_CRn_32BITS | MTU_CRn_ENA,
+	       mtu_base + MTU_CR(0));
+}
+
+void __init nmdk_timer_init(void __iomem *base, int irq)
+{
+	unsigned long rate;
+	struct clk *clk0;
+
+	mtu_base = base;
+	clk0 = clk_get_sys("mtu0", NULL);
+	BUG_ON(IS_ERR(clk0));
+	BUG_ON(clk_prepare(clk0) < 0);
+	BUG_ON(clk_enable(clk0) < 0);
+
+	/*
+	 * Tick rate is 2.4MHz for Nomadik and 2.4Mhz, 100MHz or 133 MHz
+	 * for ux500.
+	 * Use a divide-by-16 counter if the tick rate is more than 32MHz.
+	 * At 32 MHz, the timer (with 32 bit counter) can be programmed
+	 * to wake-up at a max 127s a head in time. Dividing a 2.4 MHz timer
+	 * with 16 gives too low timer resolution.
+	 */
+	rate = clk_get_rate(clk0);
+	if (rate > 32000000) {
+		rate /= 16;
+		clk_prescale = MTU_CRn_PRESCALE_16;
+	} else {
+		clk_prescale = MTU_CRn_PRESCALE_1;
+	}
+
+	/* Cycles for periodic mode */
+	nmdk_cycle = DIV_ROUND_CLOSEST(rate, HZ);
+
+
+	/* Timer 0 is the free running clocksource */
+	nmdk_clksrc_reset();
+
+	if (clocksource_mmio_init(mtu_base + MTU_VAL(0), "mtu_0",
+			rate, 200, 32, clocksource_mmio_readl_down))
+		pr_err("timer: failed to initialize clock source %s\n",
+		       "mtu_0");
+
+#ifdef CONFIG_NOMADIK_MTU_SCHED_CLOCK
+	setup_sched_clock(nomadik_read_sched_clock, 32, rate);
+#endif
+
+	/* Timer 1 is used for events, register irq and clockevents */
+	setup_irq(irq, &nmdk_timer_irq);
+	nmdk_clkevt.cpumask = cpumask_of(0);
+	clockevents_config_and_register(&nmdk_clkevt, rate, 2, 0xffffffffU);
+}
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index b4b65af8612a..855fc52945db 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -408,7 +408,7 @@ config KEYBOARD_NEWTON
 
 config KEYBOARD_NOMADIK
 	tristate "ST-Ericsson Nomadik SKE keyboard"
-	depends on PLAT_NOMADIK
+	depends on (ARCH_NOMADIK || ARCH_U8500)
 	select INPUT_MATRIXKMAP
 	help
 	  Say Y here if you want to use a keypad provided on the SKE controller
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 4883139460be..e4f57482f0ae 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -559,7 +559,7 @@ config MTD_NAND_JZ4740
 
 config MTD_NAND_FSMC
 	tristate "Support for NAND on ST Micros FSMC"
-	depends on PLAT_SPEAR || PLAT_NOMADIK || MACH_U300
+	depends on PLAT_SPEAR || ARCH_NOMADIK || ARCH_U8500 || MACH_U300
 	help
 	  Enables support for NAND Flash chips on the ST Microelectronics
 	  Flexible Static Memory Controller (FSMC)
diff --git a/include/linux/platform_data/clocksource-nomadik-mtu.h b/include/linux/platform_data/clocksource-nomadik-mtu.h
new file mode 100644
index 000000000000..80088973b734
--- /dev/null
+++ b/include/linux/platform_data/clocksource-nomadik-mtu.h
@@ -0,0 +1,9 @@
+#ifndef __PLAT_MTU_H
+#define __PLAT_MTU_H
+
+void nmdk_timer_init(void __iomem *base, int irq);
+void nmdk_clkevt_reset(void);
+void nmdk_clksrc_reset(void);
+
+#endif /* __PLAT_MTU_H */
+
-- 
cgit v1.2.3-55-g7522


From ba350fbc53b5798104b3fc245bb3c3461a4ef8dc Mon Sep 17 00:00:00 2001
From: Arend van Spriel
Date: Mon, 5 Nov 2012 15:29:09 +0100
Subject: wireless: add peer-to-peer related definitions

The Peer-to-Peer IE is vendor-specific IE identified by WiFi Alliance
OUI and specific P2P OUI type. The payload of this IE consists of
so-called P2P attributes. This patch adds definitions for processing
these attributes.

Reviewed-by: Hante Meuleman <meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Reviewed-by: Franky (Zhenhui) Lin <frankyl@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 85764a900731..4530d4960953 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -905,6 +905,38 @@ struct ieee80211_tdls_data {
 	} u;
 } __packed;
 
+/*
+ * Peer-to-Peer IE attribute related definitions.
+ */
+/**
+ * enum ieee80211_p2p_attr_id - identifies type of peer-to-peer attribute.
+ */
+enum ieee80211_p2p_attr_id {
+	IEEE80211_P2P_ATTR_STATUS = 0,
+	IEEE80211_P2P_ATTR_MINOR_REASON,
+	IEEE80211_P2P_ATTR_CAPABILITY,
+	IEEE80211_P2P_ATTR_DEVICE_ID,
+	IEEE80211_P2P_ATTR_GO_INTENT,
+	IEEE80211_P2P_ATTR_GO_CONFIG_TIMEOUT,
+	IEEE80211_P2P_ATTR_LISTEN_CHANNEL,
+	IEEE80211_P2P_ATTR_GROUP_BSSID,
+	IEEE80211_P2P_ATTR_EXT_LISTEN_TIMING,
+	IEEE80211_P2P_ATTR_INTENDED_IFACE_ADDR,
+	IEEE80211_P2P_ATTR_MANAGABILITY,
+	IEEE80211_P2P_ATTR_CHANNEL_LIST,
+	IEEE80211_P2P_ATTR_ABSENCE_NOTICE,
+	IEEE80211_P2P_ATTR_DEVICE_INFO,
+	IEEE80211_P2P_ATTR_GROUP_INFO,
+	IEEE80211_P2P_ATTR_GROUP_ID,
+	IEEE80211_P2P_ATTR_INTERFACE,
+	IEEE80211_P2P_ATTR_OPER_CHANNEL,
+	IEEE80211_P2P_ATTR_INVITE_FLAGS,
+	/* 19 - 220: Reserved */
+	IEEE80211_P2P_ATTR_VENDOR_SPECIFIC = 221,
+
+	IEEE80211_P2P_ATTR_MAX
+};
+
 /**
  * struct ieee80211_bar - HT Block Ack Request
  *
-- 
cgit v1.2.3-55-g7522


From 3ecbf05be159a95e1d23ba9b3b21c5bc2941ba6b Mon Sep 17 00:00:00 2001
From: Pawel Moll
Date: Mon, 24 Sep 2012 14:55:40 +0100
Subject: mfd: Versatile Express config infrastructure

Versatile Express platform has an elaborated configuration system,
consisting of microcontrollers residing on the mother- and
daughterboards known as Motherboard/Daughterboard Configuration
Controller (MCC and DCC). The controllers are responsible for
the platform initialization (reset generation, flash programming,
FPGA bitfiles loading etc.) but also control clock generators,
voltage regulators, gather environmental data like temperature,
power consumption etc. Even the video output switch (FPGA) is
controlled that way.

Those devices are _not_ visible in the main address space and
the usual communication channel uses some kind of a bridge in
the peripheral block sending commands (requests) to the
controllers and receiving responses. It can take up to
500 microseconds for a transaction to be completed, therefore
it is important to provide a non-blocking interface to it.

This patch adds an abstraction of this infrastructure. Bridge
drivers can register themselves with the framework. Then,
a driver of a device can request an abstract "function" - the
request will be redirected to a bridge referred by thedd
"arm,vexpress,config-bridge" property of the device tree node.

Signed-off-by: Pawel Moll <pawel.moll@arm.com>
---
 Documentation/devicetree/bindings/arm/vexpress.txt |  68 ++++-
 drivers/mfd/Kconfig                                |   6 +
 drivers/mfd/Makefile                               |   1 +
 drivers/mfd/vexpress-config.c                      | 277 +++++++++++++++++++++
 include/linux/vexpress.h                           |  85 +++++++
 5 files changed, 436 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mfd/vexpress-config.c
 create mode 100644 include/linux/vexpress.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/arm/vexpress.txt b/Documentation/devicetree/bindings/arm/vexpress.txt
index ec8b50cbb2e8..5d9996b9eabf 100644
--- a/Documentation/devicetree/bindings/arm/vexpress.txt
+++ b/Documentation/devicetree/bindings/arm/vexpress.txt
@@ -11,6 +11,10 @@ the motherboard file using a /include/ directive. As the motherboard
 can be initialized in one of two different configurations ("memory
 maps"), care must be taken to include the correct one.
 
+
+Root node
+---------
+
 Required properties in the root node:
 - compatible value:
 	compatible = "arm,vexpress,<model>", "arm,vexpress";
@@ -45,6 +49,10 @@ Optional properties in the root node:
   - Coretile Express A9x4 (V2P-CA9) HBI-0225:
 	arm,hbi = <0x225>;
 
+
+CPU nodes
+---------
+
 Top-level standard "cpus" node is required. It must contain a node
 with device_type = "cpu" property for every available core, eg.:
 
@@ -59,6 +67,52 @@ with device_type = "cpu" property for every available core, eg.:
 		};
 	};
 
+
+Configuration infrastructure
+----------------------------
+
+The platform has an elaborated configuration system, consisting of
+microcontrollers residing on the mother- and daughterboards known
+as Motherboard/Daughterboard Configuration Controller (MCC and DCC).
+The controllers are responsible for the platform initialization
+(reset generation, flash programming, FPGA bitfiles loading etc.)
+but also control clock generators, voltage regulators, gather
+environmental data like temperature, power consumption etc. Even
+the video output switch (FPGA) is controlled that way.
+
+Nodes describing devices controlled by this infrastructure should
+point at the bridge device node:
+- bridge phandle:
+	arm,vexpress,config-bridge = <phandle>;
+This property can be also defined in a parent node (eg. for a DCC)
+and is effective for all children.
+
+
+Platform topology
+-----------------
+
+As Versatile Express can be configured in number of physically
+different setups, the device tree should describe platform topology.
+Root node and main motherboard node must define the following
+property, describing physical location of the children nodes:
+- site number:
+	arm,vexpress,site = <number>;
+  where 0 means motherboard, 1 or 2 are daugtherboard sites,
+  0xf means "master" site (site containing main CPU tile)
+- when daughterboards are stacked on one site, their position
+  in the stack be be described with:
+	arm,vexpress,position = <number>;
+- when describing tiles consisting more than one DCC, its number
+  can be described with:
+	arm,vexpress,dcc = <number>;
+
+Any of the numbers above defaults to zero if not defined in
+the node or any of its parent.
+
+
+Motherboard
+-----------
+
 The motherboard description file provides a single "motherboard" node
 using 2 address cells corresponding to the Static Memory Bus used
 between the motherboard and the tile. The first cell defines the Chip
@@ -96,13 +150,16 @@ The tile description must define "ranges", "interrupt-map-mask" and
 "interrupt-map" properties to translate the motherboard's address
 and interrupt space into one used by the tile's processor.
 
-Abbreviated example:
+
+Example of a VE tile description (simplified)
+---------------------------------------------
 
 /dts-v1/;
 
 / {
 	model = "V2P-CA5s";
 	arm,hbi = <0x225>;
+	arm,vexpress,site = <0xf>;
 	compatible = "arm,vexpress-v2p-ca5s", "arm,vexpress";
 	interrupt-parent = <&gic>;
 	#address-cells = <1>;
@@ -134,6 +191,15 @@ Abbreviated example:
 		      <0x2c000100 0x100>;
 	};
 
+	dcc {
+		compatible = "simple-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			compatible = "arm,vexpress-osc";
+		};
+	};
+
 	motherboard {
 		/* CS0 is visible at 0x08000000 */
 		ranges = <0 0 0x08000000 0x04000000>;
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index acab3ef8a310..637bcdf8ce77 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1070,3 +1070,9 @@ config MCP_UCB1200_TS
 	depends on MCP_UCB1200 && INPUT
 
 endmenu
+
+config VEXPRESS_CONFIG
+	bool
+	help
+	  Platform configuration infrastructure for the ARM Ltd.
+	  Versatile Express.
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index d8ccb630ddb0..e807164f68da 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -138,3 +138,4 @@ obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
 obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
+obj-$(CONFIG_VEXPRESS_CONFIG)	+= vexpress-config.o
diff --git a/drivers/mfd/vexpress-config.c b/drivers/mfd/vexpress-config.c
new file mode 100644
index 000000000000..fae15d880758
--- /dev/null
+++ b/drivers/mfd/vexpress-config.c
@@ -0,0 +1,277 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2012 ARM Limited
+ */
+
+#define pr_fmt(fmt) "vexpress-config: " fmt
+
+#include <linux/bitops.h>
+#include <linux/completion.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/vexpress.h>
+
+
+#define VEXPRESS_CONFIG_MAX_BRIDGES 2
+
+struct vexpress_config_bridge {
+	struct device_node *node;
+	struct vexpress_config_bridge_info *info;
+	struct list_head transactions;
+	spinlock_t transactions_lock;
+} vexpress_config_bridges[VEXPRESS_CONFIG_MAX_BRIDGES];
+
+static DECLARE_BITMAP(vexpress_config_bridges_map,
+		ARRAY_SIZE(vexpress_config_bridges));
+static DEFINE_MUTEX(vexpress_config_bridges_mutex);
+
+struct vexpress_config_bridge *vexpress_config_bridge_register(
+		struct device_node *node,
+		struct vexpress_config_bridge_info *info)
+{
+	struct vexpress_config_bridge *bridge;
+	int i;
+
+	pr_debug("Registering bridge '%s'\n", info->name);
+
+	mutex_lock(&vexpress_config_bridges_mutex);
+	i = find_first_zero_bit(vexpress_config_bridges_map,
+			ARRAY_SIZE(vexpress_config_bridges));
+	if (i >= ARRAY_SIZE(vexpress_config_bridges)) {
+		pr_err("Can't register more bridges!\n");
+		mutex_unlock(&vexpress_config_bridges_mutex);
+		return NULL;
+	}
+	__set_bit(i, vexpress_config_bridges_map);
+	bridge = &vexpress_config_bridges[i];
+
+	bridge->node = node;
+	bridge->info = info;
+	INIT_LIST_HEAD(&bridge->transactions);
+	spin_lock_init(&bridge->transactions_lock);
+
+	mutex_unlock(&vexpress_config_bridges_mutex);
+
+	return bridge;
+}
+
+void vexpress_config_bridge_unregister(struct vexpress_config_bridge *bridge)
+{
+	struct vexpress_config_bridge __bridge = *bridge;
+	int i;
+
+	mutex_lock(&vexpress_config_bridges_mutex);
+	for (i = 0; i < ARRAY_SIZE(vexpress_config_bridges); i++)
+		if (&vexpress_config_bridges[i] == bridge)
+			__clear_bit(i, vexpress_config_bridges_map);
+	mutex_unlock(&vexpress_config_bridges_mutex);
+
+	WARN_ON(!list_empty(&__bridge.transactions));
+	while (!list_empty(&__bridge.transactions))
+		cpu_relax();
+}
+
+
+struct vexpress_config_func {
+	struct vexpress_config_bridge *bridge;
+	void *func;
+};
+
+struct vexpress_config_func *__vexpress_config_func_get(struct device *dev,
+		struct device_node *node)
+{
+	struct device_node *bridge_node;
+	struct vexpress_config_func *func;
+	int i;
+
+	if (WARN_ON(dev && node && dev->of_node != node))
+		return NULL;
+	if (dev && !node)
+		node = dev->of_node;
+
+	func = kzalloc(sizeof(*func), GFP_KERNEL);
+	if (!func)
+		return NULL;
+
+	bridge_node = of_node_get(node);
+	while (bridge_node) {
+		const __be32 *prop = of_get_property(bridge_node,
+				"arm,vexpress,config-bridge", NULL);
+
+		if (prop) {
+			bridge_node = of_find_node_by_phandle(
+					be32_to_cpup(prop));
+			break;
+		}
+
+		bridge_node = of_get_next_parent(bridge_node);
+	}
+
+	mutex_lock(&vexpress_config_bridges_mutex);
+	for (i = 0; i < ARRAY_SIZE(vexpress_config_bridges); i++) {
+		struct vexpress_config_bridge *bridge =
+				&vexpress_config_bridges[i];
+
+		if (test_bit(i, vexpress_config_bridges_map) &&
+				bridge->node == bridge_node) {
+			func->bridge = bridge;
+			func->func = bridge->info->func_get(dev, node);
+			break;
+		}
+	}
+	mutex_unlock(&vexpress_config_bridges_mutex);
+
+	if (!func->func) {
+		of_node_put(node);
+		kfree(func);
+		return NULL;
+	}
+
+	return func;
+}
+
+void vexpress_config_func_put(struct vexpress_config_func *func)
+{
+	func->bridge->info->func_put(func->func);
+	of_node_put(func->bridge->node);
+	kfree(func);
+}
+
+
+struct vexpress_config_trans {
+	struct vexpress_config_func *func;
+	int offset;
+	bool write;
+	u32 *data;
+	int status;
+	struct completion completion;
+	struct list_head list;
+};
+
+static void vexpress_config_dump_trans(const char *what,
+		struct vexpress_config_trans *trans)
+{
+	pr_debug("%s %s trans %p func 0x%p offset %d data 0x%x status %d\n",
+			what, trans->write ? "write" : "read", trans,
+			trans->func->func, trans->offset,
+			trans->data ? *trans->data : 0, trans->status);
+}
+
+static int vexpress_config_schedule(struct vexpress_config_trans *trans)
+{
+	int status;
+	struct vexpress_config_bridge *bridge = trans->func->bridge;
+	unsigned long flags;
+
+	init_completion(&trans->completion);
+	trans->status = -EFAULT;
+
+	spin_lock_irqsave(&bridge->transactions_lock, flags);
+
+	vexpress_config_dump_trans("Executing", trans);
+
+	if (list_empty(&bridge->transactions))
+		status = bridge->info->func_exec(trans->func->func,
+				trans->offset, trans->write, trans->data);
+	else
+		status = VEXPRESS_CONFIG_STATUS_WAIT;
+
+	switch (status) {
+	case VEXPRESS_CONFIG_STATUS_DONE:
+		vexpress_config_dump_trans("Finished", trans);
+		trans->status = status;
+		break;
+	case VEXPRESS_CONFIG_STATUS_WAIT:
+		list_add_tail(&trans->list, &bridge->transactions);
+		break;
+	}
+
+	spin_unlock_irqrestore(&bridge->transactions_lock, flags);
+
+	return status;
+}
+
+void vexpress_config_complete(struct vexpress_config_bridge *bridge,
+		int status)
+{
+	struct vexpress_config_trans *trans;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bridge->transactions_lock, flags);
+
+	trans = list_first_entry(&bridge->transactions,
+			struct vexpress_config_trans, list);
+	vexpress_config_dump_trans("Completed", trans);
+
+	trans->status = status;
+	list_del(&trans->list);
+
+	if (!list_empty(&bridge->transactions)) {
+		vexpress_config_dump_trans("Pending", trans);
+
+		bridge->info->func_exec(trans->func->func, trans->offset,
+				trans->write, trans->data);
+	}
+	spin_unlock_irqrestore(&bridge->transactions_lock, flags);
+
+	complete(&trans->completion);
+}
+
+int vexpress_config_wait(struct vexpress_config_trans *trans)
+{
+	wait_for_completion(&trans->completion);
+
+	return trans->status;
+}
+
+
+int vexpress_config_read(struct vexpress_config_func *func, int offset,
+		u32 *data)
+{
+	struct vexpress_config_trans trans = {
+		.func = func,
+		.offset = offset,
+		.write = false,
+		.data = data,
+		.status = 0,
+	};
+	int status = vexpress_config_schedule(&trans);
+
+	if (status == VEXPRESS_CONFIG_STATUS_WAIT)
+		status = vexpress_config_wait(&trans);
+
+	return status;
+}
+EXPORT_SYMBOL(vexpress_config_read);
+
+int vexpress_config_write(struct vexpress_config_func *func, int offset,
+		u32 data)
+{
+	struct vexpress_config_trans trans = {
+		.func = func,
+		.offset = offset,
+		.write = true,
+		.data = &data,
+		.status = 0,
+	};
+	int status = vexpress_config_schedule(&trans);
+
+	if (status == VEXPRESS_CONFIG_STATUS_WAIT)
+		status = vexpress_config_wait(&trans);
+
+	return status;
+}
+EXPORT_SYMBOL(vexpress_config_write);
diff --git a/include/linux/vexpress.h b/include/linux/vexpress.h
new file mode 100644
index 000000000000..c2d877a7b691
--- /dev/null
+++ b/include/linux/vexpress.h
@@ -0,0 +1,85 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2012 ARM Limited
+ */
+
+#ifndef _LINUX_VEXPRESS_H
+#define _LINUX_VEXPRESS_H
+
+#include <linux/device.h>
+
+#define VEXPRESS_SITE_MB		0
+#define VEXPRESS_SITE_DB1		1
+#define VEXPRESS_SITE_DB2		2
+#define VEXPRESS_SITE_MASTER		0xf
+
+#define VEXPRESS_CONFIG_STATUS_DONE	0
+#define VEXPRESS_CONFIG_STATUS_WAIT	1
+
+/* Config bridge API */
+
+/**
+ * struct vexpress_config_bridge_info - description of the platform
+ * configuration infrastructure bridge.
+ *
+ * @name:	Bridge name
+ *
+ * @func_get:	Obtains pointer to a configuration function for a given
+ *		device or a Device Tree node, to be used with @func_put
+ *		and @func_exec. The node pointer should take precedence
+ *		over device pointer when both are passed.
+ *
+ * @func_put:	Tells the bridge that the function will not be used any
+ *		more, so all allocated resources can be released.
+ *
+ * @func_exec:	Executes a configuration function read or write operation.
+ *		The offset selects a 32 bit word of the value accessed.
+ *		Must return VEXPRESS_CONFIG_STATUS_DONE when operation
+ *		is finished immediately, VEXPRESS_CONFIG_STATUS_WAIT when
+ *		will be completed in some time or negative value in case
+ *		of error.
+ */
+struct vexpress_config_bridge_info {
+	const char *name;
+	void *(*func_get)(struct device *dev, struct device_node *node);
+	void (*func_put)(void *func);
+	int (*func_exec)(void *func, int offset, bool write, u32 *data);
+};
+
+struct vexpress_config_bridge;
+
+struct vexpress_config_bridge *vexpress_config_bridge_register(
+		struct device_node *node,
+		struct vexpress_config_bridge_info *info);
+void vexpress_config_bridge_unregister(struct vexpress_config_bridge *bridge);
+
+void vexpress_config_complete(struct vexpress_config_bridge *bridge,
+		int status);
+
+/* Config function API */
+
+struct vexpress_config_func;
+
+struct vexpress_config_func *__vexpress_config_func_get(struct device *dev,
+		struct device_node *node);
+#define vexpress_config_func_get_by_dev(dev) \
+		__vexpress_config_func_get(dev, NULL)
+#define vexpress_config_func_get_by_node(node) \
+		__vexpress_config_func_get(NULL, node)
+void vexpress_config_func_put(struct vexpress_config_func *func);
+
+/* Both may sleep! */
+int vexpress_config_read(struct vexpress_config_func *func, int offset,
+		u32 *data);
+int vexpress_config_write(struct vexpress_config_func *func, int offset,
+		u32 data);
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 88e0abcd7a8171ca7af3402373e7bd81fe9b6754 Mon Sep 17 00:00:00 2001
From: Pawel Moll
Date: Tue, 18 Sep 2012 12:24:57 +0100
Subject: mfd: Versatile Express system registers driver

This is a platform driver for Versatile Express' "system
register" block. It's a random collection of registers providing
the following functionality:

- low level platform functions like board ID access; in order to
  use those, the driver must be initialized early, either statically
  or based on the DT

- config bus bridge via "system control" interface; as the response
  from the controller does not generate interrupt (yet), the status
  register is periodically polled using a timer

- pseudo GPIO lines providing MMC card status and Flash WP#
  signal control

- LED interface for a set of 8 LEDs on the motherboard, with
  "heartbeat", "mmc0" and "cpu0" to "cpu5" as default triggers

Signed-off-by: Pawel Moll <pawel.moll@arm.com>
---
 .../devicetree/bindings/arm/vexpress-sysreg.txt    |  50 ++
 drivers/mfd/Makefile                               |   2 +-
 drivers/mfd/vexpress-sysreg.c                      | 552 +++++++++++++++++++++
 include/linux/vexpress.h                           |  25 +
 4 files changed, 628 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
 create mode 100644 drivers/mfd/vexpress-sysreg.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt b/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
new file mode 100644
index 000000000000..9cf3f25544c7
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
@@ -0,0 +1,50 @@
+ARM Versatile Express system registers
+--------------------------------------
+
+This is a system control registers block, providing multiple low level
+platform functions like board detection and identification, software
+interrupt generation, MMC and NOR Flash control etc.
+
+Required node properties:
+- compatible value : = "arm,vexpress,sysreg";
+- reg : physical base address and the size of the registers window
+- gpio-controller : specifies that the node is a GPIO controller
+- #gpio-cells : size of the GPIO specifier, should be 2:
+  - first cell is the pseudo-GPIO line number:
+    0 - MMC CARDIN
+    1 - MMC WPROT
+    2 - NOR FLASH WPn
+  - second cell can take standard GPIO flags (currently ignored).
+
+Example:
+	v2m_sysreg: sysreg@10000000 {
+ 		compatible = "arm,vexpress-sysreg";
+ 		reg = <0x10000000 0x1000>;
+		gpio-controller;
+		#gpio-cells = <2>;
+ 	};
+
+This block also can also act a bridge to the platform's configuration
+bus via "system control" interface, addressing devices with site number,
+position in the board stack, config controller, function and device
+numbers - see motherboard's TRM for more details.
+
+The node describing a config device must refer to the sysreg node via
+"arm,vexpress,config-bridge" phandle (can be also defined in the node's
+parent) and relies on the board topology properties - see main vexpress
+node documentation for more details. It must must also define the
+following property:
+- arm,vexpress-sysreg,func : must contain two cells:
+  - first cell defines function number (eg. 1 for clock generator,
+    2 for voltage regulators etc.)
+  - device number (eg. osc 0, osc 1 etc.)
+
+Example:
+	mcc {
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+		};
+	};
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index e807164f68da..296817c6c06f 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -138,4 +138,4 @@ obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
 obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
-obj-$(CONFIG_VEXPRESS_CONFIG)	+= vexpress-config.o
+obj-$(CONFIG_VEXPRESS_CONFIG)	+= vexpress-config.o vexpress-sysreg.o
diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c
new file mode 100644
index 000000000000..059d6b17b14a
--- /dev/null
+++ b/drivers/mfd/vexpress-sysreg.c
@@ -0,0 +1,552 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2012 ARM Limited
+ */
+
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/io.h>
+#include <linux/leds.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/timer.h>
+#include <linux/vexpress.h>
+
+#define SYS_ID			0x000
+#define SYS_SW			0x004
+#define SYS_LED			0x008
+#define SYS_100HZ		0x024
+#define SYS_FLAGS		0x030
+#define SYS_FLAGSSET		0x030
+#define SYS_FLAGSCLR		0x034
+#define SYS_NVFLAGS		0x038
+#define SYS_NVFLAGSSET		0x038
+#define SYS_NVFLAGSCLR		0x03c
+#define SYS_MCI			0x048
+#define SYS_FLASH		0x04c
+#define SYS_CFGSW		0x058
+#define SYS_24MHZ		0x05c
+#define SYS_MISC		0x060
+#define SYS_DMA			0x064
+#define SYS_PROCID0		0x084
+#define SYS_PROCID1		0x088
+#define SYS_CFGDATA		0x0a0
+#define SYS_CFGCTRL		0x0a4
+#define SYS_CFGSTAT		0x0a8
+
+#define SYS_HBI_MASK		0xfff
+#define SYS_ID_HBI_SHIFT	16
+#define SYS_PROCIDx_HBI_SHIFT	0
+
+#define SYS_MCI_CARDIN		(1 << 0)
+#define SYS_MCI_WPROT		(1 << 1)
+
+#define SYS_FLASH_WPn		(1 << 0)
+
+#define SYS_MISC_MASTERSITE	(1 << 14)
+
+#define SYS_CFGCTRL_START	(1 << 31)
+#define SYS_CFGCTRL_WRITE	(1 << 30)
+#define SYS_CFGCTRL_DCC(n)	(((n) & 0xf) << 26)
+#define SYS_CFGCTRL_FUNC(n)	(((n) & 0x3f) << 20)
+#define SYS_CFGCTRL_SITE(n)	(((n) & 0x3) << 16)
+#define SYS_CFGCTRL_POSITION(n)	(((n) & 0xf) << 12)
+#define SYS_CFGCTRL_DEVICE(n)	(((n) & 0xfff) << 0)
+
+#define SYS_CFGSTAT_ERR		(1 << 1)
+#define SYS_CFGSTAT_COMPLETE	(1 << 0)
+
+
+static void __iomem *vexpress_sysreg_base;
+static struct device *vexpress_sysreg_dev;
+static int vexpress_master_site;
+
+
+void vexpress_flags_set(u32 data)
+{
+	writel(~0, vexpress_sysreg_base + SYS_FLAGSCLR);
+	writel(data, vexpress_sysreg_base + SYS_FLAGSSET);
+}
+
+u32 vexpress_get_procid(int site)
+{
+	if (site == VEXPRESS_SITE_MASTER)
+		site = vexpress_master_site;
+
+	return readl(vexpress_sysreg_base + (site == VEXPRESS_SITE_DB1 ?
+			SYS_PROCID0 : SYS_PROCID1));
+}
+
+u32 vexpress_get_hbi(int site)
+{
+	u32 id;
+
+	switch (site) {
+	case VEXPRESS_SITE_MB:
+		id = readl(vexpress_sysreg_base + SYS_ID);
+		return (id >> SYS_ID_HBI_SHIFT) & SYS_HBI_MASK;
+	case VEXPRESS_SITE_MASTER:
+	case VEXPRESS_SITE_DB1:
+	case VEXPRESS_SITE_DB2:
+		id = vexpress_get_procid(site);
+		return (id >> SYS_PROCIDx_HBI_SHIFT) & SYS_HBI_MASK;
+	}
+
+	return ~0;
+}
+
+void __iomem *vexpress_get_24mhz_clock_base(void)
+{
+	return vexpress_sysreg_base + SYS_24MHZ;
+}
+
+
+static void vexpress_sysreg_find_prop(struct device_node *node,
+		const char *name, u32 *val)
+{
+	of_node_get(node);
+	while (node) {
+		if (of_property_read_u32(node, name, val) == 0) {
+			of_node_put(node);
+			return;
+		}
+		node = of_get_next_parent(node);
+	}
+}
+
+unsigned __vexpress_get_site(struct device *dev, struct device_node *node)
+{
+	u32 site = 0;
+
+	WARN_ON(dev && node && dev->of_node != node);
+	if (dev && !node)
+		node = dev->of_node;
+
+	if (node) {
+		vexpress_sysreg_find_prop(node, "arm,vexpress,site", &site);
+	} else if (dev && dev->bus == &platform_bus_type) {
+		struct platform_device *pdev = to_platform_device(dev);
+
+		if (pdev->num_resources == 1 &&
+				pdev->resource[0].flags == IORESOURCE_BUS)
+			site = pdev->resource[0].start;
+	} else if (dev && strncmp(dev_name(dev), "ct:", 3) == 0) {
+		site = VEXPRESS_SITE_MASTER;
+	}
+
+	if (site == VEXPRESS_SITE_MASTER)
+		site = vexpress_master_site;
+
+	return site;
+}
+
+
+struct vexpress_sysreg_config_func {
+	u32 template;
+	u32 device;
+};
+
+static struct vexpress_config_bridge *vexpress_sysreg_config_bridge;
+static struct timer_list vexpress_sysreg_config_timer;
+static u32 *vexpress_sysreg_config_data;
+static int vexpress_sysreg_config_tries;
+
+static void *vexpress_sysreg_config_func_get(struct device *dev,
+		struct device_node *node)
+{
+	struct vexpress_sysreg_config_func *config_func;
+	u32 site;
+	u32 position = 0;
+	u32 dcc = 0;
+	u32 func_device[2];
+	int err = -EFAULT;
+
+	if (node) {
+		of_node_get(node);
+		vexpress_sysreg_find_prop(node, "arm,vexpress,site", &site);
+		vexpress_sysreg_find_prop(node, "arm,vexpress,position",
+				&position);
+		vexpress_sysreg_find_prop(node, "arm,vexpress,dcc", &dcc);
+		err = of_property_read_u32_array(node,
+				"arm,vexpress-sysreg,func", func_device,
+				ARRAY_SIZE(func_device));
+		of_node_put(node);
+	} else if (dev && dev->bus == &platform_bus_type) {
+		struct platform_device *pdev = to_platform_device(dev);
+
+		if (pdev->num_resources == 1 &&
+				pdev->resource[0].flags == IORESOURCE_BUS) {
+			site = pdev->resource[0].start;
+			func_device[0] = pdev->resource[0].end;
+			func_device[1] = pdev->id;
+			err = 0;
+		}
+	}
+	if (err)
+		return NULL;
+
+	config_func = kzalloc(sizeof(*config_func), GFP_KERNEL);
+	if (!config_func)
+		return NULL;
+
+	config_func->template = SYS_CFGCTRL_DCC(dcc);
+	config_func->template |= SYS_CFGCTRL_FUNC(func_device[0]);
+	config_func->template |= SYS_CFGCTRL_SITE(site == VEXPRESS_SITE_MASTER ?
+			vexpress_master_site : site);
+	config_func->template |= SYS_CFGCTRL_POSITION(position);
+	config_func->device |= func_device[1];
+
+	dev_dbg(vexpress_sysreg_dev, "func 0x%p = 0x%x, %d\n", config_func,
+			config_func->template, config_func->device);
+
+	return config_func;
+}
+
+static void vexpress_sysreg_config_func_put(void *func)
+{
+	kfree(func);
+}
+
+static int vexpress_sysreg_config_func_exec(void *func, int offset,
+		bool write, u32 *data)
+{
+	int status;
+	struct vexpress_sysreg_config_func *config_func = func;
+	u32 command;
+
+	if (WARN_ON(!vexpress_sysreg_base))
+		return -ENOENT;
+
+	command = readl(vexpress_sysreg_base + SYS_CFGCTRL);
+	if (WARN_ON(command & SYS_CFGCTRL_START))
+		return -EBUSY;
+
+	command = SYS_CFGCTRL_START;
+	command |= write ? SYS_CFGCTRL_WRITE : 0;
+	command |= config_func->template;
+	command |= SYS_CFGCTRL_DEVICE(config_func->device + offset);
+
+	/* Use a canary for reads */
+	if (!write)
+		*data = 0xdeadbeef;
+
+	dev_dbg(vexpress_sysreg_dev, "command %x, data %x\n",
+			command, *data);
+	writel(*data, vexpress_sysreg_base + SYS_CFGDATA);
+	writel(0, vexpress_sysreg_base + SYS_CFGSTAT);
+	writel(command, vexpress_sysreg_base + SYS_CFGCTRL);
+	mb();
+
+	if (vexpress_sysreg_dev) {
+		/* Schedule completion check */
+		if (!write)
+			vexpress_sysreg_config_data = data;
+		vexpress_sysreg_config_tries = 100;
+		mod_timer(&vexpress_sysreg_config_timer,
+				jiffies + usecs_to_jiffies(100));
+		status = VEXPRESS_CONFIG_STATUS_WAIT;
+	} else {
+		/* Early execution, no timer available, have to spin */
+		u32 cfgstat;
+
+		do {
+			cpu_relax();
+			cfgstat = readl(vexpress_sysreg_base + SYS_CFGSTAT);
+		} while (!cfgstat);
+
+		if (!write && (cfgstat & SYS_CFGSTAT_COMPLETE))
+			*data = readl(vexpress_sysreg_base + SYS_CFGDATA);
+		status = VEXPRESS_CONFIG_STATUS_DONE;
+
+		if (cfgstat & SYS_CFGSTAT_ERR)
+			status = -EINVAL;
+	}
+
+	return status;
+}
+
+struct vexpress_config_bridge_info vexpress_sysreg_config_bridge_info = {
+	.name = "vexpress-sysreg",
+	.func_get = vexpress_sysreg_config_func_get,
+	.func_put = vexpress_sysreg_config_func_put,
+	.func_exec = vexpress_sysreg_config_func_exec,
+};
+
+static void vexpress_sysreg_config_complete(unsigned long data)
+{
+	int status = VEXPRESS_CONFIG_STATUS_DONE;
+	u32 cfgstat = readl(vexpress_sysreg_base + SYS_CFGSTAT);
+
+	if (cfgstat & SYS_CFGSTAT_ERR)
+		status = -EINVAL;
+	if (!vexpress_sysreg_config_tries--)
+		status = -ETIMEDOUT;
+
+	if (status < 0) {
+		dev_err(vexpress_sysreg_dev, "error %d\n", status);
+	} else if (!(cfgstat & SYS_CFGSTAT_COMPLETE)) {
+		mod_timer(&vexpress_sysreg_config_timer,
+				jiffies + usecs_to_jiffies(50));
+		return;
+	}
+
+	if (vexpress_sysreg_config_data) {
+		*vexpress_sysreg_config_data = readl(vexpress_sysreg_base +
+				SYS_CFGDATA);
+		dev_dbg(vexpress_sysreg_dev, "read data %x\n",
+				*vexpress_sysreg_config_data);
+		vexpress_sysreg_config_data = NULL;
+	}
+
+	vexpress_config_complete(vexpress_sysreg_config_bridge, status);
+}
+
+
+void __init vexpress_sysreg_early_init(void __iomem *base)
+{
+	struct device_node *node = of_find_compatible_node(NULL, NULL,
+			"arm,vexpress-sysreg");
+
+	if (node)
+		base = of_iomap(node, 0);
+
+	if (WARN_ON(!base))
+		return;
+
+	vexpress_sysreg_base = base;
+
+	if (readl(vexpress_sysreg_base + SYS_MISC) & SYS_MISC_MASTERSITE)
+		vexpress_master_site = VEXPRESS_SITE_DB2;
+	else
+		vexpress_master_site = VEXPRESS_SITE_DB1;
+
+	vexpress_sysreg_config_bridge = vexpress_config_bridge_register(
+			node, &vexpress_sysreg_config_bridge_info);
+	WARN_ON(!vexpress_sysreg_config_bridge);
+}
+
+void __init vexpress_sysreg_of_early_init(void)
+{
+	vexpress_sysreg_early_init(NULL);
+}
+
+
+static struct vexpress_sysreg_gpio {
+	unsigned long reg;
+	u32 value;
+} vexpress_sysreg_gpios[] = {
+	[VEXPRESS_GPIO_MMC_CARDIN] = {
+		.reg = SYS_MCI,
+		.value = SYS_MCI_CARDIN,
+	},
+	[VEXPRESS_GPIO_MMC_WPROT] = {
+		.reg = SYS_MCI,
+		.value = SYS_MCI_WPROT,
+	},
+	[VEXPRESS_GPIO_FLASH_WPn] = {
+		.reg = SYS_FLASH,
+		.value = SYS_FLASH_WPn,
+	},
+};
+
+static int vexpress_sysreg_gpio_direction_input(struct gpio_chip *chip,
+				       unsigned offset)
+{
+	return 0;
+}
+
+static int vexpress_sysreg_gpio_direction_output(struct gpio_chip *chip,
+						unsigned offset, int value)
+{
+	return 0;
+}
+
+static int vexpress_sysreg_gpio_get(struct gpio_chip *chip,
+				       unsigned offset)
+{
+	struct vexpress_sysreg_gpio *gpio = &vexpress_sysreg_gpios[offset];
+	u32 reg_value = readl(vexpress_sysreg_base + gpio->reg);
+
+	return !!(reg_value & gpio->value);
+}
+
+static void vexpress_sysreg_gpio_set(struct gpio_chip *chip,
+				       unsigned offset, int value)
+{
+	struct vexpress_sysreg_gpio *gpio = &vexpress_sysreg_gpios[offset];
+	u32 reg_value = readl(vexpress_sysreg_base + gpio->reg);
+
+	if (value)
+		reg_value |= gpio->value;
+	else
+		reg_value &= ~gpio->value;
+
+	writel(reg_value, vexpress_sysreg_base + gpio->reg);
+}
+
+static struct gpio_chip vexpress_sysreg_gpio_chip = {
+	.label = "vexpress-sysreg",
+	.direction_input = vexpress_sysreg_gpio_direction_input,
+	.direction_output = vexpress_sysreg_gpio_direction_output,
+	.get = vexpress_sysreg_gpio_get,
+	.set = vexpress_sysreg_gpio_set,
+	.ngpio = ARRAY_SIZE(vexpress_sysreg_gpios),
+	.base = 0,
+};
+
+
+static ssize_t vexpress_sysreg_sys_id_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "0x%08x\n", readl(vexpress_sysreg_base + SYS_ID));
+}
+
+DEVICE_ATTR(sys_id, S_IRUGO, vexpress_sysreg_sys_id_show, NULL);
+
+static int __devinit vexpress_sysreg_probe(struct platform_device *pdev)
+{
+	int err;
+	struct resource *res = platform_get_resource(pdev,
+			IORESOURCE_MEM, 0);
+
+	if (!devm_request_mem_region(&pdev->dev, res->start,
+			resource_size(res), pdev->name)) {
+		dev_err(&pdev->dev, "Failed to request memory region!\n");
+		return -EBUSY;
+	}
+
+	if (!vexpress_sysreg_base)
+		vexpress_sysreg_base = devm_ioremap(&pdev->dev, res->start,
+				resource_size(res));
+
+	if (!vexpress_sysreg_base) {
+		dev_err(&pdev->dev, "Failed to obtain base address!\n");
+		return -EFAULT;
+	}
+
+	setup_timer(&vexpress_sysreg_config_timer,
+			vexpress_sysreg_config_complete, 0);
+
+	vexpress_sysreg_gpio_chip.dev = &pdev->dev;
+	err = gpiochip_add(&vexpress_sysreg_gpio_chip);
+	if (err) {
+		vexpress_config_bridge_unregister(
+				vexpress_sysreg_config_bridge);
+		dev_err(&pdev->dev, "Failed to register GPIO chip! (%d)\n",
+				err);
+		return err;
+	}
+
+	vexpress_sysreg_dev = &pdev->dev;
+
+	device_create_file(vexpress_sysreg_dev, &dev_attr_sys_id);
+
+	return 0;
+}
+
+static const struct of_device_id vexpress_sysreg_match[] = {
+	{ .compatible = "arm,vexpress-sysreg", },
+	{},
+};
+
+static struct platform_driver vexpress_sysreg_driver = {
+	.driver = {
+		.name = "vexpress-sysreg",
+		.of_match_table = vexpress_sysreg_match,
+	},
+	.probe = vexpress_sysreg_probe,
+};
+
+static int __init vexpress_sysreg_init(void)
+{
+	return platform_driver_register(&vexpress_sysreg_driver);
+}
+core_initcall(vexpress_sysreg_init);
+
+
+#if defined(CONFIG_LEDS_CLASS)
+
+struct vexpress_sysreg_led {
+	u32 mask;
+	struct led_classdev cdev;
+} vexpress_sysreg_leds[] = {
+	{ .mask = 1 << 0, .cdev.name = "v2m:green:user1",
+			.cdev.default_trigger = "heartbeat", },
+	{ .mask = 1 << 1, .cdev.name = "v2m:green:user2",
+			.cdev.default_trigger = "mmc0", },
+	{ .mask = 1 << 2, .cdev.name = "v2m:green:user3",
+			.cdev.default_trigger = "cpu0", },
+	{ .mask = 1 << 3, .cdev.name = "v2m:green:user4",
+			.cdev.default_trigger = "cpu1", },
+	{ .mask = 1 << 4, .cdev.name = "v2m:green:user5",
+			.cdev.default_trigger = "cpu2", },
+	{ .mask = 1 << 5, .cdev.name = "v2m:green:user6",
+			.cdev.default_trigger = "cpu3", },
+	{ .mask = 1 << 6, .cdev.name = "v2m:green:user7",
+			.cdev.default_trigger = "cpu4", },
+	{ .mask = 1 << 7, .cdev.name = "v2m:green:user8",
+			.cdev.default_trigger = "cpu5", },
+};
+
+static DEFINE_SPINLOCK(vexpress_sysreg_leds_lock);
+
+static void vexpress_sysreg_led_brightness_set(struct led_classdev *cdev,
+		enum led_brightness brightness)
+{
+	struct vexpress_sysreg_led *led = container_of(cdev,
+			struct vexpress_sysreg_led, cdev);
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&vexpress_sysreg_leds_lock, flags);
+
+	val = readl(vexpress_sysreg_base + SYS_LED);
+	if (brightness == LED_OFF)
+		val &= ~led->mask;
+	else
+		val |= led->mask;
+	writel(val, vexpress_sysreg_base + SYS_LED);
+
+	spin_unlock_irqrestore(&vexpress_sysreg_leds_lock, flags);
+}
+
+static int __init vexpress_sysreg_init_leds(void)
+{
+	struct vexpress_sysreg_led *led;
+	int i;
+
+	/* Clear all user LEDs */
+	writel(0, vexpress_sysreg_base + SYS_LED);
+
+	for (i = 0, led = vexpress_sysreg_leds;
+			i < ARRAY_SIZE(vexpress_sysreg_leds); i++, led++) {
+		int err;
+
+		led->cdev.brightness_set = vexpress_sysreg_led_brightness_set;
+		err = led_classdev_register(vexpress_sysreg_dev, &led->cdev);
+		if (err) {
+			dev_err(vexpress_sysreg_dev,
+					"Failed to register LED %d! (%d)\n",
+					i, err);
+			while (led--, i--)
+				led_classdev_unregister(&led->cdev);
+			return err;
+		}
+	}
+
+	return 0;
+}
+device_initcall(vexpress_sysreg_init_leds);
+
+#endif
diff --git a/include/linux/vexpress.h b/include/linux/vexpress.h
index c2d877a7b691..09c81d7a22de 100644
--- a/include/linux/vexpress.h
+++ b/include/linux/vexpress.h
@@ -24,6 +24,17 @@
 #define VEXPRESS_CONFIG_STATUS_DONE	0
 #define VEXPRESS_CONFIG_STATUS_WAIT	1
 
+#define VEXPRESS_GPIO_MMC_CARDIN	0
+#define VEXPRESS_GPIO_MMC_WPROT		1
+#define VEXPRESS_GPIO_FLASH_WPn		2
+
+#define VEXPRESS_RES_FUNC(_site, _func)	\
+{					\
+	.start = (_site),		\
+	.end = (_func),			\
+	.flags = IORESOURCE_BUS,	\
+}
+
 /* Config bridge API */
 
 /**
@@ -82,4 +93,18 @@ int vexpress_config_read(struct vexpress_config_func *func, int offset,
 int vexpress_config_write(struct vexpress_config_func *func, int offset,
 		u32 data);
 
+/* Platform control */
+
+u32 vexpress_get_procid(int site);
+u32 vexpress_get_hbi(int site);
+void *vexpress_get_24mhz_clock_base(void);
+void vexpress_flags_set(u32 data);
+
+#define vexpress_get_site_by_node(node) __vexpress_get_site(NULL, node)
+#define vexpress_get_site_by_dev(dev) __vexpress_get_site(dev, NULL)
+unsigned __vexpress_get_site(struct device *dev, struct device_node *node);
+
+void vexpress_sysreg_early_init(void __iomem *base);
+void vexpress_sysreg_of_early_init(void);
+
 #endif
-- 
cgit v1.2.3-55-g7522


From 38669e045dbf8f62a008898a7fb1e93975b3817c Mon Sep 17 00:00:00 2001
From: Pawel Moll
Date: Tue, 9 Oct 2012 12:56:36 +0100
Subject: ARM: vexpress: Start using new Versatile Express infrastructure

This patch starts using all the configuration infrastructure.

- generic GPIO library is forced now

- sysreg GPIOs are used as MMC CD and WP information sources;
  thanks to this MMCI auxiliary data is not longer necessary

- DVI muxer and mode control is removed from non-DT V2P-CA9 code
  as this is now handled by the vexpress-dvi driver

- clock generators control is removed as is being handled by the
  common clock driver now

- the sysreg and sysctl control is now delegated to the
  appropriate drivers and all related code was removed

- NOR Flash set_vpp function has been removed as the control
  bit used does _not_ control its VPP line, but the #WP signal
  instead (which is de facto unusable in case of Linux MTD
  drivers); this also allowed the remove its DT auxiliary
  data

The non-DT code defines only minimal required number of
the config devices. Device Trees are updated to make use
of all new features.

Signed-off-by: Pawel Moll <pawel.moll@arm.com>
---
 arch/arm/include/asm/hardware/sp810.h             |   6 -
 arch/arm/mach-vexpress/Kconfig                    |   4 +-
 arch/arm/mach-vexpress/Makefile                   |   2 +-
 arch/arm/mach-vexpress/ct-ca9x4.c                 |  41 +--
 arch/arm/mach-vexpress/include/mach/motherboard.h |  81 ------
 arch/arm/mach-vexpress/platsmp.c                  |   3 +-
 arch/arm/mach-vexpress/v2m.c                      | 332 +++++-----------------
 include/linux/vexpress.h                          |  11 +
 8 files changed, 110 insertions(+), 370 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/hardware/sp810.h b/arch/arm/include/asm/hardware/sp810.h
index afd7e916472f..6636430dd0e6 100644
--- a/arch/arm/include/asm/hardware/sp810.h
+++ b/arch/arm/include/asm/hardware/sp810.h
@@ -50,12 +50,6 @@
 #define SCPCELLID2		0xFF8
 #define SCPCELLID3		0xFFC
 
-#define SCCTRL_TIMEREN0SEL_REFCLK	(0 << 15)
-#define SCCTRL_TIMEREN0SEL_TIMCLK	(1 << 15)
-
-#define SCCTRL_TIMEREN1SEL_REFCLK	(0 << 17)
-#define SCCTRL_TIMEREN1SEL_TIMCLK	(1 << 17)
-
 #define SCCTRL_TIMERENnSEL_SHIFT(n)	(15 + ((n) * 2))
 
 static inline void sysctl_soft_reset(void __iomem *base)
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index c95296066203..99e63f5f99d1 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -1,11 +1,12 @@
 config ARCH_VEXPRESS
 	bool "ARM Ltd. Versatile Express family" if ARCH_MULTI_V7
-	select ARCH_WANT_OPTIONAL_GPIOLIB
+	select ARCH_REQUIRE_GPIOLIB
 	select ARM_AMBA
 	select ARM_GIC
 	select ARM_TIMER_SP804
 	select CLKDEV_LOOKUP
 	select COMMON_CLK
+	select COMMON_CLK_VERSATILE
 	select CPU_V7
 	select GENERIC_CLOCKEVENTS
 	select HAVE_CLK
@@ -17,6 +18,7 @@ config ARCH_VEXPRESS
 	select PLAT_VERSATILE
 	select PLAT_VERSATILE_CLCD
 	select REGULATOR_FIXED_VOLTAGE if REGULATOR
+	select VEXPRESS_CONFIG
 	help
 	  This option enables support for systems using Cortex processor based
 	  ARM core and logic (FPGA) tiles on the Versatile Express motherboard,
diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
index 42703e8b4d3b..80b64971fbdd 100644
--- a/arch/arm/mach-vexpress/Makefile
+++ b/arch/arm/mach-vexpress/Makefile
@@ -4,7 +4,7 @@
 ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
 	-I$(srctree)/arch/arm/plat-versatile/include
 
-obj-y					:= v2m.o
+obj-y					:= v2m.o reset.o
 obj-$(CONFIG_ARCH_VEXPRESS_CA9X4)	+= ct-ca9x4.o
 obj-$(CONFIG_SMP)			+= platsmp.o
 obj-$(CONFIG_HOTPLUG_CPU)		+= hotplug.o
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index 4f471fa3e3c5..60838ddb8564 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -9,6 +9,7 @@
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
 #include <linux/clkdev.h>
+#include <linux/vexpress.h>
 
 #include <asm/hardware/arm_timer.h>
 #include <asm/hardware/cache-l2x0.h>
@@ -64,19 +65,6 @@ static void __init ct_ca9x4_init_irq(void)
 	ca9x4_twd_init();
 }
 
-static void ct_ca9x4_clcd_enable(struct clcd_fb *fb)
-{
-	u32 site = v2m_get_master_site();
-
-	/*
-	 * Old firmware was using the "site" component of the command
-	 * to control the DVI muxer (while it should be always 0 ie. MB).
-	 * Newer firmware uses the data register. Keep both for compatibility.
-	 */
-	v2m_cfg_write(SYS_CFG_MUXFPGA | SYS_CFG_SITE(site), site);
-	v2m_cfg_write(SYS_CFG_DVIMODE | SYS_CFG_SITE(SYS_CFG_SITE_MB), 2);
-}
-
 static int ct_ca9x4_clcd_setup(struct clcd_fb *fb)
 {
 	unsigned long framesize = 1024 * 768 * 2;
@@ -93,7 +81,6 @@ static struct clcd_board ct_ca9x4_clcd_data = {
 	.caps		= CLCD_CAP_5551 | CLCD_CAP_565,
 	.check		= clcdfb_check,
 	.decode		= clcdfb_decode,
-	.enable		= ct_ca9x4_clcd_enable,
 	.setup		= ct_ca9x4_clcd_setup,
 	.mmap		= versatile_clcd_mmap_dma,
 	.remove		= versatile_clcd_remove_dma,
@@ -111,14 +98,6 @@ static struct amba_device *ct_ca9x4_amba_devs[] __initdata = {
 	&gpio_device,
 };
 
-
-static struct v2m_osc ct_osc1 = {
-	.osc = 1,
-	.rate_min = 10000000,
-	.rate_max = 80000000,
-	.rate_default = 23750000,
-};
-
 static struct resource pmu_resources[] = {
 	[0] = {
 		.start	= IRQ_CT_CA9X4_PMU_CPU0,
@@ -149,10 +128,18 @@ static struct platform_device pmu_device = {
 	.resource	= pmu_resources,
 };
 
+static struct platform_device osc1_device = {
+	.name		= "vexpress-osc",
+	.id		= 1,
+	.num_resources	= 1,
+	.resource	= (struct resource []) {
+		VEXPRESS_RES_FUNC(0xf, 1),
+	},
+};
+
 static void __init ct_ca9x4_init(void)
 {
 	int i;
-	struct clk *clk;
 
 #ifdef CONFIG_CACHE_L2X0
 	void __iomem *l2x0_base = ioremap(CT_CA9X4_L2CC, SZ_4K);
@@ -164,14 +151,14 @@ static void __init ct_ca9x4_init(void)
 	l2x0_init(l2x0_base, 0x00400000, 0xfe0fffff);
 #endif
 
-	ct_osc1.site = v2m_get_master_site();
-	clk = v2m_osc_register("ct:osc1", &ct_osc1);
-	clk_register_clkdev(clk, NULL, "ct:clcd");
-
 	for (i = 0; i < ARRAY_SIZE(ct_ca9x4_amba_devs); i++)
 		amba_device_register(ct_ca9x4_amba_devs[i], &iomem_resource);
 
 	platform_device_register(&pmu_device);
+	platform_device_register(&osc1_device);
+
+	WARN_ON(clk_register_clkdev(vexpress_osc_setup(&osc1_device.dev),
+			NULL, "ct:clcd"));
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/arm/mach-vexpress/include/mach/motherboard.h b/arch/arm/mach-vexpress/include/mach/motherboard.h
index 1e388c7bf4d7..68abc8b72781 100644
--- a/arch/arm/mach-vexpress/include/mach/motherboard.h
+++ b/arch/arm/mach-vexpress/include/mach/motherboard.h
@@ -1,8 +1,6 @@
 #ifndef __MACH_MOTHERBOARD_H
 #define __MACH_MOTHERBOARD_H
 
-#include <linux/clk-provider.h>
-
 /*
  * Physical addresses, offset from V2M_PA_CS0-3
  */
@@ -41,31 +39,6 @@
 #define V2M_CF			(V2M_PA_CS7 + 0x0001a000)
 #define V2M_CLCD		(V2M_PA_CS7 + 0x0001f000)
 
-/*
- * Offsets from SYSREGS base
- */
-#define V2M_SYS_ID		0x000
-#define V2M_SYS_SW		0x004
-#define V2M_SYS_LED		0x008
-#define V2M_SYS_100HZ		0x024
-#define V2M_SYS_FLAGS		0x030
-#define V2M_SYS_FLAGSSET	0x030
-#define V2M_SYS_FLAGSCLR	0x034
-#define V2M_SYS_NVFLAGS		0x038
-#define V2M_SYS_NVFLAGSSET	0x038
-#define V2M_SYS_NVFLAGSCLR	0x03c
-#define V2M_SYS_MCI		0x048
-#define V2M_SYS_FLASH		0x03c
-#define V2M_SYS_CFGSW		0x058
-#define V2M_SYS_24MHZ		0x05c
-#define V2M_SYS_MISC		0x060
-#define V2M_SYS_DMA		0x064
-#define V2M_SYS_PROCID0		0x084
-#define V2M_SYS_PROCID1		0x088
-#define V2M_SYS_CFGDATA		0x0a0
-#define V2M_SYS_CFGCTRL		0x0a4
-#define V2M_SYS_CFGSTAT		0x0a8
-
 
 /*
  * Interrupts.  Those in {} are for AMBA devices
@@ -90,43 +63,6 @@
 #define IRQ_V2M_PCIE		(32 + 17)
 
 
-/*
- * Configuration
- */
-#define SYS_CFG_START		(1 << 31)
-#define SYS_CFG_WRITE		(1 << 30)
-#define SYS_CFG_OSC		(1 << 20)
-#define SYS_CFG_VOLT		(2 << 20)
-#define SYS_CFG_AMP		(3 << 20)
-#define SYS_CFG_TEMP		(4 << 20)
-#define SYS_CFG_RESET		(5 << 20)
-#define SYS_CFG_SCC		(6 << 20)
-#define SYS_CFG_MUXFPGA		(7 << 20)
-#define SYS_CFG_SHUTDOWN	(8 << 20)
-#define SYS_CFG_REBOOT		(9 << 20)
-#define SYS_CFG_DVIMODE		(11 << 20)
-#define SYS_CFG_POWER		(12 << 20)
-#define SYS_CFG_SITE(n)		((n) << 16)
-#define SYS_CFG_SITE_MB		0
-#define SYS_CFG_SITE_DB1	1
-#define SYS_CFG_SITE_DB2	2
-#define SYS_CFG_STACK(n)	((n) << 12)
-
-#define SYS_CFG_ERR		(1 << 1)
-#define SYS_CFG_COMPLETE	(1 << 0)
-
-int v2m_cfg_write(u32 devfn, u32 data);
-int v2m_cfg_read(u32 devfn, u32 *data);
-void v2m_flags_set(u32 data);
-
-/*
- * Miscellaneous
- */
-#define SYS_MISC_MASTERSITE	(1 << 14)
-#define SYS_PROCIDx_HBI_MASK	0xfff
-
-int v2m_get_master_site(void);
-
 /*
  * Core tile IDs
  */
@@ -149,21 +85,4 @@ struct ct_desc {
 
 extern struct ct_desc *ct_desc;
 
-/*
- * OSC clock provider
- */
-struct v2m_osc {
-	struct clk_hw hw;
-	u8 site; /* 0 = motherboard, 1 = site 1, 2 = site 2 */
-	u8 stack; /* board stack position */
-	u16 osc;
-	unsigned long rate_min;
-	unsigned long rate_max;
-	unsigned long rate_default;
-};
-
-#define to_v2m_osc(osc) container_of(osc, struct v2m_osc, hw)
-
-struct clk *v2m_osc_register(const char *name, struct v2m_osc *osc);
-
 #endif
diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c
index 7db27c8c05cc..c5d70de9bb4e 100644
--- a/arch/arm/mach-vexpress/platsmp.c
+++ b/arch/arm/mach-vexpress/platsmp.c
@@ -13,6 +13,7 @@
 #include <linux/smp.h>
 #include <linux/io.h>
 #include <linux/of_fdt.h>
+#include <linux/vexpress.h>
 
 #include <asm/smp_scu.h>
 #include <asm/hardware/gic.h>
@@ -193,7 +194,7 @@ static void __init vexpress_smp_prepare_cpus(unsigned int max_cpus)
 	 * until it receives a soft interrupt, and then the
 	 * secondary CPU branches to this address.
 	 */
-	v2m_flags_set(virt_to_phys(versatile_secondary_startup));
+	vexpress_flags_set(virt_to_phys(versatile_secondary_startup));
 }
 
 struct smp_operations __initdata vexpress_smp_ops = {
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 560e0df728f8..99d4172816be 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -16,11 +16,10 @@
 #include <linux/smsc911x.h>
 #include <linux/spinlock.h>
 #include <linux/usb/isp1760.h>
-#include <linux/clkdev.h>
-#include <linux/clk-provider.h>
 #include <linux/mtd/physmap.h>
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/machine.h>
+#include <linux/vexpress.h>
 
 #include <asm/arch_timer.h>
 #include <asm/mach-types.h>
@@ -33,7 +32,6 @@
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/hardware/gic.h>
 #include <asm/hardware/timer-sp.h>
-#include <asm/hardware/sp810.h>
 
 #include <mach/ct-ca9x4.h>
 #include <mach/motherboard.h>
@@ -58,22 +56,6 @@ static struct map_desc v2m_io_desc[] __initdata = {
 	},
 };
 
-static void __iomem *v2m_sysreg_base;
-
-static void __init v2m_sysctl_init(void __iomem *base)
-{
-	u32 scctrl;
-
-	if (WARN_ON(!base))
-		return;
-
-	/* Select 1MHz TIMCLK as the reference clock for SP804 timers */
-	scctrl = readl(base + SCCTRL);
-	scctrl |= SCCTRL_TIMEREN0SEL_TIMCLK;
-	scctrl |= SCCTRL_TIMEREN1SEL_TIMCLK;
-	writel(scctrl, base + SCCTRL);
-}
-
 static void __init v2m_sp804_init(void __iomem *base, unsigned int irq)
 {
 	if (WARN_ON(!base || irq == NO_IRQ))
@@ -87,69 +69,6 @@ static void __init v2m_sp804_init(void __iomem *base, unsigned int irq)
 }
 
 
-static DEFINE_SPINLOCK(v2m_cfg_lock);
-
-int v2m_cfg_write(u32 devfn, u32 data)
-{
-	/* Configuration interface broken? */
-	u32 val;
-
-	printk("%s: writing %08x to %08x\n", __func__, data, devfn);
-
-	devfn |= SYS_CFG_START | SYS_CFG_WRITE;
-
-	spin_lock(&v2m_cfg_lock);
-	val = readl(v2m_sysreg_base + V2M_SYS_CFGSTAT);
-	writel(val & ~SYS_CFG_COMPLETE, v2m_sysreg_base + V2M_SYS_CFGSTAT);
-
-	writel(data, v2m_sysreg_base +  V2M_SYS_CFGDATA);
-	writel(devfn, v2m_sysreg_base + V2M_SYS_CFGCTRL);
-
-	do {
-		val = readl(v2m_sysreg_base + V2M_SYS_CFGSTAT);
-	} while (val == 0);
-	spin_unlock(&v2m_cfg_lock);
-
-	return !!(val & SYS_CFG_ERR);
-}
-
-int v2m_cfg_read(u32 devfn, u32 *data)
-{
-	u32 val;
-
-	devfn |= SYS_CFG_START;
-
-	spin_lock(&v2m_cfg_lock);
-	writel(0, v2m_sysreg_base + V2M_SYS_CFGSTAT);
-	writel(devfn, v2m_sysreg_base + V2M_SYS_CFGCTRL);
-
-	mb();
-
-	do {
-		cpu_relax();
-		val = readl(v2m_sysreg_base + V2M_SYS_CFGSTAT);
-	} while (val == 0);
-
-	*data = readl(v2m_sysreg_base + V2M_SYS_CFGDATA);
-	spin_unlock(&v2m_cfg_lock);
-
-	return !!(val & SYS_CFG_ERR);
-}
-
-void __init v2m_flags_set(u32 data)
-{
-	writel(~0, v2m_sysreg_base + V2M_SYS_FLAGSCLR);
-	writel(data, v2m_sysreg_base + V2M_SYS_FLAGSSET);
-}
-
-int v2m_get_master_site(void)
-{
-	u32 misc = readl(v2m_sysreg_base + V2M_SYS_MISC);
-
-	return misc & SYS_MISC_MASTERSITE ? SYS_CFG_SITE_DB2 : SYS_CFG_SITE_DB1;
-}
-
-
 static struct resource v2m_pcie_i2c_resource = {
 	.start	= V2M_SERIAL_BUS_PCI,
 	.end	= V2M_SERIAL_BUS_PCI + SZ_4K - 1,
@@ -237,14 +156,8 @@ static struct platform_device v2m_usb_device = {
 	.dev.platform_data = &v2m_usb_config,
 };
 
-static void v2m_flash_set_vpp(struct platform_device *pdev, int on)
-{
-	writel(on != 0, v2m_sysreg_base + V2M_SYS_FLASH);
-}
-
 static struct physmap_flash_data v2m_flash_data = {
 	.width		= 4,
-	.set_vpp	= v2m_flash_set_vpp,
 };
 
 static struct resource v2m_flash_resources[] = {
@@ -291,14 +204,61 @@ static struct platform_device v2m_cf_device = {
 	.dev.platform_data = &v2m_pata_data,
 };
 
-static unsigned int v2m_mmci_status(struct device *dev)
-{
-	return readl(v2m_sysreg_base + V2M_SYS_MCI) & (1 << 0);
-}
-
 static struct mmci_platform_data v2m_mmci_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
-	.status		= v2m_mmci_status,
+	.gpio_wp	= VEXPRESS_GPIO_MMC_WPROT,
+	.gpio_cd	= VEXPRESS_GPIO_MMC_CARDIN,
+};
+
+static struct resource v2m_sysreg_resources[] = {
+	{
+		.start	= V2M_SYSREGS,
+		.end	= V2M_SYSREGS + 0xfff,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device v2m_sysreg_device = {
+	.name		= "vexpress-sysreg",
+	.id		= -1,
+	.resource	= v2m_sysreg_resources,
+	.num_resources	= ARRAY_SIZE(v2m_sysreg_resources),
+};
+
+static struct platform_device v2m_muxfpga_device = {
+	.name		= "vexpress-muxfpga",
+	.id		= 0,
+	.num_resources	= 1,
+	.resource	= (struct resource []) {
+		VEXPRESS_RES_FUNC(0, 7),
+	}
+};
+
+static struct platform_device v2m_shutdown_device = {
+	.name		= "vexpress-shutdown",
+	.id		= 0,
+	.num_resources	= 1,
+	.resource	= (struct resource []) {
+		VEXPRESS_RES_FUNC(0, 8),
+	}
+};
+
+static struct platform_device v2m_reboot_device = {
+	.name		= "vexpress-reboot",
+	.id		= 0,
+	.num_resources	= 1,
+	.resource	= (struct resource []) {
+		VEXPRESS_RES_FUNC(0, 9),
+	}
+};
+
+static struct platform_device v2m_dvimode_device = {
+	.name		= "vexpress-dvimode",
+	.id		= 0,
+	.num_resources	= 1,
+	.resource	= (struct resource []) {
+		VEXPRESS_RES_FUNC(0, 11),
+	}
 };
 
 static AMBA_APB_DEVICE(aaci,  "mb:aaci",  0, V2M_AACI, IRQ_V2M_AACI, NULL);
@@ -325,123 +285,9 @@ static struct amba_device *v2m_amba_devs[] __initdata = {
 	&rtc_device,
 };
 
-
-static unsigned long v2m_osc_recalc_rate(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	struct v2m_osc *osc = to_v2m_osc(hw);
-
-	return !parent_rate ? osc->rate_default : parent_rate;
-}
-
-static long v2m_osc_round_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *parent_rate)
-{
-	struct v2m_osc *osc = to_v2m_osc(hw);
-
-	if (WARN_ON(rate < osc->rate_min))
-		rate = osc->rate_min;
-
-	if (WARN_ON(rate > osc->rate_max))
-		rate = osc->rate_max;
-
-	return rate;
-}
-
-static int v2m_osc_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	struct v2m_osc *osc = to_v2m_osc(hw);
-
-	v2m_cfg_write(SYS_CFG_OSC | SYS_CFG_SITE(osc->site) |
-			SYS_CFG_STACK(osc->stack) | osc->osc, rate);
-
-	return 0;
-}
-
-static struct clk_ops v2m_osc_ops = {
-	.recalc_rate = v2m_osc_recalc_rate,
-	.round_rate = v2m_osc_round_rate,
-	.set_rate = v2m_osc_set_rate,
-};
-
-struct clk * __init v2m_osc_register(const char *name, struct v2m_osc *osc)
-{
-	struct clk_init_data init;
-
-	WARN_ON(osc->site > 2);
-	WARN_ON(osc->stack > 15);
-	WARN_ON(osc->osc > 4095);
-
-	init.name = name;
-	init.ops = &v2m_osc_ops;
-	init.flags = CLK_IS_ROOT;
-	init.num_parents = 0;
-
-	osc->hw.init = &init;
-
-	return clk_register(NULL, &osc->hw);
-}
-
-static struct v2m_osc v2m_mb_osc1 = {
-	.site = SYS_CFG_SITE_MB,
-	.osc = 1,
-	.rate_min = 23750000,
-	.rate_max = 63500000,
-	.rate_default = 23750000,
-};
-
-static const char *v2m_ref_clk_periphs[] __initconst = {
-	"mb:wdt",   "1000f000.wdt",  "1c0f0000.wdt",	/* SP805 WDT */
-};
-
-static const char *v2m_osc1_periphs[] __initconst = {
-	"mb:clcd",  "1001f000.clcd", "1c1f0000.clcd",	/* PL111 CLCD */
-};
-
-static const char *v2m_osc2_periphs[] __initconst = {
-	"mb:mmci",  "10005000.mmci", "1c050000.mmci",	/* PL180 MMCI */
-	"mb:kmi0",  "10006000.kmi",  "1c060000.kmi",	/* PL050 KMI0 */
-	"mb:kmi1",  "10007000.kmi",  "1c070000.kmi",	/* PL050 KMI1 */
-	"mb:uart0", "10009000.uart", "1c090000.uart",	/* PL011 UART0 */
-	"mb:uart1", "1000a000.uart", "1c0a0000.uart",	/* PL011 UART1 */
-	"mb:uart2", "1000b000.uart", "1c0b0000.uart",	/* PL011 UART2 */
-	"mb:uart3", "1000c000.uart", "1c0c0000.uart",	/* PL011 UART3 */
-};
-
-static void __init v2m_clk_init(void)
-{
-	struct clk *clk;
-	int i;
-
-	clk = clk_register_fixed_rate(NULL, "dummy_apb_pclk", NULL,
-			CLK_IS_ROOT, 0);
-	WARN_ON(clk_register_clkdev(clk, "apb_pclk", NULL));
-
-	clk = clk_register_fixed_rate(NULL, "mb:ref_clk", NULL,
-			CLK_IS_ROOT, 32768);
-	for (i = 0; i < ARRAY_SIZE(v2m_ref_clk_periphs); i++)
-		WARN_ON(clk_register_clkdev(clk, NULL, v2m_ref_clk_periphs[i]));
-
-	clk = clk_register_fixed_rate(NULL, "mb:sp804_clk", NULL,
-			CLK_IS_ROOT, 1000000);
-	WARN_ON(clk_register_clkdev(clk, "v2m-timer0", "sp804"));
-	WARN_ON(clk_register_clkdev(clk, "v2m-timer1", "sp804"));
-
-	clk = v2m_osc_register("mb:osc1", &v2m_mb_osc1);
-	for (i = 0; i < ARRAY_SIZE(v2m_osc1_periphs); i++)
-		WARN_ON(clk_register_clkdev(clk, NULL, v2m_osc1_periphs[i]));
-
-	clk = clk_register_fixed_rate(NULL, "mb:osc2", NULL,
-			CLK_IS_ROOT, 24000000);
-	for (i = 0; i < ARRAY_SIZE(v2m_osc2_periphs); i++)
-		WARN_ON(clk_register_clkdev(clk, NULL, v2m_osc2_periphs[i]));
-}
-
 static void __init v2m_timer_init(void)
 {
-	v2m_sysctl_init(ioremap(V2M_SYSCTL, SZ_4K));
-	v2m_clk_init();
+	vexpress_clk_init(ioremap(V2M_SYSCTL, SZ_4K));
 	v2m_sp804_init(ioremap(V2M_TIMER01, SZ_4K), IRQ_V2M_TIMER0);
 }
 
@@ -453,19 +299,7 @@ static void __init v2m_init_early(void)
 {
 	if (ct_desc->init_early)
 		ct_desc->init_early();
-	versatile_sched_clock_init(v2m_sysreg_base + V2M_SYS_24MHZ, 24000000);
-}
-
-static void v2m_power_off(void)
-{
-	if (v2m_cfg_write(SYS_CFG_SHUTDOWN | SYS_CFG_SITE(SYS_CFG_SITE_MB), 0))
-		printk(KERN_EMERG "Unable to shutdown\n");
-}
-
-static void v2m_restart(char str, const char *cmd)
-{
-	if (v2m_cfg_write(SYS_CFG_REBOOT | SYS_CFG_SITE(SYS_CFG_SITE_MB), 0))
-		printk(KERN_EMERG "Unable to reboot\n");
+	versatile_sched_clock_init(vexpress_get_24mhz_clock_base(), 24000000);
 }
 
 struct ct_desc *ct_desc;
@@ -482,7 +316,7 @@ static void __init v2m_populate_ct_desc(void)
 	u32 current_tile_id;
 
 	ct_desc = NULL;
-	current_tile_id = readl(v2m_sysreg_base + V2M_SYS_PROCID0)
+	current_tile_id = vexpress_get_procid(VEXPRESS_SITE_MASTER)
 				& V2M_CT_ID_MASK;
 
 	for (i = 0; i < ARRAY_SIZE(ct_descs) && !ct_desc; ++i)
@@ -498,7 +332,7 @@ static void __init v2m_populate_ct_desc(void)
 static void __init v2m_map_io(void)
 {
 	iotable_init(v2m_io_desc, ARRAY_SIZE(v2m_io_desc));
-	v2m_sysreg_base = ioremap(V2M_SYSREGS, SZ_4K);
+	vexpress_sysreg_early_init(ioremap(V2M_SYSREGS, SZ_4K));
 	v2m_populate_ct_desc();
 	ct_desc->map_io();
 }
@@ -515,6 +349,12 @@ static void __init v2m_init(void)
 	regulator_register_fixed(0, v2m_eth_supplies,
 			ARRAY_SIZE(v2m_eth_supplies));
 
+	platform_device_register(&v2m_muxfpga_device);
+	platform_device_register(&v2m_shutdown_device);
+	platform_device_register(&v2m_reboot_device);
+	platform_device_register(&v2m_dvimode_device);
+
+	platform_device_register(&v2m_sysreg_device);
 	platform_device_register(&v2m_pcie_i2c_device);
 	platform_device_register(&v2m_ddc_i2c_device);
 	platform_device_register(&v2m_flash_device);
@@ -525,7 +365,7 @@ static void __init v2m_init(void)
 	for (i = 0; i < ARRAY_SIZE(v2m_amba_devs); i++)
 		amba_device_register(v2m_amba_devs[i], &iomem_resource);
 
-	pm_power_off = v2m_power_off;
+	pm_power_off = vexpress_power_off;
 
 	ct_desc->init_tile();
 }
@@ -539,7 +379,7 @@ MACHINE_START(VEXPRESS, "ARM-Versatile Express")
 	.timer		= &v2m_timer,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= v2m_init,
-	.restart	= v2m_restart,
+	.restart	= vexpress_restart,
 MACHINE_END
 
 static struct map_desc v2m_rs1_io_desc __initdata = {
@@ -580,20 +420,13 @@ void __init v2m_dt_map_io(void)
 
 void __init v2m_dt_init_early(void)
 {
-	struct device_node *node;
 	u32 dt_hbi;
 
-	node = of_find_compatible_node(NULL, NULL, "arm,vexpress-sysreg");
-	v2m_sysreg_base = of_iomap(node, 0);
-	if (WARN_ON(!v2m_sysreg_base))
-		return;
+	vexpress_sysreg_of_early_init();
 
 	/* Confirm board type against DT property, if available */
 	if (of_property_read_u32(allnodes, "arm,hbi", &dt_hbi) == 0) {
-		int site = v2m_get_master_site();
-		u32 id = readl(v2m_sysreg_base + (site == SYS_CFG_SITE_DB2 ?
-				V2M_SYS_PROCID1 : V2M_SYS_PROCID0));
-		u32 hbi = id & SYS_PROCIDx_HBI_MASK;
+		u32 hbi = vexpress_get_hbi(VEXPRESS_SITE_MASTER);
 
 		if (WARN_ON(dt_hbi != hbi))
 			pr_warning("vexpress: DT HBI (%x) is not matching "
@@ -617,10 +450,7 @@ static void __init v2m_dt_timer_init(void)
 	const char *path;
 	int err;
 
-	node = of_find_compatible_node(NULL, NULL, "arm,sp810");
-	v2m_sysctl_init(of_iomap(node, 0));
-
-	v2m_clk_init();
+	vexpress_clk_of_init();
 
 	err = of_property_read_string(of_aliases, "arm,v2m_timer", &path);
 	if (WARN_ON(err))
@@ -631,33 +461,29 @@ static void __init v2m_dt_timer_init(void)
 		twd_local_timer_of_register();
 
 	if (arch_timer_sched_clock_init() != 0)
-		versatile_sched_clock_init(v2m_sysreg_base + V2M_SYS_24MHZ, 24000000);
+		versatile_sched_clock_init(vexpress_get_24mhz_clock_base(),
+				24000000);
 }
 
 static struct sys_timer v2m_dt_timer = {
 	.init = v2m_dt_timer_init,
 };
 
-static struct of_dev_auxdata v2m_dt_auxdata_lookup[] __initdata = {
-	OF_DEV_AUXDATA("arm,vexpress-flash", V2M_NOR0, "physmap-flash",
-			&v2m_flash_data),
-	OF_DEV_AUXDATA("arm,primecell", V2M_MMCI, "mb:mmci", &v2m_mmci_data),
-	/* RS1 memory map */
-	OF_DEV_AUXDATA("arm,vexpress-flash", 0x08000000, "physmap-flash",
-			&v2m_flash_data),
-	OF_DEV_AUXDATA("arm,primecell", 0x1c050000, "mb:mmci", &v2m_mmci_data),
+static const struct of_device_id v2m_dt_bus_match[] __initconst = {
+	{ .compatible = "simple-bus", },
+	{ .compatible = "arm,amba-bus", },
+	{ .compatible = "arm,vexpress,config-bus", },
 	{}
 };
 
 static void __init v2m_dt_init(void)
 {
 	l2x0_of_init(0x00400000, 0xfe0fffff);
-	of_platform_populate(NULL, of_default_bus_match_table,
-			v2m_dt_auxdata_lookup, NULL);
-	pm_power_off = v2m_power_off;
+	of_platform_populate(NULL, v2m_dt_bus_match, NULL, NULL);
+	pm_power_off = vexpress_power_off;
 }
 
-const static char *v2m_dt_match[] __initconst = {
+static const char * const v2m_dt_match[] __initconst = {
 	"arm,vexpress",
 	"xen,xenvm",
 	NULL,
@@ -672,5 +498,5 @@ DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
 	.timer		= &v2m_dt_timer,
 	.init_machine	= v2m_dt_init,
 	.handle_irq	= gic_handle_irq,
-	.restart	= v2m_restart,
+	.restart	= vexpress_restart,
 MACHINE_END
diff --git a/include/linux/vexpress.h b/include/linux/vexpress.h
index 09c81d7a22de..c52215ff4245 100644
--- a/include/linux/vexpress.h
+++ b/include/linux/vexpress.h
@@ -107,4 +107,15 @@ unsigned __vexpress_get_site(struct device *dev, struct device_node *node);
 void vexpress_sysreg_early_init(void __iomem *base);
 void vexpress_sysreg_of_early_init(void);
 
+void vexpress_power_off(void);
+void vexpress_restart(char str, const char *cmd);
+
+/* Clocks */
+
+struct clk *vexpress_osc_setup(struct device *dev);
+void vexpress_osc_of_setup(struct device_node *node);
+
+void vexpress_clk_init(void __iomem *sp810_base);
+void vexpress_clk_of_init(void);
+
 #endif
-- 
cgit v1.2.3-55-g7522


From ed95779340b50e362245c81b5dec0d11a1debfa8 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 5 Nov 2012 09:16:58 -0800
Subject: cgroup: kill cgroup_subsys->__DEPRECATED_clear_css_refs

2ef37d3fe4 ("memcg: Simplify mem_cgroup_force_empty_list error
handling") removed the last user of __DEPRECATED_clear_css_refs.  This
patch removes __DEPRECATED_clear_css_refs and mechanisms to support
it.

* Conditionals dependent on __DEPRECATED_clear_css_refs removed.

* cgroup_clear_css_refs() can no longer fail.  All that needs to be
  done are deactivating refcnts, setting CSS_REMOVED and putting the
  base reference on each css.  Remove cgroup_clear_css_refs() and the
  failure path, and open-code the loops into cgroup_rmdir().

This patch keeps the two for_each_subsys() loops separate while open
coding them.  They can be merged now but there are scheduled changes
which need them to be separate, so keep them separate to reduce the
amount of churn.

local_irq_save/restore() from cgroup_clear_css_refs() are replaced
with local_irq_disable/enable() for simplicity.  This is safe as
cgroup_rmdir() is always called with IRQ enabled.  Note that this IRQ
switching is necessary to ensure that css_tryget() isn't called from
IRQ context on the same CPU while lower context is between CSS
deactivation and setting CSS_REMOVED as css_tryget() would hang
forever in such cases waiting for CSS to be re-activated or
CSS_REMOVED set.  This will go away soon.

v2: cgroup_call_pre_destroy() removal dropped per Michal.  Commit
    message updated to explain local_irq_disable/enable() conversion.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  12 -----
 kernel/cgroup.c        | 131 ++++++++++++++-----------------------------------
 2 files changed, 36 insertions(+), 107 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c90eaa803440..02e09c0e98ab 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -86,7 +86,6 @@ struct cgroup_subsys_state {
 enum {
 	CSS_ROOT, /* This CSS is the root of the subsystem */
 	CSS_REMOVED, /* This CSS is dead */
-	CSS_CLEAR_CSS_REFS,		/* @ss->__DEPRECATED_clear_css_refs */
 };
 
 /* Caller must verify that the css is not for root cgroup */
@@ -485,17 +484,6 @@ struct cgroup_subsys {
 	 */
 	bool use_id;
 
-	/*
-	 * If %true, cgroup removal will try to clear css refs by retrying
-	 * ss->pre_destroy() until there's no css ref left.  This behavior
-	 * is strictly for backward compatibility and will be removed as
-	 * soon as the current user (memcg) is updated.
-	 *
-	 * If %false, ss->pre_destroy() can't fail and cgroup removal won't
-	 * wait for css refs to drop to zero before proceeding.
-	 */
-	bool __DEPRECATED_clear_css_refs;
-
 #define MAX_CGROUP_TYPE_NAMELEN 32
 	const char *name;
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 79818507e444..8c605e2bdd1a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -865,11 +865,8 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp)
 			continue;
 
 		ret = ss->pre_destroy(cgrp);
-		if (ret) {
-			/* ->pre_destroy() failure is being deprecated */
-			WARN_ON_ONCE(!ss->__DEPRECATED_clear_css_refs);
+		if (WARN_ON_ONCE(ret))
 			break;
-		}
 	}
 
 	return ret;
@@ -3901,14 +3898,12 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
 	cgrp->subsys[ss->subsys_id] = css;
 
 	/*
-	 * If !clear_css_refs, css holds an extra ref to @cgrp->dentry
-	 * which is put on the last css_put().  dput() requires process
-	 * context, which css_put() may be called without.  @css->dput_work
-	 * will be used to invoke dput() asynchronously from css_put().
+	 * css holds an extra ref to @cgrp->dentry which is put on the last
+	 * css_put().  dput() requires process context, which css_put() may
+	 * be called without.  @css->dput_work will be used to invoke
+	 * dput() asynchronously from css_put().
 	 */
 	INIT_WORK(&css->dput_work, css_dput_fn);
-	if (ss->__DEPRECATED_clear_css_refs)
-		set_bit(CSS_CLEAR_CSS_REFS, &css->flags);
 }
 
 /*
@@ -3978,10 +3973,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 	if (err < 0)
 		goto err_remove;
 
-	/* If !clear_css_refs, each css holds a ref to the cgroup's dentry */
+	/* each css holds a ref to the cgroup's dentry */
 	for_each_subsys(root, ss)
-		if (!ss->__DEPRECATED_clear_css_refs)
-			dget(dentry);
+		dget(dentry);
 
 	/* The cgroup directory was pre-locked for us */
 	BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
@@ -4066,71 +4060,6 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
 	return 0;
 }
 
-/*
- * Atomically mark all (or else none) of the cgroup's CSS objects as
- * CSS_REMOVED. Return true on success, or false if the cgroup has
- * busy subsystems. Call with cgroup_mutex held
- *
- * Depending on whether a subsys has __DEPRECATED_clear_css_refs set or
- * not, cgroup removal behaves differently.
- *
- * If clear is set, css refcnt for the subsystem should be zero before
- * cgroup removal can be committed.  This is implemented by
- * CGRP_WAIT_ON_RMDIR and retry logic around ->pre_destroy(), which may be
- * called multiple times until all css refcnts reach zero and is allowed to
- * veto removal on any invocation.  This behavior is deprecated and will be
- * removed as soon as the existing user (memcg) is updated.
- *
- * If clear is not set, each css holds an extra reference to the cgroup's
- * dentry and cgroup removal proceeds regardless of css refs.
- * ->pre_destroy() will be called at least once and is not allowed to fail.
- * On the last put of each css, whenever that may be, the extra dentry ref
- * is put so that dentry destruction happens only after all css's are
- * released.
- */
-static int cgroup_clear_css_refs(struct cgroup *cgrp)
-{
-	struct cgroup_subsys *ss;
-	unsigned long flags;
-	bool failed = false;
-
-	local_irq_save(flags);
-
-	/*
-	 * Block new css_tryget() by deactivating refcnt.  If all refcnts
-	 * for subsystems w/ clear_css_refs set were 1 at the moment of
-	 * deactivation, we succeeded.
-	 */
-	for_each_subsys(cgrp->root, ss) {
-		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
-
-		WARN_ON(atomic_read(&css->refcnt) < 0);
-		atomic_add(CSS_DEACT_BIAS, &css->refcnt);
-
-		if (ss->__DEPRECATED_clear_css_refs)
-			failed |= css_refcnt(css) != 1;
-	}
-
-	/*
-	 * If succeeded, set REMOVED and put all the base refs; otherwise,
-	 * restore refcnts to positive values.  Either way, all in-progress
-	 * css_tryget() will be released.
-	 */
-	for_each_subsys(cgrp->root, ss) {
-		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
-
-		if (!failed) {
-			set_bit(CSS_REMOVED, &css->flags);
-			css_put(css);
-		} else {
-			atomic_sub(CSS_DEACT_BIAS, &css->refcnt);
-		}
-	}
-
-	local_irq_restore(flags);
-	return !failed;
-}
-
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 {
 	struct cgroup *cgrp = dentry->d_fsdata;
@@ -4138,10 +4067,10 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 	struct cgroup *parent;
 	DEFINE_WAIT(wait);
 	struct cgroup_event *event, *tmp;
+	struct cgroup_subsys *ss;
 	int ret;
 
 	/* the vfs holds both inode->i_mutex already */
-again:
 	mutex_lock(&cgroup_mutex);
 	if (atomic_read(&cgrp->count) != 0) {
 		mutex_unlock(&cgroup_mutex);
@@ -4182,21 +4111,34 @@ again:
 		return -EBUSY;
 	}
 	prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
-	if (!cgroup_clear_css_refs(cgrp)) {
-		mutex_unlock(&cgroup_mutex);
-		/*
-		 * Because someone may call cgroup_wakeup_rmdir_waiter() before
-		 * prepare_to_wait(), we need to check this flag.
-		 */
-		if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
-			schedule();
-		finish_wait(&cgroup_rmdir_waitq, &wait);
-		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
-		if (signal_pending(current))
-			return -EINTR;
-		goto again;
+
+	local_irq_disable();
+
+	/* block new css_tryget() by deactivating refcnt */
+	for_each_subsys(cgrp->root, ss) {
+		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+
+		WARN_ON(atomic_read(&css->refcnt) < 0);
+		atomic_add(CSS_DEACT_BIAS, &css->refcnt);
+	}
+
+	/*
+	 * Set REMOVED.  All in-progress css_tryget() will be released.
+	 * Put all the base refs.  Each css holds an extra reference to the
+	 * cgroup's dentry and cgroup removal proceeds regardless of css
+	 * refs.  On the last put of each css, whenever that may be, the
+	 * extra dentry ref is put so that dentry destruction happens only
+	 * after all css's are released.
+	 */
+	for_each_subsys(cgrp->root, ss) {
+		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+
+		set_bit(CSS_REMOVED, &css->flags);
+		css_put(css);
 	}
-	/* NO css_tryget() can success after here. */
+
+	local_irq_enable();
+
 	finish_wait(&cgroup_rmdir_waitq, &wait);
 	clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 
@@ -4949,8 +4891,7 @@ void __css_put(struct cgroup_subsys_state *css)
 		cgroup_wakeup_rmdir_waiter(cgrp);
 		break;
 	case 0:
-		if (!test_bit(CSS_CLEAR_CSS_REFS, &css->flags))
-			schedule_work(&css->dput_work);
+		schedule_work(&css->dput_work);
 		break;
 	}
 	rcu_read_unlock();
-- 
cgit v1.2.3-55-g7522


From e93160803ffda2e67d9ff9cacb63bb6868c8398f Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 5 Nov 2012 09:16:58 -0800
Subject: cgroup: kill CSS_REMOVED

CSS_REMOVED is one of the several contortions which were necessary to
support css reference draining on cgroup removal.  All css->refcnts
which need draining should be deactivated and verified to equal zero
atomically w.r.t. css_tryget().  If any one isn't zero, all refcnts
needed to be re-activated and css_tryget() shouldn't fail in the
process.

This was achieved by letting css_tryget() busy-loop until either the
refcnt is reactivated (failed removal attempt) or CSS_REMOVED is set
(committing to removal).

Now that css refcnt draining is no longer used, there's no need for
atomic rollback mechanism.  css_tryget() simply can look at the
reference count and fail if it's deactivated - it's never getting
re-activated.

This patch removes CSS_REMOVED and updates __css_tryget() to fail if
the refcnt is deactivated.  As deactivation and removal are a single
step now, they no longer need to be protected against css_tryget()
happening from irq context.  Remove local_irq_disable/enable() from
cgroup_rmdir().

Note that this removes css_is_removed() whose only user is VM_BUG_ON()
in memcontrol.c.  We can replace it with a check on the refcnt but
given that the only use case is a debug assert, I think it's better to
simply unexport it.

v2: Comment updated and explanation on local_irq_disable/enable()
    added per Michal Hocko.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
---
 include/linux/cgroup.h |  6 ------
 kernel/cgroup.c        | 31 ++++++++++++-------------------
 mm/memcontrol.c        |  7 +++----
 3 files changed, 15 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 02e09c0e98ab..a3098046250b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -85,7 +85,6 @@ struct cgroup_subsys_state {
 /* bits in struct cgroup_subsys_state flags field */
 enum {
 	CSS_ROOT, /* This CSS is the root of the subsystem */
-	CSS_REMOVED, /* This CSS is dead */
 };
 
 /* Caller must verify that the css is not for root cgroup */
@@ -108,11 +107,6 @@ static inline void css_get(struct cgroup_subsys_state *css)
 		__css_get(css, 1);
 }
 
-static inline bool css_is_removed(struct cgroup_subsys_state *css)
-{
-	return test_bit(CSS_REMOVED, &css->flags);
-}
-
 /*
  * Call css_tryget() to take a reference on a css if your existing
  * (known-valid) reference isn't already ref-counted. Returns false if
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8c605e2bdd1a..c194f9e4fc7b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -170,8 +170,8 @@ struct css_id {
 	 * The css to which this ID points. This pointer is set to valid value
 	 * after cgroup is populated. If cgroup is removed, this will be NULL.
 	 * This pointer is expected to be RCU-safe because destroy()
-	 * is called after synchronize_rcu(). But for safe use, css_is_removed()
-	 * css_tryget() should be used for avoiding race.
+	 * is called after synchronize_rcu(). But for safe use, css_tryget()
+	 * should be used for avoiding race.
 	 */
 	struct cgroup_subsys_state __rcu *css;
 	/*
@@ -4112,8 +4112,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 	}
 	prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
 
-	local_irq_disable();
-
 	/* block new css_tryget() by deactivating refcnt */
 	for_each_subsys(cgrp->root, ss) {
 		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
@@ -4123,21 +4121,14 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 	}
 
 	/*
-	 * Set REMOVED.  All in-progress css_tryget() will be released.
 	 * Put all the base refs.  Each css holds an extra reference to the
 	 * cgroup's dentry and cgroup removal proceeds regardless of css
 	 * refs.  On the last put of each css, whenever that may be, the
 	 * extra dentry ref is put so that dentry destruction happens only
 	 * after all css's are released.
 	 */
-	for_each_subsys(cgrp->root, ss) {
-		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
-
-		set_bit(CSS_REMOVED, &css->flags);
-		css_put(css);
-	}
-
-	local_irq_enable();
+	for_each_subsys(cgrp->root, ss)
+		css_put(cgrp->subsys[ss->subsys_id]);
 
 	finish_wait(&cgroup_rmdir_waitq, &wait);
 	clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
@@ -4861,15 +4852,17 @@ static void check_for_release(struct cgroup *cgrp)
 /* Caller must verify that the css is not for root cgroup */
 bool __css_tryget(struct cgroup_subsys_state *css)
 {
-	do {
-		int v = css_refcnt(css);
+	while (true) {
+		int t, v;
 
-		if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v)
+		v = css_refcnt(css);
+		t = atomic_cmpxchg(&css->refcnt, v, v + 1);
+		if (likely(t == v))
 			return true;
+		else if (t < 0)
+			return false;
 		cpu_relax();
-	} while (!test_bit(CSS_REMOVED, &css->flags));
-
-	return false;
+	}
 }
 EXPORT_SYMBOL_GPL(__css_tryget);
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5a1d584ffed3..37c356646544 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2343,7 +2343,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 again:
 	if (*ptr) { /* css should be a valid one */
 		memcg = *ptr;
-		VM_BUG_ON(css_is_removed(&memcg->css));
 		if (mem_cgroup_is_root(memcg))
 			goto done;
 		if (nr_pages == 1 && consume_stock(memcg))
@@ -2483,9 +2482,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
 
 /*
  * A helper function to get mem_cgroup from ID. must be called under
- * rcu_read_lock(). The caller must check css_is_removed() or some if
- * it's concern. (dropping refcnt from swap can be called against removed
- * memcg.)
+ * rcu_read_lock().  The caller is responsible for calling css_tryget if
+ * the mem_cgroup is used for charging. (dropping refcnt from swap can be
+ * called against removed memcg.)
  */
 static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
 {
-- 
cgit v1.2.3-55-g7522


From b25ed609d0eecf077db607e88ea70bae83b6adb2 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 5 Nov 2012 09:16:59 -0800
Subject: cgroup: remove CGRP_WAIT_ON_RMDIR, cgroup_exclude_rmdir() and
 cgroup_release_and_wakeup_rmdir()

CGRP_WAIT_ON_RMDIR is another kludge which was added to make cgroup
destruction rollback somewhat working.  cgroup_rmdir() used to drain
CSS references and CGRP_WAIT_ON_RMDIR and the associated waitqueue and
helpers were used to allow the task performing rmdir to wait for the
next relevant event.

Unfortunately, the wait is visible to controllers too and the
mechanism got exposed to memcg by 887032670d ("cgroup avoid permanent
sleep at rmdir").

Now that the draining and retries are gone, CGRP_WAIT_ON_RMDIR is
unnecessary.  Remove it and all the mechanisms supporting it.  Note
that memcontrol.c changes are essentially revert of 887032670d
("cgroup avoid permanent sleep at rmdir").

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Balbir Singh <bsingharora@gmail.com>
---
 include/linux/cgroup.h | 21 ---------------------
 kernel/cgroup.c        | 51 --------------------------------------------------
 mm/memcontrol.c        | 24 +-----------------------
 3 files changed, 1 insertion(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a3098046250b..47868a86ba2b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -144,10 +144,6 @@ enum {
 	CGRP_RELEASABLE,
 	/* Control Group requires release notifications to userspace */
 	CGRP_NOTIFY_ON_RELEASE,
-	/*
-	 * A thread in rmdir() is wating for this cgroup.
-	 */
-	CGRP_WAIT_ON_RMDIR,
 	/*
 	 * Clone cgroup values when creating a new child cgroup
 	 */
@@ -411,23 +407,6 @@ int cgroup_task_count(const struct cgroup *cgrp);
 /* Return true if cgrp is a descendant of the task's cgroup */
 int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task);
 
-/*
- * When the subsys has to access css and may add permanent refcnt to css,
- * it should take care of racy conditions with rmdir(). Following set of
- * functions, is for stop/restart rmdir if necessary.
- * Because these will call css_get/put, "css" should be alive css.
- *
- *  cgroup_exclude_rmdir();
- *  ...do some jobs which may access arbitrary empty cgroup
- *  cgroup_release_and_wakeup_rmdir();
- *
- *  When someone removes a cgroup while cgroup_exclude_rmdir() holds it,
- *  it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up.
- */
-
-void cgroup_exclude_rmdir(struct cgroup_subsys_state *css);
-void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css);
-
 /*
  * Control Group taskset, used to pass around set of tasks to cgroup_subsys
  * methods.
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 66204a6f68f3..c5f6fb28dd0e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -965,33 +965,6 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
 	remove_dir(dentry);
 }
 
-/*
- * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
- * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
- * reference to css->refcnt. In general, this refcnt is expected to goes down
- * to zero, soon.
- *
- * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
- */
-static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
-
-static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
-{
-	if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
-		wake_up_all(&cgroup_rmdir_waitq);
-}
-
-void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
-{
-	css_get(css);
-}
-
-void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
-{
-	cgroup_wakeup_rmdir_waiter(css->cgroup);
-	css_put(css);
-}
-
 /*
  * Call with cgroup_mutex held. Drops reference counts on modules, including
  * any duplicate ones that parse_cgroupfs_options took. If this function
@@ -1963,12 +1936,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	}
 
 	synchronize_rcu();
-
-	/*
-	 * wake up rmdir() waiter. the rmdir should fail since the cgroup
-	 * is no longer empty.
-	 */
-	cgroup_wakeup_rmdir_waiter(cgrp);
 out:
 	if (retval) {
 		for_each_subsys(root, ss) {
@@ -2138,7 +2105,6 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 	 * step 5: success! and cleanup
 	 */
 	synchronize_rcu();
-	cgroup_wakeup_rmdir_waiter(cgrp);
 	retval = 0;
 out_put_css_set_refs:
 	if (retval) {
@@ -4058,26 +4024,13 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 	struct cgroup_event *event, *tmp;
 	struct cgroup_subsys *ss;
 
-	/*
-	 * In general, subsystem has no css->refcnt after pre_destroy(). But
-	 * in racy cases, subsystem may have to get css->refcnt after
-	 * pre_destroy() and it makes rmdir return with -EBUSY. This sometimes
-	 * make rmdir return -EBUSY too often. To avoid that, we use waitqueue
-	 * for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir
-	 * and subsystem's reference count handling. Please see css_get/put
-	 * and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation.
-	 */
-	set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
-
 	/* the vfs holds both inode->i_mutex already */
 	mutex_lock(&cgroup_mutex);
 	parent = cgrp->parent;
 	if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
-		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 		mutex_unlock(&cgroup_mutex);
 		return -EBUSY;
 	}
-	prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
 
 	/*
 	 * Block new css_tryget() by deactivating refcnt and mark @cgrp
@@ -4114,9 +4067,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 	for_each_subsys(cgrp->root, ss)
 		css_put(cgrp->subsys[ss->subsys_id]);
 
-	finish_wait(&cgroup_rmdir_waitq, &wait);
-	clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
-
 	raw_spin_lock(&release_list_lock);
 	if (!list_empty(&cgrp->release_list))
 		list_del_init(&cgrp->release_list);
@@ -4864,7 +4814,6 @@ void __css_put(struct cgroup_subsys_state *css)
 			set_bit(CGRP_RELEASABLE, &cgrp->flags);
 			check_for_release(cgrp);
 		}
-		cgroup_wakeup_rmdir_waiter(cgrp);
 		break;
 	case 0:
 		schedule_work(&css->dput_work);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 37c356646544..930edfaa5187 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2681,13 +2681,6 @@ static int mem_cgroup_move_account(struct page *page,
 	/* caller should have done css_get */
 	pc->mem_cgroup = to;
 	mem_cgroup_charge_statistics(to, anon, nr_pages);
-	/*
-	 * We charges against "to" which may not have any tasks. Then, "to"
-	 * can be under rmdir(). But in current implementation, caller of
-	 * this function is just force_empty() and move charge, so it's
-	 * guaranteed that "to" is never removed. So, we don't check rmdir
-	 * status here.
-	 */
 	move_unlock_mem_cgroup(from, &flags);
 	ret = 0;
 unlock:
@@ -2893,7 +2886,6 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
 		return;
 	if (!memcg)
 		return;
-	cgroup_exclude_rmdir(&memcg->css);
 
 	__mem_cgroup_commit_charge(memcg, page, 1, ctype, true);
 	/*
@@ -2907,12 +2899,6 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
 		swp_entry_t ent = {.val = page_private(page)};
 		mem_cgroup_uncharge_swap(ent);
 	}
-	/*
-	 * At swapin, we may charge account against cgroup which has no tasks.
-	 * So, rmdir()->pre_destroy() can be called while we do this charge.
-	 * In that case, we need to call pre_destroy() again. check it here.
-	 */
-	cgroup_release_and_wakeup_rmdir(&memcg->css);
 }
 
 void mem_cgroup_commit_charge_swapin(struct page *page,
@@ -3360,8 +3346,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 
 	if (!memcg)
 		return;
-	/* blocks rmdir() */
-	cgroup_exclude_rmdir(&memcg->css);
+
 	if (!migration_ok) {
 		used = oldpage;
 		unused = newpage;
@@ -3395,13 +3380,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 	 */
 	if (anon)
 		mem_cgroup_uncharge_page(used);
-	/*
-	 * At migration, we may charge account against cgroup which has no
-	 * tasks.
-	 * So, rmdir()->pre_destroy() can be called while we do this charge.
-	 * In that case, we need to call pre_destroy() again. check it here.
-	 */
-	cgroup_release_and_wakeup_rmdir(&memcg->css);
 }
 
 /*
-- 
cgit v1.2.3-55-g7522


From bcf6de1b9129531215d26dd9af8331e84973bc52 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 5 Nov 2012 09:16:59 -0800
Subject: cgroup: make ->pre_destroy() return void

All ->pre_destory() implementations return 0 now, which is the only
allowed return value.  Make it return void.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
---
 block/blk-cgroup.c     | 3 +--
 include/linux/cgroup.h | 2 +-
 kernel/cgroup.c        | 2 +-
 mm/hugetlb_cgroup.c    | 4 +---
 mm/memcontrol.c        | 3 +--
 5 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index f3b44a65fc7a..a7816f3d0059 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -600,7 +600,7 @@ struct cftype blkcg_files[] = {
  *
  * This is the blkcg counterpart of ioc_release_fn().
  */
-static int blkcg_pre_destroy(struct cgroup *cgroup)
+static void blkcg_pre_destroy(struct cgroup *cgroup)
 {
 	struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
 
@@ -622,7 +622,6 @@ static int blkcg_pre_destroy(struct cgroup *cgroup)
 	}
 
 	spin_unlock_irq(&blkcg->lock);
-	return 0;
 }
 
 static void blkcg_destroy(struct cgroup *cgroup)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 47868a86ba2b..adb2adc8ec1a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -436,7 +436,7 @@ int cgroup_taskset_size(struct cgroup_taskset *tset);
 
 struct cgroup_subsys {
 	struct cgroup_subsys_state *(*create)(struct cgroup *cgrp);
-	int (*pre_destroy)(struct cgroup *cgrp);
+	void (*pre_destroy)(struct cgroup *cgrp);
 	void (*destroy)(struct cgroup *cgrp);
 	int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
 	void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c5f6fb28dd0e..83cd7d041c62 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4054,7 +4054,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 	mutex_unlock(&cgroup_mutex);
 	for_each_subsys(cgrp->root, ss)
 		if (ss->pre_destroy)
-			WARN_ON_ONCE(ss->pre_destroy(cgrp));
+			ss->pre_destroy(cgrp);
 	mutex_lock(&cgroup_mutex);
 
 	/*
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index dc595c6b1f55..0d3a1a317731 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -155,7 +155,7 @@ out:
  * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
  * the parent cgroup.
  */
-static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
+static void hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
 {
 	struct hstate *h;
 	struct page *page;
@@ -172,8 +172,6 @@ static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
 		}
 		cond_resched();
 	} while (hugetlb_cgroup_have_usage(cgroup));
-
-	return 0;
 }
 
 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6678f991c6c6..a1811ce60e20 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5002,12 +5002,11 @@ free_out:
 	return ERR_PTR(error);
 }
 
-static int mem_cgroup_pre_destroy(struct cgroup *cont)
+static void mem_cgroup_pre_destroy(struct cgroup *cont)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
 	mem_cgroup_reparent_charges(memcg);
-	return 0;
 }
 
 static void mem_cgroup_destroy(struct cgroup *cont)
-- 
cgit v1.2.3-55-g7522


From cc95e347cbba3a29c6f8c95ec737624afe489d8e Mon Sep 17 00:00:00 2001
From: Stephen Warren
Date: Wed, 19 Sep 2012 15:51:01 -0600
Subject: ARM: tegra: move tegra-ahb.h out of arch/arm/mach-tegra/

We wish to empty arch/arm/mach-tegra/include/mach/ as much as possible
to enable single zImage. Move tegra-ahb.h to a more central location
(suggested by Arnd, OK'd by Greg KH), and actually make tegra-ahb.c
include the header to ensure client and provider agree on the prototype.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 arch/arm/mach-tegra/include/mach/tegra-ahb.h | 19 -------------------
 drivers/amba/tegra-ahb.c                     |  1 +
 drivers/iommu/tegra-smmu.c                   |  3 +--
 include/linux/tegra-ahb.h                    | 19 +++++++++++++++++++
 4 files changed, 21 insertions(+), 21 deletions(-)
 delete mode 100644 arch/arm/mach-tegra/include/mach/tegra-ahb.h
 create mode 100644 include/linux/tegra-ahb.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-tegra/include/mach/tegra-ahb.h b/arch/arm/mach-tegra/include/mach/tegra-ahb.h
deleted file mode 100644
index e0f8c84b1d8c..000000000000
--- a/arch/arm/mach-tegra/include/mach/tegra-ahb.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __MACH_TEGRA_AHB_H__
-#define __MACH_TEGRA_AHB_H__
-
-extern int tegra_ahb_enable_smmu(struct device_node *ahb);
-
-#endif	/* __MACH_TEGRA_AHB_H__ */
diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c
index 0b6f0b28a487..bd5de08ad6fd 100644
--- a/drivers/amba/tegra-ahb.c
+++ b/drivers/amba/tegra-ahb.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
+#include <linux/tegra-ahb.h>
 
 #define DRV_NAME "tegra-ahb"
 
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 3fb95dde20f5..41678639b7e3 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -34,12 +34,11 @@
 #include <linux/of_iommu.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/tegra-ahb.h>
 
 #include <asm/page.h>
 #include <asm/cacheflush.h>
 
-#include <mach/tegra-ahb.h>
-
 enum smmu_hwgrp {
 	HWGRP_AFI,
 	HWGRP_AVPC,
diff --git a/include/linux/tegra-ahb.h b/include/linux/tegra-ahb.h
new file mode 100644
index 000000000000..f1cd075ceee1
--- /dev/null
+++ b/include/linux/tegra-ahb.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __LINUX_AHB_H__
+#define __LINUX_AHB_H__
+
+extern int tegra_ahb_enable_smmu(struct device_node *ahb);
+
+#endif	/* __LINUX_AHB_H__ */
-- 
cgit v1.2.3-55-g7522


From 4eb3ccf157639a9d9c7829de94017c46c73d9cc4 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen
Date: Mon, 5 Nov 2012 09:56:00 +0000
Subject: staging:iio: Move the ad7887 driver out of staging

The driver does not expose any custom API to userspace and none of the standard
static code checker tools report any issues, so move it out of staging.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/Kconfig              |  13 ++
 drivers/iio/adc/Makefile             |   1 +
 drivers/iio/adc/ad7887.c             | 378 +++++++++++++++++++++++++++++++++++
 drivers/staging/iio/adc/Kconfig      |  13 --
 drivers/staging/iio/adc/Makefile     |   2 -
 drivers/staging/iio/adc/ad7887.c     | 378 -----------------------------------
 drivers/staging/iio/adc/ad7887.h     |  31 ---
 include/linux/platform_data/ad7887.h |  26 +++
 8 files changed, 418 insertions(+), 424 deletions(-)
 create mode 100644 drivers/iio/adc/ad7887.c
 delete mode 100644 drivers/staging/iio/adc/ad7887.c
 delete mode 100644 drivers/staging/iio/adc/ad7887.h
 create mode 100644 include/linux/platform_data/ad7887.h

(limited to 'include/linux')

diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 492758120338..706386ba02e3 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -45,6 +45,19 @@ config AD7476
 	  To compile this driver as a module, choose M here: the
 	  module will be called ad7476.
 
+config AD7887
+	tristate "Analog Devices AD7887 ADC driver"
+	depends on SPI
+	select IIO_BUFFER
+	select IIO_TRIGGERED_BUFFER
+	help
+	  Say yes here to build support for Analog Devices
+	  AD7887 SPI analog to digital converter (ADC).
+	  If unsure, say N (but it's safe to say "Y").
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ad7887.
+
 config AT91_ADC
 	tristate "Atmel AT91 ADC"
 	depends on ARCH_AT91
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index 900995d5e179..034eacb8f7c9 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -6,5 +6,6 @@ obj-$(CONFIG_AD_SIGMA_DELTA) += ad_sigma_delta.o
 obj-$(CONFIG_AD7266) += ad7266.o
 obj-$(CONFIG_AD7476) += ad7476.o
 obj-$(CONFIG_AD7791) += ad7791.o
+obj-$(CONFIG_AD7887) += ad7887.o
 obj-$(CONFIG_AT91_ADC) += at91_adc.o
 obj-$(CONFIG_LP8788_ADC) += lp8788_adc.o
diff --git a/drivers/iio/adc/ad7887.c b/drivers/iio/adc/ad7887.c
new file mode 100644
index 000000000000..fd62309b4d3d
--- /dev/null
+++ b/drivers/iio/adc/ad7887.c
@@ -0,0 +1,378 @@
+/*
+ * AD7887 SPI ADC driver
+ *
+ * Copyright 2010-2011 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/spi/spi.h>
+#include <linux/regulator/consumer.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+#include <linux/iio/buffer.h>
+
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iio/triggered_buffer.h>
+
+#include <linux/platform_data/ad7887.h>
+
+#define AD7887_REF_DIS		(1 << 5) /* on-chip reference disable */
+#define AD7887_DUAL		(1 << 4) /* dual-channel mode */
+#define AD7887_CH_AIN1		(1 << 3) /* convert on channel 1, DUAL=1 */
+#define AD7887_CH_AIN0		(0 << 3) /* convert on channel 0, DUAL=0,1 */
+#define AD7887_PM_MODE1		(0)	 /* CS based shutdown */
+#define AD7887_PM_MODE2		(1)	 /* full on */
+#define AD7887_PM_MODE3		(2)	 /* auto shutdown after conversion */
+#define AD7887_PM_MODE4		(3)	 /* standby mode */
+
+enum ad7887_channels {
+	AD7887_CH0,
+	AD7887_CH0_CH1,
+	AD7887_CH1,
+};
+
+#define RES_MASK(bits)	((1 << (bits)) - 1)
+
+/**
+ * struct ad7887_chip_info - chip specifc information
+ * @int_vref_mv:	the internal reference voltage
+ * @channel:		channel specification
+ */
+struct ad7887_chip_info {
+	u16				int_vref_mv;
+	struct iio_chan_spec		channel[3];
+};
+
+struct ad7887_state {
+	struct spi_device		*spi;
+	const struct ad7887_chip_info	*chip_info;
+	struct regulator		*reg;
+	struct spi_transfer		xfer[4];
+	struct spi_message		msg[3];
+	struct spi_message		*ring_msg;
+	unsigned char			tx_cmd_buf[4];
+
+	/*
+	 * DMA (thus cache coherency maintenance) requires the
+	 * transfer buffers to live in their own cache lines.
+	 * Buffer needs to be large enough to hold two 16 bit samples and a
+	 * 64 bit aligned 64 bit timestamp.
+	 */
+	unsigned char data[ALIGN(4, sizeof(s64)) + sizeof(s64)]
+		____cacheline_aligned;
+};
+
+enum ad7887_supported_device_ids {
+	ID_AD7887
+};
+
+static int ad7887_ring_preenable(struct iio_dev *indio_dev)
+{
+	struct ad7887_state *st = iio_priv(indio_dev);
+	int ret;
+
+	ret = iio_sw_buffer_preenable(indio_dev);
+	if (ret < 0)
+		return ret;
+
+	/* We know this is a single long so can 'cheat' */
+	switch (*indio_dev->active_scan_mask) {
+	case (1 << 0):
+		st->ring_msg = &st->msg[AD7887_CH0];
+		break;
+	case (1 << 1):
+		st->ring_msg = &st->msg[AD7887_CH1];
+		/* Dummy read: push CH1 setting down to hardware */
+		spi_sync(st->spi, st->ring_msg);
+		break;
+	case ((1 << 1) | (1 << 0)):
+		st->ring_msg = &st->msg[AD7887_CH0_CH1];
+		break;
+	}
+
+	return 0;
+}
+
+static int ad7887_ring_postdisable(struct iio_dev *indio_dev)
+{
+	struct ad7887_state *st = iio_priv(indio_dev);
+
+	/* dummy read: restore default CH0 settin */
+	return spi_sync(st->spi, &st->msg[AD7887_CH0]);
+}
+
+/**
+ * ad7887_trigger_handler() bh of trigger launched polling to ring buffer
+ *
+ * Currently there is no option in this driver to disable the saving of
+ * timestamps within the ring.
+ **/
+static irqreturn_t ad7887_trigger_handler(int irq, void *p)
+{
+	struct iio_poll_func *pf = p;
+	struct iio_dev *indio_dev = pf->indio_dev;
+	struct ad7887_state *st = iio_priv(indio_dev);
+	s64 time_ns;
+	int b_sent;
+
+	b_sent = spi_sync(st->spi, st->ring_msg);
+	if (b_sent)
+		goto done;
+
+	time_ns = iio_get_time_ns();
+
+	if (indio_dev->scan_timestamp)
+		memcpy(st->data + indio_dev->scan_bytes - sizeof(s64),
+		       &time_ns, sizeof(time_ns));
+
+	iio_push_to_buffer(indio_dev->buffer, st->data);
+done:
+	iio_trigger_notify_done(indio_dev->trig);
+
+	return IRQ_HANDLED;
+}
+
+static const struct iio_buffer_setup_ops ad7887_ring_setup_ops = {
+	.preenable = &ad7887_ring_preenable,
+	.postenable = &iio_triggered_buffer_postenable,
+	.predisable = &iio_triggered_buffer_predisable,
+	.postdisable = &ad7887_ring_postdisable,
+};
+
+static int ad7887_scan_direct(struct ad7887_state *st, unsigned ch)
+{
+	int ret = spi_sync(st->spi, &st->msg[ch]);
+	if (ret)
+		return ret;
+
+	return (st->data[(ch * 2)] << 8) | st->data[(ch * 2) + 1];
+}
+
+static int ad7887_read_raw(struct iio_dev *indio_dev,
+			   struct iio_chan_spec const *chan,
+			   int *val,
+			   int *val2,
+			   long m)
+{
+	int ret;
+	struct ad7887_state *st = iio_priv(indio_dev);
+
+	switch (m) {
+	case IIO_CHAN_INFO_RAW:
+		mutex_lock(&indio_dev->mlock);
+		if (iio_buffer_enabled(indio_dev))
+			ret = -EBUSY;
+		else
+			ret = ad7887_scan_direct(st, chan->address);
+		mutex_unlock(&indio_dev->mlock);
+
+		if (ret < 0)
+			return ret;
+		*val = ret >> chan->scan_type.shift;
+		*val &= RES_MASK(chan->scan_type.realbits);
+		return IIO_VAL_INT;
+	case IIO_CHAN_INFO_SCALE:
+		if (st->reg) {
+			*val = regulator_get_voltage(st->reg);
+			if (*val < 0)
+				return *val;
+			*val /= 1000;
+		} else {
+			*val = st->chip_info->int_vref_mv;
+		}
+
+		*val2 = chan->scan_type.realbits;
+
+		return IIO_VAL_FRACTIONAL_LOG2;
+	}
+	return -EINVAL;
+}
+
+
+static const struct ad7887_chip_info ad7887_chip_info_tbl[] = {
+	/*
+	 * More devices added in future
+	 */
+	[ID_AD7887] = {
+		.channel[0] = {
+			.type = IIO_VOLTAGE,
+			.indexed = 1,
+			.channel = 1,
+			.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT |
+			IIO_CHAN_INFO_SCALE_SHARED_BIT,
+			.address = 1,
+			.scan_index = 1,
+			.scan_type = IIO_ST('u', 12, 16, 0),
+		},
+		.channel[1] = {
+			.type = IIO_VOLTAGE,
+			.indexed = 1,
+			.channel = 0,
+			.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT |
+			IIO_CHAN_INFO_SCALE_SHARED_BIT,
+			.address = 0,
+			.scan_index = 0,
+			.scan_type = IIO_ST('u', 12, 16, 0),
+		},
+		.channel[2] = IIO_CHAN_SOFT_TIMESTAMP(2),
+		.int_vref_mv = 2500,
+	},
+};
+
+static const struct iio_info ad7887_info = {
+	.read_raw = &ad7887_read_raw,
+	.driver_module = THIS_MODULE,
+};
+
+static int __devinit ad7887_probe(struct spi_device *spi)
+{
+	struct ad7887_platform_data *pdata = spi->dev.platform_data;
+	struct ad7887_state *st;
+	struct iio_dev *indio_dev = iio_device_alloc(sizeof(*st));
+	uint8_t mode;
+	int ret;
+
+	if (indio_dev == NULL)
+		return -ENOMEM;
+
+	st = iio_priv(indio_dev);
+
+	if (!pdata || !pdata->use_onchip_ref) {
+		st->reg = regulator_get(&spi->dev, "vref");
+		if (IS_ERR(st->reg)) {
+			ret = PTR_ERR(st->reg);
+			goto error_free;
+		}
+
+		ret = regulator_enable(st->reg);
+		if (ret)
+			goto error_put_reg;
+	}
+
+	st->chip_info =
+		&ad7887_chip_info_tbl[spi_get_device_id(spi)->driver_data];
+
+	spi_set_drvdata(spi, indio_dev);
+	st->spi = spi;
+
+	/* Estabilish that the iio_dev is a child of the spi device */
+	indio_dev->dev.parent = &spi->dev;
+	indio_dev->name = spi_get_device_id(spi)->name;
+	indio_dev->info = &ad7887_info;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+
+	/* Setup default message */
+
+	mode = AD7887_PM_MODE4;
+	if (!pdata || !pdata->use_onchip_ref)
+		mode |= AD7887_REF_DIS;
+	if (pdata && pdata->en_dual)
+		mode |= AD7887_DUAL;
+
+	st->tx_cmd_buf[0] = AD7887_CH_AIN0 | mode;
+
+	st->xfer[0].rx_buf = &st->data[0];
+	st->xfer[0].tx_buf = &st->tx_cmd_buf[0];
+	st->xfer[0].len = 2;
+
+	spi_message_init(&st->msg[AD7887_CH0]);
+	spi_message_add_tail(&st->xfer[0], &st->msg[AD7887_CH0]);
+
+	if (pdata && pdata->en_dual) {
+		st->tx_cmd_buf[2] = AD7887_CH_AIN1 | mode;
+
+		st->xfer[1].rx_buf = &st->data[0];
+		st->xfer[1].tx_buf = &st->tx_cmd_buf[2];
+		st->xfer[1].len = 2;
+
+		st->xfer[2].rx_buf = &st->data[2];
+		st->xfer[2].tx_buf = &st->tx_cmd_buf[0];
+		st->xfer[2].len = 2;
+
+		spi_message_init(&st->msg[AD7887_CH0_CH1]);
+		spi_message_add_tail(&st->xfer[1], &st->msg[AD7887_CH0_CH1]);
+		spi_message_add_tail(&st->xfer[2], &st->msg[AD7887_CH0_CH1]);
+
+		st->xfer[3].rx_buf = &st->data[2];
+		st->xfer[3].tx_buf = &st->tx_cmd_buf[2];
+		st->xfer[3].len = 2;
+
+		spi_message_init(&st->msg[AD7887_CH1]);
+		spi_message_add_tail(&st->xfer[3], &st->msg[AD7887_CH1]);
+
+		indio_dev->channels = st->chip_info->channel;
+		indio_dev->num_channels = 3;
+	} else {
+		indio_dev->channels = &st->chip_info->channel[1];
+		indio_dev->num_channels = 2;
+	}
+
+	ret = iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+			&ad7887_trigger_handler, &ad7887_ring_setup_ops);
+	if (ret)
+		goto error_disable_reg;
+
+	ret = iio_device_register(indio_dev);
+	if (ret)
+		goto error_unregister_ring;
+
+	return 0;
+error_unregister_ring:
+	iio_triggered_buffer_cleanup(indio_dev);
+error_disable_reg:
+	if (st->reg)
+		regulator_disable(st->reg);
+error_put_reg:
+	if (st->reg)
+		regulator_put(st->reg);
+error_free:
+	iio_device_free(indio_dev);
+
+	return ret;
+}
+
+static int __devexit ad7887_remove(struct spi_device *spi)
+{
+	struct iio_dev *indio_dev = spi_get_drvdata(spi);
+	struct ad7887_state *st = iio_priv(indio_dev);
+
+	iio_device_unregister(indio_dev);
+	iio_triggered_buffer_cleanup(indio_dev);
+	if (st->reg) {
+		regulator_disable(st->reg);
+		regulator_put(st->reg);
+	}
+	iio_device_free(indio_dev);
+
+	return 0;
+}
+
+static const struct spi_device_id ad7887_id[] = {
+	{"ad7887", ID_AD7887},
+	{}
+};
+MODULE_DEVICE_TABLE(spi, ad7887_id);
+
+static struct spi_driver ad7887_driver = {
+	.driver = {
+		.name	= "ad7887",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= ad7887_probe,
+	.remove		= __devexit_p(ad7887_remove),
+	.id_table	= ad7887_id,
+};
+module_spi_driver(ad7887_driver);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("Analog Devices AD7887 ADC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/staging/iio/adc/Kconfig b/drivers/staging/iio/adc/Kconfig
index 71a515d0a6de..eba64fb64d82 100644
--- a/drivers/staging/iio/adc/Kconfig
+++ b/drivers/staging/iio/adc/Kconfig
@@ -68,19 +68,6 @@ config AD799X_RING_BUFFER
 	  Say yes here to include ring buffer support in the AD799X
 	  ADC driver.
 
-config AD7887
-	tristate "Analog Devices AD7887 ADC driver"
-	depends on SPI
-	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
-	help
-	  Say yes here to build support for Analog Devices
-	  AD7887 SPI analog to digital converter (ADC).
-	  If unsure, say N (but it's safe to say "Y").
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called ad7887.
-
 config AD7780
 	tristate "Analog Devices AD7780 and similar ADCs driver"
 	depends on SPI
diff --git a/drivers/staging/iio/adc/Makefile b/drivers/staging/iio/adc/Makefile
index 8036fd14f68a..c56b41ee285b 100644
--- a/drivers/staging/iio/adc/Makefile
+++ b/drivers/staging/iio/adc/Makefile
@@ -17,8 +17,6 @@ ad799x-y := ad799x_core.o
 ad799x-$(CONFIG_AD799X_RING_BUFFER) += ad799x_ring.o
 obj-$(CONFIG_AD799X) += ad799x.o
 
-obj-$(CONFIG_AD7887) += ad7887.o
-
 ad7298-y := ad7298_core.o
 ad7298-$(CONFIG_IIO_BUFFER) += ad7298_ring.o
 obj-$(CONFIG_AD7298) += ad7298.o
diff --git a/drivers/staging/iio/adc/ad7887.c b/drivers/staging/iio/adc/ad7887.c
deleted file mode 100644
index 72cfe191cd83..000000000000
--- a/drivers/staging/iio/adc/ad7887.c
+++ /dev/null
@@ -1,378 +0,0 @@
-/*
- * AD7887 SPI ADC driver
- *
- * Copyright 2010-2011 Analog Devices Inc.
- *
- * Licensed under the GPL-2.
- */
-
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/sysfs.h>
-#include <linux/spi/spi.h>
-#include <linux/regulator/consumer.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-
-#include <linux/iio/iio.h>
-#include <linux/iio/sysfs.h>
-#include <linux/iio/buffer.h>
-
-#include <linux/iio/trigger_consumer.h>
-#include <linux/iio/triggered_buffer.h>
-
-#include "ad7887.h"
-
-#define AD7887_REF_DIS		(1 << 5) /* on-chip reference disable */
-#define AD7887_DUAL		(1 << 4) /* dual-channel mode */
-#define AD7887_CH_AIN1		(1 << 3) /* convert on channel 1, DUAL=1 */
-#define AD7887_CH_AIN0		(0 << 3) /* convert on channel 0, DUAL=0,1 */
-#define AD7887_PM_MODE1		(0)	 /* CS based shutdown */
-#define AD7887_PM_MODE2		(1)	 /* full on */
-#define AD7887_PM_MODE3		(2)	 /* auto shutdown after conversion */
-#define AD7887_PM_MODE4		(3)	 /* standby mode */
-
-enum ad7887_channels {
-	AD7887_CH0,
-	AD7887_CH0_CH1,
-	AD7887_CH1,
-};
-
-#define RES_MASK(bits)	((1 << (bits)) - 1)
-
-/**
- * struct ad7887_chip_info - chip specifc information
- * @int_vref_mv:	the internal reference voltage
- * @channel:		channel specification
- */
-struct ad7887_chip_info {
-	u16				int_vref_mv;
-	struct iio_chan_spec		channel[3];
-};
-
-struct ad7887_state {
-	struct spi_device		*spi;
-	const struct ad7887_chip_info	*chip_info;
-	struct regulator		*reg;
-	struct spi_transfer		xfer[4];
-	struct spi_message		msg[3];
-	struct spi_message		*ring_msg;
-	unsigned char			tx_cmd_buf[4];
-
-	/*
-	 * DMA (thus cache coherency maintenance) requires the
-	 * transfer buffers to live in their own cache lines.
-	 * Buffer needs to be large enough to hold two 16 bit samples and a
-	 * 64 bit aligned 64 bit timestamp.
-	 */
-	unsigned char data[ALIGN(4, sizeof(s64)) + sizeof(s64)]
-		____cacheline_aligned;
-};
-
-enum ad7887_supported_device_ids {
-	ID_AD7887
-};
-
-static int ad7887_ring_preenable(struct iio_dev *indio_dev)
-{
-	struct ad7887_state *st = iio_priv(indio_dev);
-	int ret;
-
-	ret = iio_sw_buffer_preenable(indio_dev);
-	if (ret < 0)
-		return ret;
-
-	/* We know this is a single long so can 'cheat' */
-	switch (*indio_dev->active_scan_mask) {
-	case (1 << 0):
-		st->ring_msg = &st->msg[AD7887_CH0];
-		break;
-	case (1 << 1):
-		st->ring_msg = &st->msg[AD7887_CH1];
-		/* Dummy read: push CH1 setting down to hardware */
-		spi_sync(st->spi, st->ring_msg);
-		break;
-	case ((1 << 1) | (1 << 0)):
-		st->ring_msg = &st->msg[AD7887_CH0_CH1];
-		break;
-	}
-
-	return 0;
-}
-
-static int ad7887_ring_postdisable(struct iio_dev *indio_dev)
-{
-	struct ad7887_state *st = iio_priv(indio_dev);
-
-	/* dummy read: restore default CH0 settin */
-	return spi_sync(st->spi, &st->msg[AD7887_CH0]);
-}
-
-/**
- * ad7887_trigger_handler() bh of trigger launched polling to ring buffer
- *
- * Currently there is no option in this driver to disable the saving of
- * timestamps within the ring.
- **/
-static irqreturn_t ad7887_trigger_handler(int irq, void *p)
-{
-	struct iio_poll_func *pf = p;
-	struct iio_dev *indio_dev = pf->indio_dev;
-	struct ad7887_state *st = iio_priv(indio_dev);
-	s64 time_ns;
-	int b_sent;
-
-	b_sent = spi_sync(st->spi, st->ring_msg);
-	if (b_sent)
-		goto done;
-
-	time_ns = iio_get_time_ns();
-
-	if (indio_dev->scan_timestamp)
-		memcpy(st->data + indio_dev->scan_bytes - sizeof(s64),
-		       &time_ns, sizeof(time_ns));
-
-	iio_push_to_buffer(indio_dev->buffer, st->data);
-done:
-	iio_trigger_notify_done(indio_dev->trig);
-
-	return IRQ_HANDLED;
-}
-
-static const struct iio_buffer_setup_ops ad7887_ring_setup_ops = {
-	.preenable = &ad7887_ring_preenable,
-	.postenable = &iio_triggered_buffer_postenable,
-	.predisable = &iio_triggered_buffer_predisable,
-	.postdisable = &ad7887_ring_postdisable,
-};
-
-static int ad7887_scan_direct(struct ad7887_state *st, unsigned ch)
-{
-	int ret = spi_sync(st->spi, &st->msg[ch]);
-	if (ret)
-		return ret;
-
-	return (st->data[(ch * 2)] << 8) | st->data[(ch * 2) + 1];
-}
-
-static int ad7887_read_raw(struct iio_dev *indio_dev,
-			   struct iio_chan_spec const *chan,
-			   int *val,
-			   int *val2,
-			   long m)
-{
-	int ret;
-	struct ad7887_state *st = iio_priv(indio_dev);
-
-	switch (m) {
-	case IIO_CHAN_INFO_RAW:
-		mutex_lock(&indio_dev->mlock);
-		if (iio_buffer_enabled(indio_dev))
-			ret = -EBUSY;
-		else
-			ret = ad7887_scan_direct(st, chan->address);
-		mutex_unlock(&indio_dev->mlock);
-
-		if (ret < 0)
-			return ret;
-		*val = ret >> chan->scan_type.shift;
-		*val &= RES_MASK(chan->scan_type.realbits);
-		return IIO_VAL_INT;
-	case IIO_CHAN_INFO_SCALE:
-		if (st->reg) {
-			*val = regulator_get_voltage(st->reg);
-			if (*val < 0)
-				return *val;
-			*val /= 1000;
-		} else {
-			*val = st->chip_info->int_vref_mv;
-		}
-
-		*val2 = chan->scan_type.realbits;
-
-		return IIO_VAL_FRACTIONAL_LOG2;
-	}
-	return -EINVAL;
-}
-
-
-static const struct ad7887_chip_info ad7887_chip_info_tbl[] = {
-	/*
-	 * More devices added in future
-	 */
-	[ID_AD7887] = {
-		.channel[0] = {
-			.type = IIO_VOLTAGE,
-			.indexed = 1,
-			.channel = 1,
-			.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT |
-			IIO_CHAN_INFO_SCALE_SHARED_BIT,
-			.address = 1,
-			.scan_index = 1,
-			.scan_type = IIO_ST('u', 12, 16, 0),
-		},
-		.channel[1] = {
-			.type = IIO_VOLTAGE,
-			.indexed = 1,
-			.channel = 0,
-			.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT |
-			IIO_CHAN_INFO_SCALE_SHARED_BIT,
-			.address = 0,
-			.scan_index = 0,
-			.scan_type = IIO_ST('u', 12, 16, 0),
-		},
-		.channel[2] = IIO_CHAN_SOFT_TIMESTAMP(2),
-		.int_vref_mv = 2500,
-	},
-};
-
-static const struct iio_info ad7887_info = {
-	.read_raw = &ad7887_read_raw,
-	.driver_module = THIS_MODULE,
-};
-
-static int __devinit ad7887_probe(struct spi_device *spi)
-{
-	struct ad7887_platform_data *pdata = spi->dev.platform_data;
-	struct ad7887_state *st;
-	struct iio_dev *indio_dev = iio_device_alloc(sizeof(*st));
-	uint8_t mode;
-	int ret;
-
-	if (indio_dev == NULL)
-		return -ENOMEM;
-
-	st = iio_priv(indio_dev);
-
-	if (!pdata || !pdata->use_onchip_ref) {
-		st->reg = regulator_get(&spi->dev, "vref");
-		if (IS_ERR(st->reg)) {
-			ret = PTR_ERR(st->reg);
-			goto error_free;
-		}
-
-		ret = regulator_enable(st->reg);
-		if (ret)
-			goto error_put_reg;
-	}
-
-	st->chip_info =
-		&ad7887_chip_info_tbl[spi_get_device_id(spi)->driver_data];
-
-	spi_set_drvdata(spi, indio_dev);
-	st->spi = spi;
-
-	/* Estabilish that the iio_dev is a child of the spi device */
-	indio_dev->dev.parent = &spi->dev;
-	indio_dev->name = spi_get_device_id(spi)->name;
-	indio_dev->info = &ad7887_info;
-	indio_dev->modes = INDIO_DIRECT_MODE;
-
-	/* Setup default message */
-
-	mode = AD7887_PM_MODE4;
-	if (!pdata || !pdata->use_onchip_ref)
-		mode |= AD7887_REF_DIS;
-	if (pdata && pdata->en_dual)
-		mode |= AD7887_DUAL;
-
-	st->tx_cmd_buf[0] = AD7887_CH_AIN0 | mode;
-
-	st->xfer[0].rx_buf = &st->data[0];
-	st->xfer[0].tx_buf = &st->tx_cmd_buf[0];
-	st->xfer[0].len = 2;
-
-	spi_message_init(&st->msg[AD7887_CH0]);
-	spi_message_add_tail(&st->xfer[0], &st->msg[AD7887_CH0]);
-
-	if (pdata && pdata->en_dual) {
-		st->tx_cmd_buf[2] = AD7887_CH_AIN1 | mode;
-
-		st->xfer[1].rx_buf = &st->data[0];
-		st->xfer[1].tx_buf = &st->tx_cmd_buf[2];
-		st->xfer[1].len = 2;
-
-		st->xfer[2].rx_buf = &st->data[2];
-		st->xfer[2].tx_buf = &st->tx_cmd_buf[0];
-		st->xfer[2].len = 2;
-
-		spi_message_init(&st->msg[AD7887_CH0_CH1]);
-		spi_message_add_tail(&st->xfer[1], &st->msg[AD7887_CH0_CH1]);
-		spi_message_add_tail(&st->xfer[2], &st->msg[AD7887_CH0_CH1]);
-
-		st->xfer[3].rx_buf = &st->data[2];
-		st->xfer[3].tx_buf = &st->tx_cmd_buf[2];
-		st->xfer[3].len = 2;
-
-		spi_message_init(&st->msg[AD7887_CH1]);
-		spi_message_add_tail(&st->xfer[3], &st->msg[AD7887_CH1]);
-
-		indio_dev->channels = st->chip_info->channel;
-		indio_dev->num_channels = 3;
-	} else {
-		indio_dev->channels = &st->chip_info->channel[1];
-		indio_dev->num_channels = 2;
-	}
-
-	ret = iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
-			&ad7887_trigger_handler, &ad7887_ring_setup_ops);
-	if (ret)
-		goto error_disable_reg;
-
-	ret = iio_device_register(indio_dev);
-	if (ret)
-		goto error_unregister_ring;
-
-	return 0;
-error_unregister_ring:
-	iio_triggered_buffer_cleanup(indio_dev);
-error_disable_reg:
-	if (st->reg)
-		regulator_disable(st->reg);
-error_put_reg:
-	if (st->reg)
-		regulator_put(st->reg);
-error_free:
-	iio_device_free(indio_dev);
-
-	return ret;
-}
-
-static int __devexit ad7887_remove(struct spi_device *spi)
-{
-	struct iio_dev *indio_dev = spi_get_drvdata(spi);
-	struct ad7887_state *st = iio_priv(indio_dev);
-
-	iio_device_unregister(indio_dev);
-	iio_triggered_buffer_cleanup(indio_dev);
-	if (st->reg) {
-		regulator_disable(st->reg);
-		regulator_put(st->reg);
-	}
-	iio_device_free(indio_dev);
-
-	return 0;
-}
-
-static const struct spi_device_id ad7887_id[] = {
-	{"ad7887", ID_AD7887},
-	{}
-};
-MODULE_DEVICE_TABLE(spi, ad7887_id);
-
-static struct spi_driver ad7887_driver = {
-	.driver = {
-		.name	= "ad7887",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= ad7887_probe,
-	.remove		= __devexit_p(ad7887_remove),
-	.id_table	= ad7887_id,
-};
-module_spi_driver(ad7887_driver);
-
-MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
-MODULE_DESCRIPTION("Analog Devices AD7887 ADC");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/staging/iio/adc/ad7887.h b/drivers/staging/iio/adc/ad7887.h
deleted file mode 100644
index 16c2d05e5e05..000000000000
--- a/drivers/staging/iio/adc/ad7887.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * AD7887 SPI ADC driver
- *
- * Copyright 2010 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-#ifndef IIO_ADC_AD7887_H_
-#define IIO_ADC_AD7887_H_
-
-/*
- * TODO: struct ad7887_platform_data needs to go into include/linux/iio
- */
-
-
-/**
- * struct ad7887_platform_data - AD7887 ADC driver platform data
- * @en_dual: Whether to use dual channel mode. If set to true AIN1 becomes the
- *	second input channel, and Vref is internally connected to Vdd. If set to
- *	false the device is used in single channel mode and AIN1/Vref is used as
- *	VREF input.
- * @use_onchip_ref: Whether to use the onchip reference. If set to true the
- *	internal 2.5V reference is used. If set to false a external reference is
- *	used.
- */
-struct ad7887_platform_data {
-	bool en_dual;
-	bool use_onchip_ref;
-};
-
-#endif /* IIO_ADC_AD7887_H_ */
diff --git a/include/linux/platform_data/ad7887.h b/include/linux/platform_data/ad7887.h
new file mode 100644
index 000000000000..1e06eac3174d
--- /dev/null
+++ b/include/linux/platform_data/ad7887.h
@@ -0,0 +1,26 @@
+/*
+ * AD7887 SPI ADC driver
+ *
+ * Copyright 2010 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+#ifndef IIO_ADC_AD7887_H_
+#define IIO_ADC_AD7887_H_
+
+/**
+ * struct ad7887_platform_data - AD7887 ADC driver platform data
+ * @en_dual: Whether to use dual channel mode. If set to true AIN1 becomes the
+ *	second input channel, and Vref is internally connected to Vdd. If set to
+ *	false the device is used in single channel mode and AIN1/Vref is used as
+ *	VREF input.
+ * @use_onchip_ref: Whether to use the onchip reference. If set to true the
+ *	internal 2.5V reference is used. If set to false a external reference is
+ *	used.
+ */
+struct ad7887_platform_data {
+	bool en_dual;
+	bool use_onchip_ref;
+};
+
+#endif /* IIO_ADC_AD7887_H_ */
-- 
cgit v1.2.3-55-g7522


From d1fb57435c108b8dd66d7f47b4c60c1798dcae4c Mon Sep 17 00:00:00 2001
From: Patil, Rachna
Date: Tue, 16 Oct 2012 12:55:39 +0530
Subject: input: TSC: ti_tscadc: Add Step configuration as platform data

There are 16 programmable Step Configuration
registers which are used by the sequencer.
Program the Steps in order to configure a channel
input to be sampled. If the same step is applied
several times, the coordinate values read are more
accurate.
Hence we provide the user an option of how many steps
should be configured.

For ex: If this value is assigned as 4, This means that
4 steps are applied to read x co-ordinate and 4 steps to read
y co-ordinate. Furtheron the interrupt handler already
holds code to use delta filter and report the best value
out of these values to the input sub-system.

Signed-off-by: Patil, Rachna <rachna@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/touchscreen/ti_tscadc.c | 25 +++++++++++++------------
 include/linux/input/ti_tscadc.h       |  6 ++++++
 2 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/ti_tscadc.c b/drivers/input/touchscreen/ti_tscadc.c
index d198cab61a69..c1bd8e5c132c 100644
--- a/drivers/input/touchscreen/ti_tscadc.c
+++ b/drivers/input/touchscreen/ti_tscadc.c
@@ -41,10 +41,6 @@
 #define REG_CHARGEDELAY		0x060
 #define REG_STEPCONFIG(n)	(0x64 + ((n - 1) * 8))
 #define REG_STEPDELAY(n)	(0x68 + ((n - 1) * 8))
-#define REG_STEPCONFIG13	0x0C4
-#define REG_STEPDELAY13		0x0C8
-#define REG_STEPCONFIG14	0x0CC
-#define REG_STEPDELAY14		0x0D0
 #define REG_FIFO0CNT		0xE4
 #define REG_FIFO1THR		0xF4
 #define REG_FIFO0		0x100
@@ -134,6 +130,7 @@ struct tscadc {
 	unsigned int		wires;
 	unsigned int		x_plate_resistance;
 	bool			pen_down;
+	int			steps_to_configure;
 };
 
 static unsigned int tscadc_readl(struct tscadc *ts, unsigned int reg)
@@ -150,9 +147,10 @@ static void tscadc_writel(struct tscadc *tsc, unsigned int reg,
 static void tscadc_step_config(struct tscadc *ts_dev)
 {
 	unsigned int	config;
-	int i;
+	int i, total_steps;
 
 	/* Configure the Step registers */
+	total_steps = 2 * ts_dev->steps_to_configure;
 
 	config = STEPCONFIG_MODE_HWSYNC |
 			STEPCONFIG_AVG_16 | STEPCONFIG_XPP;
@@ -170,7 +168,7 @@ static void tscadc_step_config(struct tscadc *ts_dev)
 		break;
 	}
 
-	for (i = 1; i < 7; i++) {
+	for (i = 1; i <= ts_dev->steps_to_configure; i++) {
 		tscadc_writel(ts_dev, REG_STEPCONFIG(i), config);
 		tscadc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
 	}
@@ -192,7 +190,7 @@ static void tscadc_step_config(struct tscadc *ts_dev)
 		break;
 	}
 
-	for (i = 7; i < 13; i++) {
+	for (i = (ts_dev->steps_to_configure + 1); i <= total_steps; i++) {
 		tscadc_writel(ts_dev, REG_STEPCONFIG(i), config);
 		tscadc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
 	}
@@ -211,12 +209,14 @@ static void tscadc_step_config(struct tscadc *ts_dev)
 	config = STEPCONFIG_MODE_HWSYNC |
 			STEPCONFIG_AVG_16 | STEPCONFIG_YPP |
 			STEPCONFIG_XNN | STEPCONFIG_INM_ADCREFM;
-	tscadc_writel(ts_dev, REG_STEPCONFIG13, config);
-	tscadc_writel(ts_dev, REG_STEPDELAY13, STEPCONFIG_OPENDLY);
+	tscadc_writel(ts_dev, REG_STEPCONFIG(total_steps + 1), config);
+	tscadc_writel(ts_dev, REG_STEPDELAY(total_steps + 1),
+			STEPCONFIG_OPENDLY);
 
 	config |= STEPCONFIG_INP_AN3 | STEPCONFIG_FIFO1;
-	tscadc_writel(ts_dev, REG_STEPCONFIG14, config);
-	tscadc_writel(ts_dev, REG_STEPDELAY14, STEPCONFIG_OPENDLY);
+	tscadc_writel(ts_dev, REG_STEPCONFIG(total_steps + 2), config);
+	tscadc_writel(ts_dev, REG_STEPDELAY(total_steps + 2),
+			STEPCONFIG_OPENDLY);
 
 	tscadc_writel(ts_dev, REG_SE, STPENB_STEPENB);
 }
@@ -379,6 +379,7 @@ static int __devinit tscadc_probe(struct platform_device *pdev)
 	ts_dev->irq = irq;
 	ts_dev->wires = pdata->wires;
 	ts_dev->x_plate_resistance = pdata->x_plate_resistance;
+	ts_dev->steps_to_configure = pdata->steps_to_configure;
 
 	res = request_mem_region(res->start, resource_size(res), pdev->name);
 	if (!res) {
@@ -447,7 +448,7 @@ static int __devinit tscadc_probe(struct platform_device *pdev)
 	tscadc_idle_config(ts_dev);
 	tscadc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO1THRES);
 	tscadc_step_config(ts_dev);
-	tscadc_writel(ts_dev, REG_FIFO1THR, 6);
+	tscadc_writel(ts_dev, REG_FIFO1THR, ts_dev->steps_to_configure);
 
 	ctrl |= CNTRLREG_TSCSSENB;
 	tscadc_writel(ts_dev, REG_CTRL, ctrl);
diff --git a/include/linux/input/ti_tscadc.h b/include/linux/input/ti_tscadc.h
index b10a527a92a4..ad442a3eb68b 100644
--- a/include/linux/input/ti_tscadc.h
+++ b/include/linux/input/ti_tscadc.h
@@ -7,11 +7,17 @@
  *			i.e. 4/5/8 wire touchscreen support
  *			on the platform.
  * @x_plate_resistance:	X plate resistance.
+ * @steps_to_configure:	The sequencer supports a total of
+ *			16 programmable steps.
+ *			A step configured to read a single
+ *			co-ordinate value, can be applied
+ *			more number of times for better results.
  */
 
 struct tsc_data {
 	int wires;
 	int x_plate_resistance;
+	int steps_to_configure;
 };
 
 #endif
-- 
cgit v1.2.3-55-g7522


From 55c04de5176ea3eac6fdc469a6a063c5cb91ed7c Mon Sep 17 00:00:00 2001
From: Patil, Rachna
Date: Tue, 16 Oct 2012 12:55:42 +0530
Subject: input: TSC: ti_tscadc: Rename the existing touchscreen driver

Make way for addition of MFD driver.
The existing touchsreen driver is a MFD client.
For better readability we rename the file to
indicate its functionality as only touchscreen.

Signed-off-by: Patil, Rachna <rachna@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/touchscreen/Kconfig         |   4 +-
 drivers/input/touchscreen/Makefile        |   2 +-
 drivers/input/touchscreen/ti_am335x_tsc.c | 522 ++++++++++++++++++++++++++++++
 drivers/input/touchscreen/ti_tscadc.c     | 522 ------------------------------
 include/linux/input/ti_am335x_tsc.h       |  23 ++
 include/linux/input/ti_tscadc.h           |  23 --
 6 files changed, 548 insertions(+), 548 deletions(-)
 create mode 100644 drivers/input/touchscreen/ti_am335x_tsc.c
 delete mode 100644 drivers/input/touchscreen/ti_tscadc.c
 create mode 100644 include/linux/input/ti_am335x_tsc.h
 delete mode 100644 include/linux/input/ti_tscadc.h

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index f7668b24c378..d31dc5faeaaf 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -529,7 +529,7 @@ config TOUCHSCREEN_TOUCHWIN
 	  To compile this driver as a module, choose M here: the
 	  module will be called touchwin.
 
-config TOUCHSCREEN_TI_TSCADC
+config TOUCHSCREEN_TI_AM335X_TSC
 	tristate "TI Touchscreen Interface"
 	depends on ARCH_OMAP2PLUS
 	help
@@ -539,7 +539,7 @@ config TOUCHSCREEN_TI_TSCADC
 	  If unsure, say N.
 
 	  To compile this driver as a module, choose M here: the
-	  module will be called ti_tscadc.
+	  module will be called ti_am335x_tsc.
 
 config TOUCHSCREEN_ATMEL_TSADCC
 	tristate "Atmel Touchscreen Interface"
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 178eb128d90f..7c4c78ebe49e 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -52,7 +52,7 @@ obj-$(CONFIG_TOUCHSCREEN_PIXCIR)	+= pixcir_i2c_ts.o
 obj-$(CONFIG_TOUCHSCREEN_S3C2410)	+= s3c2410_ts.o
 obj-$(CONFIG_TOUCHSCREEN_ST1232)	+= st1232.o
 obj-$(CONFIG_TOUCHSCREEN_STMPE)		+= stmpe-ts.o
-obj-$(CONFIG_TOUCHSCREEN_TI_TSCADC)	+= ti_tscadc.o
+obj-$(CONFIG_TOUCHSCREEN_TI_AM335X_TSC)	+= ti_am335x_tsc.o
 obj-$(CONFIG_TOUCHSCREEN_TNETV107X)	+= tnetv107x-ts.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHIT213)	+= touchit213.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHRIGHT)	+= touchright.o
diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c
new file mode 100644
index 000000000000..462950acb97b
--- /dev/null
+++ b/drivers/input/touchscreen/ti_am335x_tsc.c
@@ -0,0 +1,522 @@
+/*
+ * TI Touch Screen driver
+ *
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/input.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/input/ti_am335x_tsc.h>
+#include <linux/delay.h>
+
+#define REG_RAWIRQSTATUS	0x024
+#define REG_IRQSTATUS		0x028
+#define REG_IRQENABLE		0x02C
+#define REG_IRQWAKEUP		0x034
+#define REG_CTRL		0x040
+#define REG_ADCFSM		0x044
+#define REG_CLKDIV		0x04C
+#define REG_SE			0x054
+#define REG_IDLECONFIG		0x058
+#define REG_CHARGECONFIG	0x05C
+#define REG_CHARGEDELAY		0x060
+#define REG_STEPCONFIG(n)	(0x64 + ((n - 1) * 8))
+#define REG_STEPDELAY(n)	(0x68 + ((n - 1) * 8))
+#define REG_FIFO0CNT		0xE4
+#define REG_FIFO0THR		0xE8
+#define REG_FIFO1THR		0xF4
+#define REG_FIFO0		0x100
+#define REG_FIFO1		0x200
+
+/*	Register Bitfields	*/
+#define IRQWKUP_ENB		BIT(0)
+
+/* Step Enable */
+#define STEPENB_MASK		(0x1FFFF << 0)
+#define STEPENB(val)		(val << 0)
+#define STPENB_STEPENB		STEPENB(0x7FFF)
+
+/* IRQ enable */
+#define IRQENB_FIFO0THRES	BIT(2)
+#define IRQENB_FIFO1THRES	BIT(5)
+#define IRQENB_PENUP		BIT(9)
+
+/* Step Configuration */
+#define STEPCONFIG_MODE_MASK	(3 << 0)
+#define STEPCONFIG_MODE(val)	(val << 0)
+#define STEPCONFIG_MODE_HWSYNC	STEPCONFIG_MODE(2)
+#define STEPCONFIG_AVG_MASK	(7 << 2)
+#define STEPCONFIG_AVG(val)	(val << 2)
+#define STEPCONFIG_AVG_16	STEPCONFIG_AVG(4)
+#define STEPCONFIG_XPP		BIT(5)
+#define STEPCONFIG_XNN		BIT(6)
+#define STEPCONFIG_YPP		BIT(7)
+#define STEPCONFIG_YNN		BIT(8)
+#define STEPCONFIG_XNP		BIT(9)
+#define STEPCONFIG_YPN		BIT(10)
+#define STEPCONFIG_INM_MASK	(0xF << 15)
+#define STEPCONFIG_INM(val)	(val << 15)
+#define STEPCONFIG_INM_ADCREFM	STEPCONFIG_INM(8)
+#define STEPCONFIG_INP_MASK	(0xF << 19)
+#define STEPCONFIG_INP(val)	(val << 19)
+#define STEPCONFIG_INP_AN2	STEPCONFIG_INP(2)
+#define STEPCONFIG_INP_AN3	STEPCONFIG_INP(3)
+#define STEPCONFIG_INP_AN4	STEPCONFIG_INP(4)
+#define STEPCONFIG_INP_ADCREFM	STEPCONFIG_INP(8)
+#define STEPCONFIG_FIFO1	BIT(26)
+
+/* Delay register */
+#define STEPDELAY_OPEN_MASK	(0x3FFFF << 0)
+#define STEPDELAY_OPEN(val)	(val << 0)
+#define STEPCONFIG_OPENDLY	STEPDELAY_OPEN(0x098)
+
+/* Charge Config */
+#define STEPCHARGE_RFP_MASK	(7 << 12)
+#define STEPCHARGE_RFP(val)	(val << 12)
+#define STEPCHARGE_RFP_XPUL	STEPCHARGE_RFP(1)
+#define STEPCHARGE_INM_MASK	(0xF << 15)
+#define STEPCHARGE_INM(val)	(val << 15)
+#define STEPCHARGE_INM_AN1	STEPCHARGE_INM(1)
+#define STEPCHARGE_INP_MASK	(0xF << 19)
+#define STEPCHARGE_INP(val)	(val << 19)
+#define STEPCHARGE_INP_AN1	STEPCHARGE_INP(1)
+#define STEPCHARGE_RFM_MASK	(3 << 23)
+#define STEPCHARGE_RFM(val)	(val << 23)
+#define STEPCHARGE_RFM_XNUR	STEPCHARGE_RFM(1)
+
+/* Charge delay */
+#define CHARGEDLY_OPEN_MASK	(0x3FFFF << 0)
+#define CHARGEDLY_OPEN(val)	(val << 0)
+#define CHARGEDLY_OPENDLY	CHARGEDLY_OPEN(1)
+
+/* Control register */
+#define CNTRLREG_TSCSSENB	BIT(0)
+#define CNTRLREG_STEPID		BIT(1)
+#define CNTRLREG_STEPCONFIGWRT	BIT(2)
+#define CNTRLREG_AFE_CTRL_MASK	(3 << 5)
+#define CNTRLREG_AFE_CTRL(val)	(val << 5)
+#define CNTRLREG_4WIRE		CNTRLREG_AFE_CTRL(1)
+#define CNTRLREG_5WIRE		CNTRLREG_AFE_CTRL(2)
+#define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
+#define CNTRLREG_TSCENB		BIT(7)
+
+#define ADCFSM_STEPID		0x10
+#define SEQ_SETTLE		275
+#define ADC_CLK			3000000
+#define MAX_12BIT		((1 << 12) - 1)
+
+struct titsc {
+	struct input_dev	*input;
+	struct clk		*tsc_ick;
+	void __iomem		*tsc_base;
+	unsigned int		irq;
+	unsigned int		wires;
+	unsigned int		x_plate_resistance;
+	bool			pen_down;
+	int			steps_to_configure;
+};
+
+static unsigned int titsc_readl(struct titsc *ts, unsigned int reg)
+{
+	return readl(ts->tsc_base + reg);
+}
+
+static void titsc_writel(struct titsc *tsc, unsigned int reg,
+					unsigned int val)
+{
+	writel(val, tsc->tsc_base + reg);
+}
+
+static void titsc_step_config(struct titsc *ts_dev)
+{
+	unsigned int	config;
+	int i, total_steps;
+
+	/* Configure the Step registers */
+	total_steps = 2 * ts_dev->steps_to_configure;
+
+	config = STEPCONFIG_MODE_HWSYNC |
+			STEPCONFIG_AVG_16 | STEPCONFIG_XPP;
+	switch (ts_dev->wires) {
+	case 4:
+		config |= STEPCONFIG_INP_AN2 | STEPCONFIG_XNN;
+		break;
+	case 5:
+		config |= STEPCONFIG_YNN |
+				STEPCONFIG_INP_AN4 | STEPCONFIG_XNN |
+				STEPCONFIG_YPP;
+		break;
+	case 8:
+		config |= STEPCONFIG_INP_AN2 | STEPCONFIG_XNN;
+		break;
+	}
+
+	for (i = 1; i <= ts_dev->steps_to_configure; i++) {
+		titsc_writel(ts_dev, REG_STEPCONFIG(i), config);
+		titsc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
+	}
+
+	config = 0;
+	config = STEPCONFIG_MODE_HWSYNC |
+			STEPCONFIG_AVG_16 | STEPCONFIG_YNN |
+			STEPCONFIG_INM_ADCREFM | STEPCONFIG_FIFO1;
+	switch (ts_dev->wires) {
+	case 4:
+		config |= STEPCONFIG_YPP;
+		break;
+	case 5:
+		config |= STEPCONFIG_XPP | STEPCONFIG_INP_AN4 |
+				STEPCONFIG_XNP | STEPCONFIG_YPN;
+		break;
+	case 8:
+		config |= STEPCONFIG_YPP;
+		break;
+	}
+
+	for (i = (ts_dev->steps_to_configure + 1); i <= total_steps; i++) {
+		titsc_writel(ts_dev, REG_STEPCONFIG(i), config);
+		titsc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
+	}
+
+	config = 0;
+	/* Charge step configuration */
+	config = STEPCONFIG_XPP | STEPCONFIG_YNN |
+			STEPCHARGE_RFP_XPUL | STEPCHARGE_RFM_XNUR |
+			STEPCHARGE_INM_AN1 | STEPCHARGE_INP_AN1;
+
+	titsc_writel(ts_dev, REG_CHARGECONFIG, config);
+	titsc_writel(ts_dev, REG_CHARGEDELAY, CHARGEDLY_OPENDLY);
+
+	config = 0;
+	/* Configure to calculate pressure */
+	config = STEPCONFIG_MODE_HWSYNC |
+			STEPCONFIG_AVG_16 | STEPCONFIG_YPP |
+			STEPCONFIG_XNN | STEPCONFIG_INM_ADCREFM;
+	titsc_writel(ts_dev, REG_STEPCONFIG(total_steps + 1), config);
+	titsc_writel(ts_dev, REG_STEPDELAY(total_steps + 1),
+			STEPCONFIG_OPENDLY);
+
+	config |= STEPCONFIG_INP_AN3 | STEPCONFIG_FIFO1;
+	titsc_writel(ts_dev, REG_STEPCONFIG(total_steps + 2), config);
+	titsc_writel(ts_dev, REG_STEPDELAY(total_steps + 2),
+			STEPCONFIG_OPENDLY);
+
+	titsc_writel(ts_dev, REG_SE, STPENB_STEPENB);
+}
+
+static void titsc_idle_config(struct titsc *ts_config)
+{
+	unsigned int idleconfig;
+
+	idleconfig = STEPCONFIG_YNN |
+			STEPCONFIG_INM_ADCREFM |
+			STEPCONFIG_YPN | STEPCONFIG_INP_ADCREFM;
+	titsc_writel(ts_config, REG_IDLECONFIG, idleconfig);
+}
+
+static void titsc_read_coordinates(struct titsc *ts_dev,
+				    unsigned int *x, unsigned int *y)
+{
+	unsigned int fifocount = titsc_readl(ts_dev, REG_FIFO0CNT);
+	unsigned int prev_val_x = ~0, prev_val_y = ~0;
+	unsigned int prev_diff_x = ~0, prev_diff_y = ~0;
+	unsigned int read, diff;
+	unsigned int i;
+
+	/*
+	 * Delta filter is used to remove large variations in sampled
+	 * values from ADC. The filter tries to predict where the next
+	 * coordinate could be. This is done by taking a previous
+	 * coordinate and subtracting it form current one. Further the
+	 * algorithm compares the difference with that of a present value,
+	 * if true the value is reported to the sub system.
+	 */
+	for (i = 0; i < fifocount - 1; i++) {
+		read = titsc_readl(ts_dev, REG_FIFO0) & 0xfff;
+		diff = abs(read - prev_val_x);
+		if (diff < prev_diff_x) {
+			prev_diff_x = diff;
+			*x = read;
+		}
+		prev_val_x = read;
+
+		read = titsc_readl(ts_dev, REG_FIFO1) & 0xfff;
+		diff = abs(read - prev_val_y);
+		if (diff < prev_diff_y) {
+			prev_diff_y = diff;
+			*y = read;
+		}
+		prev_val_y = read;
+	}
+}
+
+static irqreturn_t titsc_irq(int irq, void *dev)
+{
+	struct titsc *ts_dev = dev;
+	struct input_dev *input_dev = ts_dev->input;
+	unsigned int status, irqclr = 0;
+	unsigned int x = 0, y = 0;
+	unsigned int z1, z2, z;
+	unsigned int fsm;
+
+	status = titsc_readl(ts_dev, REG_IRQSTATUS);
+	if (status & IRQENB_FIFO0THRES) {
+		titsc_read_coordinates(ts_dev, &x, &y);
+
+		z1 = titsc_readl(ts_dev, REG_FIFO0) & 0xfff;
+		z2 = titsc_readl(ts_dev, REG_FIFO1) & 0xfff;
+
+		if (ts_dev->pen_down && z1 != 0 && z2 != 0) {
+			/*
+			 * Calculate pressure using formula
+			 * Resistance(touch) = x plate resistance *
+			 * x postion/4096 * ((z2 / z1) - 1)
+			 */
+			z = z2 - z1;
+			z *= x;
+			z *= ts_dev->x_plate_resistance;
+			z /= z1;
+			z = (z + 2047) >> 12;
+
+			if (z <= MAX_12BIT) {
+				input_report_abs(input_dev, ABS_X, x);
+				input_report_abs(input_dev, ABS_Y, y);
+				input_report_abs(input_dev, ABS_PRESSURE, z);
+				input_report_key(input_dev, BTN_TOUCH, 1);
+				input_sync(input_dev);
+			}
+		}
+		irqclr |= IRQENB_FIFO0THRES;
+	}
+
+	/*
+	 * Time for sequencer to settle, to read
+	 * correct state of the sequencer.
+	 */
+	udelay(SEQ_SETTLE);
+
+	status = titsc_readl(ts_dev, REG_RAWIRQSTATUS);
+	if (status & IRQENB_PENUP) {
+		/* Pen up event */
+		fsm = titsc_readl(ts_dev, REG_ADCFSM);
+		if (fsm == ADCFSM_STEPID) {
+			ts_dev->pen_down = false;
+			input_report_key(input_dev, BTN_TOUCH, 0);
+			input_report_abs(input_dev, ABS_PRESSURE, 0);
+			input_sync(input_dev);
+		} else {
+			ts_dev->pen_down = true;
+		}
+		irqclr |= IRQENB_PENUP;
+	}
+
+	titsc_writel(ts_dev, REG_IRQSTATUS, irqclr);
+
+	titsc_writel(ts_dev, REG_SE, STPENB_STEPENB);
+	return IRQ_HANDLED;
+}
+
+/*
+ * The functions for inserting/removing driver as a module.
+ */
+
+static int __devinit titsc_probe(struct platform_device *pdev)
+{
+	const struct tsc_data *pdata = pdev->dev.platform_data;
+	struct resource *res;
+	struct titsc *ts_dev;
+	struct input_dev *input_dev;
+	struct clk *clk;
+	int err;
+	int clk_value, ctrl, irq;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "missing platform data.\n");
+		return -EINVAL;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "no memory resource defined.\n");
+		return -EINVAL;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no irq ID is specified.\n");
+		return -EINVAL;
+	}
+
+	/* Allocate memory for device */
+	ts_dev = kzalloc(sizeof(struct titsc), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!ts_dev || !input_dev) {
+		dev_err(&pdev->dev, "failed to allocate memory.\n");
+		err = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	ts_dev->input = input_dev;
+	ts_dev->irq = irq;
+	ts_dev->wires = pdata->wires;
+	ts_dev->x_plate_resistance = pdata->x_plate_resistance;
+	ts_dev->steps_to_configure = pdata->steps_to_configure;
+
+	res = request_mem_region(res->start, resource_size(res), pdev->name);
+	if (!res) {
+		dev_err(&pdev->dev, "failed to reserve registers.\n");
+		err = -EBUSY;
+		goto err_free_mem;
+	}
+
+	ts_dev->tsc_base = ioremap(res->start, resource_size(res));
+	if (!ts_dev->tsc_base) {
+		dev_err(&pdev->dev, "failed to map registers.\n");
+		err = -ENOMEM;
+		goto err_release_mem_region;
+	}
+
+	err = request_irq(ts_dev->irq, titsc_irq,
+			  0, pdev->dev.driver->name, ts_dev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to allocate irq.\n");
+		goto err_unmap_regs;
+	}
+
+	ts_dev->tsc_ick = clk_get(&pdev->dev, "adc_tsc_ick");
+	if (IS_ERR(ts_dev->tsc_ick)) {
+		dev_err(&pdev->dev, "failed to get TSC ick\n");
+		goto err_free_irq;
+	}
+	clk_enable(ts_dev->tsc_ick);
+
+	clk = clk_get(&pdev->dev, "adc_tsc_fck");
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "failed to get TSC fck\n");
+		err = PTR_ERR(clk);
+		goto err_disable_clk;
+	}
+
+	clk_value = clk_get_rate(clk) / ADC_CLK;
+	clk_put(clk);
+
+	if (clk_value < 7) {
+		dev_err(&pdev->dev, "clock input less than min clock requirement\n");
+		goto err_disable_clk;
+	}
+	/* CLKDIV needs to be configured to the value minus 1 */
+	titsc_writel(ts_dev, REG_CLKDIV, clk_value - 1);
+
+	 /* Enable wake-up of the SoC using touchscreen */
+	titsc_writel(ts_dev, REG_IRQWAKEUP, IRQWKUP_ENB);
+
+	ctrl = CNTRLREG_STEPCONFIGWRT |
+			CNTRLREG_TSCENB |
+			CNTRLREG_STEPID;
+	switch (ts_dev->wires) {
+	case 4:
+		ctrl |= CNTRLREG_4WIRE;
+		break;
+	case 5:
+		ctrl |= CNTRLREG_5WIRE;
+		break;
+	case 8:
+		ctrl |= CNTRLREG_8WIRE;
+		break;
+	}
+	titsc_writel(ts_dev, REG_CTRL, ctrl);
+
+	titsc_idle_config(ts_dev);
+	titsc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO0THRES);
+	titsc_step_config(ts_dev);
+	titsc_writel(ts_dev, REG_FIFO0THR, ts_dev->steps_to_configure);
+
+	ctrl |= CNTRLREG_TSCSSENB;
+	titsc_writel(ts_dev, REG_CTRL, ctrl);
+
+	input_dev->name = "ti-tsc-adc";
+	input_dev->dev.parent = &pdev->dev;
+
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
+
+	input_set_abs_params(input_dev, ABS_X, 0, MAX_12BIT, 0, 0);
+	input_set_abs_params(input_dev, ABS_Y, 0, MAX_12BIT, 0, 0);
+	input_set_abs_params(input_dev, ABS_PRESSURE, 0, MAX_12BIT, 0, 0);
+
+	/* register to the input system */
+	err = input_register_device(input_dev);
+	if (err)
+		goto err_disable_clk;
+
+	platform_set_drvdata(pdev, ts_dev);
+	return 0;
+
+err_disable_clk:
+	clk_disable(ts_dev->tsc_ick);
+	clk_put(ts_dev->tsc_ick);
+err_free_irq:
+	free_irq(ts_dev->irq, ts_dev);
+err_unmap_regs:
+	iounmap(ts_dev->tsc_base);
+err_release_mem_region:
+	release_mem_region(res->start, resource_size(res));
+err_free_mem:
+	input_free_device(input_dev);
+	kfree(ts_dev);
+	return err;
+}
+
+static int __devexit titsc_remove(struct platform_device *pdev)
+{
+	struct titsc *ts_dev = platform_get_drvdata(pdev);
+	struct resource *res;
+
+	free_irq(ts_dev->irq, ts_dev);
+
+	input_unregister_device(ts_dev->input);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	iounmap(ts_dev->tsc_base);
+	release_mem_region(res->start, resource_size(res));
+
+	clk_disable(ts_dev->tsc_ick);
+	clk_put(ts_dev->tsc_ick);
+
+	kfree(ts_dev);
+
+	platform_set_drvdata(pdev, NULL);
+	return 0;
+}
+
+static struct platform_driver ti_tsc_driver = {
+	.probe	= titsc_probe,
+	.remove	= __devexit_p(titsc_remove),
+	.driver	= {
+		.name   = "tsc",
+		.owner	= THIS_MODULE,
+	},
+};
+module_platform_driver(ti_tsc_driver);
+
+MODULE_DESCRIPTION("TI touchscreen controller driver");
+MODULE_AUTHOR("Rachna Patil <rachna@ti.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/touchscreen/ti_tscadc.c b/drivers/input/touchscreen/ti_tscadc.c
deleted file mode 100644
index ec0a442478a8..000000000000
--- a/drivers/input/touchscreen/ti_tscadc.c
+++ /dev/null
@@ -1,522 +0,0 @@
-/*
- * TI Touch Screen driver
- *
- * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/input.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/clk.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/input/ti_tscadc.h>
-#include <linux/delay.h>
-
-#define REG_RAWIRQSTATUS	0x024
-#define REG_IRQSTATUS		0x028
-#define REG_IRQENABLE		0x02C
-#define REG_IRQWAKEUP		0x034
-#define REG_CTRL		0x040
-#define REG_ADCFSM		0x044
-#define REG_CLKDIV		0x04C
-#define REG_SE			0x054
-#define REG_IDLECONFIG		0x058
-#define REG_CHARGECONFIG	0x05C
-#define REG_CHARGEDELAY		0x060
-#define REG_STEPCONFIG(n)	(0x64 + ((n - 1) * 8))
-#define REG_STEPDELAY(n)	(0x68 + ((n - 1) * 8))
-#define REG_FIFO0CNT		0xE4
-#define REG_FIFO0THR		0xE8
-#define REG_FIFO1THR		0xF4
-#define REG_FIFO0		0x100
-#define REG_FIFO1		0x200
-
-/*	Register Bitfields	*/
-#define IRQWKUP_ENB		BIT(0)
-
-/* Step Enable */
-#define STEPENB_MASK		(0x1FFFF << 0)
-#define STEPENB(val)		(val << 0)
-#define STPENB_STEPENB		STEPENB(0x7FFF)
-
-/* IRQ enable */
-#define IRQENB_FIFO0THRES	BIT(2)
-#define IRQENB_FIFO1THRES	BIT(5)
-#define IRQENB_PENUP		BIT(9)
-
-/* Step Configuration */
-#define STEPCONFIG_MODE_MASK	(3 << 0)
-#define STEPCONFIG_MODE(val)	(val << 0)
-#define STEPCONFIG_MODE_HWSYNC	STEPCONFIG_MODE(2)
-#define STEPCONFIG_AVG_MASK	(7 << 2)
-#define STEPCONFIG_AVG(val)	(val << 2)
-#define STEPCONFIG_AVG_16	STEPCONFIG_AVG(4)
-#define STEPCONFIG_XPP		BIT(5)
-#define STEPCONFIG_XNN		BIT(6)
-#define STEPCONFIG_YPP		BIT(7)
-#define STEPCONFIG_YNN		BIT(8)
-#define STEPCONFIG_XNP		BIT(9)
-#define STEPCONFIG_YPN		BIT(10)
-#define STEPCONFIG_INM_MASK	(0xF << 15)
-#define STEPCONFIG_INM(val)	(val << 15)
-#define STEPCONFIG_INM_ADCREFM	STEPCONFIG_INM(8)
-#define STEPCONFIG_INP_MASK	(0xF << 19)
-#define STEPCONFIG_INP(val)	(val << 19)
-#define STEPCONFIG_INP_AN2	STEPCONFIG_INP(2)
-#define STEPCONFIG_INP_AN3	STEPCONFIG_INP(3)
-#define STEPCONFIG_INP_AN4	STEPCONFIG_INP(4)
-#define STEPCONFIG_INP_ADCREFM	STEPCONFIG_INP(8)
-#define STEPCONFIG_FIFO1	BIT(26)
-
-/* Delay register */
-#define STEPDELAY_OPEN_MASK	(0x3FFFF << 0)
-#define STEPDELAY_OPEN(val)	(val << 0)
-#define STEPCONFIG_OPENDLY	STEPDELAY_OPEN(0x098)
-
-/* Charge Config */
-#define STEPCHARGE_RFP_MASK	(7 << 12)
-#define STEPCHARGE_RFP(val)	(val << 12)
-#define STEPCHARGE_RFP_XPUL	STEPCHARGE_RFP(1)
-#define STEPCHARGE_INM_MASK	(0xF << 15)
-#define STEPCHARGE_INM(val)	(val << 15)
-#define STEPCHARGE_INM_AN1	STEPCHARGE_INM(1)
-#define STEPCHARGE_INP_MASK	(0xF << 19)
-#define STEPCHARGE_INP(val)	(val << 19)
-#define STEPCHARGE_INP_AN1	STEPCHARGE_INP(1)
-#define STEPCHARGE_RFM_MASK	(3 << 23)
-#define STEPCHARGE_RFM(val)	(val << 23)
-#define STEPCHARGE_RFM_XNUR	STEPCHARGE_RFM(1)
-
-/* Charge delay */
-#define CHARGEDLY_OPEN_MASK	(0x3FFFF << 0)
-#define CHARGEDLY_OPEN(val)	(val << 0)
-#define CHARGEDLY_OPENDLY	CHARGEDLY_OPEN(1)
-
-/* Control register */
-#define CNTRLREG_TSCSSENB	BIT(0)
-#define CNTRLREG_STEPID		BIT(1)
-#define CNTRLREG_STEPCONFIGWRT	BIT(2)
-#define CNTRLREG_AFE_CTRL_MASK	(3 << 5)
-#define CNTRLREG_AFE_CTRL(val)	(val << 5)
-#define CNTRLREG_4WIRE		CNTRLREG_AFE_CTRL(1)
-#define CNTRLREG_5WIRE		CNTRLREG_AFE_CTRL(2)
-#define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
-#define CNTRLREG_TSCENB		BIT(7)
-
-#define ADCFSM_STEPID		0x10
-#define SEQ_SETTLE		275
-#define ADC_CLK			3000000
-#define MAX_12BIT		((1 << 12) - 1)
-
-struct tscadc {
-	struct input_dev	*input;
-	struct clk		*tsc_ick;
-	void __iomem		*tsc_base;
-	unsigned int		irq;
-	unsigned int		wires;
-	unsigned int		x_plate_resistance;
-	bool			pen_down;
-	int			steps_to_configure;
-};
-
-static unsigned int tscadc_readl(struct tscadc *ts, unsigned int reg)
-{
-	return readl(ts->tsc_base + reg);
-}
-
-static void tscadc_writel(struct tscadc *tsc, unsigned int reg,
-					unsigned int val)
-{
-	writel(val, tsc->tsc_base + reg);
-}
-
-static void tscadc_step_config(struct tscadc *ts_dev)
-{
-	unsigned int	config;
-	int i, total_steps;
-
-	/* Configure the Step registers */
-	total_steps = 2 * ts_dev->steps_to_configure;
-
-	config = STEPCONFIG_MODE_HWSYNC |
-			STEPCONFIG_AVG_16 | STEPCONFIG_XPP;
-	switch (ts_dev->wires) {
-	case 4:
-		config |= STEPCONFIG_INP_AN2 | STEPCONFIG_XNN;
-		break;
-	case 5:
-		config |= STEPCONFIG_YNN |
-				STEPCONFIG_INP_AN4 | STEPCONFIG_XNN |
-				STEPCONFIG_YPP;
-		break;
-	case 8:
-		config |= STEPCONFIG_INP_AN2 | STEPCONFIG_XNN;
-		break;
-	}
-
-	for (i = 1; i <= ts_dev->steps_to_configure; i++) {
-		tscadc_writel(ts_dev, REG_STEPCONFIG(i), config);
-		tscadc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
-	}
-
-	config = 0;
-	config = STEPCONFIG_MODE_HWSYNC |
-			STEPCONFIG_AVG_16 | STEPCONFIG_YNN |
-			STEPCONFIG_INM_ADCREFM | STEPCONFIG_FIFO1;
-	switch (ts_dev->wires) {
-	case 4:
-		config |= STEPCONFIG_YPP;
-		break;
-	case 5:
-		config |= STEPCONFIG_XPP | STEPCONFIG_INP_AN4 |
-				STEPCONFIG_XNP | STEPCONFIG_YPN;
-		break;
-	case 8:
-		config |= STEPCONFIG_YPP;
-		break;
-	}
-
-	for (i = (ts_dev->steps_to_configure + 1); i <= total_steps; i++) {
-		tscadc_writel(ts_dev, REG_STEPCONFIG(i), config);
-		tscadc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
-	}
-
-	config = 0;
-	/* Charge step configuration */
-	config = STEPCONFIG_XPP | STEPCONFIG_YNN |
-			STEPCHARGE_RFP_XPUL | STEPCHARGE_RFM_XNUR |
-			STEPCHARGE_INM_AN1 | STEPCHARGE_INP_AN1;
-
-	tscadc_writel(ts_dev, REG_CHARGECONFIG, config);
-	tscadc_writel(ts_dev, REG_CHARGEDELAY, CHARGEDLY_OPENDLY);
-
-	config = 0;
-	/* Configure to calculate pressure */
-	config = STEPCONFIG_MODE_HWSYNC |
-			STEPCONFIG_AVG_16 | STEPCONFIG_YPP |
-			STEPCONFIG_XNN | STEPCONFIG_INM_ADCREFM;
-	tscadc_writel(ts_dev, REG_STEPCONFIG(total_steps + 1), config);
-	tscadc_writel(ts_dev, REG_STEPDELAY(total_steps + 1),
-			STEPCONFIG_OPENDLY);
-
-	config |= STEPCONFIG_INP_AN3 | STEPCONFIG_FIFO1;
-	tscadc_writel(ts_dev, REG_STEPCONFIG(total_steps + 2), config);
-	tscadc_writel(ts_dev, REG_STEPDELAY(total_steps + 2),
-			STEPCONFIG_OPENDLY);
-
-	tscadc_writel(ts_dev, REG_SE, STPENB_STEPENB);
-}
-
-static void tscadc_idle_config(struct tscadc *ts_config)
-{
-	unsigned int idleconfig;
-
-	idleconfig = STEPCONFIG_YNN |
-			STEPCONFIG_INM_ADCREFM |
-			STEPCONFIG_YPN | STEPCONFIG_INP_ADCREFM;
-	tscadc_writel(ts_config, REG_IDLECONFIG, idleconfig);
-}
-
-static void tscadc_read_coordinates(struct tscadc *ts_dev,
-				    unsigned int *x, unsigned int *y)
-{
-	unsigned int fifocount = tscadc_readl(ts_dev, REG_FIFO0CNT);
-	unsigned int prev_val_x = ~0, prev_val_y = ~0;
-	unsigned int prev_diff_x = ~0, prev_diff_y = ~0;
-	unsigned int read, diff;
-	unsigned int i;
-
-	/*
-	 * Delta filter is used to remove large variations in sampled
-	 * values from ADC. The filter tries to predict where the next
-	 * coordinate could be. This is done by taking a previous
-	 * coordinate and subtracting it form current one. Further the
-	 * algorithm compares the difference with that of a present value,
-	 * if true the value is reported to the sub system.
-	 */
-	for (i = 0; i < fifocount - 1; i++) {
-		read = tscadc_readl(ts_dev, REG_FIFO0) & 0xfff;
-		diff = abs(read - prev_val_x);
-		if (diff < prev_diff_x) {
-			prev_diff_x = diff;
-			*x = read;
-		}
-		prev_val_x = read;
-
-		read = tscadc_readl(ts_dev, REG_FIFO1) & 0xfff;
-		diff = abs(read - prev_val_y);
-		if (diff < prev_diff_y) {
-			prev_diff_y = diff;
-			*y = read;
-		}
-		prev_val_y = read;
-	}
-}
-
-static irqreturn_t tscadc_irq(int irq, void *dev)
-{
-	struct tscadc *ts_dev = dev;
-	struct input_dev *input_dev = ts_dev->input;
-	unsigned int status, irqclr = 0;
-	unsigned int x = 0, y = 0;
-	unsigned int z1, z2, z;
-	unsigned int fsm;
-
-	status = tscadc_readl(ts_dev, REG_IRQSTATUS);
-	if (status & IRQENB_FIFO0THRES) {
-		tscadc_read_coordinates(ts_dev, &x, &y);
-
-		z1 = tscadc_readl(ts_dev, REG_FIFO0) & 0xfff;
-		z2 = tscadc_readl(ts_dev, REG_FIFO1) & 0xfff;
-
-		if (ts_dev->pen_down && z1 != 0 && z2 != 0) {
-			/*
-			 * Calculate pressure using formula
-			 * Resistance(touch) = x plate resistance *
-			 * x postion/4096 * ((z2 / z1) - 1)
-			 */
-			z = z2 - z1;
-			z *= x;
-			z *= ts_dev->x_plate_resistance;
-			z /= z1;
-			z = (z + 2047) >> 12;
-
-			if (z <= MAX_12BIT) {
-				input_report_abs(input_dev, ABS_X, x);
-				input_report_abs(input_dev, ABS_Y, y);
-				input_report_abs(input_dev, ABS_PRESSURE, z);
-				input_report_key(input_dev, BTN_TOUCH, 1);
-				input_sync(input_dev);
-			}
-		}
-		irqclr |= IRQENB_FIFO0THRES;
-	}
-
-	/*
-	 * Time for sequencer to settle, to read
-	 * correct state of the sequencer.
-	 */
-	udelay(SEQ_SETTLE);
-
-	status = tscadc_readl(ts_dev, REG_RAWIRQSTATUS);
-	if (status & IRQENB_PENUP) {
-		/* Pen up event */
-		fsm = tscadc_readl(ts_dev, REG_ADCFSM);
-		if (fsm == ADCFSM_STEPID) {
-			ts_dev->pen_down = false;
-			input_report_key(input_dev, BTN_TOUCH, 0);
-			input_report_abs(input_dev, ABS_PRESSURE, 0);
-			input_sync(input_dev);
-		} else {
-			ts_dev->pen_down = true;
-		}
-		irqclr |= IRQENB_PENUP;
-	}
-
-	tscadc_writel(ts_dev, REG_IRQSTATUS, irqclr);
-
-	tscadc_writel(ts_dev, REG_SE, STPENB_STEPENB);
-	return IRQ_HANDLED;
-}
-
-/*
- * The functions for inserting/removing driver as a module.
- */
-
-static int __devinit tscadc_probe(struct platform_device *pdev)
-{
-	const struct tsc_data *pdata = pdev->dev.platform_data;
-	struct resource *res;
-	struct tscadc *ts_dev;
-	struct input_dev *input_dev;
-	struct clk *clk;
-	int err;
-	int clk_value, ctrl, irq;
-
-	if (!pdata) {
-		dev_err(&pdev->dev, "missing platform data.\n");
-		return -EINVAL;
-	}
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "no memory resource defined.\n");
-		return -EINVAL;
-	}
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "no irq ID is specified.\n");
-		return -EINVAL;
-	}
-
-	/* Allocate memory for device */
-	ts_dev = kzalloc(sizeof(struct tscadc), GFP_KERNEL);
-	input_dev = input_allocate_device();
-	if (!ts_dev || !input_dev) {
-		dev_err(&pdev->dev, "failed to allocate memory.\n");
-		err = -ENOMEM;
-		goto err_free_mem;
-	}
-
-	ts_dev->input = input_dev;
-	ts_dev->irq = irq;
-	ts_dev->wires = pdata->wires;
-	ts_dev->x_plate_resistance = pdata->x_plate_resistance;
-	ts_dev->steps_to_configure = pdata->steps_to_configure;
-
-	res = request_mem_region(res->start, resource_size(res), pdev->name);
-	if (!res) {
-		dev_err(&pdev->dev, "failed to reserve registers.\n");
-		err = -EBUSY;
-		goto err_free_mem;
-	}
-
-	ts_dev->tsc_base = ioremap(res->start, resource_size(res));
-	if (!ts_dev->tsc_base) {
-		dev_err(&pdev->dev, "failed to map registers.\n");
-		err = -ENOMEM;
-		goto err_release_mem_region;
-	}
-
-	err = request_irq(ts_dev->irq, tscadc_irq,
-			  0, pdev->dev.driver->name, ts_dev);
-	if (err) {
-		dev_err(&pdev->dev, "failed to allocate irq.\n");
-		goto err_unmap_regs;
-	}
-
-	ts_dev->tsc_ick = clk_get(&pdev->dev, "adc_tsc_ick");
-	if (IS_ERR(ts_dev->tsc_ick)) {
-		dev_err(&pdev->dev, "failed to get TSC ick\n");
-		goto err_free_irq;
-	}
-	clk_enable(ts_dev->tsc_ick);
-
-	clk = clk_get(&pdev->dev, "adc_tsc_fck");
-	if (IS_ERR(clk)) {
-		dev_err(&pdev->dev, "failed to get TSC fck\n");
-		err = PTR_ERR(clk);
-		goto err_disable_clk;
-	}
-
-	clk_value = clk_get_rate(clk) / ADC_CLK;
-	clk_put(clk);
-
-	if (clk_value < 7) {
-		dev_err(&pdev->dev, "clock input less than min clock requirement\n");
-		goto err_disable_clk;
-	}
-	/* CLKDIV needs to be configured to the value minus 1 */
-	tscadc_writel(ts_dev, REG_CLKDIV, clk_value - 1);
-
-	 /* Enable wake-up of the SoC using touchscreen */
-	tscadc_writel(ts_dev, REG_IRQWAKEUP, IRQWKUP_ENB);
-
-	ctrl = CNTRLREG_STEPCONFIGWRT |
-			CNTRLREG_TSCENB |
-			CNTRLREG_STEPID;
-	switch (ts_dev->wires) {
-	case 4:
-		ctrl |= CNTRLREG_4WIRE;
-		break;
-	case 5:
-		ctrl |= CNTRLREG_5WIRE;
-		break;
-	case 8:
-		ctrl |= CNTRLREG_8WIRE;
-		break;
-	}
-	tscadc_writel(ts_dev, REG_CTRL, ctrl);
-
-	tscadc_idle_config(ts_dev);
-	tscadc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO0THRES);
-	tscadc_step_config(ts_dev);
-	tscadc_writel(ts_dev, REG_FIFO0THR, ts_dev->steps_to_configure);
-
-	ctrl |= CNTRLREG_TSCSSENB;
-	tscadc_writel(ts_dev, REG_CTRL, ctrl);
-
-	input_dev->name = "ti-tsc-adc";
-	input_dev->dev.parent = &pdev->dev;
-
-	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
-	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
-
-	input_set_abs_params(input_dev, ABS_X, 0, MAX_12BIT, 0, 0);
-	input_set_abs_params(input_dev, ABS_Y, 0, MAX_12BIT, 0, 0);
-	input_set_abs_params(input_dev, ABS_PRESSURE, 0, MAX_12BIT, 0, 0);
-
-	/* register to the input system */
-	err = input_register_device(input_dev);
-	if (err)
-		goto err_disable_clk;
-
-	platform_set_drvdata(pdev, ts_dev);
-	return 0;
-
-err_disable_clk:
-	clk_disable(ts_dev->tsc_ick);
-	clk_put(ts_dev->tsc_ick);
-err_free_irq:
-	free_irq(ts_dev->irq, ts_dev);
-err_unmap_regs:
-	iounmap(ts_dev->tsc_base);
-err_release_mem_region:
-	release_mem_region(res->start, resource_size(res));
-err_free_mem:
-	input_free_device(input_dev);
-	kfree(ts_dev);
-	return err;
-}
-
-static int __devexit tscadc_remove(struct platform_device *pdev)
-{
-	struct tscadc *ts_dev = platform_get_drvdata(pdev);
-	struct resource *res;
-
-	free_irq(ts_dev->irq, ts_dev);
-
-	input_unregister_device(ts_dev->input);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	iounmap(ts_dev->tsc_base);
-	release_mem_region(res->start, resource_size(res));
-
-	clk_disable(ts_dev->tsc_ick);
-	clk_put(ts_dev->tsc_ick);
-
-	kfree(ts_dev);
-
-	platform_set_drvdata(pdev, NULL);
-	return 0;
-}
-
-static struct platform_driver ti_tsc_driver = {
-	.probe	= tscadc_probe,
-	.remove	= __devexit_p(tscadc_remove),
-	.driver	= {
-		.name   = "tsc",
-		.owner	= THIS_MODULE,
-	},
-};
-module_platform_driver(ti_tsc_driver);
-
-MODULE_DESCRIPTION("TI touchscreen controller driver");
-MODULE_AUTHOR("Rachna Patil <rachna@ti.com>");
-MODULE_LICENSE("GPL");
diff --git a/include/linux/input/ti_am335x_tsc.h b/include/linux/input/ti_am335x_tsc.h
new file mode 100644
index 000000000000..49269a2aa329
--- /dev/null
+++ b/include/linux/input/ti_am335x_tsc.h
@@ -0,0 +1,23 @@
+#ifndef __LINUX_TI_AM335X_TSC_H
+#define __LINUX_TI_AM335X_TSC_H
+
+/**
+ * struct tsc_data	Touchscreen wire configuration
+ * @wires:		Wires refer to application modes
+ *			i.e. 4/5/8 wire touchscreen support
+ *			on the platform.
+ * @x_plate_resistance:	X plate resistance.
+ * @steps_to_configure:	The sequencer supports a total of
+ *			16 programmable steps.
+ *			A step configured to read a single
+ *			co-ordinate value, can be applied
+ *			more number of times for better results.
+ */
+
+struct tsc_data {
+	int wires;
+	int x_plate_resistance;
+	int steps_to_configure;
+};
+
+#endif
diff --git a/include/linux/input/ti_tscadc.h b/include/linux/input/ti_tscadc.h
deleted file mode 100644
index ad442a3eb68b..000000000000
--- a/include/linux/input/ti_tscadc.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef __LINUX_TI_TSCADC_H
-#define __LINUX_TI_TSCADC_H
-
-/**
- * struct tsc_data	Touchscreen wire configuration
- * @wires:		Wires refer to application modes
- *			i.e. 4/5/8 wire touchscreen support
- *			on the platform.
- * @x_plate_resistance:	X plate resistance.
- * @steps_to_configure:	The sequencer supports a total of
- *			16 programmable steps.
- *			A step configured to read a single
- *			co-ordinate value, can be applied
- *			more number of times for better results.
- */
-
-struct tsc_data {
-	int wires;
-	int x_plate_resistance;
-	int steps_to_configure;
-};
-
-#endif
-- 
cgit v1.2.3-55-g7522


From 01636eb970a029897b06fb96026941429212ddd9 Mon Sep 17 00:00:00 2001
From: Patil, Rachna
Date: Tue, 16 Oct 2012 12:55:43 +0530
Subject: mfd: ti_tscadc: Add support for TI's TSC/ADC MFDevice

Add the mfd core driver which supports touchscreen
and ADC.
With this patch we are only adding infrastructure to
support the MFD clients.

Signed-off-by: Patil, Rachna <rachna@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig                  |  11 ++
 drivers/mfd/Makefile                 |   1 +
 drivers/mfd/ti_am335x_tscadc.c       | 250 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/ti_am335x_tscadc.h | 137 +++++++++++++++++++
 4 files changed, 399 insertions(+)
 create mode 100644 drivers/mfd/ti_am335x_tscadc.c
 create mode 100644 include/linux/mfd/ti_am335x_tscadc.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index acab3ef8a310..9bba7f78ff36 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -94,6 +94,17 @@ config MFD_TI_SSP
 	  To compile this driver as a module, choose M here: the
 	  module will be called ti-ssp.
 
+config MFD_TI_AM335X_TSCADC
+	tristate "TI ADC / Touch Screen chip support"
+	select MFD_CORE
+	select REGMAP
+	select REGMAP_MMIO
+	help
+	  If you say yes here you get support for Texas Instruments series
+	  of Touch Screen /ADC chips.
+	  To compile this driver as a module, choose M here: the
+	  module will be called ti_am335x_tscadc.
+
 config HTC_EGPIO
 	bool "HTC EGPIO support"
 	depends on GENERIC_HARDIRQS && GPIOLIB && ARM
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index d8ccb630ddb0..442c17e40741 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_HTC_I2CPLD)	+= htc-i2cpld.o
 obj-$(CONFIG_MFD_DAVINCI_VOICECODEC)	+= davinci_voicecodec.o
 obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
 obj-$(CONFIG_MFD_TI_SSP)	+= ti-ssp.o
+obj-$(CONFIG_MFD_TI_AM335X_TSCADC)	+= ti_am335x_tscadc.o
 
 obj-$(CONFIG_MFD_STA2X11)	+= sta2x11-mfd.o
 obj-$(CONFIG_MFD_STMPE)		+= stmpe.o
diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
new file mode 100644
index 000000000000..14df67bc390f
--- /dev/null
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -0,0 +1,250 @@
+/*
+ * TI Touch Screen / ADC MFD driver
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/regmap.h>
+#include <linux/mfd/core.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mfd/ti_am335x_tscadc.h>
+
+static unsigned int tscadc_readl(struct ti_tscadc_dev *tsadc, unsigned int reg)
+{
+	unsigned int val;
+
+	regmap_read(tsadc->regmap_tscadc, reg, &val);
+	return val;
+}
+
+static void tscadc_writel(struct ti_tscadc_dev *tsadc, unsigned int reg,
+					unsigned int val)
+{
+	regmap_write(tsadc->regmap_tscadc, reg, val);
+}
+
+static const struct regmap_config tscadc_regmap_config = {
+	.name = "ti_tscadc",
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+};
+
+static void tscadc_idle_config(struct ti_tscadc_dev *config)
+{
+	unsigned int idleconfig;
+
+	idleconfig = STEPCONFIG_YNN | STEPCONFIG_INM_ADCREFM |
+			STEPCONFIG_INP_ADCREFM | STEPCONFIG_YPN;
+
+	tscadc_writel(config, REG_IDLECONFIG, idleconfig);
+}
+
+static	int __devinit ti_tscadc_probe(struct platform_device *pdev)
+{
+	struct ti_tscadc_dev	*tscadc;
+	struct resource		*res;
+	struct clk		*clk;
+	struct mfd_tscadc_board	*pdata = pdev->dev.platform_data;
+	int			irq;
+	int			err, ctrl;
+	int			clk_value, clock_rate;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "Could not find platform data\n");
+		return -EINVAL;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "no memory resource defined.\n");
+		return -EINVAL;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no irq ID is specified.\n");
+		return -EINVAL;
+	}
+
+	/* Allocate memory for device */
+	tscadc = devm_kzalloc(&pdev->dev,
+			sizeof(struct ti_tscadc_dev), GFP_KERNEL);
+	if (!tscadc) {
+		dev_err(&pdev->dev, "failed to allocate memory.\n");
+		return -ENOMEM;
+	}
+	tscadc->dev = &pdev->dev;
+	tscadc->irq = irq;
+
+	res = devm_request_mem_region(&pdev->dev,
+			res->start, resource_size(res), pdev->name);
+	if (!res) {
+		dev_err(&pdev->dev, "failed to reserve registers.\n");
+		err = -EBUSY;
+		goto err;
+	}
+
+	tscadc->tscadc_base = devm_ioremap(&pdev->dev,
+			res->start, resource_size(res));
+	if (!tscadc->tscadc_base) {
+		dev_err(&pdev->dev, "failed to map registers.\n");
+		err = -ENOMEM;
+		goto err;
+	}
+
+	tscadc->regmap_tscadc = devm_regmap_init_mmio(&pdev->dev,
+			tscadc->tscadc_base, &tscadc_regmap_config);
+	if (IS_ERR(tscadc->regmap_tscadc)) {
+		dev_err(&pdev->dev, "regmap init failed\n");
+		err = PTR_ERR(tscadc->regmap_tscadc);
+		goto err;
+	}
+
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+
+	/*
+	 * The TSC_ADC_Subsystem has 2 clock domains
+	 * OCP_CLK and ADC_CLK.
+	 * The ADC clock is expected to run at target of 3MHz,
+	 * and expected to capture 12-bit data at a rate of 200 KSPS.
+	 * The TSC_ADC_SS controller design assumes the OCP clock is
+	 * at least 6x faster than the ADC clock.
+	 */
+	clk = clk_get(&pdev->dev, "adc_tsc_fck");
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "failed to get TSC fck\n");
+		err = PTR_ERR(clk);
+		goto err_disable_clk;
+	}
+	clock_rate = clk_get_rate(clk);
+	clk_put(clk);
+	clk_value = clock_rate / ADC_CLK;
+	if (clk_value < MAX_CLK_DIV) {
+		dev_err(&pdev->dev, "clock input less than min clock requirement\n");
+		err = -EINVAL;
+		goto err_disable_clk;
+	}
+	/* TSCADC_CLKDIV needs to be configured to the value minus 1 */
+	clk_value = clk_value - 1;
+	tscadc_writel(tscadc, REG_CLKDIV, clk_value);
+
+	/* Set the control register bits */
+	ctrl = CNTRLREG_STEPCONFIGWRT |
+			CNTRLREG_TSCENB |
+			CNTRLREG_STEPID |
+			CNTRLREG_4WIRE;
+	tscadc_writel(tscadc, REG_CTRL, ctrl);
+
+	/* Set register bits for Idle Config Mode */
+	tscadc_idle_config(tscadc);
+
+	/* Enable the TSC module enable bit */
+	ctrl = tscadc_readl(tscadc, REG_CTRL);
+	ctrl |= CNTRLREG_TSCSSENB;
+	tscadc_writel(tscadc, REG_CTRL, ctrl);
+
+	err = mfd_add_devices(&pdev->dev, pdev->id, tscadc->cells,
+			TSCADC_CELLS, NULL, 0, NULL);
+	if (err < 0)
+		goto err_disable_clk;
+
+	device_init_wakeup(&pdev->dev, true);
+	platform_set_drvdata(pdev, tscadc);
+
+	return 0;
+
+err_disable_clk:
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+err:
+	return err;
+}
+
+static int __devexit ti_tscadc_remove(struct platform_device *pdev)
+{
+	struct ti_tscadc_dev	*tscadc = platform_get_drvdata(pdev);
+
+	tscadc_writel(tscadc, REG_SE, 0x00);
+
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	mfd_remove_devices(tscadc->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int tscadc_suspend(struct device *dev)
+{
+	struct ti_tscadc_dev	*tscadc_dev = dev_get_drvdata(dev);
+
+	tscadc_writel(tscadc_dev, REG_SE, 0x00);
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int tscadc_resume(struct device *dev)
+{
+	struct ti_tscadc_dev	*tscadc_dev = dev_get_drvdata(dev);
+	unsigned int restore, ctrl;
+
+	pm_runtime_get_sync(dev);
+
+	/* context restore */
+	ctrl = CNTRLREG_STEPCONFIGWRT | CNTRLREG_TSCENB |
+			CNTRLREG_STEPID | CNTRLREG_4WIRE;
+	tscadc_writel(tscadc_dev, REG_CTRL, ctrl);
+	tscadc_idle_config(tscadc_dev);
+	tscadc_writel(tscadc_dev, REG_SE, STPENB_STEPENB);
+	restore = tscadc_readl(tscadc_dev, REG_CTRL);
+	tscadc_writel(tscadc_dev, REG_CTRL,
+			(restore | CNTRLREG_TSCSSENB));
+
+	return 0;
+}
+
+static const struct dev_pm_ops tscadc_pm_ops = {
+	.suspend = tscadc_suspend,
+	.resume = tscadc_resume,
+};
+#define TSCADC_PM_OPS (&tscadc_pm_ops)
+#else
+#define TSCADC_PM_OPS NULL
+#endif
+
+static struct platform_driver ti_tscadc_driver = {
+	.driver = {
+		.name   = "ti_tscadc",
+		.owner	= THIS_MODULE,
+		.pm	= TSCADC_PM_OPS,
+	},
+	.probe	= ti_tscadc_probe,
+	.remove	= __devexit_p(ti_tscadc_remove),
+
+};
+
+module_platform_driver(ti_tscadc_driver);
+
+MODULE_DESCRIPTION("TI touchscreen / ADC MFD controller driver");
+MODULE_AUTHOR("Rachna Patil <rachna@ti.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
new file mode 100644
index 000000000000..b7232b157061
--- /dev/null
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -0,0 +1,137 @@
+#ifndef __LINUX_TI_AM335X_TSCADC_MFD_H
+#define __LINUX_TI_AM335X_TSCADC_MFD_H
+
+/*
+ * TI Touch Screen / ADC MFD driver
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mfd/core.h>
+
+#define REG_RAWIRQSTATUS	0x024
+#define REG_IRQSTATUS		0x028
+#define REG_IRQENABLE		0x02C
+#define REG_IRQCLR		0x030
+#define REG_IRQWAKEUP		0x034
+#define REG_CTRL		0x040
+#define REG_ADCFSM		0x044
+#define REG_CLKDIV		0x04C
+#define REG_SE			0x054
+#define REG_IDLECONFIG		0x058
+#define REG_CHARGECONFIG	0x05C
+#define REG_CHARGEDELAY		0x060
+#define REG_STEPCONFIG(n)	(0x64 + ((n - 1) * 8))
+#define REG_STEPDELAY(n)	(0x68 + ((n - 1) * 8))
+#define REG_FIFO0CNT		0xE4
+#define REG_FIFO0THR		0xE8
+#define REG_FIFO1CNT		0xF0
+#define REG_FIFO1THR		0xF4
+#define REG_FIFO0		0x100
+#define REG_FIFO1		0x200
+
+/*	Register Bitfields	*/
+/* IRQ wakeup enable */
+#define IRQWKUP_ENB		BIT(0)
+
+/* Step Enable */
+#define STEPENB_MASK		(0x1FFFF << 0)
+#define STEPENB(val)		((val) << 0)
+#define STPENB_STEPENB		STEPENB(0x1FFFF)
+
+/* IRQ enable */
+#define IRQENB_HW_PEN		BIT(0)
+#define IRQENB_FIFO0THRES	BIT(2)
+#define IRQENB_FIFO1THRES	BIT(5)
+#define IRQENB_PENUP		BIT(9)
+
+/* Step Configuration */
+#define STEPCONFIG_MODE_MASK	(3 << 0)
+#define STEPCONFIG_MODE(val)	((val) << 0)
+#define STEPCONFIG_MODE_HWSYNC	STEPCONFIG_MODE(2)
+#define STEPCONFIG_AVG_MASK	(7 << 2)
+#define STEPCONFIG_AVG(val)	((val) << 2)
+#define STEPCONFIG_AVG_16	STEPCONFIG_AVG(4)
+#define STEPCONFIG_XPP		BIT(5)
+#define STEPCONFIG_XNN		BIT(6)
+#define STEPCONFIG_YPP		BIT(7)
+#define STEPCONFIG_YNN		BIT(8)
+#define STEPCONFIG_XNP		BIT(9)
+#define STEPCONFIG_YPN		BIT(10)
+#define STEPCONFIG_INM_MASK	(0xF << 15)
+#define STEPCONFIG_INM(val)	((val) << 15)
+#define STEPCONFIG_INM_ADCREFM	STEPCONFIG_INM(8)
+#define STEPCONFIG_INP_MASK	(0xF << 19)
+#define STEPCONFIG_INP(val)	((val) << 19)
+#define STEPCONFIG_INP_AN2	STEPCONFIG_INP(2)
+#define STEPCONFIG_INP_AN3	STEPCONFIG_INP(3)
+#define STEPCONFIG_INP_AN4	STEPCONFIG_INP(4)
+#define STEPCONFIG_INP_ADCREFM	STEPCONFIG_INP(8)
+#define STEPCONFIG_FIFO1	BIT(26)
+
+/* Delay register */
+#define STEPDELAY_OPEN_MASK	(0x3FFFF << 0)
+#define STEPDELAY_OPEN(val)	((val) << 0)
+#define STEPCONFIG_OPENDLY	STEPDELAY_OPEN(0x098)
+#define STEPDELAY_SAMPLE_MASK	(0xFF << 24)
+#define STEPDELAY_SAMPLE(val)	((val) << 24)
+#define STEPCONFIG_SAMPLEDLY	STEPDELAY_SAMPLE(0)
+
+/* Charge Config */
+#define STEPCHARGE_RFP_MASK	(7 << 12)
+#define STEPCHARGE_RFP(val)	((val) << 12)
+#define STEPCHARGE_RFP_XPUL	STEPCHARGE_RFP(1)
+#define STEPCHARGE_INM_MASK	(0xF << 15)
+#define STEPCHARGE_INM(val)	((val) << 15)
+#define STEPCHARGE_INM_AN1	STEPCHARGE_INM(1)
+#define STEPCHARGE_INP_MASK	(0xF << 19)
+#define STEPCHARGE_INP(val)	((val) << 19)
+#define STEPCHARGE_INP_AN1	STEPCHARGE_INP(1)
+#define STEPCHARGE_RFM_MASK	(3 << 23)
+#define STEPCHARGE_RFM(val)	((val) << 23)
+#define STEPCHARGE_RFM_XNUR	STEPCHARGE_RFM(1)
+
+/* Charge delay */
+#define CHARGEDLY_OPEN_MASK	(0x3FFFF << 0)
+#define CHARGEDLY_OPEN(val)	((val) << 0)
+#define CHARGEDLY_OPENDLY	CHARGEDLY_OPEN(1)
+
+/* Control register */
+#define CNTRLREG_TSCSSENB	BIT(0)
+#define CNTRLREG_STEPID		BIT(1)
+#define CNTRLREG_STEPCONFIGWRT	BIT(2)
+#define CNTRLREG_POWERDOWN	BIT(4)
+#define CNTRLREG_AFE_CTRL_MASK	(3 << 5)
+#define CNTRLREG_AFE_CTRL(val)	((val) << 5)
+#define CNTRLREG_4WIRE		CNTRLREG_AFE_CTRL(1)
+#define CNTRLREG_5WIRE		CNTRLREG_AFE_CTRL(2)
+#define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
+#define CNTRLREG_TSCENB		BIT(7)
+
+#define ADC_CLK			3000000
+#define	MAX_CLK_DIV		7
+
+#define TSCADC_CELLS		0
+
+struct mfd_tscadc_board {
+	struct tsc_data *tsc_init;
+};
+
+struct ti_tscadc_dev {
+	struct device *dev;
+	struct regmap *regmap_tscadc;
+	void __iomem *tscadc_base;
+	int irq;
+	struct mfd_cell cells[TSCADC_CELLS];
+};
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 2b99bafab19145a72e2c557326fc4662a864a162 Mon Sep 17 00:00:00 2001
From: Patil, Rachna
Date: Tue, 16 Oct 2012 12:55:44 +0530
Subject: input: TSC: ti_tsc: Convert TSC into a MFDevice

This patch converts touchscreen into a MFD client.
All the register definitions, clock initialization,
etc has been moved to MFD core driver.

Signed-off-by: Patil, Rachna <rachna@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/touchscreen/Kconfig         |   2 +-
 drivers/input/touchscreen/ti_am335x_tsc.c | 318 +++++++++---------------------
 drivers/mfd/ti_am335x_tscadc.c            |  11 ++
 include/linux/mfd/ti_am335x_tscadc.h      |  10 +-
 4 files changed, 118 insertions(+), 223 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index d31dc5faeaaf..0c45caddd41c 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -531,7 +531,7 @@ config TOUCHSCREEN_TOUCHWIN
 
 config TOUCHSCREEN_TI_AM335X_TSC
 	tristate "TI Touchscreen Interface"
-	depends on ARCH_OMAP2PLUS
+	depends on MFD_TI_AM335X_TSCADC
 	help
 	  Say Y here if you have 4/5/8 wire touchscreen controller
 	  to be connected to the ADC controller on your TI AM335x SoC.
diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c
index 462950acb97b..7a18a8a15228 100644
--- a/drivers/input/touchscreen/ti_am335x_tsc.c
+++ b/drivers/input/touchscreen/ti_am335x_tsc.c
@@ -27,106 +27,15 @@
 #include <linux/input/ti_am335x_tsc.h>
 #include <linux/delay.h>
 
-#define REG_RAWIRQSTATUS	0x024
-#define REG_IRQSTATUS		0x028
-#define REG_IRQENABLE		0x02C
-#define REG_IRQWAKEUP		0x034
-#define REG_CTRL		0x040
-#define REG_ADCFSM		0x044
-#define REG_CLKDIV		0x04C
-#define REG_SE			0x054
-#define REG_IDLECONFIG		0x058
-#define REG_CHARGECONFIG	0x05C
-#define REG_CHARGEDELAY		0x060
-#define REG_STEPCONFIG(n)	(0x64 + ((n - 1) * 8))
-#define REG_STEPDELAY(n)	(0x68 + ((n - 1) * 8))
-#define REG_FIFO0CNT		0xE4
-#define REG_FIFO0THR		0xE8
-#define REG_FIFO1THR		0xF4
-#define REG_FIFO0		0x100
-#define REG_FIFO1		0x200
-
-/*	Register Bitfields	*/
-#define IRQWKUP_ENB		BIT(0)
-
-/* Step Enable */
-#define STEPENB_MASK		(0x1FFFF << 0)
-#define STEPENB(val)		(val << 0)
-#define STPENB_STEPENB		STEPENB(0x7FFF)
-
-/* IRQ enable */
-#define IRQENB_FIFO0THRES	BIT(2)
-#define IRQENB_FIFO1THRES	BIT(5)
-#define IRQENB_PENUP		BIT(9)
-
-/* Step Configuration */
-#define STEPCONFIG_MODE_MASK	(3 << 0)
-#define STEPCONFIG_MODE(val)	(val << 0)
-#define STEPCONFIG_MODE_HWSYNC	STEPCONFIG_MODE(2)
-#define STEPCONFIG_AVG_MASK	(7 << 2)
-#define STEPCONFIG_AVG(val)	(val << 2)
-#define STEPCONFIG_AVG_16	STEPCONFIG_AVG(4)
-#define STEPCONFIG_XPP		BIT(5)
-#define STEPCONFIG_XNN		BIT(6)
-#define STEPCONFIG_YPP		BIT(7)
-#define STEPCONFIG_YNN		BIT(8)
-#define STEPCONFIG_XNP		BIT(9)
-#define STEPCONFIG_YPN		BIT(10)
-#define STEPCONFIG_INM_MASK	(0xF << 15)
-#define STEPCONFIG_INM(val)	(val << 15)
-#define STEPCONFIG_INM_ADCREFM	STEPCONFIG_INM(8)
-#define STEPCONFIG_INP_MASK	(0xF << 19)
-#define STEPCONFIG_INP(val)	(val << 19)
-#define STEPCONFIG_INP_AN2	STEPCONFIG_INP(2)
-#define STEPCONFIG_INP_AN3	STEPCONFIG_INP(3)
-#define STEPCONFIG_INP_AN4	STEPCONFIG_INP(4)
-#define STEPCONFIG_INP_ADCREFM	STEPCONFIG_INP(8)
-#define STEPCONFIG_FIFO1	BIT(26)
-
-/* Delay register */
-#define STEPDELAY_OPEN_MASK	(0x3FFFF << 0)
-#define STEPDELAY_OPEN(val)	(val << 0)
-#define STEPCONFIG_OPENDLY	STEPDELAY_OPEN(0x098)
-
-/* Charge Config */
-#define STEPCHARGE_RFP_MASK	(7 << 12)
-#define STEPCHARGE_RFP(val)	(val << 12)
-#define STEPCHARGE_RFP_XPUL	STEPCHARGE_RFP(1)
-#define STEPCHARGE_INM_MASK	(0xF << 15)
-#define STEPCHARGE_INM(val)	(val << 15)
-#define STEPCHARGE_INM_AN1	STEPCHARGE_INM(1)
-#define STEPCHARGE_INP_MASK	(0xF << 19)
-#define STEPCHARGE_INP(val)	(val << 19)
-#define STEPCHARGE_INP_AN1	STEPCHARGE_INP(1)
-#define STEPCHARGE_RFM_MASK	(3 << 23)
-#define STEPCHARGE_RFM(val)	(val << 23)
-#define STEPCHARGE_RFM_XNUR	STEPCHARGE_RFM(1)
-
-/* Charge delay */
-#define CHARGEDLY_OPEN_MASK	(0x3FFFF << 0)
-#define CHARGEDLY_OPEN(val)	(val << 0)
-#define CHARGEDLY_OPENDLY	CHARGEDLY_OPEN(1)
-
-/* Control register */
-#define CNTRLREG_TSCSSENB	BIT(0)
-#define CNTRLREG_STEPID		BIT(1)
-#define CNTRLREG_STEPCONFIGWRT	BIT(2)
-#define CNTRLREG_AFE_CTRL_MASK	(3 << 5)
-#define CNTRLREG_AFE_CTRL(val)	(val << 5)
-#define CNTRLREG_4WIRE		CNTRLREG_AFE_CTRL(1)
-#define CNTRLREG_5WIRE		CNTRLREG_AFE_CTRL(2)
-#define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
-#define CNTRLREG_TSCENB		BIT(7)
+#include <linux/mfd/ti_am335x_tscadc.h>
 
 #define ADCFSM_STEPID		0x10
 #define SEQ_SETTLE		275
-#define ADC_CLK			3000000
 #define MAX_12BIT		((1 << 12) - 1)
 
 struct titsc {
 	struct input_dev	*input;
-	struct clk		*tsc_ick;
-	void __iomem		*tsc_base;
+	struct ti_tscadc_dev	*mfd_tscadc;
 	unsigned int		irq;
 	unsigned int		wires;
 	unsigned int		x_plate_resistance;
@@ -136,13 +45,13 @@ struct titsc {
 
 static unsigned int titsc_readl(struct titsc *ts, unsigned int reg)
 {
-	return readl(ts->tsc_base + reg);
+	return readl(ts->mfd_tscadc->tscadc_base + reg);
 }
 
 static void titsc_writel(struct titsc *tsc, unsigned int reg,
 					unsigned int val)
 {
-	writel(val, tsc->tsc_base + reg);
+	writel(val, tsc->mfd_tscadc->tscadc_base + reg);
 }
 
 static void titsc_step_config(struct titsc *ts_dev)
@@ -219,17 +128,7 @@ static void titsc_step_config(struct titsc *ts_dev)
 	titsc_writel(ts_dev, REG_STEPDELAY(total_steps + 2),
 			STEPCONFIG_OPENDLY);
 
-	titsc_writel(ts_dev, REG_SE, STPENB_STEPENB);
-}
-
-static void titsc_idle_config(struct titsc *ts_config)
-{
-	unsigned int idleconfig;
-
-	idleconfig = STEPCONFIG_YNN |
-			STEPCONFIG_INM_ADCREFM |
-			STEPCONFIG_YPN | STEPCONFIG_INP_ADCREFM;
-	titsc_writel(ts_config, REG_IDLECONFIG, idleconfig);
+	titsc_writel(ts_dev, REG_SE, STPENB_STEPENB_TC);
 }
 
 static void titsc_read_coordinates(struct titsc *ts_dev,
@@ -239,7 +138,7 @@ static void titsc_read_coordinates(struct titsc *ts_dev,
 	unsigned int prev_val_x = ~0, prev_val_y = ~0;
 	unsigned int prev_diff_x = ~0, prev_diff_y = ~0;
 	unsigned int read, diff;
-	unsigned int i;
+	unsigned int i, channel;
 
 	/*
 	 * Delta filter is used to remove large variations in sampled
@@ -250,21 +149,32 @@ static void titsc_read_coordinates(struct titsc *ts_dev,
 	 * if true the value is reported to the sub system.
 	 */
 	for (i = 0; i < fifocount - 1; i++) {
-		read = titsc_readl(ts_dev, REG_FIFO0) & 0xfff;
-		diff = abs(read - prev_val_x);
-		if (diff < prev_diff_x) {
-			prev_diff_x = diff;
-			*x = read;
+		read = titsc_readl(ts_dev, REG_FIFO0);
+		channel = read & 0xf0000;
+		channel = channel >> 0x10;
+		if ((channel >= 0) && (channel < ts_dev->steps_to_configure)) {
+			read &= 0xfff;
+			diff = abs(read - prev_val_x);
+			if (diff < prev_diff_x) {
+				prev_diff_x = diff;
+				*x = read;
+			}
+			prev_val_x = read;
 		}
-		prev_val_x = read;
 
-		read = titsc_readl(ts_dev, REG_FIFO1) & 0xfff;
-		diff = abs(read - prev_val_y);
-		if (diff < prev_diff_y) {
-			prev_diff_y = diff;
-			*y = read;
+		read = titsc_readl(ts_dev, REG_FIFO1);
+		channel = read & 0xf0000;
+		channel = channel >> 0x10;
+		if ((channel >= ts_dev->steps_to_configure) &&
+			(channel < (2 * ts_dev->steps_to_configure - 1))) {
+			read &= 0xfff;
+			diff = abs(read - prev_val_y);
+			if (diff < prev_diff_y) {
+				prev_diff_y = diff;
+				*y = read;
+			}
+			prev_val_y = read;
 		}
-		prev_val_y = read;
 	}
 }
 
@@ -276,6 +186,8 @@ static irqreturn_t titsc_irq(int irq, void *dev)
 	unsigned int x = 0, y = 0;
 	unsigned int z1, z2, z;
 	unsigned int fsm;
+	unsigned int fifo1count, fifo0count;
+	int i;
 
 	status = titsc_readl(ts_dev, REG_IRQSTATUS);
 	if (status & IRQENB_FIFO0THRES) {
@@ -284,6 +196,14 @@ static irqreturn_t titsc_irq(int irq, void *dev)
 		z1 = titsc_readl(ts_dev, REG_FIFO0) & 0xfff;
 		z2 = titsc_readl(ts_dev, REG_FIFO1) & 0xfff;
 
+		fifo1count = titsc_readl(ts_dev, REG_FIFO1CNT);
+		for (i = 0; i < fifo1count; i++)
+			titsc_readl(ts_dev, REG_FIFO1);
+
+		fifo0count = titsc_readl(ts_dev, REG_FIFO0CNT);
+		for (i = 0; i < fifo0count; i++)
+			titsc_readl(ts_dev, REG_FIFO0);
+
 		if (ts_dev->pen_down && z1 != 0 && z2 != 0) {
 			/*
 			 * Calculate pressure using formula
@@ -330,7 +250,7 @@ static irqreturn_t titsc_irq(int irq, void *dev)
 
 	titsc_writel(ts_dev, REG_IRQSTATUS, irqclr);
 
-	titsc_writel(ts_dev, REG_SE, STPENB_STEPENB);
+	titsc_writel(ts_dev, REG_SE, STPENB_STEPENB_TC);
 	return IRQ_HANDLED;
 }
 
@@ -340,28 +260,16 @@ static irqreturn_t titsc_irq(int irq, void *dev)
 
 static int __devinit titsc_probe(struct platform_device *pdev)
 {
-	const struct tsc_data *pdata = pdev->dev.platform_data;
-	struct resource *res;
 	struct titsc *ts_dev;
 	struct input_dev *input_dev;
-	struct clk *clk;
+	struct ti_tscadc_dev *tscadc_dev = pdev->dev.platform_data;
+	struct mfd_tscadc_board	*pdata;
 	int err;
-	int clk_value, ctrl, irq;
 
-	if (!pdata) {
-		dev_err(&pdev->dev, "missing platform data.\n");
-		return -EINVAL;
-	}
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "no memory resource defined.\n");
-		return -EINVAL;
-	}
+	pdata = tscadc_dev->dev->platform_data;
 
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "no irq ID is specified.\n");
+	if (!pdata) {
+		dev_err(&pdev->dev, "Could not find platform data\n");
 		return -EINVAL;
 	}
 
@@ -374,85 +282,26 @@ static int __devinit titsc_probe(struct platform_device *pdev)
 		goto err_free_mem;
 	}
 
+	tscadc_dev->tsc = ts_dev;
+	ts_dev->mfd_tscadc = tscadc_dev;
 	ts_dev->input = input_dev;
-	ts_dev->irq = irq;
-	ts_dev->wires = pdata->wires;
-	ts_dev->x_plate_resistance = pdata->x_plate_resistance;
-	ts_dev->steps_to_configure = pdata->steps_to_configure;
-
-	res = request_mem_region(res->start, resource_size(res), pdev->name);
-	if (!res) {
-		dev_err(&pdev->dev, "failed to reserve registers.\n");
-		err = -EBUSY;
-		goto err_free_mem;
-	}
-
-	ts_dev->tsc_base = ioremap(res->start, resource_size(res));
-	if (!ts_dev->tsc_base) {
-		dev_err(&pdev->dev, "failed to map registers.\n");
-		err = -ENOMEM;
-		goto err_release_mem_region;
-	}
+	ts_dev->irq = tscadc_dev->irq;
+	ts_dev->wires = pdata->tsc_init->wires;
+	ts_dev->x_plate_resistance = pdata->tsc_init->x_plate_resistance;
+	ts_dev->steps_to_configure = pdata->tsc_init->steps_to_configure;
 
 	err = request_irq(ts_dev->irq, titsc_irq,
 			  0, pdev->dev.driver->name, ts_dev);
 	if (err) {
 		dev_err(&pdev->dev, "failed to allocate irq.\n");
-		goto err_unmap_regs;
-	}
-
-	ts_dev->tsc_ick = clk_get(&pdev->dev, "adc_tsc_ick");
-	if (IS_ERR(ts_dev->tsc_ick)) {
-		dev_err(&pdev->dev, "failed to get TSC ick\n");
-		goto err_free_irq;
-	}
-	clk_enable(ts_dev->tsc_ick);
-
-	clk = clk_get(&pdev->dev, "adc_tsc_fck");
-	if (IS_ERR(clk)) {
-		dev_err(&pdev->dev, "failed to get TSC fck\n");
-		err = PTR_ERR(clk);
-		goto err_disable_clk;
-	}
-
-	clk_value = clk_get_rate(clk) / ADC_CLK;
-	clk_put(clk);
-
-	if (clk_value < 7) {
-		dev_err(&pdev->dev, "clock input less than min clock requirement\n");
-		goto err_disable_clk;
-	}
-	/* CLKDIV needs to be configured to the value minus 1 */
-	titsc_writel(ts_dev, REG_CLKDIV, clk_value - 1);
-
-	 /* Enable wake-up of the SoC using touchscreen */
-	titsc_writel(ts_dev, REG_IRQWAKEUP, IRQWKUP_ENB);
-
-	ctrl = CNTRLREG_STEPCONFIGWRT |
-			CNTRLREG_TSCENB |
-			CNTRLREG_STEPID;
-	switch (ts_dev->wires) {
-	case 4:
-		ctrl |= CNTRLREG_4WIRE;
-		break;
-	case 5:
-		ctrl |= CNTRLREG_5WIRE;
-		break;
-	case 8:
-		ctrl |= CNTRLREG_8WIRE;
-		break;
+		goto err_free_mem;
 	}
-	titsc_writel(ts_dev, REG_CTRL, ctrl);
 
-	titsc_idle_config(ts_dev);
 	titsc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO0THRES);
 	titsc_step_config(ts_dev);
 	titsc_writel(ts_dev, REG_FIFO0THR, ts_dev->steps_to_configure);
 
-	ctrl |= CNTRLREG_TSCSSENB;
-	titsc_writel(ts_dev, REG_CTRL, ctrl);
-
-	input_dev->name = "ti-tsc-adc";
+	input_dev->name = "ti-tsc";
 	input_dev->dev.parent = &pdev->dev;
 
 	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
@@ -465,20 +314,13 @@ static int __devinit titsc_probe(struct platform_device *pdev)
 	/* register to the input system */
 	err = input_register_device(input_dev);
 	if (err)
-		goto err_disable_clk;
+		goto err_free_irq;
 
 	platform_set_drvdata(pdev, ts_dev);
 	return 0;
 
-err_disable_clk:
-	clk_disable(ts_dev->tsc_ick);
-	clk_put(ts_dev->tsc_ick);
 err_free_irq:
 	free_irq(ts_dev->irq, ts_dev);
-err_unmap_regs:
-	iounmap(ts_dev->tsc_base);
-err_release_mem_region:
-	release_mem_region(res->start, resource_size(res));
 err_free_mem:
 	input_free_device(input_dev);
 	kfree(ts_dev);
@@ -487,32 +329,66 @@ err_free_mem:
 
 static int __devexit titsc_remove(struct platform_device *pdev)
 {
-	struct titsc *ts_dev = platform_get_drvdata(pdev);
-	struct resource *res;
+	struct ti_tscadc_dev *tscadc_dev = pdev->dev.platform_data;
+	struct titsc *ts_dev = tscadc_dev->tsc;
 
 	free_irq(ts_dev->irq, ts_dev);
 
 	input_unregister_device(ts_dev->input);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	iounmap(ts_dev->tsc_base);
-	release_mem_region(res->start, resource_size(res));
+	platform_set_drvdata(pdev, NULL);
+	kfree(ts_dev);
+	return 0;
+}
 
-	clk_disable(ts_dev->tsc_ick);
-	clk_put(ts_dev->tsc_ick);
+#ifdef CONFIG_PM
+static int titsc_suspend(struct device *dev)
+{
+	struct ti_tscadc_dev *tscadc_dev = dev->platform_data;
+	struct titsc *ts_dev = tscadc_dev->tsc;
+	unsigned int idle;
+
+	if (device_may_wakeup(tscadc_dev->dev)) {
+		idle = titsc_readl(ts_dev, REG_IRQENABLE);
+		titsc_writel(ts_dev, REG_IRQENABLE,
+				(idle | IRQENB_HW_PEN));
+		titsc_writel(ts_dev, REG_IRQWAKEUP, IRQWKUP_ENB);
+	}
+	return 0;
+}
 
-	kfree(ts_dev);
+static int titsc_resume(struct device *dev)
+{
+	struct ti_tscadc_dev *tscadc_dev = dev->platform_data;
+	struct titsc *ts_dev = tscadc_dev->tsc;
 
-	platform_set_drvdata(pdev, NULL);
+	if (device_may_wakeup(tscadc_dev->dev)) {
+		titsc_writel(ts_dev, REG_IRQWAKEUP,
+				0x00);
+		titsc_writel(ts_dev, REG_IRQCLR, IRQENB_HW_PEN);
+	}
+	titsc_step_config(ts_dev);
+	titsc_writel(ts_dev, REG_FIFO0THR,
+			ts_dev->steps_to_configure);
 	return 0;
 }
 
+static const struct dev_pm_ops titsc_pm_ops = {
+	.suspend = titsc_suspend,
+	.resume  = titsc_resume,
+};
+#define TITSC_PM_OPS (&titsc_pm_ops)
+#else
+#define TITSC_PM_OPS NULL
+#endif
+
 static struct platform_driver ti_tsc_driver = {
 	.probe	= titsc_probe,
 	.remove	= __devexit_p(titsc_remove),
 	.driver	= {
 		.name   = "tsc",
 		.owner	= THIS_MODULE,
+		.pm	= TITSC_PM_OPS,
 	},
 };
 module_platform_driver(ti_tsc_driver);
diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index 14df67bc390f..d812be4b61df 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -24,6 +24,7 @@
 #include <linux/pm_runtime.h>
 
 #include <linux/mfd/ti_am335x_tscadc.h>
+#include <linux/input/ti_am335x_tsc.h>
 
 static unsigned int tscadc_readl(struct ti_tscadc_dev *tsadc, unsigned int reg)
 {
@@ -62,15 +63,19 @@ static	int __devinit ti_tscadc_probe(struct platform_device *pdev)
 	struct resource		*res;
 	struct clk		*clk;
 	struct mfd_tscadc_board	*pdata = pdev->dev.platform_data;
+	struct mfd_cell		*cell;
 	int			irq;
 	int			err, ctrl;
 	int			clk_value, clock_rate;
+	int			tsc_wires;
 
 	if (!pdata) {
 		dev_err(&pdev->dev, "Could not find platform data\n");
 		return -EINVAL;
 	}
 
+	tsc_wires = pdata->tsc_init->wires;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
 		dev_err(&pdev->dev, "no memory resource defined.\n");
@@ -161,6 +166,12 @@ static	int __devinit ti_tscadc_probe(struct platform_device *pdev)
 	ctrl |= CNTRLREG_TSCSSENB;
 	tscadc_writel(tscadc, REG_CTRL, ctrl);
 
+	/* TSC Cell */
+	cell = &tscadc->cells[TSC_CELL];
+	cell->name = "tsc";
+	cell->platform_data = tscadc;
+	cell->pdata_size = sizeof(*tscadc);
+
 	err = mfd_add_devices(&pdev->dev, pdev->id, tscadc->cells,
 			TSCADC_CELLS, NULL, 0, NULL);
 	if (err < 0)
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index b7232b157061..fc18b2ef753f 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -47,6 +47,7 @@
 #define STEPENB_MASK		(0x1FFFF << 0)
 #define STEPENB(val)		((val) << 0)
 #define STPENB_STEPENB		STEPENB(0x1FFFF)
+#define STPENB_STEPENB_TC	STEPENB(0x1FFF)
 
 /* IRQ enable */
 #define IRQENB_HW_PEN		BIT(0)
@@ -120,7 +121,11 @@
 #define ADC_CLK			3000000
 #define	MAX_CLK_DIV		7
 
-#define TSCADC_CELLS		0
+#define TSCADC_CELLS		1
+
+enum tscadc_cells {
+	TSC_CELL,
+};
 
 struct mfd_tscadc_board {
 	struct tsc_data *tsc_init;
@@ -132,6 +137,9 @@ struct ti_tscadc_dev {
 	void __iomem *tscadc_base;
 	int irq;
 	struct mfd_cell cells[TSCADC_CELLS];
+
+	/* tsc device */
+	struct titsc *tsc;
 };
 
 #endif
-- 
cgit v1.2.3-55-g7522


From 5e53a69b44e893227b046a7bc74db3cb40d7f39b Mon Sep 17 00:00:00 2001
From: Patil, Rachna
Date: Tue, 16 Oct 2012 12:55:45 +0530
Subject: IIO : ADC: tiadc: Add support of TI's ADC driver

This patch adds support for TI's ADC driver.
This is a multifunctional device.
Analog input lines are provided on which
voltage measurements can be carried out.
You can have upto 8 input lines.

Signed-off-by: Patil, Rachna <rachna@ti.com>
Acked-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/iio/adc/Kconfig                     |   7 +
 drivers/iio/adc/Makefile                    |   1 +
 drivers/iio/adc/ti_am335x_adc.c             | 260 ++++++++++++++++++++++++++++
 drivers/mfd/ti_am335x_tscadc.c              |  18 +-
 include/linux/mfd/ti_am335x_tscadc.h        |   9 +-
 include/linux/platform_data/ti_am335x_adc.h |  14 ++
 6 files changed, 307 insertions(+), 2 deletions(-)
 create mode 100644 drivers/iio/adc/ti_am335x_adc.c
 create mode 100644 include/linux/platform_data/ti_am335x_adc.h

(limited to 'include/linux')

diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 492758120338..1401ed1af39f 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -60,4 +60,11 @@ config LP8788_ADC
 	help
 	  Say yes here to build support for TI LP8788 ADC.
 
+config TI_AM335X_ADC
+	tristate "TI's ADC driver"
+	depends on MFD_TI_AM335X_TSCADC
+	help
+	  Say yes here to build support for Texas Instruments ADC
+	  driver which is also a MFD client.
+
 endmenu
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index 900995d5e179..4410a90fc84b 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_AD7476) += ad7476.o
 obj-$(CONFIG_AD7791) += ad7791.o
 obj-$(CONFIG_AT91_ADC) += at91_adc.o
 obj-$(CONFIG_LP8788_ADC) += lp8788_adc.o
+obj-$(CONFIG_TI_AM335X_ADC) += ti_am335x_adc.o
diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
new file mode 100644
index 000000000000..02a43c87a8a3
--- /dev/null
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -0,0 +1,260 @@
+/*
+ * TI ADC MFD driver
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/iio/iio.h>
+
+#include <linux/mfd/ti_am335x_tscadc.h>
+#include <linux/platform_data/ti_am335x_adc.h>
+
+struct tiadc_device {
+	struct ti_tscadc_dev *mfd_tscadc;
+	int channels;
+};
+
+static unsigned int tiadc_readl(struct tiadc_device *adc, unsigned int reg)
+{
+	return readl(adc->mfd_tscadc->tscadc_base + reg);
+}
+
+static void tiadc_writel(struct tiadc_device *adc, unsigned int reg,
+					unsigned int val)
+{
+	writel(val, adc->mfd_tscadc->tscadc_base + reg);
+}
+
+static void tiadc_step_config(struct tiadc_device *adc_dev)
+{
+	unsigned int stepconfig;
+	int i, channels = 0, steps;
+
+	/*
+	 * There are 16 configurable steps and 8 analog input
+	 * lines available which are shared between Touchscreen and ADC.
+	 *
+	 * Steps backwards i.e. from 16 towards 0 are used by ADC
+	 * depending on number of input lines needed.
+	 * Channel would represent which analog input
+	 * needs to be given to ADC to digitalize data.
+	 */
+
+	steps = TOTAL_STEPS - adc_dev->channels;
+	channels = TOTAL_CHANNELS - adc_dev->channels;
+
+	stepconfig = STEPCONFIG_AVG_16 | STEPCONFIG_FIFO1;
+
+	for (i = (steps + 1); i <= TOTAL_STEPS; i++) {
+		tiadc_writel(adc_dev, REG_STEPCONFIG(i),
+				stepconfig | STEPCONFIG_INP(channels));
+		tiadc_writel(adc_dev, REG_STEPDELAY(i),
+				STEPCONFIG_OPENDLY);
+		channels++;
+	}
+	tiadc_writel(adc_dev, REG_SE, STPENB_STEPENB);
+}
+
+static int tiadc_channel_init(struct iio_dev *indio_dev, int channels)
+{
+	struct iio_chan_spec *chan_array;
+	int i;
+
+	indio_dev->num_channels = channels;
+	chan_array = kcalloc(indio_dev->num_channels,
+			sizeof(struct iio_chan_spec), GFP_KERNEL);
+
+	if (chan_array == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < (indio_dev->num_channels); i++) {
+		struct iio_chan_spec *chan = chan_array + i;
+		chan->type = IIO_VOLTAGE;
+		chan->indexed = 1;
+		chan->channel = i;
+		chan->info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT;
+	}
+
+	indio_dev->channels = chan_array;
+
+	return indio_dev->num_channels;
+}
+
+static void tiadc_channels_remove(struct iio_dev *indio_dev)
+{
+	kfree(indio_dev->channels);
+}
+
+static int tiadc_read_raw(struct iio_dev *indio_dev,
+		struct iio_chan_spec const *chan,
+		int *val, int *val2, long mask)
+{
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	int i;
+	unsigned int fifo1count, readx1;
+
+	/*
+	 * When the sub-system is first enabled,
+	 * the sequencer will always start with the
+	 * lowest step (1) and continue until step (16).
+	 * For ex: If we have enabled 4 ADC channels and
+	 * currently use only 1 out of them, the
+	 * sequencer still configures all the 4 steps,
+	 * leading to 3 unwanted data.
+	 * Hence we need to flush out this data.
+	 */
+
+	fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT);
+	for (i = 0; i < fifo1count; i++) {
+		readx1 = tiadc_readl(adc_dev, REG_FIFO1);
+		if (i == chan->channel)
+			*val = readx1 & 0xfff;
+	}
+	tiadc_writel(adc_dev, REG_SE, STPENB_STEPENB);
+
+	return IIO_VAL_INT;
+}
+
+static const struct iio_info tiadc_info = {
+	.read_raw = &tiadc_read_raw,
+};
+
+static int __devinit tiadc_probe(struct platform_device *pdev)
+{
+	struct iio_dev		*indio_dev;
+	struct tiadc_device	*adc_dev;
+	struct ti_tscadc_dev	*tscadc_dev = pdev->dev.platform_data;
+	struct mfd_tscadc_board	*pdata;
+	int			err;
+
+	pdata = tscadc_dev->dev->platform_data;
+	if (!pdata || !pdata->adc_init) {
+		dev_err(&pdev->dev, "Could not find platform data\n");
+		return -EINVAL;
+	}
+
+	indio_dev = iio_device_alloc(sizeof(struct tiadc_device));
+	if (indio_dev == NULL) {
+		dev_err(&pdev->dev, "failed to allocate iio device\n");
+		err = -ENOMEM;
+		goto err_ret;
+	}
+	adc_dev = iio_priv(indio_dev);
+
+	adc_dev->mfd_tscadc = tscadc_dev;
+	adc_dev->channels = pdata->adc_init->adc_channels;
+
+	indio_dev->dev.parent = &pdev->dev;
+	indio_dev->name = dev_name(&pdev->dev);
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->info = &tiadc_info;
+
+	tiadc_step_config(adc_dev);
+
+	err = tiadc_channel_init(indio_dev, adc_dev->channels);
+	if (err < 0)
+		goto err_free_device;
+
+	err = iio_device_register(indio_dev);
+	if (err)
+		goto err_free_channels;
+
+	platform_set_drvdata(pdev, indio_dev);
+
+	return 0;
+
+err_free_channels:
+	tiadc_channels_remove(indio_dev);
+err_free_device:
+	iio_device_free(indio_dev);
+err_ret:
+	return err;
+}
+
+static int __devexit tiadc_remove(struct platform_device *pdev)
+{
+	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
+
+	iio_device_unregister(indio_dev);
+	tiadc_channels_remove(indio_dev);
+
+	iio_device_free(indio_dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int tiadc_suspend(struct device *dev)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	struct ti_tscadc_dev *tscadc_dev = dev->platform_data;
+	unsigned int idle;
+
+	if (!device_may_wakeup(tscadc_dev->dev)) {
+		idle = tiadc_readl(adc_dev, REG_CTRL);
+		idle &= ~(CNTRLREG_TSCSSENB);
+		tiadc_writel(adc_dev, REG_CTRL, (idle |
+				CNTRLREG_POWERDOWN));
+	}
+
+	return 0;
+}
+
+static int tiadc_resume(struct device *dev)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	unsigned int restore;
+
+	/* Make sure ADC is powered up */
+	restore = tiadc_readl(adc_dev, REG_CTRL);
+	restore &= ~(CNTRLREG_POWERDOWN);
+	tiadc_writel(adc_dev, REG_CTRL, restore);
+
+	tiadc_step_config(adc_dev);
+
+	return 0;
+}
+
+static const struct dev_pm_ops tiadc_pm_ops = {
+	.suspend = tiadc_suspend,
+	.resume = tiadc_resume,
+};
+#define TIADC_PM_OPS (&tiadc_pm_ops)
+#else
+#define TIADC_PM_OPS NULL
+#endif
+
+static struct platform_driver tiadc_driver = {
+	.driver = {
+		.name   = "tiadc",
+		.owner	= THIS_MODULE,
+		.pm	= TIADC_PM_OPS,
+	},
+	.probe	= tiadc_probe,
+	.remove	= __devexit_p(tiadc_remove),
+};
+
+module_platform_driver(tiadc_driver);
+
+MODULE_DESCRIPTION("TI ADC controller driver");
+MODULE_AUTHOR("Rachna Patil <rachna@ti.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index d812be4b61df..e947dd8bbcc3 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -25,6 +25,7 @@
 
 #include <linux/mfd/ti_am335x_tscadc.h>
 #include <linux/input/ti_am335x_tsc.h>
+#include <linux/platform_data/ti_am335x_adc.h>
 
 static unsigned int tscadc_readl(struct ti_tscadc_dev *tsadc, unsigned int reg)
 {
@@ -67,14 +68,23 @@ static	int __devinit ti_tscadc_probe(struct platform_device *pdev)
 	int			irq;
 	int			err, ctrl;
 	int			clk_value, clock_rate;
-	int			tsc_wires;
+	int			tsc_wires, adc_channels = 0, total_channels;
 
 	if (!pdata) {
 		dev_err(&pdev->dev, "Could not find platform data\n");
 		return -EINVAL;
 	}
 
+	if (pdata->adc_init)
+		adc_channels = pdata->adc_init->adc_channels;
+
 	tsc_wires = pdata->tsc_init->wires;
+	total_channels = tsc_wires + adc_channels;
+
+	if (total_channels > 8) {
+		dev_err(&pdev->dev, "Number of i/p channels more than 8\n");
+		return -EINVAL;
+	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
@@ -172,6 +182,12 @@ static	int __devinit ti_tscadc_probe(struct platform_device *pdev)
 	cell->platform_data = tscadc;
 	cell->pdata_size = sizeof(*tscadc);
 
+	/* ADC Cell */
+	cell = &tscadc->cells[ADC_CELL];
+	cell->name = "tiadc";
+	cell->platform_data = tscadc;
+	cell->pdata_size = sizeof(*tscadc);
+
 	err = mfd_add_devices(&pdev->dev, pdev->id, tscadc->cells,
 			TSCADC_CELLS, NULL, 0, NULL);
 	if (err < 0)
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index fc18b2ef753f..c79ad5d2f271 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -120,15 +120,19 @@
 
 #define ADC_CLK			3000000
 #define	MAX_CLK_DIV		7
+#define TOTAL_STEPS		16
+#define TOTAL_CHANNELS		8
 
-#define TSCADC_CELLS		1
+#define TSCADC_CELLS		2
 
 enum tscadc_cells {
 	TSC_CELL,
+	ADC_CELL,
 };
 
 struct mfd_tscadc_board {
 	struct tsc_data *tsc_init;
+	struct adc_data *adc_init;
 };
 
 struct ti_tscadc_dev {
@@ -140,6 +144,9 @@ struct ti_tscadc_dev {
 
 	/* tsc device */
 	struct titsc *tsc;
+
+	/* adc device */
+	struct adc_device *adc;
 };
 
 #endif
diff --git a/include/linux/platform_data/ti_am335x_adc.h b/include/linux/platform_data/ti_am335x_adc.h
new file mode 100644
index 000000000000..e41d5834cb84
--- /dev/null
+++ b/include/linux/platform_data/ti_am335x_adc.h
@@ -0,0 +1,14 @@
+#ifndef __LINUX_TI_AM335X_ADC_H
+#define __LINUX_TI_AM335X_ADC_H
+
+/**
+ * struct adc_data	ADC Input information
+ * @adc_channels:	Number of analog inputs
+ *			available for ADC.
+ */
+
+struct adc_data {
+	unsigned int adc_channels;
+};
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 636036d29a20154b897571e1d5949faac98bfb7c Mon Sep 17 00:00:00 2001
From: Bo Shen
Date: Tue, 6 Nov 2012 13:57:51 +0800
Subject: ARM: at91: atmel-ssc: add platform device id table

Add platform device id to check whether the SSC controller support
pdc or dam for data transfer

If match "at91rm9200_ssc", which support pdc for data transfer
If match "at91sam9g45_ssc", which support dma for data transfer

Signed-off-by: Bo Shen <voice.shen@atmel.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/mach-at91/at91rm9200.c          |  6 +++---
 arch/arm/mach-at91/at91rm9200_devices.c  |  6 +++---
 arch/arm/mach-at91/at91sam9260.c         |  2 +-
 arch/arm/mach-at91/at91sam9260_devices.c |  2 +-
 arch/arm/mach-at91/at91sam9261.c         |  6 +++---
 arch/arm/mach-at91/at91sam9261_devices.c |  6 +++---
 arch/arm/mach-at91/at91sam9263.c         |  4 ++--
 arch/arm/mach-at91/at91sam9263_devices.c |  4 ++--
 arch/arm/mach-at91/at91sam9g45.c         |  4 ++--
 arch/arm/mach-at91/at91sam9g45_devices.c |  4 ++--
 arch/arm/mach-at91/at91sam9rl.c          |  4 ++--
 arch/arm/mach-at91/at91sam9rl_devices.c  |  4 ++--
 drivers/misc/atmel-ssc.c                 | 23 +++++++++++++++++++++++
 include/linux/atmel-ssc.h                |  5 +++++
 14 files changed, 54 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-at91/at91rm9200.c b/arch/arm/mach-at91/at91rm9200.c
index 5269825194a8..400d1a34683c 100644
--- a/arch/arm/mach-at91/at91rm9200.c
+++ b/arch/arm/mach-at91/at91rm9200.c
@@ -184,9 +184,9 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.1", &tc3_clk),
 	CLKDEV_CON_DEV_ID("t1_clk", "atmel_tcb.1", &tc4_clk),
 	CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.1", &tc5_clk),
-	CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk),
-	CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk),
-	CLKDEV_CON_DEV_ID("pclk", "ssc.2", &ssc2_clk),
+	CLKDEV_CON_DEV_ID("pclk", "at91rm9200_ssc.0", &ssc0_clk),
+	CLKDEV_CON_DEV_ID("pclk", "at91rm9200_ssc.1", &ssc1_clk),
+	CLKDEV_CON_DEV_ID("pclk", "at91rm9200_ssc.2", &ssc2_clk),
 	CLKDEV_CON_DEV_ID(NULL, "i2c-at91rm9200.0", &twi_clk),
 	/* fake hclk clock */
 	CLKDEV_CON_DEV_ID("hclk", "at91_ohci", &ohci_clk),
diff --git a/arch/arm/mach-at91/at91rm9200_devices.c b/arch/arm/mach-at91/at91rm9200_devices.c
index 1e122bcd7845..f40925a552a1 100644
--- a/arch/arm/mach-at91/at91rm9200_devices.c
+++ b/arch/arm/mach-at91/at91rm9200_devices.c
@@ -752,7 +752,7 @@ static struct resource ssc0_resources[] = {
 };
 
 static struct platform_device at91rm9200_ssc0_device = {
-	.name	= "ssc",
+	.name	= "at91rm9200_ssc",
 	.id	= 0,
 	.dev	= {
 		.dma_mask		= &ssc0_dmamask,
@@ -794,7 +794,7 @@ static struct resource ssc1_resources[] = {
 };
 
 static struct platform_device at91rm9200_ssc1_device = {
-	.name	= "ssc",
+	.name	= "at91rm9200_ssc",
 	.id	= 1,
 	.dev	= {
 		.dma_mask		= &ssc1_dmamask,
@@ -836,7 +836,7 @@ static struct resource ssc2_resources[] = {
 };
 
 static struct platform_device at91rm9200_ssc2_device = {
-	.name	= "ssc",
+	.name	= "at91rm9200_ssc",
 	.id	= 2,
 	.dev	= {
 		.dma_mask		= &ssc2_dmamask,
diff --git a/arch/arm/mach-at91/at91sam9260.c b/arch/arm/mach-at91/at91sam9260.c
index f8202615f4a8..d14ab6aea250 100644
--- a/arch/arm/mach-at91/at91sam9260.c
+++ b/arch/arm/mach-at91/at91sam9260.c
@@ -210,7 +210,7 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.1", &tc3_clk),
 	CLKDEV_CON_DEV_ID("t1_clk", "atmel_tcb.1", &tc4_clk),
 	CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.1", &tc5_clk),
-	CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc_clk),
+	CLKDEV_CON_DEV_ID("pclk", "at91rm9200_ssc.0", &ssc_clk),
 	CLKDEV_CON_DEV_ID(NULL, "i2c-at91sam9260.0", &twi_clk),
 	CLKDEV_CON_DEV_ID(NULL, "i2c-at91sam9g20.0", &twi_clk),
 	/* more usart lookup table for DT entries */
diff --git a/arch/arm/mach-at91/at91sam9260_devices.c b/arch/arm/mach-at91/at91sam9260_devices.c
index 46edfaf275f7..df7bebf07f10 100644
--- a/arch/arm/mach-at91/at91sam9260_devices.c
+++ b/arch/arm/mach-at91/at91sam9260_devices.c
@@ -742,7 +742,7 @@ static struct resource ssc_resources[] = {
 };
 
 static struct platform_device at91sam9260_ssc_device = {
-	.name	= "ssc",
+	.name	= "at91rm9200_ssc",
 	.id	= 0,
 	.dev	= {
 		.dma_mask		= &ssc_dmamask,
diff --git a/arch/arm/mach-at91/at91sam9261.c b/arch/arm/mach-at91/at91sam9261.c
index 04295c04b3e0..c7b605796dd4 100644
--- a/arch/arm/mach-at91/at91sam9261.c
+++ b/arch/arm/mach-at91/at91sam9261.c
@@ -174,9 +174,9 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.0", &tc0_clk),
 	CLKDEV_CON_DEV_ID("t1_clk", "atmel_tcb.0", &tc1_clk),
 	CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.0", &tc2_clk),
-	CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk),
-	CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk),
-	CLKDEV_CON_DEV_ID("pclk", "ssc.2", &ssc2_clk),
+	CLKDEV_CON_DEV_ID("pclk", "at91rm9200_ssc.0", &ssc0_clk),
+	CLKDEV_CON_DEV_ID("pclk", "at91rm9200_ssc.1", &ssc1_clk),
+	CLKDEV_CON_DEV_ID("pclk", "at91rm9200_ssc.2", &ssc2_clk),
 	CLKDEV_CON_DEV_ID("hclk", "at91_ohci", &hck0),
 	CLKDEV_CON_DEV_ID(NULL, "i2c-at91sam9261.0", &twi_clk),
 	CLKDEV_CON_DEV_ID(NULL, "i2c-at91sam9g10.0", &twi_clk),
diff --git a/arch/arm/mach-at91/at91sam9261_devices.c b/arch/arm/mach-at91/at91sam9261_devices.c
index b9487696b7be..d8afd2130b79 100644
--- a/arch/arm/mach-at91/at91sam9261_devices.c
+++ b/arch/arm/mach-at91/at91sam9261_devices.c
@@ -706,7 +706,7 @@ static struct resource ssc0_resources[] = {
 };
 
 static struct platform_device at91sam9261_ssc0_device = {
-	.name	= "ssc",
+	.name	= "at91rm9200_ssc",
 	.id	= 0,
 	.dev	= {
 		.dma_mask		= &ssc0_dmamask,
@@ -748,7 +748,7 @@ static struct resource ssc1_resources[] = {
 };
 
 static struct platform_device at91sam9261_ssc1_device = {
-	.name	= "ssc",
+	.name	= "at91rm9200_ssc",
 	.id	= 1,
 	.dev	= {
 		.dma_mask		= &ssc1_dmamask,
@@ -790,7 +790,7 @@ static struct resource ssc2_resources[] = {
 };
 
 static struct platform_device at91sam9261_ssc2_device = {
-	.name	= "ssc",
+	.name	= "at91rm9200_ssc",
 	.id	= 2,
 	.dev	= {
 		.dma_mask		= &ssc2_dmamask,
diff --git a/arch/arm/mach-at91/at91sam9263.c b/arch/arm/mach-at91/at91sam9263.c
index d6f9c23927c4..b87be18f08ae 100644
--- a/arch/arm/mach-at91/at91sam9263.c
+++ b/arch/arm/mach-at91/at91sam9263.c
@@ -186,8 +186,8 @@ static struct clk *periph_clocks[] __initdata = {
 static struct clk_lookup periph_clocks_lookups[] = {
 	/* One additional fake clock for macb_hclk */
 	CLKDEV_CON_ID("hclk", &macb_clk),
-	CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk),
-	CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk),
+	CLKDEV_CON_DEV_ID("pclk", "at91rm9200_ssc.0", &ssc0_clk),
+	CLKDEV_CON_DEV_ID("pclk", "at91rm9200_ssc.1", &ssc1_clk),
 	CLKDEV_CON_DEV_ID("mci_clk", "atmel_mci.0", &mmc0_clk),
 	CLKDEV_CON_DEV_ID("mci_clk", "atmel_mci.1", &mmc1_clk),
 	CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.0", &spi0_clk),
diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c
index cb85da2eccea..025c9704bfe6 100644
--- a/arch/arm/mach-at91/at91sam9263_devices.c
+++ b/arch/arm/mach-at91/at91sam9263_devices.c
@@ -1199,7 +1199,7 @@ static struct resource ssc0_resources[] = {
 };
 
 static struct platform_device at91sam9263_ssc0_device = {
-	.name	= "ssc",
+	.name	= "at91rm9200_ssc",
 	.id	= 0,
 	.dev	= {
 		.dma_mask		= &ssc0_dmamask,
@@ -1241,7 +1241,7 @@ static struct resource ssc1_resources[] = {
 };
 
 static struct platform_device at91sam9263_ssc1_device = {
-	.name	= "ssc",
+	.name	= "at91rm9200_ssc",
 	.id	= 1,
 	.dev	= {
 		.dma_mask		= &ssc1_dmamask,
diff --git a/arch/arm/mach-at91/at91sam9g45.c b/arch/arm/mach-at91/at91sam9g45.c
index 84af1b506d92..f4f96a61e4b8 100644
--- a/arch/arm/mach-at91/at91sam9g45.c
+++ b/arch/arm/mach-at91/at91sam9g45.c
@@ -239,8 +239,8 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.1", &tcb0_clk),
 	CLKDEV_CON_DEV_ID(NULL, "i2c-at91sam9g10.0", &twi0_clk),
 	CLKDEV_CON_DEV_ID(NULL, "i2c-at91sam9g10.1", &twi1_clk),
-	CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk),
-	CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk),
+	CLKDEV_CON_DEV_ID("pclk", "at91sam9g45_ssc.0", &ssc0_clk),
+	CLKDEV_CON_DEV_ID("pclk", "at91sam9g45_ssc.1", &ssc1_clk),
 	CLKDEV_CON_DEV_ID(NULL, "atmel-trng", &trng_clk),
 	CLKDEV_CON_DEV_ID(NULL, "atmel_sha", &aestdessha_clk),
 	CLKDEV_CON_DEV_ID(NULL, "atmel_tdes", &aestdessha_clk),
diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c
index b1596072dcc2..27e3bf64a8bc 100644
--- a/arch/arm/mach-at91/at91sam9g45_devices.c
+++ b/arch/arm/mach-at91/at91sam9g45_devices.c
@@ -1459,7 +1459,7 @@ static struct resource ssc0_resources[] = {
 };
 
 static struct platform_device at91sam9g45_ssc0_device = {
-	.name	= "ssc",
+	.name	= "at91sam9g45_ssc",
 	.id	= 0,
 	.dev	= {
 		.dma_mask		= &ssc0_dmamask,
@@ -1501,7 +1501,7 @@ static struct resource ssc1_resources[] = {
 };
 
 static struct platform_device at91sam9g45_ssc1_device = {
-	.name	= "ssc",
+	.name	= "at91sam9g45_ssc",
 	.id	= 1,
 	.dev	= {
 		.dma_mask		= &ssc1_dmamask,
diff --git a/arch/arm/mach-at91/at91sam9rl.c b/arch/arm/mach-at91/at91sam9rl.c
index 72e908412222..4110b5480d49 100644
--- a/arch/arm/mach-at91/at91sam9rl.c
+++ b/arch/arm/mach-at91/at91sam9rl.c
@@ -184,8 +184,8 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.0", &tc0_clk),
 	CLKDEV_CON_DEV_ID("t1_clk", "atmel_tcb.0", &tc1_clk),
 	CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.0", &tc2_clk),
-	CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk),
-	CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk),
+	CLKDEV_CON_DEV_ID("pclk", "at91rm9200_ssc.0", &ssc0_clk),
+	CLKDEV_CON_DEV_ID("pclk", "at91rm9200_ssc.1", &ssc1_clk),
 	CLKDEV_CON_DEV_ID(NULL, "i2c-at91sam9g20.0", &twi0_clk),
 	CLKDEV_CON_DEV_ID(NULL, "i2c-at91sam9g20.1", &twi1_clk),
 	CLKDEV_CON_ID("pioA", &pioA_clk),
diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c
index 5047bdc92adf..b656110e8afe 100644
--- a/arch/arm/mach-at91/at91sam9rl_devices.c
+++ b/arch/arm/mach-at91/at91sam9rl_devices.c
@@ -832,7 +832,7 @@ static struct resource ssc0_resources[] = {
 };
 
 static struct platform_device at91sam9rl_ssc0_device = {
-	.name	= "ssc",
+	.name	= "at91rm9200_ssc",
 	.id	= 0,
 	.dev	= {
 		.dma_mask		= &ssc0_dmamask,
@@ -874,7 +874,7 @@ static struct resource ssc1_resources[] = {
 };
 
 static struct platform_device at91sam9rl_ssc1_device = {
-	.name	= "ssc",
+	.name	= "at91rm9200_ssc",
 	.id	= 1,
 	.dev	= {
 		.dma_mask		= &ssc1_dmamask,
diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c
index ac00f83ea86b..f40abd8a6695 100644
--- a/drivers/misc/atmel-ssc.c
+++ b/drivers/misc/atmel-ssc.c
@@ -68,6 +68,26 @@ void ssc_free(struct ssc_device *ssc)
 }
 EXPORT_SYMBOL(ssc_free);
 
+static struct atmel_ssc_platform_data at91rm9200_config = {
+	.use_dma = 0,
+};
+
+static struct atmel_ssc_platform_data at91sam9g45_config = {
+	.use_dma = 1,
+};
+
+static const struct platform_device_id atmel_ssc_devtypes[] = {
+	{
+		.name = "at91rm9200_ssc",
+		.driver_data = (unsigned long) &at91rm9200_config,
+	}, {
+		.name = "at91sam9g45_ssc",
+		.driver_data = (unsigned long) &at91sam9g45_config,
+	}, {
+		/* sentinel */
+	}
+};
+
 static int ssc_probe(struct platform_device *pdev)
 {
 	struct resource *regs;
@@ -80,6 +100,8 @@ static int ssc_probe(struct platform_device *pdev)
 	}
 
 	ssc->pdev = pdev;
+	ssc->pdata = (struct atmel_ssc_platform_data *)
+			platform_get_device_id(pdev)->driver_data;
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!regs) {
@@ -139,6 +161,7 @@ static struct platform_driver ssc_driver = {
 		.name		= "ssc",
 		.owner		= THIS_MODULE,
 	},
+	.id_table	= atmel_ssc_devtypes,
 	.probe		= ssc_probe,
 	.remove		= __devexit_p(ssc_remove),
 };
diff --git a/include/linux/atmel-ssc.h b/include/linux/atmel-ssc.h
index 4eb31752e2b7..1ca0e3292bc9 100644
--- a/include/linux/atmel-ssc.h
+++ b/include/linux/atmel-ssc.h
@@ -5,10 +5,15 @@
 #include <linux/list.h>
 #include <linux/io.h>
 
+struct atmel_ssc_platform_data {
+	int			use_dma;
+};
+
 struct ssc_device {
 	struct list_head	list;
 	void __iomem		*regs;
 	struct platform_device	*pdev;
+	struct atmel_ssc_platform_data *pdata;
 	struct clk		*clk;
 	int			user;
 	int			irq;
-- 
cgit v1.2.3-55-g7522


From bcd2360c1ff9fff69eb45bedc5fba7240c6da875 Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD
Date: Tue, 30 Oct 2012 05:12:23 +0800
Subject: arm: at91: move platfarm_data to include/linux/platform_data/atmel.h

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/mach-at91/include/mach/board.h     | 55 -----------------------
 arch/avr32/mach-at32ap/include/mach/board.h |  7 ---
 drivers/ata/pata_at91.c                     |  2 +-
 drivers/input/touchscreen/atmel_tsadcc.c    |  2 +-
 drivers/mmc/host/atmel-mci.c                |  2 +-
 drivers/net/can/at91_can.c                  |  3 +-
 drivers/net/ethernet/cadence/at91_ether.c   |  2 +-
 drivers/pcmcia/at91_cf.c                    |  2 +-
 drivers/rtc/rtc-at91sam9.c                  |  2 +-
 drivers/spi/spi-atmel.c                     |  2 +-
 drivers/tty/serial/atmel_serial.c           |  2 +-
 drivers/usb/gadget/at91_udc.c               |  2 +-
 drivers/usb/gadget/atmel_usba_udc.c         |  2 +-
 drivers/usb/host/ohci-at91.c                |  2 +-
 drivers/video/atmel_lcdfb.c                 |  2 +-
 include/linux/platform_data/atmel.h         | 67 +++++++++++++++++++++++++++++
 16 files changed, 80 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-at91/include/mach/board.h b/arch/arm/mach-at91/include/mach/board.h
index c55a4364ffb4..662451d85fc9 100644
--- a/arch/arm/mach-at91/include/mach/board.h
+++ b/arch/arm/mach-at91/include/mach/board.h
@@ -31,42 +31,15 @@
 #ifndef __ASM_ARCH_BOARD_H
 #define __ASM_ARCH_BOARD_H
 
-#include <linux/mtd/partitions.h>
-#include <linux/device.h>
-#include <linux/i2c.h>
-#include <linux/leds.h>
-#include <linux/spi/spi.h>
-#include <linux/usb/atmel_usba_udc.h>
-#include <linux/atmel-mci.h>
-#include <sound/atmel-ac97c.h>
-#include <linux/serial.h>
-#include <linux/platform_data/macb.h>
 #include <linux/platform_data/atmel.h>
 
  /* USB Device */
-struct at91_udc_data {
-	int	vbus_pin;		/* high == host powering us */
-	u8	vbus_active_low;	/* vbus polarity */
-	u8	vbus_polled;		/* Use polling, not interrupt */
-	int	pullup_pin;		/* active == D+ pulled up */
-	u8	pullup_active_low;	/* true == pullup_pin is active low */
-};
 extern void __init at91_add_device_udc(struct at91_udc_data *data);
 
  /* USB High Speed Device */
 extern void __init at91_add_device_usba(struct usba_platform_data *data);
 
  /* Compact Flash */
-struct at91_cf_data {
-	int	irq_pin;		/* I/O IRQ */
-	int	det_pin;		/* Card detect */
-	int	vcc_pin;		/* power switching */
-	int	rst_pin;		/* card reset */
-	u8	chipselect;		/* EBI Chip Select number */
-	u8	flags;
-#define AT91_CF_TRUE_IDE	0x01
-#define AT91_IDE_SWAP_A0_A2	0x02
-};
 extern void __init at91_add_device_cf(struct at91_cf_data *data);
 
  /* MMC / SD */
@@ -86,16 +59,6 @@ extern void __init at91_add_device_mci(short mmc_id, struct mci_platform_data *d
 extern void __init at91_add_device_eth(struct macb_platform_data *data);
 
  /* USB Host */
-#define AT91_MAX_USBH_PORTS	3
-struct at91_usbh_data {
-	int		vbus_pin[AT91_MAX_USBH_PORTS];	/* port power-control pin */
-	int             overcurrent_pin[AT91_MAX_USBH_PORTS];
-	u8		ports;				/* number of ports on root hub */
-	u8              overcurrent_supported;
-	u8              vbus_pin_active_low[AT91_MAX_USBH_PORTS];
-	u8              overcurrent_status[AT91_MAX_USBH_PORTS];
-	u8              overcurrent_changed[AT91_MAX_USBH_PORTS];
-};
 extern void __init at91_add_device_usbh(struct at91_usbh_data *data);
 extern void __init at91_add_device_usbh_ohci(struct at91_usbh_data *data);
 extern void __init at91_add_device_usbh_ehci(struct at91_usbh_data *data);
@@ -124,13 +87,6 @@ extern void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pin
 
 extern struct platform_device *atmel_default_console_device;
 
-struct atmel_uart_data {
-	int			num;		/* port num */
-	short			use_dma_tx;	/* use transmit DMA? */
-	short			use_dma_rx;	/* use receive DMA? */
-	void __iomem		*regs;		/* virt. base address, if any */
-	struct serial_rs485	rs485;		/* rs485 settings */
-};
 extern void __init at91_add_device_serial(void);
 
 /*
@@ -173,24 +129,13 @@ extern void __init at91_add_device_isi(struct isi_platform_data *data,
 		bool use_pck_as_mck);
 
  /* Touchscreen Controller */
-struct at91_tsadcc_data {
-	unsigned int    adc_clock;
-	u8		pendet_debounce;
-	u8		ts_sample_hold_time;
-};
 extern void __init at91_add_device_tsadcc(struct at91_tsadcc_data *data);
 
 /* CAN */
-struct at91_can_data {
-	void (*transceiver_switch)(int on);
-};
 extern void __init at91_add_device_can(struct at91_can_data *data);
 
  /* LEDs */
 extern void __init at91_gpio_leds(struct gpio_led *leds, int nr);
 extern void __init at91_pwm_leds(struct gpio_led *leds, int nr);
 
-/* FIXME: this needs a better location, but gets stuff building again */
-extern int at91_suspend_entering_slow_clock(void);
-
 #endif
diff --git a/arch/avr32/mach-at32ap/include/mach/board.h b/arch/avr32/mach-at32ap/include/mach/board.h
index 70742ec997f8..dca93450cb0e 100644
--- a/arch/avr32/mach-at32ap/include/mach/board.h
+++ b/arch/avr32/mach-at32ap/include/mach/board.h
@@ -34,13 +34,6 @@ extern struct platform_device *atmel_default_console_device;
 #define	ATMEL_USART_CTS		0x02
 #define	ATMEL_USART_CLK		0x04
 
-struct atmel_uart_data {
-	int		num;		/* port num */
-	short		use_dma_tx;	/* use transmit DMA? */
-	short		use_dma_rx;	/* use receive DMA? */
-	void __iomem	*regs;		/* virtual base address, if any */
-	struct serial_rs485	rs485;		/* rs485 settings */
-};
 void at32_map_usart(unsigned int hw_id, unsigned int line, int flags);
 struct platform_device *at32_add_device_usart(unsigned int id);
 
diff --git a/drivers/ata/pata_at91.c b/drivers/ata/pata_at91.c
index 53d3770a0b1b..2a96bb2c53ee 100644
--- a/drivers/ata/pata_at91.c
+++ b/drivers/ata/pata_at91.c
@@ -27,9 +27,9 @@
 #include <linux/libata.h>
 #include <linux/platform_device.h>
 #include <linux/ata_platform.h>
+#include <linux/platform_data/atmel.h>
 
 #include <mach/at91sam9_smc.h>
-#include <mach/board.h>
 #include <asm/gpio.h>
 
 #define DRV_NAME		"pata_at91"
diff --git a/drivers/input/touchscreen/atmel_tsadcc.c b/drivers/input/touchscreen/atmel_tsadcc.c
index 201b2d2ec1b3..ea392ee138ed 100644
--- a/drivers/input/touchscreen/atmel_tsadcc.c
+++ b/drivers/input/touchscreen/atmel_tsadcc.c
@@ -22,7 +22,7 @@
 #include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
-#include <mach/board.h>
+#include <linux/platform_data/atmel.h>
 #include <mach/cpu.h>
 
 /* Register definitions based on AT91SAM9RL64 preliminary draft datasheet */
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index ddf096e3803f..868998925826 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/types.h>
+#include <linux/platform_data/atmel.h>
 
 #include <linux/mmc/host.h>
 #include <linux/mmc/sdio.h>
@@ -40,7 +41,6 @@
 #include <asm/unaligned.h>
 
 #include <mach/cpu.h>
-#include <mach/board.h>
 
 #include "atmel-mci-regs.h"
 
diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index fcff73a73b1d..994b6acd65f4 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -33,12 +33,11 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/platform_data/atmel.h>
 
 #include <linux/can/dev.h>
 #include <linux/can/error.h>
 
-#include <mach/board.h>
-
 #define AT91_MB_MASK(i)		((1 << (i)) - 1)
 
 /* Common registers */
diff --git a/drivers/net/ethernet/cadence/at91_ether.c b/drivers/net/ethernet/cadence/at91_ether.c
index 4e980a7886fb..35fc6edbacf8 100644
--- a/drivers/net/ethernet/cadence/at91_ether.c
+++ b/drivers/net/ethernet/cadence/at91_ether.c
@@ -31,6 +31,7 @@
 #include <linux/clk.h>
 #include <linux/gfp.h>
 #include <linux/phy.h>
+#include <linux/platform_data/atmel.h>
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
@@ -38,7 +39,6 @@
 
 #include <mach/at91rm9200_emac.h>
 #include <asm/gpio.h>
-#include <mach/board.h>
 
 #include "at91_ether.h"
 
diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c
index 9694c1e783a5..01463c781847 100644
--- a/drivers/pcmcia/at91_cf.c
+++ b/drivers/pcmcia/at91_cf.c
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/gpio.h>
+#include <linux/platform_data/atmel.h>
 
 #include <pcmcia/ss.h>
 
@@ -24,7 +25,6 @@
 #include <asm/io.h>
 #include <asm/sizes.h>
 
-#include <mach/board.h>
 #include <mach/at91rm9200_mc.h>
 #include <mach/at91_ramc.h>
 
diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 2dfe7a2fb998..e981798e9a9b 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -19,8 +19,8 @@
 #include <linux/interrupt.h>
 #include <linux/ioctl.h>
 #include <linux/slab.h>
+#include <linux/platform_data/atmel.h>
 
-#include <mach/board.h>
 #include <mach/at91_rtt.h>
 #include <mach/cpu.h>
 
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index 16d6a839c7fa..61fb0ec26f06 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -19,9 +19,9 @@
 #include <linux/interrupt.h>
 #include <linux/spi/spi.h>
 #include <linux/slab.h>
+#include <linux/platform_data/atmel.h>
 
 #include <asm/io.h>
-#include <mach/board.h>
 #include <asm/gpio.h>
 #include <mach/cpu.h>
 
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 3d7e1ee2fa57..5660ec2618a3 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -39,12 +39,12 @@
 #include <linux/atmel_pdc.h>
 #include <linux/atmel_serial.h>
 #include <linux/uaccess.h>
+#include <linux/platform_data/atmel.h>
 
 #include <asm/io.h>
 #include <asm/ioctls.h>
 
 #include <asm/mach/serial_at91.h>
-#include <mach/board.h>
 
 #ifdef CONFIG_ARM
 #include <mach/cpu.h>
diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c
index 89d90b5fb787..a7b042b781d5 100644
--- a/drivers/usb/gadget/at91_udc.c
+++ b/drivers/usb/gadget/at91_udc.c
@@ -31,6 +31,7 @@
 #include <linux/usb/gadget.h>
 #include <linux/of.h>
 #include <linux/of_gpio.h>
+#include <linux/platform_data/atmel.h>
 
 #include <asm/byteorder.h>
 #include <mach/hardware.h>
@@ -38,7 +39,6 @@
 #include <asm/irq.h>
 #include <asm/gpio.h>
 
-#include <mach/board.h>
 #include <mach/cpu.h>
 #include <mach/at91sam9261_matrix.h>
 #include <mach/at91_matrix.h>
diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c
index 9a9bced813ed..a7aed84d98c9 100644
--- a/drivers/usb/gadget/atmel_usba_udc.c
+++ b/drivers/usb/gadget/atmel_usba_udc.c
@@ -21,9 +21,9 @@
 #include <linux/usb/gadget.h>
 #include <linux/usb/atmel_usba_udc.h>
 #include <linux/delay.h>
+#include <linux/platform_data/atmel.h>
 
 #include <asm/gpio.h>
-#include <mach/board.h>
 
 #include "atmel_usba_udc.h"
 
diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c
index 0bf72f943b00..8e62f81ae1ed 100644
--- a/drivers/usb/host/ohci-at91.c
+++ b/drivers/usb/host/ohci-at91.c
@@ -16,11 +16,11 @@
 #include <linux/platform_device.h>
 #include <linux/of_platform.h>
 #include <linux/of_gpio.h>
+#include <linux/platform_data/atmel.h>
 
 #include <mach/hardware.h>
 #include <asm/gpio.h>
 
-#include <mach/board.h>
 #include <mach/cpu.h>
 
 #ifndef CONFIG_ARCH_AT91
diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c
index 94cac9f9919f..12cf5f31ee8f 100644
--- a/drivers/video/atmel_lcdfb.c
+++ b/drivers/video/atmel_lcdfb.c
@@ -19,8 +19,8 @@
 #include <linux/backlight.h>
 #include <linux/gfp.h>
 #include <linux/module.h>
+#include <linux/platform_data/atmel.h>
 
-#include <mach/board.h>
 #include <mach/cpu.h>
 #include <asm/gpio.h>
 
diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h
index b0f2c56a8ea2..dbd6d53cc270 100644
--- a/include/linux/platform_data/atmel.h
+++ b/include/linux/platform_data/atmel.h
@@ -8,6 +8,49 @@
 #define __ATMEL_H__
 
 #include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/device.h>
+#include <linux/i2c.h>
+#include <linux/leds.h>
+#include <linux/spi/spi.h>
+#include <linux/usb/atmel_usba_udc.h>
+#include <linux/atmel-mci.h>
+#include <sound/atmel-ac97c.h>
+#include <linux/serial.h>
+#include <linux/platform_data/macb.h>
+
+ /* USB Device */
+struct at91_udc_data {
+	int	vbus_pin;		/* high == host powering us */
+	u8	vbus_active_low;	/* vbus polarity */
+	u8	vbus_polled;		/* Use polling, not interrupt */
+	int	pullup_pin;		/* active == D+ pulled up */
+	u8	pullup_active_low;	/* true == pullup_pin is active low */
+};
+
+ /* Compact Flash */
+struct at91_cf_data {
+	int	irq_pin;		/* I/O IRQ */
+	int	det_pin;		/* Card detect */
+	int	vcc_pin;		/* power switching */
+	int	rst_pin;		/* card reset */
+	u8	chipselect;		/* EBI Chip Select number */
+	u8	flags;
+#define AT91_CF_TRUE_IDE	0x01
+#define AT91_IDE_SWAP_A0_A2	0x02
+};
+
+ /* USB Host */
+#define AT91_MAX_USBH_PORTS	3
+struct at91_usbh_data {
+	int		vbus_pin[AT91_MAX_USBH_PORTS];	/* port power-control pin */
+	int             overcurrent_pin[AT91_MAX_USBH_PORTS];
+	u8		ports;				/* number of ports on root hub */
+	u8              overcurrent_supported;
+	u8              vbus_pin_active_low[AT91_MAX_USBH_PORTS];
+	u8              overcurrent_status[AT91_MAX_USBH_PORTS];
+	u8              overcurrent_changed[AT91_MAX_USBH_PORTS];
+};
 
  /* NAND / SmartMedia */
 struct atmel_nand_data {
@@ -24,4 +67,28 @@ struct atmel_nand_data {
 	unsigned int	num_parts;
 };
 
+ /* Serial */
+struct atmel_uart_data {
+	int			num;		/* port num */
+	short			use_dma_tx;	/* use transmit DMA? */
+	short			use_dma_rx;	/* use receive DMA? */
+	void __iomem		*regs;		/* virt. base address, if any */
+	struct serial_rs485	rs485;		/* rs485 settings */
+};
+
+ /* Touchscreen Controller */
+struct at91_tsadcc_data {
+	unsigned int    adc_clock;
+	u8		pendet_debounce;
+	u8		ts_sample_hold_time;
+};
+
+/* CAN */
+struct at91_can_data {
+	void (*transceiver_switch)(int on);
+};
+
+/* FIXME: this needs a better location, but gets stuff building again */
+extern int at91_suspend_entering_slow_clock(void);
+
 #endif /* __ATMEL_H__ */
-- 
cgit v1.2.3-55-g7522


From 0547fad2dd7ccaaf3c914ba49073fa37e4e241eb Mon Sep 17 00:00:00 2001
From: Ming Lei
Date: Tue, 6 Nov 2012 04:53:04 +0000
Subject: usbnet: introduce usbnet_{read|write}_cmd_nopm

This patch introduces the below two helpers to prepare for solving
the usbnet runtime PM problem, which may cause some network utilities
(ifconfig, ethtool,...) touch a suspended device.

	usbnet_read_cmd_nopm()
	usbnet_write_cmd_nopm()

The above two helpers should be called by usbnet resume/suspend
callback to avoid deadlock.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 62 ++++++++++++++++++++++++++++++++++++++++++----
 include/linux/usb/usbnet.h |  4 +++
 2 files changed, 61 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 447d20d22b7c..0ac1ff3571cb 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1611,8 +1611,8 @@ void usbnet_device_suggests_idle(struct usbnet *dev)
 EXPORT_SYMBOL(usbnet_device_suggests_idle);
 
 /*-------------------------------------------------------------------------*/
-int usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
-		    u16 value, u16 index, void *data, u16 size)
+static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
+			     u16 value, u16 index, void *data, u16 size)
 {
 	void *buf = NULL;
 	int err = -ENOMEM;
@@ -1636,10 +1636,10 @@ int usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
 out:
 	return err;
 }
-EXPORT_SYMBOL_GPL(usbnet_read_cmd);
 
-int usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
-		     u16 value, u16 index, const void *data, u16 size)
+static int __usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
+			      u16 value, u16 index, const void *data,
+			      u16 size)
 {
 	void *buf = NULL;
 	int err = -ENOMEM;
@@ -1662,8 +1662,56 @@ int usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
 out:
 	return err;
 }
+
+/*
+ * The function can't be called inside suspend/resume callback,
+ * otherwise deadlock will be caused.
+ */
+int usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
+		    u16 value, u16 index, void *data, u16 size)
+{
+	return __usbnet_read_cmd(dev, cmd, reqtype, value, index,
+				 data, size);
+}
+EXPORT_SYMBOL_GPL(usbnet_read_cmd);
+
+/*
+ * The function can't be called inside suspend/resume callback,
+ * otherwise deadlock will be caused.
+ */
+int usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
+		     u16 value, u16 index, const void *data, u16 size)
+{
+	return __usbnet_write_cmd(dev, cmd, reqtype, value, index,
+				  data, size);
+}
 EXPORT_SYMBOL_GPL(usbnet_write_cmd);
 
+/*
+ * The function can be called inside suspend/resume callback safely
+ * and should only be called by suspend/resume callback generally.
+ */
+int usbnet_read_cmd_nopm(struct usbnet *dev, u8 cmd, u8 reqtype,
+			  u16 value, u16 index, void *data, u16 size)
+{
+	return __usbnet_read_cmd(dev, cmd, reqtype, value, index,
+				 data, size);
+}
+EXPORT_SYMBOL_GPL(usbnet_read_cmd_nopm);
+
+/*
+ * The function can be called inside suspend/resume callback safely
+ * and should only be called by suspend/resume callback generally.
+ */
+int usbnet_write_cmd_nopm(struct usbnet *dev, u8 cmd, u8 reqtype,
+			  u16 value, u16 index, const void *data,
+			  u16 size)
+{
+	return __usbnet_write_cmd(dev, cmd, reqtype, value, index,
+				  data, size);
+}
+EXPORT_SYMBOL_GPL(usbnet_write_cmd_nopm);
+
 static void usbnet_async_cmd_cb(struct urb *urb)
 {
 	struct usb_ctrlrequest *req = (struct usb_ctrlrequest *)urb->context;
@@ -1677,6 +1725,10 @@ static void usbnet_async_cmd_cb(struct urb *urb)
 	usb_free_urb(urb);
 }
 
+/*
+ * The caller must make sure that device can't be put into suspend
+ * state until the control URB completes.
+ */
 int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype,
 			   u16 value, u16 index, const void *data, u16 size)
 {
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 4410e4166c6c..9bbeabf66c54 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -167,6 +167,10 @@ extern int usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
 		    u16 value, u16 index, void *data, u16 size);
 extern int usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
 		    u16 value, u16 index, const void *data, u16 size);
+extern int usbnet_read_cmd_nopm(struct usbnet *dev, u8 cmd, u8 reqtype,
+		    u16 value, u16 index, void *data, u16 size);
+extern int usbnet_write_cmd_nopm(struct usbnet *dev, u8 cmd, u8 reqtype,
+		    u16 value, u16 index, const void *data, u16 size);
 extern int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype,
 		    u16 value, u16 index, const void *data, u16 size);
 
-- 
cgit v1.2.3-55-g7522


From 642c92da36ae0bed3c31fdd408411ab95f4e326b Mon Sep 17 00:00:00 2001
From: Taku Izumi
Date: Tue, 30 Oct 2012 15:26:18 +0900
Subject: PCI: Don't pass pci_dev to pci_ext_cfg_avail()

pci_ext_cfg_avail() doesn't use the "struct pci_dev *" passed to
it, and there's no requirement that a host bridge even be represented
by a pci_dev.  This drops the pci_ext_cfg_avail() parameter.

[bhelgaas: changelog]
Signed-off-by: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>---
 arch/x86/pci/common.c   | 2 +-
 drivers/acpi/pci_root.c | 2 +-
 drivers/pci/pci.c       | 5 ++---
 include/linux/pci.h     | 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 720e973fc34a..52dbf1aeeb63 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -626,7 +626,7 @@ void pcibios_disable_device (struct pci_dev *dev)
 		pcibios_disable_irq(dev);
 }
 
-int pci_ext_cfg_avail(struct pci_dev *dev)
+int pci_ext_cfg_avail(void)
 {
 	if (raw_pci_ext_ops)
 		return 1;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 66f3ae74d130..50f329d7ccff 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -564,7 +564,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 		acpi_pci_bridge_scan(child);
 
 	/* Indicate support for various _OSC capabilities. */
-	if (pci_ext_cfg_avail(root->bus->self))
+	if (pci_ext_cfg_avail())
 		flags |= OSC_EXT_PCI_CONFIG_SUPPORT;
 	if (pcie_aspm_support_enabled())
 		flags |= OSC_ACTIVE_STATE_PWR_SUPPORT |
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 54858838f098..01b68bfa2321 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3833,14 +3833,13 @@ static void __devinit pci_no_domains(void)
 }
 
 /**
- * pci_ext_cfg_enabled - can we access extended PCI config space?
- * @dev: The PCI device of the root bridge.
+ * pci_ext_cfg_avail - can we access extended PCI config space?
  *
  * Returns 1 if we can access PCI extended config space (offsets
  * greater than 0xff). This is the default implementation. Architecture
  * implementations can override this.
  */
-int __weak pci_ext_cfg_avail(struct pci_dev *dev)
+int __weak pci_ext_cfg_avail(void)
 {
 	return 1;
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 786094254d57..9253af697ca4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1604,7 +1604,7 @@ static inline void pci_mmcfg_early_init(void) { }
 static inline void pci_mmcfg_late_init(void) { }
 #endif
 
-int pci_ext_cfg_avail(struct pci_dev *dev);
+int pci_ext_cfg_avail(void);
 
 void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);
 
-- 
cgit v1.2.3-55-g7522


From 216b6cbdcbd86b1db0754d58886b466ae31f5a63 Mon Sep 17 00:00:00 2001
From: Namjae Jeon
Date: Wed, 29 Aug 2012 10:10:10 -0400
Subject: exportfs: add FILEID_INVALID to indicate invalid fid_type

This commit adds FILEID_INVALID = 0xff in fid_type to
indicate invalid fid_type

It avoids using magic number 255

Signed-off-by: Namjae Jeon <linkinjeon@gmail.com>
Signed-off-by: Vivek Trivedi <vtrivedi018@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/exportfs/expfs.c      | 4 ++--
 fs/fhandle.c             | 2 +-
 fs/nfsd/nfsfh.c          | 4 ++--
 include/linux/exportfs.h | 5 +++++
 4 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 29ab099e3e08..f1f1c59c2966 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -322,10 +322,10 @@ static int export_encode_fh(struct inode *inode, struct fid *fid,
 
 	if (parent && (len < 4)) {
 		*max_len = 4;
-		return 255;
+		return FILEID_INVALID;
 	} else if (len < 2) {
 		*max_len = 2;
-		return 255;
+		return FILEID_INVALID;
 	}
 
 	len = 2;
diff --git a/fs/fhandle.c b/fs/fhandle.c
index f775bfdd6e4a..26f12b95702a 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -52,7 +52,7 @@ static long do_sys_name_to_handle(struct path *path,
 	handle_bytes = handle_dwords * sizeof(u32);
 	handle->handle_bytes = handle_bytes;
 	if ((handle->handle_bytes > f_handle.handle_bytes) ||
-	    (retval == 255) || (retval == -ENOSPC)) {
+	    (retval == FILEID_INVALID) || (retval == -ENOSPC)) {
 		/* As per old exportfs_encode_fh documentation
 		 * we could return ENOSPC to indicate overflow
 		 * But file system returned 255 always. So handle
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 032af381b3aa..814afaa4458a 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -572,7 +572,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 
 		if (inode)
 			_fh_update(fhp, exp, dentry);
-		if (fhp->fh_handle.fh_fileid_type == 255) {
+		if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
 			fh_put(fhp);
 			return nfserr_opnotsupp;
 		}
@@ -603,7 +603,7 @@ fh_update(struct svc_fh *fhp)
 			goto out;
 
 		_fh_update(fhp, fhp->fh_export, dentry);
-		if (fhp->fh_handle.fh_fileid_type == 255)
+		if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
 			return nfserr_opnotsupp;
 	}
 out:
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 12291a7ee275..0e1452546300 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -83,6 +83,11 @@ enum fid_type {
 	 * 64 bit parent inode number.
 	 */
 	FILEID_NILFS_WITH_PARENT = 0x62,
+
+	/*
+	 * Filesystems must not use 0xff file ID.
+	 */
+	FILEID_INVALID = 0xff,
 };
 
 struct fid {
-- 
cgit v1.2.3-55-g7522


From 9d626eccb1de90a310f3fb9bc5e8803706be1a95 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto
Date: Tue, 30 Oct 2012 20:06:55 -0700
Subject: sh: clkfwk: add sh_clk_fsidiv_register()

This patch adds sh_clk_fsidiv_register() to share FSI-DIV clock code

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 drivers/sh/clk/cpg.c   | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sh_clk.h |  9 ++++++
 2 files changed, 95 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/sh/clk/cpg.c b/drivers/sh/clk/cpg.c
index 07e9fb4f8041..b3dc44146ca0 100644
--- a/drivers/sh/clk/cpg.c
+++ b/drivers/sh/clk/cpg.c
@@ -361,3 +361,89 @@ int __init sh_clk_div4_reparent_register(struct clk *clks, int nr,
 	return sh_clk_div_register_ops(clks, nr, table,
 				       &sh_clk_div4_reparent_clk_ops);
 }
+
+/* FSI-DIV */
+static unsigned long fsidiv_recalc(struct clk *clk)
+{
+	u32 value;
+
+	value = __raw_readl(clk->mapping->base);
+
+	value >>= 16;
+	if (value < 2)
+		return clk->parent->rate;
+
+	return clk->parent->rate / value;
+}
+
+static long fsidiv_round_rate(struct clk *clk, unsigned long rate)
+{
+	return clk_rate_div_range_round(clk, 1, 0xffff, rate);
+}
+
+static void fsidiv_disable(struct clk *clk)
+{
+	__raw_writel(0, clk->mapping->base);
+}
+
+static int fsidiv_enable(struct clk *clk)
+{
+	u32 value;
+
+	value  = __raw_readl(clk->mapping->base) >> 16;
+	if (value < 2)
+		return 0;
+
+	__raw_writel((value << 16) | 0x3, clk->mapping->base);
+
+	return 0;
+}
+
+static int fsidiv_set_rate(struct clk *clk, unsigned long rate)
+{
+	u32 val;
+	int idx;
+
+	idx = (clk->parent->rate / rate) & 0xffff;
+	if (idx < 2)
+		__raw_writel(0, clk->mapping->base);
+	else
+		__raw_writel(idx << 16, clk->mapping->base);
+
+	return 0;
+}
+
+static struct sh_clk_ops fsidiv_clk_ops = {
+	.recalc		= fsidiv_recalc,
+	.round_rate	= fsidiv_round_rate,
+	.set_rate	= fsidiv_set_rate,
+	.enable		= fsidiv_enable,
+	.disable	= fsidiv_disable,
+};
+
+int __init sh_clk_fsidiv_register(struct clk *clks, int nr)
+{
+	struct clk_mapping *map;
+	int i;
+
+	for (i = 0; i < nr; i++) {
+
+		map = kzalloc(sizeof(struct clk_mapping), GFP_KERNEL);
+		if (!map) {
+			pr_err("%s: unable to alloc memory\n", __func__);
+			return -ENOMEM;
+		}
+
+		/* clks[i].enable_reg came from SH_CLK_FSIDIV() */
+		map->phys		= (phys_addr_t)clks[i].enable_reg;
+		map->len		= 8;
+
+		clks[i].enable_reg	= 0; /* remove .enable_reg */
+		clks[i].ops		= &fsidiv_clk_ops;
+		clks[i].mapping		= map;
+
+		clk_register(&clks[i]);
+	}
+
+	return 0;
+}
diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
index 50910913b268..60c72395ec6b 100644
--- a/include/linux/sh_clk.h
+++ b/include/linux/sh_clk.h
@@ -199,4 +199,13 @@ int sh_clk_div6_reparent_register(struct clk *clks, int nr);
 #define CLKDEV_DEV_ID(_id, _clk) { .dev_id = _id, .clk = _clk }
 #define CLKDEV_ICK_ID(_cid, _did, _clk) { .con_id = _cid, .dev_id = _did, .clk = _clk }
 
+/* .enable_reg will be updated to .mapping on sh_clk_fsidiv_register() */
+#define SH_CLK_FSIDIV(_reg, _parent)		\
+{						\
+	.enable_reg = (void __iomem *)_reg,	\
+	.parent		= _parent,		\
+}
+
+int sh_clk_fsidiv_register(struct clk *clks, int nr);
+
 #endif /* __SH_CLOCK_H */
-- 
cgit v1.2.3-55-g7522


From ada8a8a13b13a2749818524a7949935f68d2b3eb Mon Sep 17 00:00:00 2001
From: Wei WANG
Date: Mon, 29 Oct 2012 13:49:33 +0800
Subject: mfd: Add realtek pcie card reader driver

Realtek PCI-E card reader driver adapts requests from upper-level
sdmmc/memstick layer to the real physical card reader.

Signed-off-by: Wei WANG <wei_wang@realsil.com.cn>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig             |    9 +
 drivers/mfd/Makefile            |    3 +
 drivers/mfd/rtl8411.c           |  251 ++++++++
 drivers/mfd/rts5209.c           |  223 +++++++
 drivers/mfd/rts5229.c           |  205 +++++++
 drivers/mfd/rtsx_pcr.c          | 1251 +++++++++++++++++++++++++++++++++++++++
 drivers/mfd/rtsx_pcr.h          |   32 +
 include/linux/mfd/rtsx_common.h |   48 ++
 include/linux/mfd/rtsx_pci.h    |  794 +++++++++++++++++++++++++
 9 files changed, 2816 insertions(+)
 create mode 100644 drivers/mfd/rtl8411.c
 create mode 100644 drivers/mfd/rts5209.c
 create mode 100644 drivers/mfd/rts5229.c
 create mode 100644 drivers/mfd/rtsx_pcr.c
 create mode 100644 drivers/mfd/rtsx_pcr.h
 create mode 100644 include/linux/mfd/rtsx_common.h
 create mode 100644 include/linux/mfd/rtsx_pci.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 9bba7f78ff36..d0cb4d4bc2cc 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -63,6 +63,15 @@ config MFD_SM501_GPIO
 	 lines on the SM501. The platform data is used to supply the
 	 base number for the first GPIO line to register.
 
+config MFD_RTSX_PCI
+	tristate "Support for Realtek PCI-E card reader"
+	depends on PCI
+	help
+	  This supports for Realtek PCI-Express card reader including rts5209,
+	  rts5229, rtl8411, etc. Realtek card reader supports access to many
+	  types of memory cards, such as Memory Stick, Memory Stick Pro,
+	  Secure Digital and MultiMediaCard.
+
 config MFD_ASIC3
 	bool "Support for Compaq ASIC3"
 	depends on GENERIC_HARDIRQS && GPIOLIB && ARM
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 442c17e40741..a4093a44304a 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -9,6 +9,9 @@ obj-$(CONFIG_MFD_88PM805)	+= 88pm805.o 88pm80x.o
 obj-$(CONFIG_MFD_SM501)		+= sm501.o
 obj-$(CONFIG_MFD_ASIC3)		+= asic3.o tmio_core.o
 
+rtsx_pci-objs			:= rtsx_pcr.o rts5209.o rts5229.o rtl8411.o
+obj-$(CONFIG_MFD_RTSX_PCI)	+= rtsx_pci.o
+
 obj-$(CONFIG_HTC_EGPIO)		+= htc-egpio.o
 obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
 obj-$(CONFIG_HTC_I2CPLD)	+= htc-i2cpld.o
diff --git a/drivers/mfd/rtl8411.c b/drivers/mfd/rtl8411.c
new file mode 100644
index 000000000000..89f046ca9e41
--- /dev/null
+++ b/drivers/mfd/rtl8411.c
@@ -0,0 +1,251 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/mfd/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rtl8411_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, SYS_VER, &val);
+	return val & 0x0F;
+}
+
+static int rtl8411_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CD_PAD_CTL,
+			CD_DISABLE_MASK | CD_AUTO_DISABLE, CD_ENABLE);
+}
+
+static int rtl8411_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00);
+}
+
+static int rtl8411_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01);
+}
+
+static int rtl8411_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D);
+}
+
+static int rtl8411_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00);
+}
+
+static int rtl8411_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CTL,
+			BPP_LDO_POWB, BPP_LDO_SUSPEND);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_10_PERCENT_ON);
+	if (err < 0)
+		return err;
+
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_15_PERCENT_ON);
+	if (err < 0)
+		return err;
+
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_ON);
+	if (err < 0)
+		return err;
+
+	return rtsx_pci_write_register(pcr, LDO_CTL, BPP_LDO_POWB, BPP_LDO_ON);
+}
+
+static int rtl8411_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_OFF);
+	if (err < 0)
+		return err;
+
+	return rtsx_pci_write_register(pcr, LDO_CTL,
+			BPP_LDO_POWB, BPP_LDO_SUSPEND);
+}
+
+static unsigned int rtl8411_cd_deglitch(struct rtsx_pcr *pcr)
+{
+	unsigned int card_exist;
+
+	card_exist = rtsx_pci_readl(pcr, RTSX_BIPR);
+	card_exist &= CARD_EXIST;
+	if (!card_exist) {
+		/* Enable card CD */
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, CD_ENABLE);
+		/* Enable card interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x00);
+		return 0;
+	}
+
+	if (hweight32(card_exist) > 1) {
+		rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+				BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON);
+		msleep(100);
+
+		card_exist = rtsx_pci_readl(pcr, RTSX_BIPR);
+		if (card_exist & MS_EXIST)
+			card_exist = MS_EXIST;
+		else if (card_exist & SD_EXIST)
+			card_exist = SD_EXIST;
+		else
+			card_exist = 0;
+
+		rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+				BPP_POWER_MASK, BPP_POWER_OFF);
+
+		dev_dbg(&(pcr->pci->dev),
+				"After CD deglitch, card_exist = 0x%x\n",
+				card_exist);
+	}
+
+	if (card_exist & MS_EXIST) {
+		/* Disable SD interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x40);
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, MS_CD_EN_ONLY);
+	} else if (card_exist & SD_EXIST) {
+		/* Disable MS interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x80);
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, SD_CD_EN_ONLY);
+	}
+
+	return card_exist;
+}
+
+static const struct pcr_ops rtl8411_pcr_ops = {
+	.extra_init_hw = rtl8411_extra_init_hw,
+	.optimize_phy = NULL,
+	.turn_on_led = rtl8411_turn_on_led,
+	.turn_off_led = rtl8411_turn_off_led,
+	.enable_auto_blink = rtl8411_enable_auto_blink,
+	.disable_auto_blink = rtl8411_disable_auto_blink,
+	.card_power_on = rtl8411_card_power_on,
+	.card_power_off = rtl8411_card_power_off,
+	.cd_deglitch = rtl8411_cd_deglitch,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rtl8411_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xA9),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rtl8411_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rtl8411_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rtl8411_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+void rtl8411_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->ops = &rtl8411_pcr_ops;
+
+	pcr->ic_version = rtl8411_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rtl8411_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rtl8411_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rtl8411_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rtl8411_ms_pull_ctl_disable_tbl;
+}
diff --git a/drivers/mfd/rts5209.c b/drivers/mfd/rts5209.c
new file mode 100644
index 000000000000..283a4f148084
--- /dev/null
+++ b/drivers/mfd/rts5209.c
@@ -0,0 +1,223 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/mfd/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5209_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	val = rtsx_pci_readb(pcr, 0x1C);
+	return val & 0x0F;
+}
+
+static void rts5209_init_vendor_cfg(struct rtsx_pcr *pcr)
+{
+	u32 val;
+
+	rtsx_pci_read_config_dword(pcr, 0x724, &val);
+	dev_dbg(&(pcr->pci->dev), "Cfg 0x724: 0x%x\n", val);
+
+	if (!(val & 0x80)) {
+		if (val & 0x08)
+			pcr->ms_pmos = false;
+		else
+			pcr->ms_pmos = true;
+	}
+}
+
+static int rts5209_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	/* Turn off LED */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO, 0xFF, 0x03);
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO_DIR, 0xFF, 0x03);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5209_optimize_phy(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_phy_register(pcr, 0x00, 0xB966);
+}
+
+static int rts5209_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00);
+}
+
+static int rts5209_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01);
+}
+
+static int rts5209_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D);
+}
+
+static int rts5209_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00);
+}
+
+static int rts5209_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+	u8 pwr_mask, partial_pwr_on, pwr_on;
+
+	pwr_mask = SD_POWER_MASK;
+	partial_pwr_on = SD_PARTIAL_POWER_ON;
+	pwr_on = SD_POWER_ON;
+
+	if (pcr->ms_pmos && (card == RTSX_MS_CARD)) {
+		pwr_mask = MS_POWER_MASK;
+		partial_pwr_on = MS_PARTIAL_POWER_ON;
+		pwr_on = MS_POWER_ON;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			pwr_mask, partial_pwr_on);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x04);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, pwr_mask, pwr_on);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x00);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int rts5209_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	u8 pwr_mask, pwr_off;
+
+	pwr_mask = SD_POWER_MASK;
+	pwr_off = SD_POWER_OFF;
+
+	if (pcr->ms_pmos && (card == RTSX_MS_CARD)) {
+		pwr_mask = MS_POWER_MASK;
+		pwr_off = MS_POWER_OFF;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			pwr_mask | PMOS_STRG_MASK, pwr_off | PMOS_STRG_400mA);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0X06);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static const struct pcr_ops rts5209_pcr_ops = {
+	.extra_init_hw = rts5209_extra_init_hw,
+	.optimize_phy = rts5209_optimize_phy,
+	.turn_on_led = rts5209_turn_on_led,
+	.turn_off_led = rts5209_turn_off_led,
+	.enable_auto_blink = rts5209_enable_auto_blink,
+	.disable_auto_blink = rts5209_disable_auto_blink,
+	.card_power_on = rts5209_card_power_on,
+	.card_power_off = rts5209_card_power_off,
+	.cd_deglitch = NULL,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5209_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5209_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5209_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5209_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5209_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 |
+		EXTRA_CAPS_SD_SDR104 | EXTRA_CAPS_MMC_8BIT;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5209_pcr_ops;
+
+	rts5209_init_vendor_cfg(pcr);
+
+	pcr->ic_version = rts5209_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5209_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5209_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rts5209_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5209_ms_pull_ctl_disable_tbl;
+}
diff --git a/drivers/mfd/rts5229.c b/drivers/mfd/rts5229.c
new file mode 100644
index 000000000000..b9dbab266fda
--- /dev/null
+++ b/drivers/mfd/rts5229.c
@@ -0,0 +1,205 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/mfd/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5229_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & 0x0F;
+}
+
+static int rts5229_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02);
+	/* Switch LDO3318 source from DV33 to card_3v3 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01);
+	/* LED shine disabled, set initial shine cycle period */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5229_optimize_phy(struct rtsx_pcr *pcr)
+{
+	/* Optimize RX sensitivity */
+	return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42);
+}
+
+static int rts5229_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02);
+}
+
+static int rts5229_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00);
+}
+
+static int rts5229_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08);
+}
+
+static int rts5229_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00);
+}
+
+static int rts5229_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_PARTIAL_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x02);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x06);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int rts5229_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK | PMOS_STRG_MASK,
+			SD_POWER_OFF | PMOS_STRG_400mA);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0X00);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static const struct pcr_ops rts5229_pcr_ops = {
+	.extra_init_hw = rts5229_extra_init_hw,
+	.optimize_phy = rts5229_optimize_phy,
+	.turn_on_led = rts5229_turn_on_led,
+	.turn_off_led = rts5229_turn_off_led,
+	.enable_auto_blink = rts5229_enable_auto_blink,
+	.disable_auto_blink = rts5229_disable_auto_blink,
+	.card_power_on = rts5229_card_power_on,
+	.card_power_off = rts5229_card_power_off,
+	.cd_deglitch = NULL,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5229_sd_pull_ctl_enable_tbl1[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* For RTS5229 version C */
+static const u32 rts5229_sd_pull_ctl_enable_tbl2[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5229_sd_pull_ctl_disable_tbl1[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+/* For RTS5229 version C */
+static const u32 rts5229_sd_pull_ctl_disable_tbl2[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE5),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5229_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5229_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5229_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5229_pcr_ops;
+
+	pcr->ic_version = rts5229_get_ic_version(pcr);
+	if (pcr->ic_version == IC_VER_C) {
+		pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl2;
+		pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl2;
+	} else {
+		pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl1;
+		pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl1;
+	}
+	pcr->ms_pull_ctl_enable_tbl = rts5229_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5229_ms_pull_ctl_disable_tbl;
+}
diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
new file mode 100644
index 000000000000..56d4377c62c2
--- /dev/null
+++ b/drivers/mfd/rtsx_pcr.c
@@ -0,0 +1,1251 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/rtsx_pci.h>
+#include <asm/unaligned.h>
+
+#include "rtsx_pcr.h"
+
+static bool msi_en = true;
+module_param(msi_en, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(msi_en, "Enable MSI");
+
+static DEFINE_IDR(rtsx_pci_idr);
+static DEFINE_SPINLOCK(rtsx_pci_lock);
+
+static struct mfd_cell rtsx_pcr_cells[] = {
+	[RTSX_SD_CARD] = {
+		.name = DRV_NAME_RTSX_PCI_SDMMC,
+	},
+	[RTSX_MS_CARD] = {
+		.name = DRV_NAME_RTSX_PCI_MS,
+	},
+};
+
+static DEFINE_PCI_DEVICE_TABLE(rtsx_pci_ids) = {
+	{ PCI_DEVICE(0x10EC, 0x5209), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5229), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5289), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, rtsx_pci_ids);
+
+void rtsx_pci_start_run(struct rtsx_pcr *pcr)
+{
+	/* If pci device removed, don't queue idle work any more */
+	if (pcr->remove_pci)
+		return;
+
+	if (pcr->state != PDEV_STAT_RUN) {
+		pcr->state = PDEV_STAT_RUN;
+		if (pcr->ops->enable_auto_blink)
+			pcr->ops->enable_auto_blink(pcr);
+	}
+
+	mod_delayed_work(system_wq, &pcr->idle_work, msecs_to_jiffies(200));
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_start_run);
+
+int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data)
+{
+	int i;
+	u32 val = HAIMR_WRITE_START;
+
+	val |= (u32)(addr & 0x3FFF) << 16;
+	val |= (u32)mask << 8;
+	val |= (u32)data;
+
+	rtsx_pci_writel(pcr, RTSX_HAIMR, val);
+
+	for (i = 0; i < MAX_RW_REG_CNT; i++) {
+		val = rtsx_pci_readl(pcr, RTSX_HAIMR);
+		if ((val & HAIMR_TRANS_END) == 0) {
+			if (data != (u8)val)
+				return -EIO;
+			return 0;
+		}
+	}
+
+	return -ETIMEDOUT;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_register);
+
+int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data)
+{
+	u32 val = HAIMR_READ_START;
+	int i;
+
+	val |= (u32)(addr & 0x3FFF) << 16;
+	rtsx_pci_writel(pcr, RTSX_HAIMR, val);
+
+	for (i = 0; i < MAX_RW_REG_CNT; i++) {
+		val = rtsx_pci_readl(pcr, RTSX_HAIMR);
+		if ((val & HAIMR_TRANS_END) == 0)
+			break;
+	}
+
+	if (i >= MAX_RW_REG_CNT)
+		return -ETIMEDOUT;
+
+	if (data)
+		*data = (u8)(val & 0xFF);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_register);
+
+int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val)
+{
+	int err, i, finished = 0;
+	u8 tmp;
+
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA0, 0xFF, (u8)val);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA1, 0xFF, (u8)(val >> 8));
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x81);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < 100000; i++) {
+		err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp);
+		if (err < 0)
+			return err;
+
+		if (!(tmp & 0x80)) {
+			finished = 1;
+			break;
+		}
+	}
+
+	if (!finished)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_phy_register);
+
+int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val)
+{
+	int err, i, finished = 0;
+	u16 data;
+	u8 *ptr, tmp;
+
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x80);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < 100000; i++) {
+		err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp);
+		if (err < 0)
+			return err;
+
+		if (!(tmp & 0x80)) {
+			finished = 1;
+			break;
+		}
+	}
+
+	if (!finished)
+		return -ETIMEDOUT;
+
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA0, 0, 0);
+	rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA1, 0, 0);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	ptr = rtsx_pci_get_cmd_data(pcr);
+	data = ((u16)ptr[1] << 8) | ptr[0];
+
+	if (val)
+		*val = data;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_phy_register);
+
+void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA);
+
+	rtsx_pci_write_register(pcr, DMACTL, 0x80, 0x80);
+	rtsx_pci_write_register(pcr, RBCTL, 0x80, 0x80);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_stop_cmd);
+
+void rtsx_pci_add_cmd(struct rtsx_pcr *pcr,
+		u8 cmd_type, u16 reg_addr, u8 mask, u8 data)
+{
+	unsigned long flags;
+	u32 val = 0;
+	u32 *ptr = (u32 *)(pcr->host_cmds_ptr);
+
+	val |= (u32)(cmd_type & 0x03) << 30;
+	val |= (u32)(reg_addr & 0x3FFF) << 16;
+	val |= (u32)mask << 8;
+	val |= (u32)data;
+
+	spin_lock_irqsave(&pcr->lock, flags);
+	ptr += pcr->ci;
+	if (pcr->ci < (HOST_CMDS_BUF_LEN / 4)) {
+		put_unaligned_le32(val, ptr);
+		ptr++;
+		pcr->ci++;
+	}
+	spin_unlock_irqrestore(&pcr->lock, flags);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_add_cmd);
+
+void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr)
+{
+	u32 val = 1 << 31;
+
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	val |= (u32)(pcr->ci * 4) & 0x00FFFFFF;
+	/* Hardware Auto Response */
+	val |= 0x40000000;
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, val);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd_no_wait);
+
+int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout)
+{
+	struct completion trans_done;
+	u32 val = 1 << 31;
+	long timeleft;
+	unsigned long flags;
+	int err = 0;
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	/* set up data structures for the wakeup system */
+	pcr->done = &trans_done;
+	pcr->trans_result = TRANS_NOT_READY;
+	init_completion(&trans_done);
+
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	val |= (u32)(pcr->ci * 4) & 0x00FFFFFF;
+	/* Hardware Auto Response */
+	val |= 0x40000000;
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, val);
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	/* Wait for TRANS_OK_INT */
+	timeleft = wait_for_completion_interruptible_timeout(
+			&trans_done, msecs_to_jiffies(timeout));
+	if (timeleft <= 0) {
+		dev_dbg(&(pcr->pci->dev), "Timeout (%s %d)\n",
+				__func__, __LINE__);
+		err = -ETIMEDOUT;
+		goto finish_send_cmd;
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+	if (pcr->trans_result == TRANS_RESULT_FAIL)
+		err = -EINVAL;
+	else if (pcr->trans_result == TRANS_RESULT_OK)
+		err = 0;
+	else if (pcr->trans_result == TRANS_NO_DEVICE)
+		err = -ENODEV;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+finish_send_cmd:
+	spin_lock_irqsave(&pcr->lock, flags);
+	pcr->done = NULL;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	if ((err < 0) && (err != -ENODEV))
+		rtsx_pci_stop_cmd(pcr);
+
+	if (pcr->finish_me)
+		complete(pcr->finish_me);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd);
+
+static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr,
+		dma_addr_t addr, unsigned int len, int end)
+{
+	u64 *ptr = (u64 *)(pcr->host_sg_tbl_ptr) + pcr->sgi;
+	u64 val;
+	u8 option = SG_VALID | SG_TRANS_DATA;
+
+	dev_dbg(&(pcr->pci->dev), "DMA addr: 0x%x, Len: 0x%x\n",
+			(unsigned int)addr, len);
+
+	if (end)
+		option |= SG_END;
+	val = ((u64)addr << 32) | ((u64)len << 12) | option;
+
+	put_unaligned_le64(val, ptr);
+	ptr++;
+	pcr->sgi++;
+}
+
+int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read, int timeout)
+{
+	struct completion trans_done;
+	u8 dir;
+	int err = 0, i, count;
+	long timeleft;
+	unsigned long flags;
+	struct scatterlist *sg;
+	enum dma_data_direction dma_dir;
+	u32 val;
+	dma_addr_t addr;
+	unsigned int len;
+
+	dev_dbg(&(pcr->pci->dev), "--> %s: num_sg = %d\n", __func__, num_sg);
+
+	/* don't transfer data during abort processing */
+	if (pcr->remove_pci)
+		return -EINVAL;
+
+	if ((sglist == NULL) || (num_sg <= 0))
+		return -EINVAL;
+
+	if (read) {
+		dir = DEVICE_TO_HOST;
+		dma_dir = DMA_FROM_DEVICE;
+	} else {
+		dir = HOST_TO_DEVICE;
+		dma_dir = DMA_TO_DEVICE;
+	}
+
+	count = dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
+	if (count < 1) {
+		dev_err(&(pcr->pci->dev), "scatterlist map failed\n");
+		return -EINVAL;
+	}
+	dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count);
+
+	val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE;
+	pcr->sgi = 0;
+	for_each_sg(sglist, sg, count, i) {
+		addr = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+		rtsx_pci_add_sg_tbl(pcr, addr, len, i == count - 1);
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	pcr->done = &trans_done;
+	pcr->trans_result = TRANS_NOT_READY;
+	init_completion(&trans_done);
+	rtsx_pci_writel(pcr, RTSX_HDBAR, pcr->host_sg_tbl_addr);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, val);
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	timeleft = wait_for_completion_interruptible_timeout(
+			&trans_done, msecs_to_jiffies(timeout));
+	if (timeleft <= 0) {
+		dev_dbg(&(pcr->pci->dev), "Timeout (%s %d)\n",
+				__func__, __LINE__);
+		err = -ETIMEDOUT;
+		goto out;
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	if (pcr->trans_result == TRANS_RESULT_FAIL)
+		err = -EINVAL;
+	else if (pcr->trans_result == TRANS_NO_DEVICE)
+		err = -ENODEV;
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+out:
+	spin_lock_irqsave(&pcr->lock, flags);
+	pcr->done = NULL;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
+
+	if ((err < 0) && (err != -ENODEV))
+		rtsx_pci_stop_cmd(pcr);
+
+	if (pcr->finish_me)
+		complete(pcr->finish_me);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
+
+int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
+{
+	int err;
+	int i, j;
+	u16 reg;
+	u8 *ptr;
+
+	if (buf_len > 512)
+		buf_len = 512;
+
+	ptr = buf;
+	reg = PPBUF_BASE2;
+	for (i = 0; i < buf_len / 256; i++) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < 256; j++)
+			rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0);
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+
+		memcpy(ptr, rtsx_pci_get_cmd_data(pcr), 256);
+		ptr += 256;
+	}
+
+	if (buf_len % 256) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < buf_len % 256; j++)
+			rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0);
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	memcpy(ptr, rtsx_pci_get_cmd_data(pcr), buf_len % 256);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_ppbuf);
+
+int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
+{
+	int err;
+	int i, j;
+	u16 reg;
+	u8 *ptr;
+
+	if (buf_len > 512)
+		buf_len = 512;
+
+	ptr = buf;
+	reg = PPBUF_BASE2;
+	for (i = 0; i < buf_len / 256; i++) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < 256; j++) {
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+					reg++, 0xFF, *ptr);
+			ptr++;
+		}
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	if (buf_len % 256) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < buf_len % 256; j++) {
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+					reg++, 0xFF, *ptr);
+			ptr++;
+		}
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_ppbuf);
+
+static int rtsx_pci_set_pull_ctl(struct rtsx_pcr *pcr, const u32 *tbl)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+
+	while (*tbl & 0xFFFF0000) {
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+				(u16)(*tbl >> 16), 0xFF, (u8)(*tbl));
+		tbl++;
+	}
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card)
+{
+	const u32 *tbl;
+
+	if (card == RTSX_SD_CARD)
+		tbl = pcr->sd_pull_ctl_enable_tbl;
+	else if (card == RTSX_MS_CARD)
+		tbl = pcr->ms_pull_ctl_enable_tbl;
+	else
+		return -EINVAL;
+
+	return rtsx_pci_set_pull_ctl(pcr, tbl);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_enable);
+
+int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card)
+{
+	const u32 *tbl;
+
+	if (card == RTSX_SD_CARD)
+		tbl = pcr->sd_pull_ctl_disable_tbl;
+	else if (card == RTSX_MS_CARD)
+		tbl = pcr->ms_pull_ctl_disable_tbl;
+	else
+		return -EINVAL;
+
+
+	return rtsx_pci_set_pull_ctl(pcr, tbl);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_disable);
+
+static void rtsx_pci_enable_bus_int(struct rtsx_pcr *pcr)
+{
+	pcr->bier = TRANS_OK_INT_EN | TRANS_FAIL_INT_EN | SD_INT_EN;
+
+	if (pcr->num_slots > 1)
+		pcr->bier |= MS_INT_EN;
+
+	/* Enable Bus Interrupt */
+	rtsx_pci_writel(pcr, RTSX_BIER, pcr->bier);
+
+	dev_dbg(&(pcr->pci->dev), "RTSX_BIER: 0x%08x\n", pcr->bier);
+}
+
+static inline u8 double_ssc_depth(u8 depth)
+{
+	return ((depth > 1) ? (depth - 1) : depth);
+}
+
+static u8 revise_ssc_depth(u8 ssc_depth, u8 div)
+{
+	if (div > CLK_DIV_1) {
+		if (ssc_depth > (div - 1))
+			ssc_depth -= (div - 1);
+		else
+			ssc_depth = SSC_DEPTH_4M;
+	}
+
+	return ssc_depth;
+}
+
+int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk)
+{
+	int err, clk;
+	u8 N, min_N, max_N, clk_divider;
+	u8 mcu_cnt, div, max_div;
+	u8 depth[] = {
+		[RTSX_SSC_DEPTH_4M] = SSC_DEPTH_4M,
+		[RTSX_SSC_DEPTH_2M] = SSC_DEPTH_2M,
+		[RTSX_SSC_DEPTH_1M] = SSC_DEPTH_1M,
+		[RTSX_SSC_DEPTH_500K] = SSC_DEPTH_500K,
+		[RTSX_SSC_DEPTH_250K] = SSC_DEPTH_250K,
+	};
+
+	if (initial_mode) {
+		/* We use 250k(around) here, in initial stage */
+		clk_divider = SD_CLK_DIVIDE_128;
+		card_clock = 30000000;
+	} else {
+		clk_divider = SD_CLK_DIVIDE_0;
+	}
+	err = rtsx_pci_write_register(pcr, SD_CFG1,
+			SD_CLK_DIVIDE_MASK, clk_divider);
+	if (err < 0)
+		return err;
+
+	card_clock /= 1000000;
+	dev_dbg(&(pcr->pci->dev), "Switch card clock to %dMHz\n", card_clock);
+
+	min_N = 80;
+	max_N = 208;
+	max_div = CLK_DIV_8;
+
+	clk = card_clock;
+	if (!initial_mode && double_clk)
+		clk = card_clock * 2;
+	dev_dbg(&(pcr->pci->dev),
+			"Internal SSC clock: %dMHz (cur_clock = %d)\n",
+			clk, pcr->cur_clock);
+
+	if (clk == pcr->cur_clock)
+		return 0;
+
+	N = (u8)(clk - 2);
+	if ((clk <= 2) || (N > max_N))
+		return -EINVAL;
+
+	mcu_cnt = (u8)(125/clk + 3);
+	if (mcu_cnt > 15)
+		mcu_cnt = 15;
+
+	/* Make sure that the SSC clock div_n is equal or greater than min_N */
+	div = CLK_DIV_1;
+	while ((N < min_N) && (div < max_div)) {
+		N = (N + 2) * 2 - 2;
+		div++;
+	}
+	dev_dbg(&(pcr->pci->dev), "N = %d, div = %d\n", N, div);
+
+	ssc_depth = depth[ssc_depth];
+	if (double_clk)
+		ssc_depth = double_ssc_depth(ssc_depth);
+
+	ssc_depth = revise_ssc_depth(ssc_depth, div);
+	dev_dbg(&(pcr->pci->dev), "ssc_depth = %d\n", ssc_depth);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL,
+			CLK_LOW_FREQ, CLK_LOW_FREQ);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV,
+			0xFF, (div << 4) | mcu_cnt);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2,
+			SSC_DEPTH_MASK, ssc_depth);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, N);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB);
+	if (vpclk) {
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, 0);
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, PHASE_NOT_RESET);
+	}
+
+	err = rtsx_pci_send_cmd(pcr, 2000);
+	if (err < 0)
+		return err;
+
+	/* Wait SSC clock stable */
+	udelay(10);
+	err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0);
+	if (err < 0)
+		return err;
+
+	pcr->cur_clock = clk;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_switch_clock);
+
+int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	if (pcr->ops->card_power_on)
+		return pcr->ops->card_power_on(pcr, card);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_power_on);
+
+int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	if (pcr->ops->card_power_off)
+		return pcr->ops->card_power_off(pcr, card);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_power_off);
+
+unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr)
+{
+	unsigned int val;
+
+	val = rtsx_pci_readl(pcr, RTSX_BIPR);
+	if (pcr->ops->cd_deglitch)
+		val = pcr->ops->cd_deglitch(pcr);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_exist);
+
+void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr)
+{
+	struct completion finish;
+
+	pcr->finish_me = &finish;
+	init_completion(&finish);
+
+	if (pcr->done)
+		complete(pcr->done);
+
+	if (!pcr->remove_pci)
+		rtsx_pci_stop_cmd(pcr);
+
+	wait_for_completion_interruptible_timeout(&finish,
+			msecs_to_jiffies(2));
+	pcr->finish_me = NULL;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_complete_unfinished_transfer);
+
+static void rtsx_pci_card_detect(struct work_struct *work)
+{
+	struct delayed_work *dwork;
+	struct rtsx_pcr *pcr;
+	unsigned long flags;
+	unsigned int card_detect = 0;
+	u32 irq_status;
+
+	dwork = to_delayed_work(work);
+	pcr = container_of(dwork, struct rtsx_pcr, carddet_work);
+
+	dev_dbg(&(pcr->pci->dev), "--> %s\n", __func__);
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	irq_status = rtsx_pci_readl(pcr, RTSX_BIPR);
+	dev_dbg(&(pcr->pci->dev), "irq_status: 0x%08x\n", irq_status);
+
+	if (pcr->card_inserted || pcr->card_removed) {
+		dev_dbg(&(pcr->pci->dev),
+				"card_inserted: 0x%x, card_removed: 0x%x\n",
+				pcr->card_inserted, pcr->card_removed);
+
+		if (pcr->ops->cd_deglitch)
+			pcr->card_inserted = pcr->ops->cd_deglitch(pcr);
+
+		card_detect = pcr->card_inserted | pcr->card_removed;
+		pcr->card_inserted = 0;
+		pcr->card_removed = 0;
+	}
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	if (card_detect & SD_EXIST)
+		pcr->slots[RTSX_SD_CARD].card_event(
+				pcr->slots[RTSX_SD_CARD].p_dev);
+	if (card_detect & MS_EXIST)
+		pcr->slots[RTSX_MS_CARD].card_event(
+				pcr->slots[RTSX_MS_CARD].p_dev);
+}
+
+static irqreturn_t rtsx_pci_isr(int irq, void *dev_id)
+{
+	struct rtsx_pcr *pcr = dev_id;
+	u32 int_reg;
+
+	if (!pcr)
+		return IRQ_NONE;
+
+	spin_lock(&pcr->lock);
+
+	int_reg = rtsx_pci_readl(pcr, RTSX_BIPR);
+	/* Clear interrupt flag */
+	rtsx_pci_writel(pcr, RTSX_BIPR, int_reg);
+	if ((int_reg & pcr->bier) == 0) {
+		spin_unlock(&pcr->lock);
+		return IRQ_NONE;
+	}
+	if (int_reg == 0xFFFFFFFF) {
+		spin_unlock(&pcr->lock);
+		return IRQ_HANDLED;
+	}
+
+	int_reg &= (pcr->bier | 0x7FFFFF);
+
+	if (int_reg & SD_INT) {
+		if (int_reg & SD_EXIST) {
+			pcr->card_inserted |= SD_EXIST;
+		} else {
+			pcr->card_removed |= SD_EXIST;
+			pcr->card_inserted &= ~SD_EXIST;
+		}
+	}
+
+	if (int_reg & MS_INT) {
+		if (int_reg & MS_EXIST) {
+			pcr->card_inserted |= MS_EXIST;
+		} else {
+			pcr->card_removed |= MS_EXIST;
+			pcr->card_inserted &= ~MS_EXIST;
+		}
+	}
+
+	if (pcr->card_inserted || pcr->card_removed)
+		schedule_delayed_work(&pcr->carddet_work,
+				msecs_to_jiffies(200));
+
+	if (int_reg & (NEED_COMPLETE_INT | DELINK_INT)) {
+		if (int_reg & (TRANS_FAIL_INT | DELINK_INT)) {
+			pcr->trans_result = TRANS_RESULT_FAIL;
+			if (pcr->done)
+				complete(pcr->done);
+		} else if (int_reg & TRANS_OK_INT) {
+			pcr->trans_result = TRANS_RESULT_OK;
+			if (pcr->done)
+				complete(pcr->done);
+		}
+	}
+
+	spin_unlock(&pcr->lock);
+	return IRQ_HANDLED;
+}
+
+static int rtsx_pci_acquire_irq(struct rtsx_pcr *pcr)
+{
+	dev_info(&(pcr->pci->dev), "%s: pcr->msi_en = %d, pci->irq = %d\n",
+			__func__, pcr->msi_en, pcr->pci->irq);
+
+	if (request_irq(pcr->pci->irq, rtsx_pci_isr,
+			pcr->msi_en ? 0 : IRQF_SHARED,
+			DRV_NAME_RTSX_PCI, pcr)) {
+		dev_err(&(pcr->pci->dev),
+			"rtsx_sdmmc: unable to grab IRQ %d, disabling device\n",
+			pcr->pci->irq);
+		return -1;
+	}
+
+	pcr->irq = pcr->pci->irq;
+	pci_intx(pcr->pci, !pcr->msi_en);
+
+	return 0;
+}
+
+static void rtsx_pci_idle_work(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct rtsx_pcr *pcr = container_of(dwork, struct rtsx_pcr, idle_work);
+
+	dev_dbg(&(pcr->pci->dev), "--> %s\n", __func__);
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	pcr->state = PDEV_STAT_IDLE;
+
+	if (pcr->ops->disable_auto_blink)
+		pcr->ops->disable_auto_blink(pcr);
+	if (pcr->ops->turn_off_led)
+		pcr->ops->turn_off_led(pcr);
+
+	mutex_unlock(&pcr->pcr_mutex);
+}
+
+static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	rtsx_pci_enable_bus_int(pcr);
+
+	/* Power on SSC */
+	err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0);
+	if (err < 0)
+		return err;
+
+	/* Wait SSC power stable */
+	udelay(200);
+
+	if (pcr->ops->optimize_phy) {
+		err = pcr->ops->optimize_phy(pcr);
+		if (err < 0)
+			return err;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+
+	/* Set mcu_cnt to 7 to ensure data can be sampled properly */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, 0x07, 0x07);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, HOST_SLEEP_STATE, 0x03, 0x00);
+	/* Disable card clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_CLK_EN, 0x1E, 0);
+	/* Reset ASPM state to default value */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0);
+	/* Reset delink mode */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x0A, 0);
+	/* Card driving select */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL,
+			0x07, DRIVER_TYPE_D);
+	/* Enable SSC Clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1,
+			0xFF, SSC_8X_EN | SSC_SEL_4M);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12);
+	/* Disable cd_pwr_save */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x16, 0x10);
+	/* Clear Link Ready Interrupt */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, IRQSTAT0,
+			LINK_RDY_INT, LINK_RDY_INT);
+	/* Enlarge the estimation window of PERST# glitch
+	 * to reduce the chance of invalid card interrupt
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PERST_GLITCH_WIDTH, 0xFF, 0x80);
+	/* Update RC oscillator to 400k
+	 * bit[0] F_HIGH: for RC oscillator, Rst_value is 1'b1
+	 *                1: 2M  0: 400k
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RCCTL, 0x01, 0x00);
+	/* Set interrupt write clear
+	 * bit 1: U_elbi_if_rd_clr_en
+	 *	1: Enable ELBI interrupt[31:22] & [7:0] flag read clear
+	 *	0: ELBI interrupt flag[31:22] & [7:0] only can be write clear
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, NFTS_TX_CTRL, 0x02, 0);
+	/* Force CLKREQ# PIN to drive 0 to request clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x08, 0x08);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* Enable clk_request_n to enable clock power management */
+	rtsx_pci_write_config_byte(pcr, 0x81, 1);
+	/* Enter L1 when host tx idle */
+	rtsx_pci_write_config_byte(pcr, 0x70F, 0x5B);
+
+	if (pcr->ops->extra_init_hw) {
+		err = pcr->ops->extra_init_hw(pcr);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static int rtsx_pci_init_chip(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	spin_lock_init(&pcr->lock);
+	mutex_init(&pcr->pcr_mutex);
+
+	switch (PCI_PID(pcr)) {
+	default:
+	case 0x5209:
+		rts5209_init_params(pcr);
+		break;
+
+	case 0x5229:
+		rts5229_init_params(pcr);
+		break;
+
+	case 0x5289:
+		rtl8411_init_params(pcr);
+		break;
+	}
+
+	dev_dbg(&(pcr->pci->dev), "PID: 0x%04x, IC version: 0x%02x\n",
+			PCI_PID(pcr), pcr->ic_version);
+
+	pcr->slots = kcalloc(pcr->num_slots, sizeof(struct rtsx_slot),
+			GFP_KERNEL);
+	if (!pcr->slots)
+		return -ENOMEM;
+
+	pcr->state = PDEV_STAT_IDLE;
+	err = rtsx_pci_init_hw(pcr);
+	if (err < 0) {
+		kfree(pcr->slots);
+		return err;
+	}
+
+	return 0;
+}
+
+static int __devinit rtsx_pci_probe(struct pci_dev *pcidev,
+				    const struct pci_device_id *id)
+{
+	struct rtsx_pcr *pcr;
+	struct pcr_handle *handle;
+	u32 base, len;
+	int ret, i;
+
+	dev_dbg(&(pcidev->dev),
+		": Realtek PCI-E Card Reader found at %s [%04x:%04x] (rev %x)\n",
+		pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device,
+		(int)pcidev->revision);
+
+	ret = pci_enable_device(pcidev);
+	if (ret)
+		return ret;
+
+	ret = pci_request_regions(pcidev, DRV_NAME_RTSX_PCI);
+	if (ret)
+		goto disable;
+
+	pcr = kzalloc(sizeof(*pcr), GFP_KERNEL);
+	if (!pcr) {
+		ret = -ENOMEM;
+		goto release_pci;
+	}
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (!handle) {
+		ret = -ENOMEM;
+		goto free_pcr;
+	}
+	handle->pcr = pcr;
+
+	if (!idr_pre_get(&rtsx_pci_idr, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto free_handle;
+	}
+
+	spin_lock(&rtsx_pci_lock);
+	ret = idr_get_new(&rtsx_pci_idr, pcr, &pcr->id);
+	spin_unlock(&rtsx_pci_lock);
+	if (ret)
+		goto free_handle;
+
+	pcr->pci = pcidev;
+	dev_set_drvdata(&pcidev->dev, handle);
+
+	len = pci_resource_len(pcidev, 0);
+	base = pci_resource_start(pcidev, 0);
+	pcr->remap_addr = ioremap_nocache(base, len);
+	if (!pcr->remap_addr) {
+		ret = -ENOMEM;
+		goto free_host;
+	}
+
+	pcr->rtsx_resv_buf = dma_alloc_coherent(&(pcidev->dev),
+			RTSX_RESV_BUF_LEN, &(pcr->rtsx_resv_buf_addr),
+			GFP_KERNEL);
+	if (pcr->rtsx_resv_buf == NULL) {
+		ret = -ENXIO;
+		goto unmap;
+	}
+	pcr->host_cmds_ptr = pcr->rtsx_resv_buf;
+	pcr->host_cmds_addr = pcr->rtsx_resv_buf_addr;
+	pcr->host_sg_tbl_ptr = pcr->rtsx_resv_buf + HOST_CMDS_BUF_LEN;
+	pcr->host_sg_tbl_addr = pcr->rtsx_resv_buf_addr + HOST_CMDS_BUF_LEN;
+
+	pcr->card_inserted = 0;
+	pcr->card_removed = 0;
+	INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect);
+	INIT_DELAYED_WORK(&pcr->idle_work, rtsx_pci_idle_work);
+
+	pcr->msi_en = msi_en;
+	if (pcr->msi_en) {
+		ret = pci_enable_msi(pcidev);
+		if (ret < 0)
+			pcr->msi_en = false;
+	}
+
+	ret = rtsx_pci_acquire_irq(pcr);
+	if (ret < 0)
+		goto free_dma;
+
+	pci_set_master(pcidev);
+	synchronize_irq(pcr->irq);
+
+	ret = rtsx_pci_init_chip(pcr);
+	if (ret < 0)
+		goto disable_irq;
+
+	for (i = 0; i < ARRAY_SIZE(rtsx_pcr_cells); i++) {
+		rtsx_pcr_cells[i].platform_data = handle;
+		rtsx_pcr_cells[i].pdata_size = sizeof(*handle);
+	}
+	ret = mfd_add_devices(&pcidev->dev, pcr->id, rtsx_pcr_cells,
+			ARRAY_SIZE(rtsx_pcr_cells), NULL, 0, NULL);
+	if (ret < 0)
+		goto disable_irq;
+
+	schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
+
+	return 0;
+
+disable_irq:
+	free_irq(pcr->irq, (void *)pcr);
+free_dma:
+	dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN,
+			pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr);
+unmap:
+	iounmap(pcr->remap_addr);
+free_host:
+	dev_set_drvdata(&pcidev->dev, NULL);
+free_handle:
+	kfree(handle);
+free_pcr:
+	kfree(pcr);
+release_pci:
+	pci_release_regions(pcidev);
+disable:
+	pci_disable_device(pcidev);
+
+	return ret;
+}
+
+static void __devexit rtsx_pci_remove(struct pci_dev *pcidev)
+{
+	struct pcr_handle *handle = pci_get_drvdata(pcidev);
+	struct rtsx_pcr *pcr = handle->pcr;
+
+	pcr->remove_pci = true;
+
+	cancel_delayed_work(&pcr->carddet_work);
+	cancel_delayed_work(&pcr->idle_work);
+
+	mfd_remove_devices(&pcidev->dev);
+
+	dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN,
+			pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr);
+	free_irq(pcr->irq, (void *)pcr);
+	if (pcr->msi_en)
+		pci_disable_msi(pcr->pci);
+	iounmap(pcr->remap_addr);
+
+	dev_set_drvdata(&pcidev->dev, NULL);
+	pci_release_regions(pcidev);
+	pci_disable_device(pcidev);
+
+	spin_lock(&rtsx_pci_lock);
+	idr_remove(&rtsx_pci_idr, pcr->id);
+	spin_unlock(&rtsx_pci_lock);
+
+	kfree(pcr->slots);
+	kfree(pcr);
+	kfree(handle);
+
+	dev_dbg(&(pcidev->dev),
+		": Realtek PCI-E Card Reader at %s [%04x:%04x] has been removed\n",
+		pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device);
+}
+
+#ifdef CONFIG_PM
+
+static int rtsx_pci_suspend(struct pci_dev *pcidev, pm_message_t state)
+{
+	struct pcr_handle *handle;
+	struct rtsx_pcr *pcr;
+	int ret = 0;
+
+	dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
+
+	handle = pci_get_drvdata(pcidev);
+	pcr = handle->pcr;
+
+	cancel_delayed_work(&pcr->carddet_work);
+	cancel_delayed_work(&pcr->idle_work);
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	if (pcr->ops->turn_off_led)
+		pcr->ops->turn_off_led(pcr);
+
+	rtsx_pci_writel(pcr, RTSX_BIER, 0);
+	pcr->bier = 0;
+
+	rtsx_pci_write_register(pcr, PETXCFG, 0x08, 0x08);
+	rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x02);
+
+	pci_save_state(pcidev);
+	pci_enable_wake(pcidev, pci_choose_state(pcidev, state), 0);
+	pci_disable_device(pcidev);
+	pci_set_power_state(pcidev, pci_choose_state(pcidev, state));
+
+	mutex_unlock(&pcr->pcr_mutex);
+	return ret;
+}
+
+static int rtsx_pci_resume(struct pci_dev *pcidev)
+{
+	struct pcr_handle *handle;
+	struct rtsx_pcr *pcr;
+	int ret = 0;
+
+	dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
+
+	handle = pci_get_drvdata(pcidev);
+	pcr = handle->pcr;
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	pci_set_power_state(pcidev, PCI_D0);
+	pci_restore_state(pcidev);
+	ret = pci_enable_device(pcidev);
+	if (ret)
+		goto out;
+	pci_set_master(pcidev);
+
+	ret = rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00);
+	if (ret)
+		goto out;
+
+	ret = rtsx_pci_init_hw(pcr);
+	if (ret)
+		goto out;
+
+	schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
+
+out:
+	mutex_unlock(&pcr->pcr_mutex);
+	return ret;
+}
+
+#else /* CONFIG_PM */
+
+#define rtsx_pci_suspend NULL
+#define rtsx_pci_resume NULL
+
+#endif /* CONFIG_PM */
+
+static struct pci_driver rtsx_pci_driver = {
+	.name = DRV_NAME_RTSX_PCI,
+	.id_table = rtsx_pci_ids,
+	.probe = rtsx_pci_probe,
+	.remove = __devexit_p(rtsx_pci_remove),
+	.suspend = rtsx_pci_suspend,
+	.resume = rtsx_pci_resume,
+};
+module_pci_driver(rtsx_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Wei WANG <wei_wang@realsil.com.cn>");
+MODULE_DESCRIPTION("Realtek PCI-E Card Reader Driver");
diff --git a/drivers/mfd/rtsx_pcr.h b/drivers/mfd/rtsx_pcr.h
new file mode 100644
index 000000000000..12462c1df1a9
--- /dev/null
+++ b/drivers/mfd/rtsx_pcr.h
@@ -0,0 +1,32 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#ifndef __RTSX_PCR_H
+#define __RTSX_PCR_H
+
+#include <linux/mfd/rtsx_pci.h>
+
+void rts5209_init_params(struct rtsx_pcr *pcr);
+void rts5229_init_params(struct rtsx_pcr *pcr);
+void rtl8411_init_params(struct rtsx_pcr *pcr);
+
+#endif
diff --git a/include/linux/mfd/rtsx_common.h b/include/linux/mfd/rtsx_common.h
new file mode 100644
index 000000000000..a8d393e3066b
--- /dev/null
+++ b/include/linux/mfd/rtsx_common.h
@@ -0,0 +1,48 @@
+/* Driver for Realtek driver-based card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#ifndef __RTSX_COMMON_H
+#define __RTSX_COMMON_H
+
+#define DRV_NAME_RTSX_PCI		"rtsx_pci"
+#define DRV_NAME_RTSX_PCI_SDMMC		"rtsx_pci_sdmmc"
+#define DRV_NAME_RTSX_PCI_MS		"rtsx_pci_ms"
+
+#define RTSX_REG_PAIR(addr, val)	(((u32)(addr) << 16) | (u8)(val))
+
+#define RTSX_SSC_DEPTH_4M		0x01
+#define RTSX_SSC_DEPTH_2M		0x02
+#define RTSX_SSC_DEPTH_1M		0x03
+#define RTSX_SSC_DEPTH_500K		0x04
+#define RTSX_SSC_DEPTH_250K		0x05
+
+#define RTSX_SD_CARD			0
+#define RTSX_MS_CARD			1
+
+struct platform_device;
+
+struct rtsx_slot {
+	struct platform_device	*p_dev;
+	void			(*card_event)(struct platform_device *p_dev);
+};
+
+#endif
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
new file mode 100644
index 000000000000..060b721fcbfb
--- /dev/null
+++ b/include/linux/mfd/rtsx_pci.h
@@ -0,0 +1,794 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#ifndef __RTSX_PCI_H
+#define __RTSX_PCI_H
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+
+#include "rtsx_common.h"
+
+#define MAX_RW_REG_CNT			1024
+
+/* PCI Operation Register Address */
+#define RTSX_HCBAR			0x00
+#define RTSX_HCBCTLR			0x04
+#define RTSX_HDBAR			0x08
+#define RTSX_HDBCTLR			0x0C
+#define RTSX_HAIMR			0x10
+#define RTSX_BIPR			0x14
+#define RTSX_BIER			0x18
+
+/* Host command buffer control register */
+#define STOP_CMD			(0x01 << 28)
+
+/* Host data buffer control register */
+#define SDMA_MODE			0x00
+#define ADMA_MODE			(0x02 << 26)
+#define STOP_DMA			(0x01 << 28)
+#define TRIG_DMA			(0x01 << 31)
+
+/* Host access internal memory register */
+#define HAIMR_TRANS_START		(0x01 << 31)
+#define HAIMR_READ			0x00
+#define HAIMR_WRITE			(0x01 << 30)
+#define HAIMR_READ_START		(HAIMR_TRANS_START | HAIMR_READ)
+#define HAIMR_WRITE_START		(HAIMR_TRANS_START | HAIMR_WRITE)
+#define HAIMR_TRANS_END			(HAIMR_TRANS_START)
+
+/* Bus interrupt pending register */
+#define CMD_DONE_INT			(1 << 31)
+#define DATA_DONE_INT			(1 << 30)
+#define TRANS_OK_INT			(1 << 29)
+#define TRANS_FAIL_INT			(1 << 28)
+#define XD_INT				(1 << 27)
+#define MS_INT				(1 << 26)
+#define SD_INT				(1 << 25)
+#define GPIO0_INT			(1 << 24)
+#define OC_INT				(1 << 23)
+#define SD_WRITE_PROTECT		(1 << 19)
+#define XD_EXIST			(1 << 18)
+#define MS_EXIST			(1 << 17)
+#define SD_EXIST			(1 << 16)
+#define DELINK_INT			GPIO0_INT
+#define MS_OC_INT			(1 << 23)
+#define SD_OC_INT			(1 << 22)
+
+#define CARD_INT		(XD_INT | MS_INT | SD_INT)
+#define NEED_COMPLETE_INT	(DATA_DONE_INT | TRANS_OK_INT | TRANS_FAIL_INT)
+#define RTSX_INT		(CMD_DONE_INT | NEED_COMPLETE_INT | \
+					CARD_INT | GPIO0_INT | OC_INT)
+
+#define CARD_EXIST		(XD_EXIST | MS_EXIST | SD_EXIST)
+
+/* Bus interrupt enable register */
+#define CMD_DONE_INT_EN		(1 << 31)
+#define DATA_DONE_INT_EN	(1 << 30)
+#define TRANS_OK_INT_EN		(1 << 29)
+#define TRANS_FAIL_INT_EN	(1 << 28)
+#define XD_INT_EN		(1 << 27)
+#define MS_INT_EN		(1 << 26)
+#define SD_INT_EN		(1 << 25)
+#define GPIO0_INT_EN		(1 << 24)
+#define OC_INT_EN		(1 << 23)
+#define DELINK_INT_EN		GPIO0_INT_EN
+#define MS_OC_INT_EN		(1 << 23)
+#define SD_OC_INT_EN		(1 << 22)
+
+#define READ_REG_CMD		0
+#define WRITE_REG_CMD		1
+#define CHECK_REG_CMD		2
+
+/*
+ * macros for easy use
+ */
+#define rtsx_pci_writel(pcr, reg, value) \
+	iowrite32(value, (pcr)->remap_addr + reg)
+#define rtsx_pci_readl(pcr, reg) \
+	ioread32((pcr)->remap_addr + reg)
+#define rtsx_pci_writew(pcr, reg, value) \
+	iowrite16(value, (pcr)->remap_addr + reg)
+#define rtsx_pci_readw(pcr, reg) \
+	ioread16((pcr)->remap_addr + reg)
+#define rtsx_pci_writeb(pcr, reg, value) \
+	iowrite8(value, (pcr)->remap_addr + reg)
+#define rtsx_pci_readb(pcr, reg) \
+	ioread8((pcr)->remap_addr + reg)
+
+#define rtsx_pci_read_config_byte(pcr, where, val) \
+	pci_read_config_byte((pcr)->pci, where, val)
+
+#define rtsx_pci_write_config_byte(pcr, where, val) \
+	pci_write_config_byte((pcr)->pci, where, val)
+
+#define rtsx_pci_read_config_dword(pcr, where, val) \
+	pci_read_config_dword((pcr)->pci, where, val)
+
+#define rtsx_pci_write_config_dword(pcr, where, val) \
+	pci_write_config_dword((pcr)->pci, where, val)
+
+#define STATE_TRANS_NONE	0
+#define STATE_TRANS_CMD		1
+#define STATE_TRANS_BUF		2
+#define STATE_TRANS_SG		3
+
+#define TRANS_NOT_READY		0
+#define TRANS_RESULT_OK		1
+#define TRANS_RESULT_FAIL	2
+#define TRANS_NO_DEVICE		3
+
+#define RTSX_RESV_BUF_LEN	4096
+#define HOST_CMDS_BUF_LEN	1024
+#define HOST_SG_TBL_BUF_LEN	(RTSX_RESV_BUF_LEN - HOST_CMDS_BUF_LEN)
+#define HOST_SG_TBL_ITEMS	(HOST_SG_TBL_BUF_LEN / 8)
+#define MAX_SG_ITEM_LEN		0x80000
+
+#define HOST_TO_DEVICE		0
+#define DEVICE_TO_HOST		1
+
+#define MAX_PHASE		31
+#define RX_TUNING_CNT		3
+
+/* SG descriptor */
+#define SG_INT			0x04
+#define SG_END			0x02
+#define SG_VALID		0x01
+
+#define SG_NO_OP		0x00
+#define SG_TRANS_DATA		(0x02 << 4)
+#define SG_LINK_DESC		(0x03 << 4)
+
+/* SD bank voltage */
+#define SD_IO_3V3		0
+#define SD_IO_1V8		1
+
+
+/* Card Clock Enable Register */
+#define SD_CLK_EN			0x04
+#define MS_CLK_EN			0x08
+
+/* Card Select Register */
+#define SD_MOD_SEL			2
+#define MS_MOD_SEL			3
+
+/* Card Output Enable Register */
+#define SD_OUTPUT_EN			0x04
+#define MS_OUTPUT_EN			0x08
+
+/* CARD_SHARE_MODE */
+#define CARD_SHARE_MASK			0x0F
+#define CARD_SHARE_MULTI_LUN		0x00
+#define	CARD_SHARE_NORMAL		0x00
+#define	CARD_SHARE_48_SD		0x04
+#define	CARD_SHARE_48_MS		0x08
+/* CARD_SHARE_MODE for barossa */
+#define CARD_SHARE_BAROSSA_SD		0x01
+#define CARD_SHARE_BAROSSA_MS		0x02
+
+/* SD30_DRIVE_SEL */
+#define DRIVER_TYPE_A			0x05
+#define DRIVER_TYPE_B			0x03
+#define DRIVER_TYPE_C			0x02
+#define DRIVER_TYPE_D			0x01
+
+/* FPDCTL */
+#define SSC_POWER_DOWN			0x01
+#define SD_OC_POWER_DOWN		0x02
+#define ALL_POWER_DOWN			0x07
+#define OC_POWER_DOWN			0x06
+
+/* CLK_CTL */
+#define CHANGE_CLK			0x01
+
+/* LDO_CTL */
+#define BPP_LDO_POWB			0x03
+#define BPP_LDO_ON			0x00
+#define BPP_LDO_SUSPEND			0x02
+#define BPP_LDO_OFF			0x03
+
+/* CD_PAD_CTL */
+#define CD_DISABLE_MASK			0x07
+#define MS_CD_DISABLE			0x04
+#define SD_CD_DISABLE			0x02
+#define XD_CD_DISABLE			0x01
+#define CD_DISABLE			0x07
+#define CD_ENABLE			0x00
+#define MS_CD_EN_ONLY			0x03
+#define SD_CD_EN_ONLY			0x05
+#define XD_CD_EN_ONLY			0x06
+#define FORCE_CD_LOW_MASK		0x38
+#define FORCE_CD_XD_LOW			0x08
+#define FORCE_CD_SD_LOW			0x10
+#define FORCE_CD_MS_LOW			0x20
+#define CD_AUTO_DISABLE			0x40
+
+/* SD_STAT1 */
+#define	SD_CRC7_ERR			0x80
+#define	SD_CRC16_ERR			0x40
+#define	SD_CRC_WRITE_ERR		0x20
+#define	SD_CRC_WRITE_ERR_MASK		0x1C
+#define	GET_CRC_TIME_OUT		0x02
+#define	SD_TUNING_COMPARE_ERR		0x01
+
+/* SD_STAT2 */
+#define	SD_RSP_80CLK_TIMEOUT		0x01
+
+/* SD_BUS_STAT */
+#define	SD_CLK_TOGGLE_EN		0x80
+#define	SD_CLK_FORCE_STOP	        0x40
+#define	SD_DAT3_STATUS		        0x10
+#define	SD_DAT2_STATUS		        0x08
+#define	SD_DAT1_STATUS		        0x04
+#define	SD_DAT0_STATUS		        0x02
+#define	SD_CMD_STATUS			0x01
+
+/* SD_PAD_CTL */
+#define	SD_IO_USING_1V8		        0x80
+#define	SD_IO_USING_3V3		        0x7F
+#define	TYPE_A_DRIVING		        0x00
+#define	TYPE_B_DRIVING			0x01
+#define	TYPE_C_DRIVING			0x02
+#define	TYPE_D_DRIVING		        0x03
+
+/* SD_SAMPLE_POINT_CTL */
+#define	DDR_FIX_RX_DAT			0x00
+#define	DDR_VAR_RX_DAT			0x80
+#define	DDR_FIX_RX_DAT_EDGE		0x00
+#define	DDR_FIX_RX_DAT_14_DELAY		0x40
+#define	DDR_FIX_RX_CMD			0x00
+#define	DDR_VAR_RX_CMD			0x20
+#define	DDR_FIX_RX_CMD_POS_EDGE		0x00
+#define	DDR_FIX_RX_CMD_14_DELAY		0x10
+#define	SD20_RX_POS_EDGE		0x00
+#define	SD20_RX_14_DELAY		0x08
+#define SD20_RX_SEL_MASK		0x08
+
+/* SD_PUSH_POINT_CTL */
+#define	DDR_FIX_TX_CMD_DAT		0x00
+#define	DDR_VAR_TX_CMD_DAT		0x80
+#define	DDR_FIX_TX_DAT_14_TSU		0x00
+#define	DDR_FIX_TX_DAT_12_TSU		0x40
+#define	DDR_FIX_TX_CMD_NEG_EDGE		0x00
+#define	DDR_FIX_TX_CMD_14_AHEAD		0x20
+#define	SD20_TX_NEG_EDGE		0x00
+#define	SD20_TX_14_AHEAD		0x10
+#define SD20_TX_SEL_MASK		0x10
+#define	DDR_VAR_SDCLK_POL_SWAP		0x01
+
+/* SD_TRANSFER */
+#define	SD_TRANSFER_START		0x80
+#define	SD_TRANSFER_END			0x40
+#define SD_STAT_IDLE			0x20
+#define	SD_TRANSFER_ERR			0x10
+/* SD Transfer Mode definition */
+#define	SD_TM_NORMAL_WRITE		0x00
+#define	SD_TM_AUTO_WRITE_3		0x01
+#define	SD_TM_AUTO_WRITE_4		0x02
+#define	SD_TM_AUTO_READ_3		0x05
+#define	SD_TM_AUTO_READ_4		0x06
+#define	SD_TM_CMD_RSP			0x08
+#define	SD_TM_AUTO_WRITE_1		0x09
+#define	SD_TM_AUTO_WRITE_2		0x0A
+#define	SD_TM_NORMAL_READ		0x0C
+#define	SD_TM_AUTO_READ_1		0x0D
+#define	SD_TM_AUTO_READ_2		0x0E
+#define	SD_TM_AUTO_TUNING		0x0F
+
+/* SD_VPTX_CTL / SD_VPRX_CTL */
+#define PHASE_CHANGE			0x80
+#define PHASE_NOT_RESET			0x40
+
+/* SD_DCMPS_TX_CTL / SD_DCMPS_RX_CTL */
+#define DCMPS_CHANGE			0x80
+#define DCMPS_CHANGE_DONE		0x40
+#define DCMPS_ERROR			0x20
+#define DCMPS_CURRENT_PHASE		0x1F
+
+/* SD Configure 1 Register */
+#define SD_CLK_DIVIDE_0			0x00
+#define	SD_CLK_DIVIDE_256		0xC0
+#define	SD_CLK_DIVIDE_128		0x80
+#define	SD_BUS_WIDTH_1BIT		0x00
+#define	SD_BUS_WIDTH_4BIT		0x01
+#define	SD_BUS_WIDTH_8BIT		0x02
+#define	SD_ASYNC_FIFO_NOT_RST		0x10
+#define	SD_20_MODE			0x00
+#define	SD_DDR_MODE			0x04
+#define	SD_30_MODE			0x08
+
+#define SD_CLK_DIVIDE_MASK		0xC0
+
+/* SD_CMD_STATE */
+#define SD_CMD_IDLE			0x80
+
+/* SD_DATA_STATE */
+#define SD_DATA_IDLE			0x80
+
+/* DCM_DRP_CTL */
+#define DCM_RESET			0x08
+#define DCM_LOCKED			0x04
+#define DCM_208M			0x00
+#define DCM_TX			        0x01
+#define DCM_RX			        0x02
+
+/* DCM_DRP_TRIG */
+#define DRP_START			0x80
+#define DRP_DONE			0x40
+
+/* DCM_DRP_CFG */
+#define DRP_WRITE			0x80
+#define DRP_READ			0x00
+#define DCM_WRITE_ADDRESS_50		0x50
+#define DCM_WRITE_ADDRESS_51		0x51
+#define DCM_READ_ADDRESS_00		0x00
+#define DCM_READ_ADDRESS_51		0x51
+
+/* IRQSTAT0 */
+#define DMA_DONE_INT			0x80
+#define SUSPEND_INT			0x40
+#define LINK_RDY_INT			0x20
+#define LINK_DOWN_INT			0x10
+
+/* DMACTL */
+#define DMA_RST				0x80
+#define DMA_BUSY			0x04
+#define DMA_DIR_TO_CARD			0x00
+#define DMA_DIR_FROM_CARD		0x02
+#define DMA_EN				0x01
+#define DMA_128				(0 << 4)
+#define DMA_256				(1 << 4)
+#define DMA_512				(2 << 4)
+#define DMA_1024			(3 << 4)
+#define DMA_PACK_SIZE_MASK		0x30
+
+/* SSC_CTL1 */
+#define SSC_RSTB			0x80
+#define SSC_8X_EN			0x40
+#define SSC_FIX_FRAC			0x20
+#define SSC_SEL_1M			0x00
+#define SSC_SEL_2M			0x08
+#define SSC_SEL_4M			0x10
+#define SSC_SEL_8M			0x18
+
+/* SSC_CTL2 */
+#define SSC_DEPTH_MASK			0x07
+#define SSC_DEPTH_DISALBE		0x00
+#define SSC_DEPTH_4M			0x01
+#define SSC_DEPTH_2M			0x02
+#define SSC_DEPTH_1M			0x03
+#define SSC_DEPTH_500K			0x04
+#define SSC_DEPTH_250K			0x05
+
+/* System Clock Control Register */
+#define CLK_LOW_FREQ			0x01
+
+/* System Clock Divider Register */
+#define CLK_DIV_1			0x01
+#define CLK_DIV_2			0x02
+#define CLK_DIV_4			0x03
+#define CLK_DIV_8			0x04
+
+/* MS_CFG */
+#define	SAMPLE_TIME_RISING		0x00
+#define	SAMPLE_TIME_FALLING		0x80
+#define	PUSH_TIME_DEFAULT		0x00
+#define	PUSH_TIME_ODD			0x40
+#define	NO_EXTEND_TOGGLE		0x00
+#define	EXTEND_TOGGLE_CHK		0x20
+#define	MS_BUS_WIDTH_1			0x00
+#define	MS_BUS_WIDTH_4			0x10
+#define	MS_BUS_WIDTH_8			0x18
+#define	MS_2K_SECTOR_MODE		0x04
+#define	MS_512_SECTOR_MODE		0x00
+#define	MS_TOGGLE_TIMEOUT_EN		0x00
+#define	MS_TOGGLE_TIMEOUT_DISEN		0x01
+#define MS_NO_CHECK_INT			0x02
+
+/* MS_TRANS_CFG */
+#define	WAIT_INT			0x80
+#define	NO_WAIT_INT			0x00
+#define	NO_AUTO_READ_INT_REG		0x00
+#define	AUTO_READ_INT_REG		0x40
+#define	MS_CRC16_ERR			0x20
+#define	MS_RDY_TIMEOUT			0x10
+#define	MS_INT_CMDNK			0x08
+#define	MS_INT_BREQ			0x04
+#define	MS_INT_ERR			0x02
+#define	MS_INT_CED			0x01
+
+/* MS_TRANSFER */
+#define	MS_TRANSFER_START		0x80
+#define	MS_TRANSFER_END			0x40
+#define	MS_TRANSFER_ERR			0x20
+#define	MS_BS_STATE			0x10
+#define	MS_TM_READ_BYTES		0x00
+#define	MS_TM_NORMAL_READ		0x01
+#define	MS_TM_WRITE_BYTES		0x04
+#define	MS_TM_NORMAL_WRITE		0x05
+#define	MS_TM_AUTO_READ			0x08
+#define	MS_TM_AUTO_WRITE		0x0C
+
+/* SD Configure 2 Register */
+#define	SD_CALCULATE_CRC7		0x00
+#define	SD_NO_CALCULATE_CRC7		0x80
+#define	SD_CHECK_CRC16			0x00
+#define	SD_NO_CHECK_CRC16		0x40
+#define SD_NO_CHECK_WAIT_CRC_TO		0x20
+#define	SD_WAIT_BUSY_END		0x08
+#define	SD_NO_WAIT_BUSY_END		0x00
+#define	SD_CHECK_CRC7			0x00
+#define	SD_NO_CHECK_CRC7		0x04
+#define	SD_RSP_LEN_0			0x00
+#define	SD_RSP_LEN_6			0x01
+#define	SD_RSP_LEN_17			0x02
+/* SD/MMC Response Type Definition */
+#define	SD_RSP_TYPE_R0			0x04
+#define	SD_RSP_TYPE_R1			0x01
+#define	SD_RSP_TYPE_R1b			0x09
+#define	SD_RSP_TYPE_R2			0x02
+#define	SD_RSP_TYPE_R3			0x05
+#define	SD_RSP_TYPE_R4			0x05
+#define	SD_RSP_TYPE_R5			0x01
+#define	SD_RSP_TYPE_R6			0x01
+#define	SD_RSP_TYPE_R7			0x01
+
+/* SD_CONFIURE3 */
+#define	SD_RSP_80CLK_TIMEOUT_EN		0x01
+
+/* Card Transfer Reset Register */
+#define SPI_STOP			0x01
+#define XD_STOP				0x02
+#define SD_STOP				0x04
+#define MS_STOP				0x08
+#define SPI_CLR_ERR			0x10
+#define XD_CLR_ERR			0x20
+#define SD_CLR_ERR			0x40
+#define MS_CLR_ERR			0x80
+
+/* Card Data Source Register */
+#define PINGPONG_BUFFER			0x01
+#define RING_BUFFER			0x00
+
+/* Card Power Control Register */
+#define PMOS_STRG_MASK			0x10
+#define PMOS_STRG_800mA			0x10
+#define PMOS_STRG_400mA			0x00
+#define SD_POWER_OFF			0x03
+#define SD_PARTIAL_POWER_ON		0x01
+#define SD_POWER_ON			0x00
+#define SD_POWER_MASK			0x03
+#define MS_POWER_OFF			0x0C
+#define MS_PARTIAL_POWER_ON		0x04
+#define MS_POWER_ON			0x00
+#define MS_POWER_MASK			0x0C
+#define BPP_POWER_OFF			0x0F
+#define BPP_POWER_5_PERCENT_ON		0x0E
+#define BPP_POWER_10_PERCENT_ON		0x0C
+#define BPP_POWER_15_PERCENT_ON		0x08
+#define BPP_POWER_ON			0x00
+#define BPP_POWER_MASK			0x0F
+
+/* PWR_GATE_CTRL */
+#define PWR_GATE_EN			0x01
+#define LDO3318_PWR_MASK		0x06
+#define LDO_ON				0x00
+#define LDO_SUSPEND			0x04
+#define LDO_OFF				0x06
+
+/* CARD_CLK_SOURCE */
+#define CRC_FIX_CLK			(0x00 << 0)
+#define CRC_VAR_CLK0			(0x01 << 0)
+#define CRC_VAR_CLK1			(0x02 << 0)
+#define SD30_FIX_CLK			(0x00 << 2)
+#define SD30_VAR_CLK0			(0x01 << 2)
+#define SD30_VAR_CLK1			(0x02 << 2)
+#define SAMPLE_FIX_CLK			(0x00 << 4)
+#define SAMPLE_VAR_CLK0			(0x01 << 4)
+#define SAMPLE_VAR_CLK1			(0x02 << 4)
+
+#define MS_CFG				0xFD40
+#define MS_TPC				0xFD41
+#define MS_TRANS_CFG			0xFD42
+#define MS_TRANSFER			0xFD43
+#define MS_INT_REG			0xFD44
+#define MS_BYTE_CNT			0xFD45
+#define MS_SECTOR_CNT_L			0xFD46
+#define MS_SECTOR_CNT_H			0xFD47
+#define MS_DBUS_H			0xFD48
+
+#define SD_CFG1				0xFDA0
+#define SD_CFG2				0xFDA1
+#define SD_CFG3				0xFDA2
+#define SD_STAT1			0xFDA3
+#define SD_STAT2			0xFDA4
+#define SD_BUS_STAT			0xFDA5
+#define SD_PAD_CTL			0xFDA6
+#define SD_SAMPLE_POINT_CTL		0xFDA7
+#define SD_PUSH_POINT_CTL		0xFDA8
+#define SD_CMD0				0xFDA9
+#define SD_CMD1				0xFDAA
+#define SD_CMD2				0xFDAB
+#define SD_CMD3				0xFDAC
+#define SD_CMD4				0xFDAD
+#define SD_CMD5				0xFDAE
+#define SD_BYTE_CNT_L			0xFDAF
+#define SD_BYTE_CNT_H			0xFDB0
+#define SD_BLOCK_CNT_L			0xFDB1
+#define SD_BLOCK_CNT_H			0xFDB2
+#define SD_TRANSFER			0xFDB3
+#define SD_CMD_STATE			0xFDB5
+#define SD_DATA_STATE			0xFDB6
+
+#define SRCTL				0xFC13
+
+#define	DCM_DRP_CTL			0xFC23
+#define	DCM_DRP_TRIG			0xFC24
+#define	DCM_DRP_CFG			0xFC25
+#define	DCM_DRP_WR_DATA_L		0xFC26
+#define	DCM_DRP_WR_DATA_H		0xFC27
+#define	DCM_DRP_RD_DATA_L		0xFC28
+#define	DCM_DRP_RD_DATA_H		0xFC29
+#define SD_VPCLK0_CTL			0xFC2A
+#define SD_VPCLK1_CTL			0xFC2B
+#define SD_DCMPS0_CTL			0xFC2C
+#define SD_DCMPS1_CTL			0xFC2D
+#define SD_VPTX_CTL			SD_VPCLK0_CTL
+#define SD_VPRX_CTL			SD_VPCLK1_CTL
+#define SD_DCMPS_TX_CTL			SD_DCMPS0_CTL
+#define SD_DCMPS_RX_CTL			SD_DCMPS1_CTL
+#define CARD_CLK_SOURCE			0xFC2E
+
+#define CARD_PWR_CTL			0xFD50
+#define CARD_CLK_SWITCH			0xFD51
+#define CARD_SHARE_MODE			0xFD52
+#define CARD_DRIVE_SEL			0xFD53
+#define CARD_STOP			0xFD54
+#define CARD_OE				0xFD55
+#define CARD_AUTO_BLINK			0xFD56
+#define CARD_GPIO_DIR			0xFD57
+#define CARD_GPIO			0xFD58
+#define CARD_DATA_SOURCE		0xFD5B
+#define CARD_SELECT			0xFD5C
+#define SD30_DRIVE_SEL			0xFD5E
+#define CARD_CLK_EN			0xFD69
+#define SDIO_CTRL			0xFD6B
+#define CD_PAD_CTL			0xFD73
+
+#define FPDCTL				0xFC00
+#define PDINFO				0xFC01
+
+#define CLK_CTL				0xFC02
+#define CLK_DIV				0xFC03
+#define CLK_SEL				0xFC04
+
+#define SSC_DIV_N_0			0xFC0F
+#define SSC_DIV_N_1			0xFC10
+#define SSC_CTL1			0xFC11
+#define SSC_CTL2			0xFC12
+
+#define RCCTL				0xFC14
+
+#define FPGA_PULL_CTL			0xFC1D
+#define OLT_LED_CTL			0xFC1E
+#define GPIO_CTL			0xFC1F
+
+#define LDO_CTL				0xFC1E
+#define SYS_VER				0xFC32
+
+#define CARD_PULL_CTL1			0xFD60
+#define CARD_PULL_CTL2			0xFD61
+#define CARD_PULL_CTL3			0xFD62
+#define CARD_PULL_CTL4			0xFD63
+#define CARD_PULL_CTL5			0xFD64
+#define CARD_PULL_CTL6			0xFD65
+
+/* PCI Express Related Registers */
+#define IRQEN0				0xFE20
+#define IRQSTAT0			0xFE21
+#define IRQEN1				0xFE22
+#define IRQSTAT1			0xFE23
+#define TLPRIEN				0xFE24
+#define TLPRISTAT			0xFE25
+#define TLPTIEN				0xFE26
+#define TLPTISTAT			0xFE27
+#define DMATC0				0xFE28
+#define DMATC1				0xFE29
+#define DMATC2				0xFE2A
+#define DMATC3				0xFE2B
+#define DMACTL				0xFE2C
+#define BCTL				0xFE2D
+#define RBBC0				0xFE2E
+#define RBBC1				0xFE2F
+#define RBDAT				0xFE30
+#define RBCTL				0xFE34
+#define CFGADDR0			0xFE35
+#define CFGADDR1			0xFE36
+#define CFGDATA0			0xFE37
+#define CFGDATA1			0xFE38
+#define CFGDATA2			0xFE39
+#define CFGDATA3			0xFE3A
+#define CFGRWCTL			0xFE3B
+#define PHYRWCTL			0xFE3C
+#define PHYDATA0			0xFE3D
+#define PHYDATA1			0xFE3E
+#define PHYADDR				0xFE3F
+#define MSGRXDATA0			0xFE40
+#define MSGRXDATA1			0xFE41
+#define MSGRXDATA2			0xFE42
+#define MSGRXDATA3			0xFE43
+#define MSGTXDATA0			0xFE44
+#define MSGTXDATA1			0xFE45
+#define MSGTXDATA2			0xFE46
+#define MSGTXDATA3			0xFE47
+#define MSGTXCTL			0xFE48
+#define PETXCFG				0xFE49
+
+#define CDRESUMECTL			0xFE52
+#define WAKE_SEL_CTL			0xFE54
+#define PME_FORCE_CTL			0xFE56
+#define ASPM_FORCE_CTL			0xFE57
+#define PM_CLK_FORCE_CTL		0xFE58
+#define PERST_GLITCH_WIDTH		0xFE5C
+#define CHANGE_LINK_STATE		0xFE5B
+#define RESET_LOAD_REG			0xFE5E
+#define EFUSE_CONTENT			0xFE5F
+#define HOST_SLEEP_STATE		0xFE60
+#define SDIO_CFG			0xFE70
+
+#define NFTS_TX_CTRL			0xFE72
+
+#define PWR_GATE_CTRL			0xFE75
+#define PWD_SUSPEND_EN			0xFE76
+#define LDO_PWR_SEL			0xFE78
+
+#define DUMMY_REG_RESET_0		0xFE90
+
+/* Memory mapping */
+#define SRAM_BASE			0xE600
+#define RBUF_BASE			0xF400
+#define PPBUF_BASE1			0xF800
+#define PPBUF_BASE2			0xFA00
+#define IMAGE_FLAG_ADDR0		0xCE80
+#define IMAGE_FLAG_ADDR1		0xCE81
+
+#define rtsx_pci_init_cmd(pcr)		((pcr)->ci = 0)
+
+struct rtsx_pcr;
+
+struct pcr_handle {
+	struct rtsx_pcr			*pcr;
+};
+
+struct pcr_ops {
+	int		(*extra_init_hw)(struct rtsx_pcr *pcr);
+	int		(*optimize_phy)(struct rtsx_pcr *pcr);
+	int		(*turn_on_led)(struct rtsx_pcr *pcr);
+	int		(*turn_off_led)(struct rtsx_pcr *pcr);
+	int		(*enable_auto_blink)(struct rtsx_pcr *pcr);
+	int		(*disable_auto_blink)(struct rtsx_pcr *pcr);
+	int		(*card_power_on)(struct rtsx_pcr *pcr, int card);
+	int		(*card_power_off)(struct rtsx_pcr *pcr, int card);
+	unsigned int	(*cd_deglitch)(struct rtsx_pcr *pcr);
+};
+
+enum PDEV_STAT  {PDEV_STAT_IDLE, PDEV_STAT_RUN};
+
+struct rtsx_pcr {
+	struct pci_dev			*pci;
+	unsigned int			id;
+
+	/* pci resources */
+	unsigned long			addr;
+	void __iomem			*remap_addr;
+	int				irq;
+
+	/* host reserved buffer */
+	void				*rtsx_resv_buf;
+	dma_addr_t			rtsx_resv_buf_addr;
+
+	void				*host_cmds_ptr;
+	dma_addr_t			host_cmds_addr;
+	int				ci;
+
+	void				*host_sg_tbl_ptr;
+	dma_addr_t			host_sg_tbl_addr;
+	int				sgi;
+
+	u32				bier;
+	char				trans_result;
+
+	unsigned int			card_inserted;
+	unsigned int			card_removed;
+
+	struct delayed_work		carddet_work;
+	struct delayed_work		idle_work;
+
+	spinlock_t			lock;
+	struct mutex			pcr_mutex;
+	struct completion		*done;
+	struct completion		*finish_me;
+
+	unsigned int			cur_clock;
+	bool				ms_pmos;
+	bool				remove_pci;
+	bool				msi_en;
+
+#define EXTRA_CAPS_SD_SDR50		(1 << 0)
+#define EXTRA_CAPS_SD_SDR104		(1 << 1)
+#define EXTRA_CAPS_SD_DDR50		(1 << 2)
+#define EXTRA_CAPS_MMC_HSDDR		(1 << 3)
+#define EXTRA_CAPS_MMC_HS200		(1 << 4)
+#define EXTRA_CAPS_MMC_8BIT		(1 << 5)
+	u32				extra_caps;
+
+#define IC_VER_A			0
+#define IC_VER_B			1
+#define IC_VER_C			2
+#define IC_VER_D			3
+	u8				ic_version;
+
+	const u32			*sd_pull_ctl_enable_tbl;
+	const u32			*sd_pull_ctl_disable_tbl;
+	const u32			*ms_pull_ctl_enable_tbl;
+	const u32			*ms_pull_ctl_disable_tbl;
+
+	const struct pcr_ops		*ops;
+	enum PDEV_STAT			state;
+
+	int				num_slots;
+	struct rtsx_slot		*slots;
+};
+
+#define CHK_PCI_PID(pcr, pid)		((pcr)->pci->device == (pid))
+#define PCI_VID(pcr)			((pcr)->pci->vendor)
+#define PCI_PID(pcr)			((pcr)->pci->device)
+
+void rtsx_pci_start_run(struct rtsx_pcr *pcr);
+int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data);
+int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data);
+int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val);
+int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val);
+void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr);
+void rtsx_pci_add_cmd(struct rtsx_pcr *pcr,
+		u8 cmd_type, u16 reg_addr, u8 mask, u8 data);
+void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr);
+int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout);
+int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read, int timeout);
+int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
+int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
+int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card);
+int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card);
+int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk);
+int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card);
+int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card);
+unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr);
+void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr);
+
+static inline u8 *rtsx_pci_get_cmd_data(struct rtsx_pcr *pcr)
+{
+	return (u8 *)(pcr->host_cmds_ptr);
+}
+
+#endif
-- 
cgit v1.2.3-55-g7522


From f399002e68e626e7bc443e6fcab1772704cc197f Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Wed, 23 Mar 2011 14:31:09 +0100
Subject: drbd: distribute former syncer_conf settings to disk, connection, and
 resource level

This commit breaks the API again.

Move per-volume former syncer options into disk_conf.
Move per-connection former syncer options into net_conf.
Renamed the remainign sync_conf to res_opts

Syncer settings have been changeable at runtime, so we need to prepare
for these settings to be runtime-changeable in their new home as well.

Introduce new configuration operations, and share the netlink attribute
between "attach" (create new disk) and "disk-opts" (change options).
Same for "connect" and "net-opts".

Some fields cannot be changed at runtime, however.
Introduce a new flag GENLA_F_INVARIANT to be able to trigger on that in
the generated validation and assignment functions.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  10 +-
 drivers/block/drbd/drbd_main.c     |  72 +++--
 drivers/block/drbd/drbd_nl.c       | 550 ++++++++++++++++++++++++-------------
 drivers/block/drbd/drbd_receiver.c |  51 ++--
 drivers/block/drbd/drbd_state.c    |   4 +-
 drivers/block/drbd/drbd_worker.c   |  50 ++--
 include/linux/drbd_genl.h          | 133 +++++----
 include/linux/drbd_limits.h        |   2 +
 include/linux/genl_magic_func.h    |  49 ++--
 include/linux/genl_magic_struct.h  |  18 +-
 10 files changed, 572 insertions(+), 367 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d6e7e657e7a4..bc265f3733c6 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -860,7 +860,7 @@ struct drbd_md {
 	s32 bm_offset;	/* signed relative sector offset to bitmap */
 
 	/* u32 al_nr_extents;	   important for restoring the AL
-	 * is stored into  sync_conf.al_extents, which in turn
+	 * is stored into  ldev->dc.al_extents, which in turn
 	 * gets applied to act_log->nr_elements
 	 */
 };
@@ -929,6 +929,7 @@ struct drbd_tconn {			/* is a resource from the config file */
 	atomic_t net_cnt;		/* Users of net_conf */
 	wait_queue_head_t net_cnt_wait;
 	wait_queue_head_t ping_wait;		/* Woken upon reception of a ping, and a state change */
+	struct res_opts res_opts;
 
 	struct drbd_socket data;	/* data/barrier/cstate/parameter packets */
 	struct drbd_socket meta;	/* ping/ack (metadata) packets */
@@ -945,6 +946,8 @@ struct drbd_tconn {			/* is a resource from the config file */
 	struct crypto_hash *cram_hmac_tfm;
 	struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */
 	struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */
+	struct crypto_hash *csums_tfm;
+	struct crypto_hash *verify_tfm;
 	void *int_dig_out;
 	void *int_dig_in;
 	void *int_dig_vv;
@@ -963,7 +966,6 @@ struct drbd_conf {
 	unsigned long flags;
 
 	/* configured by drbdsetup */
-	struct syncer_conf sync_conf;
 	struct drbd_backing_dev *ldev __protected_by(local);
 
 	sector_t p_size;     /* partner's disk size */
@@ -1037,8 +1039,6 @@ struct drbd_conf {
 	/* size of out-of-sync range in sectors. */
 	sector_t ov_last_oos_size;
 	unsigned long ov_left; /* in bits */
-	struct crypto_hash *csums_tfm;
-	struct crypto_hash *verify_tfm;
 
 	struct drbd_bitmap *bitmap;
 	unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */
@@ -1188,7 +1188,7 @@ extern int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd,
 			  char *data, size_t size);
 #define USE_DATA_SOCKET 1
 #define USE_META_SOCKET 0
-extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc);
+extern int drbd_send_sync_param(struct drbd_conf *mdev);
 extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr,
 			u32 set_size);
 extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet,
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 79a0e042252f..bdb12723585e 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -784,7 +784,7 @@ int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data,
 	return ok;
 }
 
-int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
+int drbd_send_sync_param(struct drbd_conf *mdev)
 {
 	struct p_rs_param_95 *p;
 	struct socket *sock;
@@ -793,7 +793,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
 
 	size = apv <= 87 ? sizeof(struct p_rs_param)
 		: apv == 88 ? sizeof(struct p_rs_param)
-			+ strlen(mdev->sync_conf.verify_alg) + 1
+			+ strlen(mdev->tconn->net_conf->verify_alg) + 1
 		: apv <= 94 ? sizeof(struct p_rs_param_89)
 		: /* apv >= 95 */ sizeof(struct p_rs_param_95);
 
@@ -812,16 +812,25 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
 		/* initialize verify_alg and csums_alg */
 		memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
 
-		p->rate = cpu_to_be32(sc->rate);
-		p->c_plan_ahead = cpu_to_be32(sc->c_plan_ahead);
-		p->c_delay_target = cpu_to_be32(sc->c_delay_target);
-		p->c_fill_target = cpu_to_be32(sc->c_fill_target);
-		p->c_max_rate = cpu_to_be32(sc->c_max_rate);
+		if (get_ldev(mdev)) {
+			p->rate = cpu_to_be32(mdev->ldev->dc.resync_rate);
+			p->c_plan_ahead = cpu_to_be32(mdev->ldev->dc.c_plan_ahead);
+			p->c_delay_target = cpu_to_be32(mdev->ldev->dc.c_delay_target);
+			p->c_fill_target = cpu_to_be32(mdev->ldev->dc.c_fill_target);
+			p->c_max_rate = cpu_to_be32(mdev->ldev->dc.c_max_rate);
+			put_ldev(mdev);
+		} else {
+			p->rate = cpu_to_be32(DRBD_RATE_DEF);
+			p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF);
+			p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF);
+			p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF);
+			p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF);
+		}
 
 		if (apv >= 88)
-			strcpy(p->verify_alg, mdev->sync_conf.verify_alg);
+			strcpy(p->verify_alg, mdev->tconn->net_conf->verify_alg);
 		if (apv >= 89)
-			strcpy(p->csums_alg, mdev->sync_conf.csums_alg);
+			strcpy(p->csums_alg, mdev->tconn->net_conf->csums_alg);
 
 		rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0);
 	} else
@@ -1043,7 +1052,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev,
 	int bits;
 
 	/* may we use this feature? */
-	if ((mdev->sync_conf.use_rle == 0) ||
+	if ((mdev->tconn->net_conf->use_rle == 0) ||
 		(mdev->tconn->agreed_pro_version < 90))
 			return 0;
 
@@ -1790,26 +1799,8 @@ static int drbd_release(struct gendisk *gd, fmode_t mode)
 
 static void drbd_set_defaults(struct drbd_conf *mdev)
 {
-	/* This way we get a compile error when sync_conf grows,
-	   and we forgot to initialize it here */
-	mdev->sync_conf = (struct syncer_conf) {
-		/* .rate = */		DRBD_RATE_DEF,
-		/* .after = */		DRBD_AFTER_DEF,
-		/* .al_extents = */	DRBD_AL_EXTENTS_DEF,
-		/* .verify_alg = */	{}, 0,
-		/* .cpu_mask = */	{}, 0,
-		/* .csums_alg = */	{}, 0,
-		/* .use_rle = */	0,
-		/* .on_no_data = */	DRBD_ON_NO_DATA_DEF,
-		/* .c_plan_ahead = */	DRBD_C_PLAN_AHEAD_DEF,
-		/* .c_delay_target = */	DRBD_C_DELAY_TARGET_DEF,
-		/* .c_fill_target = */	DRBD_C_FILL_TARGET_DEF,
-		/* .c_max_rate = */	DRBD_C_MAX_RATE_DEF,
-		/* .c_min_rate = */	DRBD_C_MIN_RATE_DEF
-	};
-
-	/* Have to use that way, because the layout differs between
-	   big endian and little endian */
+	/* Beware! The actual layout differs
+	 * between big endian and little endian */
 	mdev->state = (union drbd_state) {
 		{ .role = R_SECONDARY,
 		  .peer = R_UNKNOWN,
@@ -2286,6 +2277,11 @@ struct drbd_tconn *drbd_new_tconn(const char *name)
 	drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker");
 	drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender");
 
+	tconn->res_opts = (struct res_opts) {
+		{}, 0, /* cpu_mask */
+		DRBD_ON_NO_DATA_DEF, /* on_no_data */
+	};
+
 	mutex_lock(&drbd_cfg_mutex);
 	list_add_tail(&tconn->all_tconn, &drbd_tconns);
 	mutex_unlock(&drbd_cfg_mutex);
@@ -2559,10 +2555,10 @@ void drbd_free_sock(struct drbd_tconn *tconn)
 
 void drbd_free_resources(struct drbd_conf *mdev)
 {
-	crypto_free_hash(mdev->csums_tfm);
-	mdev->csums_tfm = NULL;
-	crypto_free_hash(mdev->verify_tfm);
-	mdev->verify_tfm = NULL;
+	crypto_free_hash(mdev->tconn->csums_tfm);
+	mdev->tconn->csums_tfm = NULL;
+	crypto_free_hash(mdev->tconn->verify_tfm);
+	mdev->tconn->verify_tfm = NULL;
 	crypto_free_hash(mdev->tconn->cram_hmac_tfm);
 	mdev->tconn->cram_hmac_tfm = NULL;
 	crypto_free_hash(mdev->tconn->integrity_w_tfm);
@@ -2589,7 +2585,7 @@ struct meta_data_on_disk {
 	u32 md_size_sect;
 	u32 al_offset;         /* offset to this block */
 	u32 al_nr_extents;     /* important for restoring the AL */
-	      /* `-- act_log->nr_elements <-- sync_conf.al_extents */
+	      /* `-- act_log->nr_elements <-- ldev->dc.al_extents */
 	u32 bm_offset;         /* offset to the bitmap, from here */
 	u32 bm_bytes_per_bit;  /* BM_BLOCK_SIZE */
 	u32 la_peer_max_bio_size;   /* last peer max_bio_size */
@@ -2715,7 +2711,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	for (i = UI_CURRENT; i < UI_SIZE; i++)
 		bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
 	bdev->md.flags = be32_to_cpu(buffer->flags);
-	mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
+	bdev->dc.al_extents = be32_to_cpu(buffer->al_nr_extents);
 	bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
 
 	spin_lock_irq(&mdev->tconn->req_lock);
@@ -2727,8 +2723,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	}
 	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	if (mdev->sync_conf.al_extents < 7)
-		mdev->sync_conf.al_extents = 127;
+	if (bdev->dc.al_extents < 7)
+		bdev->dc.al_extents = 127;
 
  err:
 	mutex_unlock(&mdev->md_io_mutex);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index ac0a175e778c..18cd2ed4e8ca 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -53,8 +53,10 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
 
 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
@@ -66,7 +68,7 @@ int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
-int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
 /* .dumpit */
@@ -170,7 +172,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
 		struct nlattr *nla;
 		/* parse and validate only */
-		err = drbd_cfg_context_from_attrs(NULL, info->attrs);
+		err = drbd_cfg_context_from_attrs(NULL, info);
 		if (err)
 			goto fail;
 
@@ -616,6 +618,7 @@ static const char *from_attrs_err_to_txt(int err)
 {
 	return	err == -ENOMSG ? "required attribute missing" :
 		err == -EOPNOTSUPP ? "unknown mandatory attribute" :
+		err == -EEXIST ? "can not change invariant setting" :
 		"invalid attribute value";
 }
 
@@ -633,7 +636,7 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 
 	memset(&parms, 0, sizeof(parms));
 	if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
-		err = set_role_parms_from_attrs(&parms, info->attrs);
+		err = set_role_parms_from_attrs(&parms, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
@@ -898,24 +901,24 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int ass
  * failed, and 0 on success. You should call drbd_md_sync() after you called
  * this function.
  */
-static int drbd_check_al_size(struct drbd_conf *mdev)
+static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc)
 {
 	struct lru_cache *n, *t;
 	struct lc_element *e;
 	unsigned int in_use;
 	int i;
 
-	if (!expect(mdev->sync_conf.al_extents >= DRBD_AL_EXTENTS_MIN))
-		mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_MIN;
+	if (!expect(dc->al_extents >= DRBD_AL_EXTENTS_MIN))
+		dc->al_extents = DRBD_AL_EXTENTS_MIN;
 
 	if (mdev->act_log &&
-	    mdev->act_log->nr_elements == mdev->sync_conf.al_extents)
+	    mdev->act_log->nr_elements == dc->al_extents)
 		return 0;
 
 	in_use = 0;
 	t = mdev->act_log;
 	n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
-		mdev->sync_conf.al_extents, sizeof(struct lc_element), 0);
+		dc->al_extents, sizeof(struct lc_element), 0);
 
 	if (n == NULL) {
 		dev_err(DEV, "Cannot allocate act_log lru!\n");
@@ -1069,6 +1072,114 @@ static void drbd_suspend_al(struct drbd_conf *mdev)
 		dev_info(DEV, "Suspended AL updates\n");
 }
 
+int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+	struct drbd_conf *mdev;
+	struct disk_conf *ndc; /* new disk conf */
+	int err, fifo_size;
+	int *rs_plan_s = NULL;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
+
+	/* we also need a disk
+	 * to change the options on */
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto out;
+	}
+
+/* FIXME freeze IO, cluster wide.
+ *
+ * We should make sure no-one uses
+ * some half-updated struct when we
+ * assign it later. */
+
+	ndc = kmalloc(sizeof(*ndc), GFP_KERNEL);
+	if (!ndc) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+
+	memcpy(ndc, &mdev->ldev->dc, sizeof(*ndc));
+	err = disk_conf_from_attrs_for_change(ndc, info);
+	if (err) {
+		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
+	}
+
+	if (!expect(ndc->resync_rate >= 1))
+		ndc->resync_rate = 1;
+
+	/* clip to allowed range */
+	if (!expect(ndc->al_extents >= DRBD_AL_EXTENTS_MIN))
+		ndc->al_extents = DRBD_AL_EXTENTS_MIN;
+	if (!expect(ndc->al_extents <= DRBD_AL_EXTENTS_MAX))
+		ndc->al_extents = DRBD_AL_EXTENTS_MAX;
+
+	/* most sanity checks done, try to assign the new sync-after
+	 * dependency.  need to hold the global lock in there,
+	 * to avoid a race in the dependency loop check. */
+	retcode = drbd_alter_sa(mdev, ndc->resync_after);
+	if (retcode != NO_ERROR)
+		goto fail;
+
+	fifo_size = (ndc->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
+	if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
+		rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
+		if (!rs_plan_s) {
+			dev_err(DEV, "kmalloc of fifo_buffer failed");
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+	}
+
+	if (fifo_size != mdev->rs_plan_s.size) {
+		kfree(mdev->rs_plan_s.values);
+		mdev->rs_plan_s.values = rs_plan_s;
+		mdev->rs_plan_s.size   = fifo_size;
+		mdev->rs_planed = 0;
+		rs_plan_s = NULL;
+	}
+
+	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+	drbd_al_shrink(mdev);
+	err = drbd_check_al_size(mdev, ndc);
+	lc_unlock(mdev->act_log);
+	wake_up(&mdev->al_wait);
+
+	if (err) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+
+	/* FIXME
+	 * To avoid someone looking at a half-updated struct, we probably
+	 * should have a rw-semaphor on net_conf and disk_conf.
+	 */
+	mdev->ldev->dc = *ndc;
+
+	drbd_md_sync(mdev);
+
+
+	if (mdev->state.conn >= C_CONNECTED)
+		drbd_send_sync_param(mdev);
+
+ fail:
+	put_ldev(mdev);
+	kfree(ndc);
+	kfree(rs_plan_s);
+ out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 {
 	struct drbd_conf *mdev;
@@ -1111,12 +1222,29 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		goto fail;
 	}
 
-	nbc->dc.disk_size     = DRBD_DISK_SIZE_SECT_DEF;
-	nbc->dc.on_io_error   = DRBD_ON_IO_ERROR_DEF;
-	nbc->dc.fencing       = DRBD_FENCING_DEF;
-	nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF;
-
-	err = disk_conf_from_attrs(&nbc->dc, info->attrs);
+	nbc->dc = (struct disk_conf) {
+		{}, 0, /* backing_dev */
+		{}, 0, /* meta_dev */
+		0, /* meta_dev_idx */
+		DRBD_DISK_SIZE_SECT_DEF, /* disk_size */
+		DRBD_MAX_BIO_BVECS_DEF, /* max_bio_bvecs */
+		DRBD_ON_IO_ERROR_DEF, /* on_io_error */
+		DRBD_FENCING_DEF, /* fencing */
+		DRBD_RATE_DEF, /* resync_rate */
+		DRBD_AFTER_DEF, /* resync_after */
+		DRBD_AL_EXTENTS_DEF, /* al_extents */
+		DRBD_C_PLAN_AHEAD_DEF, /* c_plan_ahead */
+		DRBD_C_DELAY_TARGET_DEF, /* c_delay_target */
+		DRBD_C_FILL_TARGET_DEF, /* c_fill_target */
+		DRBD_C_MAX_RATE_DEF, /* c_max_rate */
+		DRBD_C_MIN_RATE_DEF, /* c_min_rate */
+		0, /* no_disk_barrier */
+		0, /* no_disk_flush */
+		0, /* no_disk_drain */
+		0, /* no_md_flush */
+	};
+
+	err = disk_conf_from_attrs(&nbc->dc, info);
 	if (err) {
 		retcode = ERR_MANDATORY_TAG;
 		drbd_msg_put_info(from_attrs_err_to_txt(err));
@@ -1267,7 +1395,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* Since we are diskless, fix the activity log first... */
-	if (drbd_check_al_size(mdev)) {
+	if (drbd_check_al_size(mdev, &nbc->dc)) {
 		retcode = ERR_NOMEM;
 		goto force_diskless_dec;
 	}
@@ -1498,6 +1626,158 @@ out:
 	return 0;
 }
 
+static bool conn_resync_running(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int vnr;
+
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (mdev->state.conn == C_SYNC_SOURCE ||
+		    mdev->state.conn == C_SYNC_TARGET ||
+		    mdev->state.conn == C_PAUSED_SYNC_S ||
+		    mdev->state.conn == C_PAUSED_SYNC_T)
+			return true;
+	}
+	return false;
+}
+
+static bool conn_ov_running(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int vnr;
+
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (mdev->state.conn == C_VERIFY_S ||
+		    mdev->state.conn == C_VERIFY_T)
+			return true;
+	}
+	return false;
+}
+
+int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+	struct drbd_tconn *tconn;
+	struct net_conf *new_conf = NULL;
+	int err;
+	int ovr; /* online verify running */
+	int rsr; /* re-sync running */
+	struct crypto_hash *verify_tfm = NULL;
+	struct crypto_hash *csums_tfm = NULL;
+
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	tconn = adm_ctx.tconn;
+
+	new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
+	if (!new_conf) {
+		retcode = ERR_NOMEM;
+		goto out;
+	}
+
+	/* we also need a net config
+	 * to change the options on */
+	if (!get_net_conf(tconn)) {
+		drbd_msg_put_info("net conf missing, try connect");
+		retcode = ERR_INVALID_REQUEST;
+		goto out;
+	}
+
+	conn_reconfig_start(tconn);
+
+	memcpy(new_conf, tconn->net_conf, sizeof(*new_conf));
+	err = net_conf_from_attrs_for_change(new_conf, info);
+	if (err) {
+		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		goto fail;
+	}
+
+	/* re-sync running */
+	rsr = conn_resync_running(tconn);
+	if (rsr && strcmp(new_conf->csums_alg, tconn->net_conf->csums_alg)) {
+		retcode = ERR_CSUMS_RESYNC_RUNNING;
+		goto fail;
+	}
+
+	if (!rsr && new_conf->csums_alg[0]) {
+		csums_tfm = crypto_alloc_hash(new_conf->csums_alg, 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(csums_tfm)) {
+			csums_tfm = NULL;
+			retcode = ERR_CSUMS_ALG;
+			goto fail;
+		}
+
+		if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) {
+			retcode = ERR_CSUMS_ALG_ND;
+			goto fail;
+		}
+	}
+
+	/* online verify running */
+	ovr = conn_ov_running(tconn);
+	if (ovr) {
+		if (strcmp(new_conf->verify_alg, tconn->net_conf->verify_alg)) {
+			retcode = ERR_VERIFY_RUNNING;
+			goto fail;
+		}
+	}
+
+	if (!ovr && new_conf->verify_alg[0]) {
+		verify_tfm = crypto_alloc_hash(new_conf->verify_alg, 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(verify_tfm)) {
+			verify_tfm = NULL;
+			retcode = ERR_VERIFY_ALG;
+			goto fail;
+		}
+
+		if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) {
+			retcode = ERR_VERIFY_ALG_ND;
+			goto fail;
+		}
+	}
+
+
+	/* For now, use struct assignment, not pointer assignment.
+	 * We don't have any means to determine who might still
+	 * keep a local alias into the struct,
+	 * so we cannot just free it and hope for the best :(
+	 * FIXME
+	 * To avoid someone looking at a half-updated struct, we probably
+	 * should have a rw-semaphor on net_conf and disk_conf.
+	 */
+	*tconn->net_conf = *new_conf;
+
+	if (!rsr) {
+		crypto_free_hash(tconn->csums_tfm);
+		tconn->csums_tfm = csums_tfm;
+		csums_tfm = NULL;
+	}
+	if (!ovr) {
+		crypto_free_hash(tconn->verify_tfm);
+		tconn->verify_tfm = verify_tfm;
+		verify_tfm = NULL;
+	}
+
+	if (tconn->cstate >= C_WF_REPORT_PARAMS)
+		drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn)));
+
+ fail:
+	crypto_free_hash(csums_tfm);
+	crypto_free_hash(verify_tfm);
+	kfree(new_conf);
+	put_net_conf(tconn);
+	conn_reconfig_done(tconn);
+ out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 {
 	char hmac_name[CRYPTO_MAX_ALG_NAME];
@@ -1531,33 +1811,47 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* allocation not in the IO path, cqueue thread context */
-	new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
+	new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
 	if (!new_conf) {
 		retcode = ERR_NOMEM;
 		goto fail;
 	}
 
-	new_conf->timeout	   = DRBD_TIMEOUT_DEF;
-	new_conf->try_connect_int  = DRBD_CONNECT_INT_DEF;
-	new_conf->ping_int	   = DRBD_PING_INT_DEF;
-	new_conf->max_epoch_size   = DRBD_MAX_EPOCH_SIZE_DEF;
-	new_conf->max_buffers	   = DRBD_MAX_BUFFERS_DEF;
-	new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF;
-	new_conf->sndbuf_size	   = DRBD_SNDBUF_SIZE_DEF;
-	new_conf->rcvbuf_size	   = DRBD_RCVBUF_SIZE_DEF;
-	new_conf->ko_count	   = DRBD_KO_COUNT_DEF;
-	new_conf->after_sb_0p	   = DRBD_AFTER_SB_0P_DEF;
-	new_conf->after_sb_1p	   = DRBD_AFTER_SB_1P_DEF;
-	new_conf->after_sb_2p	   = DRBD_AFTER_SB_2P_DEF;
-	new_conf->want_lose	   = 0;
-	new_conf->two_primaries    = 0;
-	new_conf->wire_protocol    = DRBD_PROT_C;
-	new_conf->ping_timeo	   = DRBD_PING_TIMEO_DEF;
-	new_conf->rr_conflict	   = DRBD_RR_CONFLICT_DEF;
-	new_conf->on_congestion    = DRBD_ON_CONGESTION_DEF;
-	new_conf->cong_extents     = DRBD_CONG_EXTENTS_DEF;
-
-	err = net_conf_from_attrs(new_conf, info->attrs);
+	*new_conf = (struct net_conf) {
+		{}, 0, /* my_addr */
+		{}, 0, /* peer_addr */
+		{}, 0, /* shared_secret */
+		{}, 0, /* cram_hmac_alg */
+		{}, 0, /* integrity_alg */
+		{}, 0, /* verify_alg */
+		{}, 0, /* csums_alg */
+		DRBD_PROTOCOL_DEF, /* wire_protocol */
+		DRBD_CONNECT_INT_DEF, /* try_connect_int */
+		DRBD_TIMEOUT_DEF, /* timeout */
+		DRBD_PING_INT_DEF, /* ping_int */
+		DRBD_PING_TIMEO_DEF, /* ping_timeo */
+		DRBD_SNDBUF_SIZE_DEF, /* sndbuf_size */
+		DRBD_RCVBUF_SIZE_DEF, /* rcvbuf_size */
+		DRBD_KO_COUNT_DEF, /* ko_count */
+		DRBD_MAX_BUFFERS_DEF, /* max_buffers */
+		DRBD_MAX_EPOCH_SIZE_DEF, /* max_epoch_size */
+		DRBD_UNPLUG_WATERMARK_DEF, /* unplug_watermark */
+		DRBD_AFTER_SB_0P_DEF, /* after_sb_0p */
+		DRBD_AFTER_SB_1P_DEF, /* after_sb_1p */
+		DRBD_AFTER_SB_2P_DEF, /* after_sb_2p */
+		DRBD_RR_CONFLICT_DEF, /* rr_conflict */
+		DRBD_ON_CONGESTION_DEF, /* on_congestion */
+		DRBD_CONG_FILL_DEF, /* cong_fill */
+		DRBD_CONG_EXTENTS_DEF, /* cong_extents */
+		0, /* two_primaries */
+		0, /* want_lose */
+		0, /* no_cork */
+		0, /* always_asbp */
+		0, /* dry_run */
+		0, /* use_rle */
+	};
+
+	err = net_conf_from_attrs(new_conf, info);
 	if (err) {
 		retcode = ERR_MANDATORY_TAG;
 		drbd_msg_put_info(from_attrs_err_to_txt(err));
@@ -1789,7 +2083,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 	tconn = adm_ctx.tconn;
 	memset(&parms, 0, sizeof(parms));
 	if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
-		err = disconnect_parms_from_attrs(&parms, info->attrs);
+		err = disconnect_parms_from_attrs(&parms, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
@@ -1848,7 +2142,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 
 	memset(&rs, 0, sizeof(struct resize_parms));
 	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
-		err = resize_parms_from_attrs(&rs, info->attrs);
+		err = resize_parms_from_attrs(&rs, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
@@ -1904,26 +2198,21 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
-int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info)
+int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
-	int err;
-	int ovr; /* online verify running */
-	int rsr; /* re-sync running */
-	struct crypto_hash *verify_tfm = NULL;
-	struct crypto_hash *csums_tfm = NULL;
-	struct syncer_conf sc;
 	cpumask_var_t new_cpu_mask;
+	struct drbd_tconn *tconn;
 	int *rs_plan_s = NULL;
-	int fifo_size;
+	struct res_opts sc;
+	int err;
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
 	if (!adm_ctx.reply_skb)
 		return retcode;
 	if (retcode != NO_ERROR)
 		goto fail;
-	mdev = adm_ctx.mdev;
+	tconn = adm_ctx.tconn;
 
 	if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
 		retcode = ERR_NOMEM;
@@ -1933,172 +2222,43 @@ int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info)
 
 	if (((struct drbd_genlmsghdr*)info->userhdr)->flags
 			& DRBD_GENL_F_SET_DEFAULTS) {
-		memset(&sc, 0, sizeof(struct syncer_conf));
-		sc.rate       = DRBD_RATE_DEF;
-		sc.after      = DRBD_AFTER_DEF;
-		sc.al_extents = DRBD_AL_EXTENTS_DEF;
+		memset(&sc, 0, sizeof(struct res_opts));
 		sc.on_no_data  = DRBD_ON_NO_DATA_DEF;
-		sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
-		sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF;
-		sc.c_fill_target = DRBD_C_FILL_TARGET_DEF;
-		sc.c_max_rate = DRBD_C_MAX_RATE_DEF;
-		sc.c_min_rate = DRBD_C_MIN_RATE_DEF;
 	} else
-		memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
+		sc = tconn->res_opts;
 
-	err = syncer_conf_from_attrs(&sc, info->attrs);
+	err = res_opts_from_attrs(&sc, info);
 	if (err) {
 		retcode = ERR_MANDATORY_TAG;
 		drbd_msg_put_info(from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
-	/* re-sync running */
-	rsr = (	mdev->state.conn == C_SYNC_SOURCE ||
-		mdev->state.conn == C_SYNC_TARGET ||
-		mdev->state.conn == C_PAUSED_SYNC_S ||
-		mdev->state.conn == C_PAUSED_SYNC_T );
-
-	if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) {
-		retcode = ERR_CSUMS_RESYNC_RUNNING;
-		goto fail;
-	}
-
-	if (!rsr && sc.csums_alg[0]) {
-		csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR(csums_tfm)) {
-			csums_tfm = NULL;
-			retcode = ERR_CSUMS_ALG;
-			goto fail;
-		}
-
-		if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) {
-			retcode = ERR_CSUMS_ALG_ND;
-			goto fail;
-		}
-	}
-
-	/* online verify running */
-	ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T);
-
-	if (ovr) {
-		if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) {
-			retcode = ERR_VERIFY_RUNNING;
-			goto fail;
-		}
-	}
-
-	if (!ovr && sc.verify_alg[0]) {
-		verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR(verify_tfm)) {
-			verify_tfm = NULL;
-			retcode = ERR_VERIFY_ALG;
-			goto fail;
-		}
-
-		if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) {
-			retcode = ERR_VERIFY_ALG_ND;
-			goto fail;
-		}
-	}
-
 	/* silently ignore cpu mask on UP kernel */
 	if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
 		err = __bitmap_parse(sc.cpu_mask, 32, 0,
 				cpumask_bits(new_cpu_mask), nr_cpu_ids);
 		if (err) {
-			dev_warn(DEV, "__bitmap_parse() failed with %d\n", err);
+			conn_warn(tconn, "__bitmap_parse() failed with %d\n", err);
 			retcode = ERR_CPU_MASK_PARSE;
 			goto fail;
 		}
 	}
 
-	if (!expect(sc.rate >= 1))
-		sc.rate = 1;
-
-	/* clip to allowed range */
-	if (!expect(sc.al_extents >= DRBD_AL_EXTENTS_MIN))
-		sc.al_extents = DRBD_AL_EXTENTS_MIN;
-	if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX))
-		sc.al_extents = DRBD_AL_EXTENTS_MAX;
-
-	/* most sanity checks done, try to assign the new sync-after
-	 * dependency.  need to hold the global lock in there,
-	 * to avoid a race in the dependency loop check. */
-	retcode = drbd_alter_sa(mdev, sc.after);
-	if (retcode != NO_ERROR)
-		goto fail;
-
-	fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
-	if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
-		rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
-		if (!rs_plan_s) {
-			dev_err(DEV, "kmalloc of fifo_buffer failed");
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
-	}
-
-	/* ok, assign the rest of it as well.
-	 * lock against receive_SyncParam() */
-	spin_lock(&mdev->peer_seq_lock);
-	mdev->sync_conf = sc;
-
-	if (!rsr) {
-		crypto_free_hash(mdev->csums_tfm);
-		mdev->csums_tfm = csums_tfm;
-		csums_tfm = NULL;
-	}
-
-	if (!ovr) {
-		crypto_free_hash(mdev->verify_tfm);
-		mdev->verify_tfm = verify_tfm;
-		verify_tfm = NULL;
-	}
-
-	if (fifo_size != mdev->rs_plan_s.size) {
-		kfree(mdev->rs_plan_s.values);
-		mdev->rs_plan_s.values = rs_plan_s;
-		mdev->rs_plan_s.size   = fifo_size;
-		mdev->rs_planed = 0;
-		rs_plan_s = NULL;
-	}
-
-	spin_unlock(&mdev->peer_seq_lock);
 
-	if (get_ldev(mdev)) {
-		wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
-		drbd_al_shrink(mdev);
-		err = drbd_check_al_size(mdev);
-		lc_unlock(mdev->act_log);
-		wake_up(&mdev->al_wait);
+	tconn->res_opts = sc;
 
-		put_ldev(mdev);
-		drbd_md_sync(mdev);
-
-		if (err) {
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
-	}
-
-	if (mdev->state.conn >= C_CONNECTED)
-		drbd_send_sync_param(mdev, &sc);
-
-	if (!cpumask_equal(mdev->tconn->cpu_mask, new_cpu_mask)) {
-		cpumask_copy(mdev->tconn->cpu_mask, new_cpu_mask);
-		drbd_calc_cpu_mask(mdev->tconn);
-		mdev->tconn->receiver.reset_cpu_mask = 1;
-		mdev->tconn->asender.reset_cpu_mask = 1;
-		mdev->tconn->worker.reset_cpu_mask = 1;
+	if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) {
+		cpumask_copy(tconn->cpu_mask, new_cpu_mask);
+		drbd_calc_cpu_mask(tconn);
+		tconn->receiver.reset_cpu_mask = 1;
+		tconn->asender.reset_cpu_mask = 1;
+		tconn->worker.reset_cpu_mask = 1;
 	}
 
-	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
 fail:
 	kfree(rs_plan_s);
 	free_cpumask_var(new_cpu_mask);
-	crypto_free_hash(csums_tfm);
-	crypto_free_hash(verify_tfm);
 
 	drbd_adm_finish(info, retcode);
 	return 0;
@@ -2307,6 +2467,9 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 	if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr))
 		goto nla_put_failure;
 
+	if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive))
+		goto nla_put_failure;
+
 	if (got_ldev)
 		if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive))
 			goto nla_put_failure;
@@ -2314,9 +2477,6 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 		if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive))
 			goto nla_put_failure;
 
-	if (syncer_conf_to_skb(skb, &mdev->sync_conf, exclude_sensitive))
-			goto nla_put_failure;
-
 	nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
 	if (!nla)
 		goto nla_put_failure;
@@ -2532,7 +2692,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 		/* resume from last known position, if possible */
 		struct start_ov_parms parms =
 			{ .ov_start_sector = mdev->ov_start_sector };
-		int err = start_ov_parms_from_attrs(&parms, info->attrs);
+		int err = start_ov_parms_from_attrs(&parms, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
@@ -2568,7 +2728,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
 	mdev = adm_ctx.mdev;
 	memset(&args, 0, sizeof(args));
 	if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
-		err = new_c_uuid_parms_from_attrs(&args, info->attrs);
+		err = new_c_uuid_parms_from_attrs(&args, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 50c52712715e..c8c826b2444f 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -763,7 +763,7 @@ int drbd_connected(int vnr, void *p, void *data)
 		&mdev->tconn->cstate_mutex :
 		&mdev->own_state_mutex;
 
-	ok &= drbd_send_sync_param(mdev, &mdev->sync_conf);
+	ok &= drbd_send_sync_param(mdev);
 	ok &= drbd_send_sizes(mdev, 0, 0);
 	ok &= drbd_send_uuids(mdev);
 	ok &= drbd_send_state(mdev);
@@ -2085,7 +2085,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
 	int throttle = 0;
 
 	/* feature disabled? */
-	if (mdev->sync_conf.c_min_rate == 0)
+	if (mdev->ldev->dc.c_min_rate == 0)
 		return 0;
 
 	spin_lock_irq(&mdev->al_lock);
@@ -2125,7 +2125,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
 		db = mdev->rs_mark_left[i] - rs_left;
 		dbdt = Bit2KB(db/dt);
 
-		if (dbdt > mdev->sync_conf.c_min_rate)
+		if (dbdt > mdev->ldev->dc.c_min_rate)
 			throttle = 1;
 	}
 	return throttle;
@@ -3001,7 +3001,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
 	if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
 		return false;
 
-	mdev->sync_conf.rate	  = be32_to_cpu(p->rate);
+	if (get_ldev(mdev)) {
+		mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
+		put_ldev(mdev);
+	}
 
 	if (apv >= 88) {
 		if (apv == 88) {
@@ -3029,10 +3032,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
 			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
 		}
 
-		if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
+		if (strcmp(mdev->tconn->net_conf->verify_alg, p->verify_alg)) {
 			if (mdev->state.conn == C_WF_REPORT_PARAMS) {
 				dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
-				    mdev->sync_conf.verify_alg, p->verify_alg);
+				    mdev->tconn->net_conf->verify_alg, p->verify_alg);
 				goto disconnect;
 			}
 			verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
@@ -3043,10 +3046,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
 			}
 		}
 
-		if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
+		if (apv >= 89 && strcmp(mdev->tconn->net_conf->csums_alg, p->csums_alg)) {
 			if (mdev->state.conn == C_WF_REPORT_PARAMS) {
 				dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
-				    mdev->sync_conf.csums_alg, p->csums_alg);
+				    mdev->tconn->net_conf->csums_alg, p->csums_alg);
 				goto disconnect;
 			}
 			csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
@@ -3057,37 +3060,39 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
 			}
 		}
 
-		if (apv > 94) {
-			mdev->sync_conf.rate	  = be32_to_cpu(p->rate);
-			mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
-			mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
-			mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
-			mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
+		if (apv > 94 && get_ldev(mdev)) {
+			mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
+			mdev->ldev->dc.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
+			mdev->ldev->dc.c_delay_target = be32_to_cpu(p->c_delay_target);
+			mdev->ldev->dc.c_fill_target = be32_to_cpu(p->c_fill_target);
+			mdev->ldev->dc.c_max_rate = be32_to_cpu(p->c_max_rate);
 
-			fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
+			fifo_size = (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
 			if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
 				rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
 				if (!rs_plan_s) {
 					dev_err(DEV, "kmalloc of fifo_buffer failed");
+					put_ldev(mdev);
 					goto disconnect;
 				}
 			}
+			put_ldev(mdev);
 		}
 
 		spin_lock(&mdev->peer_seq_lock);
 		/* lock against drbd_nl_syncer_conf() */
 		if (verify_tfm) {
-			strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
-			mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
-			crypto_free_hash(mdev->verify_tfm);
-			mdev->verify_tfm = verify_tfm;
+			strcpy(mdev->tconn->net_conf->verify_alg, p->verify_alg);
+			mdev->tconn->net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
+			crypto_free_hash(mdev->tconn->verify_tfm);
+			mdev->tconn->verify_tfm = verify_tfm;
 			dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
 		}
 		if (csums_tfm) {
-			strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
-			mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
-			crypto_free_hash(mdev->csums_tfm);
-			mdev->csums_tfm = csums_tfm;
+			strcpy(mdev->tconn->net_conf->csums_alg, p->csums_alg);
+			mdev->tconn->net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
+			crypto_free_hash(mdev->tconn->csums_tfm);
+			mdev->tconn->csums_tfm = csums_tfm;
 			dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
 		}
 		if (fifo_size != mdev->rs_plan_s.size) {
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 11685658659e..77fad527fb1d 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -402,7 +402,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
 		rv = SS_CONNECTED_OUTDATES;
 
 	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
-		 (mdev->sync_conf.verify_alg[0] == 0))
+		 (mdev->tconn->net_conf->verify_alg[0] == 0))
 		rv = SS_NO_VERIFY_ALG;
 
 	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
@@ -668,7 +668,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED))
 		ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
 
-	if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
+	if (mdev->tconn->res_opts.on_no_data == OND_SUSPEND_IO &&
 	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
 		ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
 
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index a730520e468e..005876b32f74 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -310,12 +310,12 @@ static int w_e_send_csum(struct drbd_work *w, int cancel)
 	if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
 		goto out;
 
-	digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+	digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
 	digest = kmalloc(digest_size, GFP_NOIO);
 	if (digest) {
 		sector_t sector = peer_req->i.sector;
 		unsigned int size = peer_req->i.size;
-		drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest);
+		drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
 		/* Free peer_req and pages before send.
 		 * In case we block on congestion, we could otherwise run into
 		 * some distributed deadlock, if the other side blocks on
@@ -451,13 +451,13 @@ static int drbd_rs_controller(struct drbd_conf *mdev)
 
 	spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */
 
-	steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
+	steps = mdev->rs_plan_s.size; /* (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
 
 	if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
-		want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps;
+		want = ((mdev->ldev->dc.resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
 	} else { /* normal path */
-		want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target :
-			sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10);
+		want = mdev->ldev->dc.c_fill_target ? mdev->ldev->dc.c_fill_target :
+			sect_in * mdev->ldev->dc.c_delay_target * HZ / (SLEEP_TIME * 10);
 	}
 
 	correction = want - mdev->rs_in_flight - mdev->rs_planed;
@@ -476,7 +476,7 @@ static int drbd_rs_controller(struct drbd_conf *mdev)
 	if (req_sect < 0)
 		req_sect = 0;
 
-	max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ;
+	max_sect = (mdev->ldev->dc.c_max_rate * 2 * SLEEP_TIME) / HZ;
 	if (req_sect > max_sect)
 		req_sect = max_sect;
 
@@ -492,11 +492,11 @@ static int drbd_rs_controller(struct drbd_conf *mdev)
 static int drbd_rs_number_requests(struct drbd_conf *mdev)
 {
 	int number;
-	if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
+	if (mdev->rs_plan_s.size) { /* mdev->ldev->dc.c_plan_ahead */
 		number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
 		mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
 	} else {
-		mdev->c_sync_rate = mdev->sync_conf.rate;
+		mdev->c_sync_rate = mdev->ldev->dc.resync_rate;
 		number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
 	}
 
@@ -619,7 +619,7 @@ next_sector:
 		/* adjust very last sectors, in case we are oddly sized */
 		if (sector + (size>>9) > capacity)
 			size = (capacity-sector)<<9;
-		if (mdev->tconn->agreed_pro_version >= 89 && mdev->csums_tfm) {
+		if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) {
 			switch (read_for_csum(mdev, sector, size)) {
 			case -EIO: /* Disk failure */
 				put_ldev(mdev);
@@ -810,7 +810,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
 			khelper_cmd = "after-resync-target";
 
-		if (mdev->csums_tfm && mdev->rs_total) {
+		if (mdev->tconn->csums_tfm && mdev->rs_total) {
 			const unsigned long s = mdev->rs_same_csum;
 			const unsigned long t = mdev->rs_total;
 			const int ratio =
@@ -1019,13 +1019,13 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
 		/* quick hack to try to avoid a race against reconfiguration.
 		 * a real fix would be much more involved,
 		 * introducing more locking mechanisms */
-		if (mdev->csums_tfm) {
-			digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+		if (mdev->tconn->csums_tfm) {
+			digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
 			D_ASSERT(digest_size == di->digest_size);
 			digest = kmalloc(digest_size, GFP_NOIO);
 		}
 		if (digest) {
-			drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest);
+			drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
 			eq = !memcmp(digest, di->digest, digest_size);
 			kfree(digest);
 		}
@@ -1069,7 +1069,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel)
 	if (unlikely(cancel))
 		goto out;
 
-	digest_size = crypto_hash_digestsize(mdev->verify_tfm);
+	digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
 	digest = kmalloc(digest_size, GFP_NOIO);
 	if (!digest) {
 		ok = 0;	/* terminate the connection in case the allocation failed */
@@ -1077,7 +1077,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel)
 	}
 
 	if (likely(!(peer_req->flags & EE_WAS_ERROR)))
-		drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest);
+		drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
 	else
 		memset(digest, 0, digest_size);
 
@@ -1141,10 +1141,10 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
 	di = peer_req->digest;
 
 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
-		digest_size = crypto_hash_digestsize(mdev->verify_tfm);
+		digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
 		digest = kmalloc(digest_size, GFP_NOIO);
 		if (digest) {
-			drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest);
+			drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
 
 			D_ASSERT(digest_size == di->digest_size);
 			eq = !memcmp(digest, di->digest, digest_size);
@@ -1319,9 +1319,9 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev)
 	struct drbd_conf *odev = mdev;
 
 	while (1) {
-		if (odev->sync_conf.after == -1)
+		if (odev->ldev->dc.resync_after == -1)
 			return 1;
-		odev = minor_to_mdev(odev->sync_conf.after);
+		odev = minor_to_mdev(odev->ldev->dc.resync_after);
 		if (!expect(odev))
 			return 1;
 		if ((odev->state.conn >= C_SYNC_SOURCE &&
@@ -1408,11 +1408,11 @@ static int sync_after_error(struct drbd_conf *mdev, int o_minor)
 			return ERR_SYNC_AFTER_CYCLE;
 
 		/* dependency chain ends here, no cycles. */
-		if (odev->sync_conf.after == -1)
+		if (odev->ldev->dc.resync_after == -1)
 			return NO_ERROR;
 
 		/* follow the dependency chain */
-		odev = minor_to_mdev(odev->sync_conf.after);
+		odev = minor_to_mdev(odev->ldev->dc.resync_after);
 	}
 }
 
@@ -1424,7 +1424,7 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na)
 	write_lock_irq(&global_state_lock);
 	retcode = sync_after_error(mdev, na);
 	if (retcode == NO_ERROR) {
-		mdev->sync_conf.after = na;
+		mdev->ldev->dc.resync_after = na;
 		do {
 			changes  = _drbd_pause_after(mdev);
 			changes |= _drbd_resume_next(mdev);
@@ -1637,7 +1637,7 @@ int drbd_worker(struct drbd_thread *thi)
 	struct drbd_work *w = NULL;
 	struct drbd_conf *mdev;
 	LIST_HEAD(work_list);
-	int minor, intr = 0;
+	int vnr, intr = 0;
 
 	while (get_t_state(thi) == RUNNING) {
 		drbd_thread_current_set_cpu(thi);
@@ -1722,7 +1722,7 @@ int drbd_worker(struct drbd_thread *thi)
 	spin_unlock_irq(&tconn->data.work.q_lock);
 
 	drbd_thread_stop(&tconn->receiver);
-	idr_for_each_entry(&tconn->volumes, mdev, minor) {
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 		D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
 		/* _drbd_set_state only uses stop_nowait.
 		 * wait here for the exiting receiver. */
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index a07d69279b1a..938e8560a833 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -102,66 +102,73 @@ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
 )
 
 GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
-	__u64_field(1, GENLA_F_MANDATORY,	disk_size)
-	__str_field(2, GENLA_F_REQUIRED,	backing_dev,	128)
-	__str_field(3, GENLA_F_REQUIRED,	meta_dev,	128)
-	__u32_field(4, GENLA_F_REQUIRED,	meta_dev_idx)
-	__u32_field(5, GENLA_F_MANDATORY,	max_bio_bvecs)
+	__str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	backing_dev,	128)
+	__str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	meta_dev,	128)
+	__u32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	meta_dev_idx)
+
+	/* use the resize command to try and change the disk_size */
+	__u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	disk_size)
+	/* we could change the max_bio_bvecs,
+	 * but it won't propagate through the stack */
+	__u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	max_bio_bvecs)
+
 	__u32_field(6, GENLA_F_MANDATORY,	on_io_error)
 	__u32_field(7, GENLA_F_MANDATORY,	fencing)
-	__flg_field(8, GENLA_F_MANDATORY,	no_disk_barrier)
-	__flg_field(9, GENLA_F_MANDATORY,	no_disk_flush)
-	__flg_field(10, GENLA_F_MANDATORY,	no_disk_drain)
-	__flg_field(11, GENLA_F_MANDATORY,	no_md_flush)
-	__flg_field(12, GENLA_F_MANDATORY,	use_bmbv)
+
+	__u32_field(8,	GENLA_F_MANDATORY,	resync_rate)
+	__u32_field(9,	GENLA_F_MANDATORY,	resync_after)
+	__u32_field(10,	GENLA_F_MANDATORY,	al_extents)
+	__u32_field(11,	GENLA_F_MANDATORY,	c_plan_ahead)
+	__u32_field(12,	GENLA_F_MANDATORY,	c_delay_target)
+	__u32_field(13,	GENLA_F_MANDATORY,	c_fill_target)
+	__u32_field(14,	GENLA_F_MANDATORY,	c_max_rate)
+	__u32_field(15,	GENLA_F_MANDATORY,	c_min_rate)
+
+	__flg_field(16, GENLA_F_MANDATORY,	no_disk_barrier)
+	__flg_field(17, GENLA_F_MANDATORY,	no_disk_flush)
+	__flg_field(18, GENLA_F_MANDATORY,	no_disk_drain)
+	__flg_field(19, GENLA_F_MANDATORY,	no_md_flush)
+
 )
 
-GENL_struct(DRBD_NLA_SYNCER_CONF, 4, syncer_conf,
-	__u32_field(1,	GENLA_F_MANDATORY,	rate)
-	__u32_field(2,	GENLA_F_MANDATORY,	after)
-	__u32_field(3,	GENLA_F_MANDATORY,	al_extents)
-	__str_field(4,	GENLA_F_MANDATORY,	cpu_mask,       32)
-	__str_field(5,	GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
-	__str_field(6,	GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
-	__flg_field(7,	GENLA_F_MANDATORY,	use_rle)
-	__u32_field(8,	GENLA_F_MANDATORY,	on_no_data)
-	__u32_field(9,	GENLA_F_MANDATORY,	c_plan_ahead)
-	__u32_field(10,	GENLA_F_MANDATORY,	c_delay_target)
-	__u32_field(11,	GENLA_F_MANDATORY,	c_fill_target)
-	__u32_field(12,	GENLA_F_MANDATORY,	c_max_rate)
-	__u32_field(13,	GENLA_F_MANDATORY,	c_min_rate)
+GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
+	__str_field(1,	GENLA_F_MANDATORY,	cpu_mask,       32)
+	__u32_field(2,	GENLA_F_MANDATORY,	on_no_data)
 )
 
 GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
-	__str_field(1,	GENLA_F_MANDATORY | GENLA_F_SENSITIVE,
+	__bin_field(1,	GENLA_F_REQUIRED | GENLA_F_INVARIANT,	my_addr,	128)
+	__bin_field(2,	GENLA_F_REQUIRED | GENLA_F_INVARIANT,	peer_addr,	128)
+	__str_field(3,	GENLA_F_MANDATORY | GENLA_F_SENSITIVE,
 						shared_secret,	SHARED_SECRET_MAX)
-	__str_field(2,	GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
-	__str_field(3,	GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
-	__str_field(4,	GENLA_F_REQUIRED,	my_addr,	128)
-	__str_field(5,	GENLA_F_REQUIRED,	peer_addr,	128)
-	__u32_field(6,	GENLA_F_REQUIRED,	wire_protocol)
-	__u32_field(7,	GENLA_F_MANDATORY,	try_connect_int)
-	__u32_field(8,	GENLA_F_MANDATORY,	timeout)
-	__u32_field(9,	GENLA_F_MANDATORY,	ping_int)
-	__u32_field(10,	GENLA_F_MANDATORY,	ping_timeo)
-	__u32_field(11,	GENLA_F_MANDATORY,	sndbuf_size)
-	__u32_field(12,	GENLA_F_MANDATORY,	rcvbuf_size)
-	__u32_field(13,	GENLA_F_MANDATORY,	ko_count)
-	__u32_field(14,	GENLA_F_MANDATORY,	max_buffers)
-	__u32_field(15,	GENLA_F_MANDATORY,	max_epoch_size)
-	__u32_field(16,	GENLA_F_MANDATORY,	unplug_watermark)
-	__u32_field(17,	GENLA_F_MANDATORY,	after_sb_0p)
-	__u32_field(18,	GENLA_F_MANDATORY,	after_sb_1p)
-	__u32_field(19,	GENLA_F_MANDATORY,	after_sb_2p)
-	__u32_field(20,	GENLA_F_MANDATORY,	rr_conflict)
-	__u32_field(21,	GENLA_F_MANDATORY,	on_congestion)
-	__u32_field(22,	GENLA_F_MANDATORY,	cong_fill)
-	__u32_field(23,	GENLA_F_MANDATORY,	cong_extents)
-	__flg_field(24, GENLA_F_MANDATORY,	two_primaries)
-	__flg_field(25, GENLA_F_MANDATORY,	want_lose)
-	__flg_field(26, GENLA_F_MANDATORY,	no_cork)
-	__flg_field(27, GENLA_F_MANDATORY,	always_asbp)
-	__flg_field(28, GENLA_F_MANDATORY,	dry_run)
+	__str_field(4,	GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	__str_field(5,	GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
+	__str_field(6,	GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
+	__str_field(7,	GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
+	__u32_field(8,	GENLA_F_MANDATORY,	wire_protocol)
+	__u32_field(9,	GENLA_F_MANDATORY,	try_connect_int)
+	__u32_field(10,	GENLA_F_MANDATORY,	timeout)
+	__u32_field(11,	GENLA_F_MANDATORY,	ping_int)
+	__u32_field(12,	GENLA_F_MANDATORY,	ping_timeo)
+	__u32_field(13,	GENLA_F_MANDATORY,	sndbuf_size)
+	__u32_field(14,	GENLA_F_MANDATORY,	rcvbuf_size)
+	__u32_field(15,	GENLA_F_MANDATORY,	ko_count)
+	__u32_field(16,	GENLA_F_MANDATORY,	max_buffers)
+	__u32_field(17,	GENLA_F_MANDATORY,	max_epoch_size)
+	__u32_field(18,	GENLA_F_MANDATORY,	unplug_watermark)
+	__u32_field(19,	GENLA_F_MANDATORY,	after_sb_0p)
+	__u32_field(20,	GENLA_F_MANDATORY,	after_sb_1p)
+	__u32_field(21,	GENLA_F_MANDATORY,	after_sb_2p)
+	__u32_field(22,	GENLA_F_MANDATORY,	rr_conflict)
+	__u32_field(23,	GENLA_F_MANDATORY,	on_congestion)
+	__u32_field(24,	GENLA_F_MANDATORY,	cong_fill)
+	__u32_field(25,	GENLA_F_MANDATORY,	cong_extents)
+	__flg_field(26, GENLA_F_MANDATORY,	two_primaries)
+	__flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	want_lose)
+	__flg_field(28, GENLA_F_MANDATORY,	no_cork)
+	__flg_field(29, GENLA_F_MANDATORY,	always_asbp)
+	__flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	dry_run)
+	__flg_field(31,	GENLA_F_MANDATORY,	use_rle)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
@@ -270,11 +277,10 @@ GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection),
 GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
 
-	/* operates on replication links */
-GENL_op(DRBD_ADM_SYNCER, 9,
-	GENL_doit(drbd_adm_syncer),
+GENL_op(DRBD_ADM_RESOURCE_OPTS, 9,
+	GENL_doit(drbd_adm_resource_opts),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, GENLA_F_MANDATORY)
 )
 
 GENL_op(
@@ -284,16 +290,28 @@ GENL_op(
 	GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED)
 )
 
+GENL_op(
+	DRBD_ADM_CHG_NET_OPTS, 29,
+	GENL_doit(drbd_adm_net_opts),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED)
+)
+
 GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
 
-	/* operates on minors */
 GENL_op(DRBD_ADM_ATTACH, 12,
 	GENL_doit(drbd_adm_attach),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
 	GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED)
 )
 
+GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28,
+	GENL_doit(drbd_adm_disk_opts),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DISK_OPTS, GENLA_F_REQUIRED)
+)
+
 GENL_op(
 	DRBD_ADM_RESIZE, 13,
 	GENL_doit(drbd_adm_resize),
@@ -301,7 +319,6 @@ GENL_op(
 	GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY)
 )
 
-	/* operates on all volumes within a resource */
 GENL_op(
 	DRBD_ADM_PRIMARY, 14,
 	GENL_doit(drbd_adm_set_role),
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 22920a8af4e2..659a8eb38830 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -166,5 +166,7 @@
 #define DRBD_CONG_EXTENTS_MAX	DRBD_AL_EXTENTS_MAX
 #define DRBD_CONG_EXTENTS_DEF	DRBD_AL_EXTENTS_DEF
 
+#define DRBD_PROTOCOL_DEF DRBD_PROT_C
+
 #undef RANGE
 #endif
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index c8c67239f616..e458282a3728 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -190,11 +190,12 @@ static struct nlattr *nested_attr_tb[128];
 
 #undef GENL_struct
 #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
-	/* static, potentially unused */				\
-int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[])	\
+/* *_from_attrs functions are static, but potentially unused */		\
+static int __ ## s_name ## _from_attrs(struct s_name *s,		\
+		struct genl_info *info, bool exclude_invariants)	\
 {									\
 	const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1;		\
-	struct nlattr *tla = tb[tag_number];				\
+	struct nlattr *tla = info->attrs[tag_number];			\
 	struct nlattr **ntb = nested_attr_tb;				\
 	struct nlattr *nla;						\
 	int err;							\
@@ -211,33 +212,49 @@ int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[])	\
 									\
 	s_fields							\
 	return 0;							\
-}
+}					__attribute__((unused))		\
+static int s_name ## _from_attrs(struct s_name *s,			\
+						struct genl_info *info)	\
+{									\
+	return __ ## s_name ## _from_attrs(s, info, false);		\
+}					__attribute__((unused))		\
+static int s_name ## _from_attrs_for_change(struct s_name *s,		\
+						struct genl_info *info)	\
+{									\
+	return __ ## s_name ## _from_attrs(s, info, true);		\
+}					__attribute__((unused))		\
 
-#undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+#define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...)	\
 		nla = ntb[__nla_type(attr_nr)];				\
 		if (nla) {						\
-			if (s)						\
-				s->name = __get(nla);			\
-			DPRINT_FIELD("<<", nla_type, name, s, nla);	\
+			if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) {		\
+				pr_info("<< must not change invariant attr: %s\n", #name);	\
+				return -EEXIST;				\
+			}						\
+			assignment;					\
+		} else if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) {		\
+			/* attribute missing from payload, */		\
+			/* which was expected */			\
 		} else if ((attr_flag) & GENLA_F_REQUIRED) {		\
 			pr_info("<< missing attr: %s\n", #name);	\
 			return -ENOMSG;					\
 		}
 
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+	__assign(attr_nr, attr_flag, name, nla_type, type,		\
+			if (s)						\
+				s->name = __get(nla);			\
+			DPRINT_FIELD("<<", nla_type, name, s, nla))
+
 /* validate_nla() already checked nla_len <= maxlen appropriately. */
 #undef __array
 #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
-		nla = ntb[__nla_type(attr_nr)];				\
-		if (nla) {						\
+	__assign(attr_nr, attr_flag, name, nla_type, type,		\
 			if (s)						\
 				s->name ## _len =			\
 					__get(s->name, nla, maxlen);	\
-			DPRINT_ARRAY("<<", nla_type, name, s, nla);	\
-		} else if ((attr_flag) & GENLA_F_REQUIRED) {		\
-			pr_info("<< missing attr: %s\n", #name);	\
-			return -ENOMSG;					\
-		}							\
+			DPRINT_ARRAY("<<", nla_type, name, s, nla))
 
 #include GENL_MAGIC_INCLUDE_FILE
 
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index 745ebfd6c7e5..9a605b9ee834 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -59,12 +59,20 @@ enum {
 	GENLA_F_MANDATORY	= 1 << 14,
 	GENLA_F_REQUIRED	= 1 << 15,
 
-	/* This will not be present in the __u16 .nla_type, but can be
-	 * triggered on in <struct>_to_skb, to exclude "sensitive"
-	 * information from broadcasts, or on unpriviledged get requests.
-	 * This is useful because genetlink multicast groups can be listened in
-	 * on by anyone.  */
+	/* Below will not be present in the __u16 .nla_type, but can be
+	 * triggered on in <struct>_to_skb resp. <struct>_from_attrs */
+
+	/* To exclude "sensitive" information from broadcasts, or on
+	 * unpriviledged get requests.  This is useful because genetlink
+	 * multicast groups can be listened in on by anyone.  */
 	GENLA_F_SENSITIVE	= 1 << 16,
+
+	/* INVARIAN options cannot be changed at runtime.
+	 * Useful to share an attribute policy and struct definition,
+	 * between some "create" and "change" commands,
+	 * but disallow certain fields to be changed online.
+	 */
+	GENLA_F_INVARIANT	= 1 << 17,
 };
 
 #define __nla_type(x)	((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK))
-- 
cgit v1.2.3-55-g7522


From cb703454a283d8dd5599e928eeea30367ca18874 Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Thu, 24 Mar 2011 11:03:07 +0100
Subject: drbd: Converted drbd_try_outdate_peer() from mdev to tconn

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |   4 +-
 drivers/block/drbd/drbd_nl.c       | 144 ++++++++++++++++++-------------------
 drivers/block/drbd/drbd_receiver.c |   7 +-
 drivers/block/drbd/drbd_state.c    |  85 ++++++++++++++++++----
 drivers/block/drbd/drbd_state.h    |   5 ++
 include/linux/drbd.h               |   3 +-
 6 files changed, 152 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index c1eb4462096e..74637cc1461c 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1472,8 +1472,8 @@ extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
 					enum drbd_role new_role,
 					int force);
-extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
-extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev);
+extern bool conn_try_outdate_peer(struct drbd_tconn *tconn);
+extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn);
 extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
 
 /* drbd_worker.c */
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index f1ec727f7df5..85290a9beb6d 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -366,116 +366,122 @@ int conn_khelper(struct drbd_tconn *tconn, char *cmd)
 	return ret;
 }
 
-enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
+static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
 {
+	enum drbd_fencing_p fp = FP_NOT_AVAIL;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (get_ldev_if_state(mdev, D_CONSISTENT)) {
+			fp = max_t(enum drbd_fencing_p, fp, mdev->ldev->dc.fencing);
+			put_ldev(mdev);
+		}
+	}
+
+	return fp;
+}
+
+bool conn_try_outdate_peer(struct drbd_tconn *tconn)
+{
+	union drbd_state mask = { };
+	union drbd_state val = { };
+	enum drbd_fencing_p fp;
 	char *ex_to_string;
 	int r;
-	enum drbd_disk_state nps;
-	enum drbd_fencing_p fp;
 
-	D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
+	if (tconn->cstate >= C_WF_REPORT_PARAMS) {
+		conn_err(tconn, "Expected cstate < C_WF_REPORT_PARAMS\n");
+		return false;
+	}
 
-	if (get_ldev_if_state(mdev, D_CONSISTENT)) {
-		fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
-	} else {
-		dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
-		nps = mdev->state.pdsk;
+	fp = highest_fencing_policy(tconn);
+	switch (fp) {
+	case FP_NOT_AVAIL:
+		conn_warn(tconn, "Not fencing peer, I'm not even Consistent myself.\n");
 		goto out;
+	case FP_DONT_CARE:
+		return true;
+	default: ;
 	}
 
-	r = drbd_khelper(mdev, "fence-peer");
+	r = conn_khelper(tconn, "fence-peer");
 
 	switch ((r>>8) & 0xff) {
 	case 3: /* peer is inconsistent */
 		ex_to_string = "peer is inconsistent or worse";
-		nps = D_INCONSISTENT;
+		mask.pdsk = D_MASK;
+		val.pdsk = D_INCONSISTENT;
 		break;
 	case 4: /* peer got outdated, or was already outdated */
 		ex_to_string = "peer was fenced";
-		nps = D_OUTDATED;
+		mask.pdsk = D_MASK;
+		val.pdsk = D_OUTDATED;
 		break;
 	case 5: /* peer was down */
-		if (mdev->state.disk == D_UP_TO_DATE) {
+		if (conn_highest_disk(tconn) == D_UP_TO_DATE) {
 			/* we will(have) create(d) a new UUID anyways... */
 			ex_to_string = "peer is unreachable, assumed to be dead";
-			nps = D_OUTDATED;
+			mask.pdsk = D_MASK;
+			val.pdsk = D_OUTDATED;
 		} else {
 			ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
-			nps = mdev->state.pdsk;
 		}
 		break;
 	case 6: /* Peer is primary, voluntarily outdate myself.
 		 * This is useful when an unconnected R_SECONDARY is asked to
 		 * become R_PRIMARY, but finds the other peer being active. */
 		ex_to_string = "peer is active";
-		dev_warn(DEV, "Peer is primary, outdating myself.\n");
-		nps = D_UNKNOWN;
-		_drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE);
+		conn_warn(tconn, "Peer is primary, outdating myself.\n");
+		mask.disk = D_MASK;
+		val.disk = D_OUTDATED;
 		break;
 	case 7:
 		if (fp != FP_STONITH)
-			dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n");
+			conn_err(tconn, "fence-peer() = 7 && fencing != Stonith !!!\n");
 		ex_to_string = "peer was stonithed";
-		nps = D_OUTDATED;
+		mask.pdsk = D_MASK;
+		val.pdsk = D_OUTDATED;
 		break;
 	default:
 		/* The script is broken ... */
-		nps = D_UNKNOWN;
-		dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
-		return nps;
+		conn_err(tconn, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
+		return false; /* Eventually leave IO frozen */
 	}
 
-	dev_info(DEV, "fence-peer helper returned %d (%s)\n",
-			(r>>8) & 0xff, ex_to_string);
+	conn_info(tconn, "fence-peer helper returned %d (%s)\n",
+		  (r>>8) & 0xff, ex_to_string);
 
-out:
-	if (mdev->state.susp_fen && nps >= D_UNKNOWN) {
-		/* The handler was not successful... unfreeze here, the
-		   state engine can not unfreeze... */
-		_drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE);
-	}
+ out:
 
-	return nps;
+	/* Not using
+	   conn_request_state(tconn, mask, val, CS_VERBOSE);
+	   here, because we might were able to re-establish the connection in the
+	   meantime. */
+	spin_lock_irq(&tconn->req_lock);
+	if (tconn->cstate < C_WF_REPORT_PARAMS)
+		_conn_request_state(tconn, mask, val, CS_VERBOSE);
+	spin_unlock_irq(&tconn->req_lock);
+
+	return conn_highest_pdsk(tconn) <= D_OUTDATED;
 }
 
 static int _try_outdate_peer_async(void *data)
 {
-	struct drbd_conf *mdev = (struct drbd_conf *)data;
-	enum drbd_disk_state nps;
-	union drbd_state ns;
+	struct drbd_tconn *tconn = (struct drbd_tconn *)data;
 
-	nps = drbd_try_outdate_peer(mdev);
-
-	/* Not using
-	   drbd_request_state(mdev, NS(pdsk, nps));
-	   here, because we might were able to re-establish the connection
-	   in the meantime. This can only partially be solved in the state's
-	   engine is_valid_state() and is_valid_state_transition()
-	   functions.
-
-	   nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN.
-	   pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid,
-	   therefore we have to have the pre state change check here.
-	*/
-	spin_lock_irq(&mdev->tconn->req_lock);
-	ns = mdev->state;
-	if (ns.conn < C_WF_REPORT_PARAMS) {
-		ns.pdsk = nps;
-		_drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
-	}
-	spin_unlock_irq(&mdev->tconn->req_lock);
+	conn_try_outdate_peer(tconn);
 
 	return 0;
 }
 
-void drbd_try_outdate_peer_async(struct drbd_conf *mdev)
+void conn_try_outdate_peer_async(struct drbd_tconn *tconn)
 {
 	struct task_struct *opa;
 
-	opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev));
+	opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h");
 	if (IS_ERR(opa))
-		dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n");
+		conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n");
 }
 
 enum drbd_state_rv
@@ -486,7 +492,6 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 	int try = 0;
 	int forced = 0;
 	union drbd_state mask, val;
-	enum drbd_disk_state nps;
 
 	if (new_role == R_PRIMARY)
 		request_ping(mdev->tconn); /* Detect a dead peer ASAP */
@@ -519,32 +524,23 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 		if (rv == SS_NO_UP_TO_DATE_DISK &&
 		    mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
 			D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
-			nps = drbd_try_outdate_peer(mdev);
 
-			if (nps == D_OUTDATED || nps == D_INCONSISTENT) {
+			if (conn_try_outdate_peer(mdev->tconn)) {
 				val.disk = D_UP_TO_DATE;
 				mask.disk = D_MASK;
 			}
-
-			val.pdsk = nps;
-			mask.pdsk = D_MASK;
-
 			continue;
 		}
 
 		if (rv == SS_NOTHING_TO_DO)
 			goto out;
 		if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
-			nps = drbd_try_outdate_peer(mdev);
-
-			if (force && nps > D_OUTDATED) {
+			if (!conn_try_outdate_peer(mdev->tconn) && force) {
 				dev_warn(DEV, "Forced into split brain situation!\n");
-				nps = D_OUTDATED;
-			}
-
-			mask.pdsk = D_MASK;
-			val.pdsk  = nps;
+				mask.pdsk = D_MASK;
+				val.pdsk  = D_OUTDATED;
 
+			}
 			continue;
 		}
 		if (rv == SS_TWO_PRIMARIES) {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 1fd871bc889e..91aa49f478e8 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -4030,9 +4030,11 @@ static void drbd_disconnect(struct drbd_tconn *tconn)
 	drbd_free_sock(tconn);
 
 	idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
-
 	conn_info(tconn, "Connection closed\n");
 
+	if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
+		conn_try_outdate_peer_async(tconn);
+
 	spin_lock_irq(&tconn->req_lock);
 	oc = tconn->cstate;
 	if (oc >= C_UNCONNECTED)
@@ -4109,9 +4111,6 @@ static int drbd_disconnected(int vnr, void *p, void *data)
 		put_ldev(mdev);
 	}
 
-	if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
-		drbd_try_outdate_peer_async(mdev);
-
 	/* serialize with bitmap writeout triggered by the state change,
 	 * if any. */
 	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 52ff1c7379e9..b4f668db3296 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -61,6 +61,73 @@ bool conn_all_vols_unconf(struct drbd_tconn *tconn)
 	return true;
 }
 
+/* Unfortunately the states where not correctly ordered, when
+   they where defined. therefore can not use max_t() here. */
+static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2)
+{
+	if (role1 == R_PRIMARY || role2 == R_PRIMARY)
+		return R_PRIMARY;
+	if (role1 == R_SECONDARY || role2 == R_SECONDARY)
+		return R_SECONDARY;
+	return R_UNKNOWN;
+}
+static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
+{
+	if (role1 == R_UNKNOWN || role2 == R_UNKNOWN)
+		return R_UNKNOWN;
+	if (role1 == R_SECONDARY || role2 == R_SECONDARY)
+		return R_SECONDARY;
+	return R_PRIMARY;
+}
+
+enum drbd_role conn_highest_role(struct drbd_tconn *tconn)
+{
+	enum drbd_role role = R_UNKNOWN;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		role = max_role(role, mdev->state.role);
+
+	return role;
+}
+
+enum drbd_role conn_highest_peer(struct drbd_tconn *tconn)
+{
+	enum drbd_role peer = R_UNKNOWN;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		peer = max_role(peer, mdev->state.peer);
+
+	return peer;
+}
+
+enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn)
+{
+	enum drbd_disk_state ds = D_DISKLESS;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		ds = max_t(enum drbd_disk_state, ds, mdev->state.disk);
+
+	return ds;
+}
+
+enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn)
+{
+	enum drbd_disk_state ds = D_DISKLESS;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		ds = max_t(enum drbd_disk_state, ds, mdev->state.pdsk);
+
+	return ds;
+}
+
 /**
  * cl_wide_st_chg() - true if the state change is a cluster wide one
  * @mdev:	DRBD device.
@@ -329,18 +396,6 @@ static void print_state_change(struct drbd_conf *mdev, union drbd_state os, unio
 		dev_info(DEV, "%s\n", pb);
 }
 
-static bool vol_has_primary_peer(struct drbd_tconn *tconn)
-{
-	struct drbd_conf *mdev;
-	int vnr;
-
-	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
-		if (mdev->state.peer == R_PRIMARY)
-			return true;
-	}
-	return false;
-}
-
 /**
  * is_valid_state() - Returns an SS_ error code if ns is not valid
  * @mdev:	DRBD device.
@@ -364,7 +419,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
 		if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY) {
 			if (ns.peer == R_PRIMARY)
 				rv = SS_TWO_PRIMARIES;
-			else if (vol_has_primary_peer(mdev->tconn))
+			else if (conn_highest_peer(mdev->tconn) == R_PRIMARY)
 				rv = SS_O_VOL_PEER_PRI;
 			}
 		put_net_conf(mdev->tconn);
@@ -1390,8 +1445,8 @@ static int _set_state_itr_fn(int vnr, void *p, void *data)
 
 	rv = __drbd_set_state(mdev, ns, flags, NULL);
 
-	ms.role = max_t(enum drbd_role, mdev->state.role, ms.role);
-	ms.peer = max_t(enum drbd_role, mdev->state.peer, ms.peer);
+	ms.role = max_role(ns.role, ms.role);
+	ms.peer = max_role(ns.peer, ms.peer);
 	ms.disk = max_t(enum drbd_role, mdev->state.disk, ms.disk);
 	ms.pdsk = max_t(enum drbd_role, mdev->state.pdsk, ms.pdsk);
 	params->ms = ms;
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h
index 55df0728bc88..394a1998acd9 100644
--- a/drivers/block/drbd/drbd_state.h
+++ b/drivers/block/drbd/drbd_state.h
@@ -110,4 +110,9 @@ static inline int drbd_request_state(struct drbd_conf *mdev,
 	return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
 }
 
+enum drbd_role conn_highest_role(struct drbd_tconn *tconn);
+enum drbd_role conn_highest_peer(struct drbd_tconn *tconn);
+enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn);
+enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn);
+
 #endif
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 9cdb888607ae..60d308819096 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -65,7 +65,8 @@ enum drbd_io_error_p {
 };
 
 enum drbd_fencing_p {
-	FP_DONT_CARE,
+	FP_NOT_AVAIL = -1, /* Not a policy */
+	FP_DONT_CARE = 0,
 	FP_RESOURCE,
 	FP_STONITH
 };
-- 
cgit v1.2.3-55-g7522


From 0c8e36d9b843be56e4e43d4ef3c3eb6a97205599 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Wed, 30 Mar 2011 16:00:17 +0200
Subject: drbd: Introduce protocol version 100 headers

The 8 byte header finally becomes too small. With the protocol 100 header we
have 16 bit for the volume number, proper 32 bit for the data length, and
32 bit for further extensions in the future.

Previous versions of drbd are using version 80 headers for all packets
short enough for protocol 80.  They support both header versions in
worker context, but only version 80 headers in asynchronous context.
For backwards compatibility, continue to use version 80 headers for
short packets before protocol version 100.

From protocol version 100 on, use the same header version for all
packets.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  8 ++++++++
 drivers/block/drbd/drbd_main.c     | 32 ++++++++++++++++++++++++++------
 drivers/block/drbd/drbd_nl.c       |  3 +--
 drivers/block/drbd/drbd_receiver.c | 14 ++++++++++++--
 include/linux/drbd.h               |  1 +
 include/linux/drbd_genl.h          |  2 --
 include/linux/drbd_limits.h        |  2 ++
 7 files changed, 50 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 6d55bb75a081..bf1aad683387 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -307,6 +307,14 @@ struct p_header95 {
 	u32	  length;	/* Use only 24 bits of that. Ignore the highest 8 bit. */
 } __packed;
 
+struct p_header100 {
+	u32	  magic;
+	u16	  volume;
+	u16	  command;
+	u32	  length;
+	u32	  pad;
+} __packed;
+
 extern unsigned int drbd_header_size(struct drbd_tconn *tconn);
 
 /* these defines must not be changed without changing the protocol version */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index b9dcc50135c4..5d9112cefcd7 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -698,9 +698,15 @@ void drbd_thread_current_set_cpu(struct drbd_thread *thi)
  */
 unsigned int drbd_header_size(struct drbd_tconn *tconn)
 {
-	BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95));
-	BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8));
-	return sizeof(struct p_header80);
+	if (tconn->agreed_pro_version >= 100) {
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8));
+		return sizeof(struct p_header100);
+	} else {
+		BUILD_BUG_ON(sizeof(struct p_header80) !=
+			     sizeof(struct p_header95));
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8));
+		return sizeof(struct p_header80);
+	}
 }
 
 static unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size)
@@ -719,10 +725,24 @@ static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd,
 	return sizeof(struct p_header95);
 }
 
-static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr, void *buffer,
-				   enum drbd_packet cmd, int size)
+static unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cmd,
+				      int size, int vnr)
+{
+	h->magic = cpu_to_be32(DRBD_MAGIC_100);
+	h->volume = cpu_to_be16(vnr);
+	h->command = cpu_to_be16(cmd);
+	h->length = cpu_to_be32(size);
+	h->pad = 0;
+	return sizeof(struct p_header100);
+}
+
+static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr,
+				   void *buffer, enum drbd_packet cmd, int size)
 {
-	if (tconn->agreed_pro_version >= 95)
+	if (tconn->agreed_pro_version >= 100)
+		return prepare_header100(buffer, cmd, size, vnr);
+	else if (tconn->agreed_pro_version >= 95 &&
+		 size > DRBD_MAX_SIZE_H80_PACKET)
 		return prepare_header95(buffer, cmd, size);
 	else
 		return prepare_header80(buffer, cmd, size);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index d9bb1a5c756a..0f52b88719c8 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2833,8 +2833,7 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info)
 		retcode = ERR_INVALID_REQUEST;
 		goto out;
 	}
-	/* FIXME we need a define here */
-	if (adm_ctx.volume >= 256) {
+	if (adm_ctx.volume > DRBD_VOLUME_MAX) {
 		drbd_msg_put_info("requested volume id out of range");
 		retcode = ERR_INVALID_REQUEST;
 		goto out;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 7e0ab2246fb6..311b95453cb7 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -983,8 +983,18 @@ static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_i
 {
 	unsigned int header_size = drbd_header_size(tconn);
 
-	if (header_size == sizeof(struct p_header95) &&
-	    *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
+	if (header_size == sizeof(struct p_header100) &&
+	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
+		struct p_header100 *h = header;
+		if (h->pad != 0) {
+			conn_err(tconn, "Header padding is not zero\n");
+			return -EINVAL;
+		}
+		pi->vnr = be16_to_cpu(h->volume);
+		pi->cmd = be16_to_cpu(h->command);
+		pi->size = be32_to_cpu(h->length);
+	} else if (header_size == sizeof(struct p_header95) &&
+		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
 		struct p_header95 *h = header;
 
 		pi->cmd = be16_to_cpu(h->command);
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 60d308819096..fe8d6ba31bcb 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -341,6 +341,7 @@ enum drbd_timeout_flag {
 
 #define DRBD_MAGIC 0x83740267
 #define DRBD_MAGIC_BIG 0x835a
+#define DRBD_MAGIC_100 0x8620ec20
 
 /* how I came up with this magic?
  * base64 decode "actlog==" ;) */
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 938e8560a833..10144d546a66 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -95,8 +95,6 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
  * and/or the replication group (aka resource) name,
  * and the volume id within the resource. */
 GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
-		/* currently only 256 volumes per group,
-		 * but maybe we still change that */
 	__u32_field(1, GENLA_F_MANDATORY,	ctx_volume)
 	__str_field(2, GENLA_F_MANDATORY,	ctx_conn_name, 128)
 )
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 659a8eb38830..7f5149bef70e 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -19,6 +19,8 @@
 #define DRBD_MINOR_COUNT_MAX 256
 #define DRBD_MINOR_COUNT_DEF 32
 
+#define DRBD_VOLUME_MAX 65535
+
 #define DRBD_DIALOG_REFRESH_MIN 0
 #define DRBD_DIALOG_REFRESH_MAX 600
 
-- 
cgit v1.2.3-55-g7522


From b032b6fa3528d6eed972db32257cb316a66e0dac Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Wed, 13 Apr 2011 18:16:10 -0700
Subject: drbd: Allow online change of replication protocol only with agreed_pv
 >= 100

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 5 +++++
 include/linux/drbd.h         | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 40de384aade6..d4b29fd603f4 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1671,6 +1671,11 @@ check_net_options(struct drbd_tconn *tconn, struct net_conf *new_conf)
 	struct drbd_conf *mdev;
 	int i;
 
+	if (tconn->net_conf && tconn->agreed_pro_version < 100 &&
+	    tconn->cstate == C_WF_REPORT_PARAMS &&
+	    new_conf->wire_protocol != tconn->net_conf->wire_protocol)
+		return ERR_NEED_APV_100;
+
 	if (new_conf->two_primaries &&
 	    (new_conf->wire_protocol != DRBD_PROT_C))
 		return ERR_NOT_PROTO_C;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index fe8d6ba31bcb..6c7c85d8fc41 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -160,6 +160,7 @@ enum drbd_ret_code {
 	ERR_MINOR_CONFIGURED    = 160,
 	ERR_MINOR_EXISTS	= 161,
 	ERR_INVALID_REQUEST	= 162,
+	ERR_NEED_APV_100	= 163,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
-- 
cgit v1.2.3-55-g7522


From d8cd289dbe69ce9b8115d6f200ceff657e5dafa0 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Tue, 3 May 2011 12:27:11 +0200
Subject: drbd: Remove left-over unused define

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 7f5149bef70e..bcebb016fda3 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -170,5 +170,4 @@
 
 #define DRBD_PROTOCOL_DEF DRBD_PROT_C
 
-#undef RANGE
 #endif
-- 
cgit v1.2.3-55-g7522


From b966b5dd8e17e6c105ca55533fd412de5d5b429e Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Tue, 3 May 2011 14:56:09 +0200
Subject: drbd: Generate the drbd_set_*_defaults() functions from drbd_genl.h

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h     |  2 +-
 drivers/block/drbd/drbd_main.c    |  2 +-
 drivers/block/drbd/drbd_nl.c      | 86 +++++-------------------------------
 include/linux/drbd_genl.h         | 91 +++++++++++++++++++--------------------
 include/linux/genl_magic_func.h   | 26 +++++++++++
 include/linux/genl_magic_struct.h |  8 ++++
 6 files changed, 91 insertions(+), 124 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 44f77265d2b0..8655fcb82028 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1390,7 +1390,7 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
 
 
 /* drbd_nl.c */
-extern void drbd_set_res_opts_default(struct res_opts *r);
+extern void drbd_set_res_opts_defaults(struct res_opts *r);
 extern int drbd_msg_put_info(const char *info);
 extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 427e959e4869..4ae3e7a99d7c 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2488,7 +2488,7 @@ struct drbd_tconn *conn_create(const char *name)
 	drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker");
 	drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender");
 
-	drbd_set_res_opts_default(&tconn->res_opts);
+	drbd_set_res_opts_defaults(&tconn->res_opts);
 
 	down_write(&drbd_cfg_rwsem);
 	kref_init(&tconn->kref);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 7320ac00f0fb..f5732cf46c2f 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1090,77 +1090,6 @@ static bool should_set_defaults(struct genl_info *info)
 	return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
 }
 
-/* Maybe we should we generate these functions
- * from the drbd_genl.h magic as well?
- * That way we would not "accidentally forget" to add defaults here. */
-
-#define RESET_ARRAY_FIELD(field) do { \
-	memset(field, 0, sizeof(field)); \
-	field ## _len = 0; \
-} while (0)
-void drbd_set_res_opts_default(struct res_opts *r)
-{
-	RESET_ARRAY_FIELD(r->cpu_mask);
-	r->on_no_data  = DRBD_ON_NO_DATA_DEF;
-}
-
-static void drbd_set_net_conf_defaults(struct net_conf *nc)
-{
-	/* Do NOT (re)set those fields marked as GENLA_F_INVARIANT
-	 * in drbd_genl.h, they can only be change with disconnect/reconnect */
-	RESET_ARRAY_FIELD(nc->shared_secret);
-
-	RESET_ARRAY_FIELD(nc->cram_hmac_alg);
-	RESET_ARRAY_FIELD(nc->integrity_alg);
-	RESET_ARRAY_FIELD(nc->verify_alg);
-	RESET_ARRAY_FIELD(nc->csums_alg);
-#undef RESET_ARRAY_FIELD
-
-	nc->wire_protocol = DRBD_PROTOCOL_DEF;
-	nc->try_connect_int = DRBD_CONNECT_INT_DEF;
-	nc->timeout = DRBD_TIMEOUT_DEF;
-	nc->ping_int = DRBD_PING_INT_DEF;
-	nc->ping_timeo = DRBD_PING_TIMEO_DEF;
-	nc->sndbuf_size = DRBD_SNDBUF_SIZE_DEF;
-	nc->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF;
-	nc->ko_count = DRBD_KO_COUNT_DEF;
-	nc->max_buffers = DRBD_MAX_BUFFERS_DEF;
-	nc->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF;
-	nc->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF;
-	nc->after_sb_0p = DRBD_AFTER_SB_0P_DEF;
-	nc->after_sb_1p = DRBD_AFTER_SB_1P_DEF;
-	nc->after_sb_2p = DRBD_AFTER_SB_2P_DEF;
-	nc->rr_conflict = DRBD_RR_CONFLICT_DEF;
-	nc->on_congestion = DRBD_ON_CONGESTION_DEF;
-	nc->cong_fill = DRBD_CONG_FILL_DEF;
-	nc->cong_extents = DRBD_CONG_EXTENTS_DEF;
-	nc->two_primaries = 0;
-	nc->no_cork = 0;
-	nc->always_asbp = 0;
-	nc->use_rle = 0;
-}
-
-static void drbd_set_disk_conf_defaults(struct disk_conf *dc)
-{
-	/* Do NOT (re)set those fields marked as GENLA_F_INVARIANT
-	 * in drbd_genl.h, they can only be change with detach/reattach */
-	dc->on_io_error = DRBD_ON_IO_ERROR_DEF;
-	dc->fencing = DRBD_FENCING_DEF;
-	dc->resync_rate = DRBD_RATE_DEF;
-	dc->resync_after = DRBD_AFTER_DEF;
-	dc->al_extents = DRBD_AL_EXTENTS_DEF;
-	dc->c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
-	dc->c_delay_target = DRBD_C_DELAY_TARGET_DEF;
-	dc->c_fill_target = DRBD_C_FILL_TARGET_DEF;
-	dc->c_max_rate = DRBD_C_MAX_RATE_DEF;
-	dc->c_min_rate = DRBD_C_MIN_RATE_DEF;
-	dc->no_disk_barrier = 0;
-	dc->no_disk_flush = 0;
-	dc->no_disk_drain = 0;
-	dc->no_md_flush = 0;
-}
-
-
 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 {
 	enum drbd_ret_code retcode;
@@ -1198,7 +1127,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 
 	memcpy(new_disk_conf, &mdev->ldev->dc, sizeof(*new_disk_conf));
 	if (should_set_defaults(info))
-		drbd_set_disk_conf_defaults(new_disk_conf);
+		set_disk_conf_defaults(new_disk_conf);
 
 	err = disk_conf_from_attrs_for_change(new_disk_conf, info);
 	if (err) {
@@ -1315,7 +1244,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		goto fail;
 	}
 
-	drbd_set_disk_conf_defaults(&nbc->dc);
+	set_disk_conf_defaults(&nbc->dc);
 
 	err = disk_conf_from_attrs(&nbc->dc, info);
 	if (err) {
@@ -1911,7 +1840,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 
 	*new_conf = *old_conf;
 	if (should_set_defaults(info))
-		drbd_set_net_conf_defaults(new_conf);
+		set_net_conf_defaults(new_conf);
 
 	err = net_conf_from_attrs_for_change(new_conf, info);
 	if (err) {
@@ -2029,7 +1958,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 		goto fail;
 	}
 
-	drbd_set_net_conf_defaults(new_conf);
+	set_net_conf_defaults(new_conf);
 
 	err = net_conf_from_attrs(new_conf, info);
 	if (err) {
@@ -2301,6 +2230,11 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+void drbd_set_res_opts_defaults(struct res_opts *r)
+{
+	return set_res_opts_defaults(r);
+}
+
 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
 {
 	enum drbd_ret_code retcode;
@@ -2325,7 +2259,7 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
 
 	res_opts = tconn->res_opts;
 	if (should_set_defaults(info))
-		drbd_set_res_opts_default(&res_opts);
+		set_res_opts_defaults(&res_opts);
 
 	err = res_opts_from_attrs(&res_opts, info);
 	if (err) {
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 10144d546a66..549800668cb9 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -110,63 +110,62 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	 * but it won't propagate through the stack */
 	__u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	max_bio_bvecs)
 
-	__u32_field(6, GENLA_F_MANDATORY,	on_io_error)
-	__u32_field(7, GENLA_F_MANDATORY,	fencing)
-
-	__u32_field(8,	GENLA_F_MANDATORY,	resync_rate)
-	__u32_field(9,	GENLA_F_MANDATORY,	resync_after)
-	__u32_field(10,	GENLA_F_MANDATORY,	al_extents)
-	__u32_field(11,	GENLA_F_MANDATORY,	c_plan_ahead)
-	__u32_field(12,	GENLA_F_MANDATORY,	c_delay_target)
-	__u32_field(13,	GENLA_F_MANDATORY,	c_fill_target)
-	__u32_field(14,	GENLA_F_MANDATORY,	c_max_rate)
-	__u32_field(15,	GENLA_F_MANDATORY,	c_min_rate)
-
-	__flg_field(16, GENLA_F_MANDATORY,	no_disk_barrier)
-	__flg_field(17, GENLA_F_MANDATORY,	no_disk_flush)
-	__flg_field(18, GENLA_F_MANDATORY,	no_disk_drain)
-	__flg_field(19, GENLA_F_MANDATORY,	no_md_flush)
-
+	__u32_field_def(6, GENLA_F_MANDATORY,	on_io_error, DRBD_ON_IO_ERROR_DEF)
+	__u32_field_def(7, GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
+
+	__u32_field_def(8,	GENLA_F_MANDATORY,	resync_rate, DRBD_RATE_DEF)
+	__u32_field_def(9,	GENLA_F_MANDATORY,	resync_after, DRBD_AFTER_DEF)
+	__u32_field_def(10,	GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
+	__u32_field_def(11,	GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
+	__u32_field_def(12,	GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
+	__u32_field_def(13,	GENLA_F_MANDATORY,	c_fill_target, DRBD_C_FILL_TARGET_DEF)
+	__u32_field_def(14,	GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
+	__u32_field_def(15,	GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
+
+	__flg_field_def(16, GENLA_F_MANDATORY,	no_disk_barrier, 0)
+	__flg_field_def(17, GENLA_F_MANDATORY,	no_disk_flush, 0)
+	__flg_field_def(18, GENLA_F_MANDATORY,	no_disk_drain, 0)
+	__flg_field_def(19, GENLA_F_MANDATORY,	no_md_flush, 0)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
-	__str_field(1,	GENLA_F_MANDATORY,	cpu_mask,       32)
-	__u32_field(2,	GENLA_F_MANDATORY,	on_no_data)
+	__str_field_def(1,	GENLA_F_MANDATORY,	cpu_mask,       32)
+	__u32_field_def(2,	GENLA_F_MANDATORY,	on_no_data, DRBD_ON_NO_DATA_DEF)
 )
 
 GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__bin_field(1,	GENLA_F_REQUIRED | GENLA_F_INVARIANT,	my_addr,	128)
 	__bin_field(2,	GENLA_F_REQUIRED | GENLA_F_INVARIANT,	peer_addr,	128)
-	__str_field(3,	GENLA_F_MANDATORY | GENLA_F_SENSITIVE,
+	__str_field_def(3,	GENLA_F_MANDATORY | GENLA_F_SENSITIVE,
 						shared_secret,	SHARED_SECRET_MAX)
-	__str_field(4,	GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
-	__str_field(5,	GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
-	__str_field(6,	GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
-	__str_field(7,	GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
-	__u32_field(8,	GENLA_F_MANDATORY,	wire_protocol)
-	__u32_field(9,	GENLA_F_MANDATORY,	try_connect_int)
-	__u32_field(10,	GENLA_F_MANDATORY,	timeout)
-	__u32_field(11,	GENLA_F_MANDATORY,	ping_int)
-	__u32_field(12,	GENLA_F_MANDATORY,	ping_timeo)
-	__u32_field(13,	GENLA_F_MANDATORY,	sndbuf_size)
-	__u32_field(14,	GENLA_F_MANDATORY,	rcvbuf_size)
-	__u32_field(15,	GENLA_F_MANDATORY,	ko_count)
-	__u32_field(16,	GENLA_F_MANDATORY,	max_buffers)
-	__u32_field(17,	GENLA_F_MANDATORY,	max_epoch_size)
-	__u32_field(18,	GENLA_F_MANDATORY,	unplug_watermark)
-	__u32_field(19,	GENLA_F_MANDATORY,	after_sb_0p)
-	__u32_field(20,	GENLA_F_MANDATORY,	after_sb_1p)
-	__u32_field(21,	GENLA_F_MANDATORY,	after_sb_2p)
-	__u32_field(22,	GENLA_F_MANDATORY,	rr_conflict)
-	__u32_field(23,	GENLA_F_MANDATORY,	on_congestion)
-	__u32_field(24,	GENLA_F_MANDATORY,	cong_fill)
-	__u32_field(25,	GENLA_F_MANDATORY,	cong_extents)
-	__flg_field(26, GENLA_F_MANDATORY,	two_primaries)
+	__str_field_def(4,	GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	__str_field_def(5,	GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
+	__str_field_def(6,	GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
+	__str_field_def(7,	GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
+	__u32_field_def(8,	GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
+	__u32_field_def(9,	GENLA_F_MANDATORY,	try_connect_int, DRBD_CONNECT_INT_DEF)
+	__u32_field_def(10,	GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
+	__u32_field_def(11,	GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
+	__u32_field_def(12,	GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
+	__u32_field_def(13,	GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
+	__u32_field_def(14,	GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
+	__u32_field_def(15,	GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF)
+	__u32_field_def(16,	GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF)
+	__u32_field_def(17,	GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
+	__u32_field_def(18,	GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
+	__u32_field_def(19,	GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
+	__u32_field_def(20,	GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
+	__u32_field_def(21,	GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
+	__u32_field_def(22,	GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF)
+	__u32_field_def(23,	GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
+	__u32_field_def(24,	GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
+	__u32_field_def(25,	GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
+	__flg_field_def(26, GENLA_F_MANDATORY,	two_primaries, 0)
 	__flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	want_lose)
-	__flg_field(28, GENLA_F_MANDATORY,	no_cork)
-	__flg_field(29, GENLA_F_MANDATORY,	always_asbp)
+	__flg_field_def(28, GENLA_F_MANDATORY,	no_cork, 0)
+	__flg_field_def(29, GENLA_F_MANDATORY,	always_asbp, 0)
 	__flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	dry_run)
-	__flg_field(31,	GENLA_F_MANDATORY,	use_rle)
+	__flg_field_def(31,	GENLA_F_MANDATORY,	use_rle, 0)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index e458282a3728..e908f1c50355 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -427,6 +427,32 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 
 #include GENL_MAGIC_INCLUDE_FILE
 
+
+/* Functions for initializing structs to default values.  */
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put)
+#undef __u32_field_def
+#define __u32_field_def(attr_nr, attr_flag, name, default)		\
+	x->name = default;
+#undef __flg_field_def
+#define __flg_field_def(attr_nr, attr_flag, name, default)		\
+	x->name = default;
+#undef __str_field_def
+#define __str_field_def(attr_nr, attr_flag, name, maxlen)		\
+	memset(x->name, 0, sizeof(x->name));				\
+	x->name ## _len = 0;
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static void set_ ## s_name ## _defaults(struct s_name *x) __attribute__((unused)); \
+static void set_ ## s_name ## _defaults(struct s_name *x) {	\
+s_fields								\
+}
+
+#include GENL_MAGIC_INCLUDE_FILE
+
 #endif /* __KERNEL__ */
 
 /* }}}1 */
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index 9a605b9ee834..f2c7cc7831df 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -107,6 +107,14 @@ enum {
 	__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
 			nla_memcpy, NLA_PUT)
 
+/* fields with default values */
+#define __flg_field_def(attr_nr, attr_flag, name, default) \
+	__flg_field(attr_nr, attr_flag, name)
+#define __u32_field_def(attr_nr, attr_flag, name, default) \
+	__u32_field(attr_nr, attr_flag, name)
+#define __str_field_def(attr_nr, attr_flag, name, maxlen) \
+	__str_field(attr_nr, attr_flag, name, maxlen)
+
 #define __nla_put_flag(skb, attrtype, value)		\
 	do {						\
 		if (value)				\
-- 
cgit v1.2.3-55-g7522


From 563e4cf25ec804eb02cd30a41baa2fcc6c06679b Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Wed, 4 May 2011 10:33:52 +0200
Subject: drbd: Introduce __s32_field in the genetlink macro magic

...and drop explicit typecasts (int)meta_dev_idx < 0.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c      | 8 ++++----
 include/linux/drbd_genl.h         | 2 +-
 include/linux/genl_magic_struct.h | 3 +++
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index a1854e3aa15e..b8ea4807c981 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1253,7 +1253,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		goto fail;
 	}
 
-	if ((int)nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
+	if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
 		retcode = ERR_MD_IDX_INVALID;
 		goto fail;
 	}
@@ -1289,7 +1289,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	 */
 	bdev = blkdev_get_by_path(nbc->dc.meta_dev,
 				  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
-				  ((int)nbc->dc.meta_dev_idx < 0) ?
+				  (nbc->dc.meta_dev_idx < 0) ?
 				  (void *)mdev : (void *)drbd_m_holder);
 	if (IS_ERR(bdev)) {
 		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
@@ -1325,7 +1325,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		goto fail;
 	}
 
-	if ((int)nbc->dc.meta_dev_idx < 0) {
+	if (nbc->dc.meta_dev_idx < 0) {
 		max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
 		/* at least one MB, otherwise it does not make sense */
 		min_md_device_sectors = (2<<10);
@@ -1356,7 +1356,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		dev_warn(DEV, "==> truncating very big lower level device "
 			"to currently maximum possible %llu sectors <==\n",
 			(unsigned long long) max_possible_sectors);
-		if ((int)nbc->dc.meta_dev_idx >= 0)
+		if (nbc->dc.meta_dev_idx >= 0)
 			dev_warn(DEV, "==>> using internal or flexible "
 				      "meta data may help <<==\n");
 	}
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 549800668cb9..f143e3c0f33b 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -102,7 +102,7 @@ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
 GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	backing_dev,	128)
 	__str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	meta_dev,	128)
-	__u32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	meta_dev_idx)
+	__s32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	meta_dev_idx)
 
 	/* use the resize command to try and change the disk_size */
 	__u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	disk_size)
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index f2c7cc7831df..ddbdd0a24476 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -97,6 +97,9 @@ enum {
 #define __u32_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U32, __u32, \
 			nla_get_u32, NLA_PUT_U32)
+#define __s32_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U32, __s32, \
+			nla_get_u32, NLA_PUT_U32)
 #define __u64_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U64, __u64, \
 			nla_get_u64, NLA_PUT_U64)
-- 
cgit v1.2.3-55-g7522


From a5d8e1fb9d22851de89bbf52db6b11c56b895dd4 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Wed, 4 May 2011 16:06:51 +0200
Subject: drbd: Convert boolean flags on netlink from NLA_FLAG to NLA_U8

Flags of type NLA_FLAG are either present or absent, but do not have a
value by themselves.  Use type NLA_U8 for our boolean flags instead, and
use the value to determine if the flag should be on or off.

On the drbdsetup command line, all those flags have an optional yes/no
argument which defaults to yes.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/genl_magic_struct.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index ddbdd0a24476..b1ddbb5bd725 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -86,8 +86,8 @@ enum {
 
 /* possible field types */
 #define __flg_field(attr_nr, attr_flag, name) \
-	__field(attr_nr, attr_flag, name, NLA_FLAG, char, \
-			nla_get_flag, __nla_put_flag)
+	__field(attr_nr, attr_flag, name, NLA_U8, char, \
+			nla_get_u8, NLA_PUT_U8)
 #define __u8_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
 			nla_get_u8, NLA_PUT_U8)
@@ -118,12 +118,6 @@ enum {
 #define __str_field_def(attr_nr, attr_flag, name, maxlen) \
 	__str_field(attr_nr, attr_flag, name, maxlen)
 
-#define __nla_put_flag(skb, attrtype, value)		\
-	do {						\
-		if (value)				\
-			NLA_PUT_FLAG(skb, attrtype);	\
-	} while (0)
-
 #define GENL_op_init(args...)	args
 #define GENL_doit(handler)		\
 	.doit = handler,		\
-- 
cgit v1.2.3-55-g7522


From 66b2f6b9c59c5e7003e13281dfe72e174f93988c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Wed, 4 May 2011 15:25:35 +0200
Subject: drbd: Turn no-disk-flushes into disk-flushes={yes|no}

Change the --no-disk-flushes drbdsetup command line option as well as
the no_disk_flush netlink packet.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_receiver.c | 2 +-
 include/linux/drbd_genl.h          | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 83d39859a9fe..e7a6eeae94e2 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1179,7 +1179,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo)
 	rcu_read_lock();
 	dc = rcu_dereference(mdev->ldev->disk_conf);
 
-	if (wo == WO_bdev_flush && dc->no_disk_flush)
+	if (wo == WO_bdev_flush && !dc->disk_flushes)
 		wo = WO_drain_io;
 	if (wo == WO_drain_io && dc->no_disk_drain)
 		wo = WO_none;
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index f143e3c0f33b..945c4dd3470c 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -122,8 +122,8 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__u32_field_def(14,	GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
 	__u32_field_def(15,	GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
 
-	__flg_field_def(16, GENLA_F_MANDATORY,	no_disk_barrier, 0)
-	__flg_field_def(17, GENLA_F_MANDATORY,	no_disk_flush, 0)
+	__flg_field_def(16, GENLA_F_MANDATORY,	disk_barrier, 1)
+	__flg_field_def(17, GENLA_F_MANDATORY,	disk_flushes, 1)
 	__flg_field_def(18, GENLA_F_MANDATORY,	no_disk_drain, 0)
 	__flg_field_def(19, GENLA_F_MANDATORY,	no_md_flush, 0)
 )
-- 
cgit v1.2.3-55-g7522


From d0c980e236243cd03aa2291243587ac1ba3c2b04 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Wed, 4 May 2011 15:25:35 +0200
Subject: drbd: Turn no-disk-drain into disk-drain={yes|no}

Change the --no-disk-drain drbdsetup command line option as well as
the no_disk_drain netlink packet.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_receiver.c | 2 +-
 include/linux/drbd_genl.h          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e7a6eeae94e2..5d1bdda8ec9f 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1181,7 +1181,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo)
 
 	if (wo == WO_bdev_flush && !dc->disk_flushes)
 		wo = WO_drain_io;
-	if (wo == WO_drain_io && dc->no_disk_drain)
+	if (wo == WO_drain_io && !dc->disk_drain)
 		wo = WO_none;
 	rcu_read_unlock();
 	mdev->write_ordering = wo;
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 945c4dd3470c..30ad6600b444 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -124,7 +124,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 
 	__flg_field_def(16, GENLA_F_MANDATORY,	disk_barrier, 1)
 	__flg_field_def(17, GENLA_F_MANDATORY,	disk_flushes, 1)
-	__flg_field_def(18, GENLA_F_MANDATORY,	no_disk_drain, 0)
+	__flg_field_def(18, GENLA_F_MANDATORY,	disk_drain, 1)
 	__flg_field_def(19, GENLA_F_MANDATORY,	no_md_flush, 0)
 )
 
-- 
cgit v1.2.3-55-g7522


From e544046ab842ab93c275a6fc4e043c1cb637076d Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Wed, 4 May 2011 15:25:35 +0200
Subject: drbd: Turn no-md-flushes into md-flushes={yes|no}

Change the --no-md-flushes drbdsetup command line option as well as
the no_md_flush netlink packet.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 6 +++---
 include/linux/drbd_genl.h    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 9af097416e26..4a946a877bde 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1449,10 +1449,10 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 
 	/* Reset the "barriers don't work" bits here, then force meta data to
 	 * be written, to ensure we determine if barriers are supported. */
-	if (new_disk_conf->no_md_flush)
-		set_bit(MD_NO_FUA, &mdev->flags);
-	else
+	if (new_disk_conf->md_flushes)
 		clear_bit(MD_NO_FUA, &mdev->flags);
+	else
+		set_bit(MD_NO_FUA, &mdev->flags);
 
 	/* Point of no return reached.
 	 * Devices and memory are no longer released by error cleanup below.
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 30ad6600b444..53518fc23154 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -125,7 +125,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__flg_field_def(16, GENLA_F_MANDATORY,	disk_barrier, 1)
 	__flg_field_def(17, GENLA_F_MANDATORY,	disk_flushes, 1)
 	__flg_field_def(18, GENLA_F_MANDATORY,	disk_drain, 1)
-	__flg_field_def(19, GENLA_F_MANDATORY,	no_md_flush, 0)
+	__flg_field_def(19, GENLA_F_MANDATORY,	md_flushes, 1)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
-- 
cgit v1.2.3-55-g7522


From bb77d34ecc6fe6cdc3f4f0841a516695c2eacc04 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Wed, 4 May 2011 15:25:35 +0200
Subject: drbd: Turn no-tcp-cork into tcp-cork={yes|no}

Change the --no-tcp-cork drbdsetup command line option as well as
the no_cork netlink packet.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_receiver.c | 8 ++++----
 drivers/block/drbd/drbd_worker.c   | 2 +-
 include/linux/drbd_genl.h          | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 5d1bdda8ec9f..b4858bb78940 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -5040,7 +5040,7 @@ int drbd_asender(struct drbd_thread *thi)
 	int expect   = header_size;
 	bool ping_timeout_active = false;
 	struct net_conf *nc;
-	int ping_timeo, no_cork, ping_int;
+	int ping_timeo, tcp_cork, ping_int;
 
 	current->policy = SCHED_RR;  /* Make this a realtime task! */
 	current->rt_priority = 2;    /* more important than all other tasks */
@@ -5051,7 +5051,7 @@ int drbd_asender(struct drbd_thread *thi)
 		rcu_read_lock();
 		nc = rcu_dereference(tconn->net_conf);
 		ping_timeo = nc->ping_timeo;
-		no_cork = nc->no_cork;
+		tcp_cork = nc->tcp_cork;
 		ping_int = nc->ping_int;
 		rcu_read_unlock();
 
@@ -5066,14 +5066,14 @@ int drbd_asender(struct drbd_thread *thi)
 
 		/* TODO: conditionally cork; it may hurt latency if we cork without
 		   much to send */
-		if (!no_cork)
+		if (tcp_cork)
 			drbd_tcp_cork(tconn->meta.socket);
 		if (tconn_finish_peer_reqs(tconn)) {
 			conn_err(tconn, "tconn_finish_peer_reqs() failed\n");
 			goto reconnect;
 		}
 		/* but unconditionally uncork unless disabled */
-		if (!no_cork)
+		if (tcp_cork)
 			drbd_tcp_uncork(tconn->meta.socket);
 
 		/* short circuit, recv_msg would return EINTR anyways. */
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index e37c42d5dd6e..78c3de49eff6 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1694,7 +1694,7 @@ int drbd_worker(struct drbd_thread *thi)
 
 			rcu_read_lock();
 			nc = rcu_dereference(tconn->net_conf);
-			cork = nc ? !nc->no_cork : 0;
+			cork = nc ? nc->tcp_cork : 0;
 			rcu_read_unlock();
 
 			if (tconn->data.socket && cork)
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 53518fc23154..6632d10f1ee1 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -162,7 +162,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__u32_field_def(25,	GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
 	__flg_field_def(26, GENLA_F_MANDATORY,	two_primaries, 0)
 	__flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	want_lose)
-	__flg_field_def(28, GENLA_F_MANDATORY,	no_cork, 0)
+	__flg_field_def(28, GENLA_F_MANDATORY,	tcp_cork, 1)
 	__flg_field_def(29, GENLA_F_MANDATORY,	always_asbp, 0)
 	__flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	dry_run)
 	__flg_field_def(31,	GENLA_F_MANDATORY,	use_rle, 0)
-- 
cgit v1.2.3-55-g7522


From 7bac3e6f7e74993475a94487effe05dc1f68bdc7 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Fri, 6 May 2011 17:50:57 +0200
Subject: drbd: Also define the default values of boolean flags in a single
 place

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_genl.h   | 16 ++++++++--------
 include/linux/drbd_limits.h | 10 ++++++++++
 2 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 6632d10f1ee1..02647dc8c67c 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -122,10 +122,10 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__u32_field_def(14,	GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
 	__u32_field_def(15,	GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
 
-	__flg_field_def(16, GENLA_F_MANDATORY,	disk_barrier, 1)
-	__flg_field_def(17, GENLA_F_MANDATORY,	disk_flushes, 1)
-	__flg_field_def(18, GENLA_F_MANDATORY,	disk_drain, 1)
-	__flg_field_def(19, GENLA_F_MANDATORY,	md_flushes, 1)
+	__flg_field_def(16, GENLA_F_MANDATORY,	disk_barrier, DRBD_DISK_BARRIER_DEF)
+	__flg_field_def(17, GENLA_F_MANDATORY,	disk_flushes, DRBD_DISK_FLUSHES_DEF)
+	__flg_field_def(18, GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
+	__flg_field_def(19, GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
@@ -160,12 +160,12 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__u32_field_def(23,	GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
 	__u32_field_def(24,	GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
 	__u32_field_def(25,	GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
-	__flg_field_def(26, GENLA_F_MANDATORY,	two_primaries, 0)
+	__flg_field_def(26, GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
 	__flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	want_lose)
-	__flg_field_def(28, GENLA_F_MANDATORY,	tcp_cork, 1)
-	__flg_field_def(29, GENLA_F_MANDATORY,	always_asbp, 0)
+	__flg_field_def(28, GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
+	__flg_field_def(29, GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
 	__flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	dry_run)
-	__flg_field_def(31,	GENLA_F_MANDATORY,	use_rle, 0)
+	__flg_field_def(31,	GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index bcebb016fda3..3d3e2d5125cb 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -170,4 +170,14 @@
 
 #define DRBD_PROTOCOL_DEF DRBD_PROT_C
 
+#define DRBD_DISK_BARRIER_DEF	0
+#define DRBD_DISK_FLUSHES_DEF	1
+#define DRBD_DISK_DRAIN_DEF	1
+#define DRBD_MD_FLUSHES_DEF	1
+#define DRBD_TCP_CORK_DEF	1
+
+#define DRBD_ALLOW_TWO_PRIMARIES_DEF	0
+#define DRBD_ALWAYS_ASBP_DEF	0
+#define DRBD_USE_RLE_DEF	0
+
 #endif
-- 
cgit v1.2.3-55-g7522


From 6139f60dc192e2c5478c1126d1aff7905dc0a98a Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Fri, 6 May 2011 20:00:02 +0200
Subject: drbd: Rename the want_lose field/flag to discard_my_data

This is what it is called in config files and on the command line as
well.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  2 +-
 drivers/block/drbd/drbd_main.c     |  6 +++---
 drivers/block/drbd/drbd_nl.c       |  4 ++--
 drivers/block/drbd/drbd_receiver.c | 14 +++++++-------
 include/linux/drbd_genl.h          |  2 +-
 5 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 56b190c65546..fa36757ffc4a 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -413,7 +413,7 @@ struct p_rs_param_95 {
 } __packed;
 
 enum drbd_conn_flags {
-	CF_WANT_LOSE = 1,
+	CF_DISCARD_MY_DATA = 1,
 	CF_DRY_RUN = 2,
 };
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 22c2b4c881da..86c8bc5ac603 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -943,8 +943,8 @@ int __drbd_send_protocol(struct drbd_tconn *tconn)
 	p->after_sb_2p   = cpu_to_be32(nc->after_sb_2p);
 	p->two_primaries = cpu_to_be32(nc->two_primaries);
 	cf = 0;
-	if (nc->want_lose)
-		cf |= CF_WANT_LOSE;
+	if (nc->discard_my_data)
+		cf |= CF_DISCARD_MY_DATA;
 	if (nc->dry_run)
 		cf |= CF_DRY_RUN;
 	p->conn_flags    = cpu_to_be32(cf);
@@ -988,7 +988,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
 	mdev->comm_bm_set = drbd_bm_total_weight(mdev);
 	p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
 	rcu_read_lock();
-	uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->want_lose ? 1 : 0;
+	uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->discard_my_data ? 1 : 0;
 	rcu_read_unlock();
 	uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
 	uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 195428ee6052..9a82306adf92 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -606,7 +606,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 		mutex_lock(&mdev->tconn->conf_update);
 		nc = mdev->tconn->net_conf;
 		if (nc)
-			nc->want_lose = 0; /* without copy; single bit op is atomic */
+			nc->discard_my_data = 0; /* without copy; single bit op is atomic */
 		mutex_unlock(&mdev->tconn->conf_update);
 
 		set_disk_ro(mdev->vdisk, false);
@@ -1738,7 +1738,7 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n
 			if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
 				return ERR_STONITH_AND_PROT_A;
 		}
-		if (mdev->state.role == R_PRIMARY && new_conf->want_lose)
+		if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data)
 			return ERR_DISCARD;
 	}
 
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index aa42967398e3..e4e8f8a408d1 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2908,9 +2908,9 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
 	}
 
 	if (hg == -100) {
-		if (nc->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
+		if (nc->discard_my_data && !(mdev->p_uuid[UI_FLAGS]&1))
 			hg = -1;
-		if (!nc->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
+		if (!nc->discard_my_data && (mdev->p_uuid[UI_FLAGS]&1))
 			hg = 1;
 
 		if (abs(hg) < 100)
@@ -3009,7 +3009,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
 {
 	struct p_protocol *p = pi->data;
 	int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
-	int p_want_lose, p_two_primaries, cf;
+	int p_discard_my_data, p_two_primaries, cf;
 	struct net_conf *nc;
 
 	p_proto		= be32_to_cpu(p->protocol);
@@ -3018,7 +3018,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
 	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
 	p_two_primaries = be32_to_cpu(p->two_primaries);
 	cf		= be32_to_cpu(p->conn_flags);
-	p_want_lose = cf & CF_WANT_LOSE;
+	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
 
 	if (tconn->agreed_pro_version >= 87) {
 		char integrity_alg[SHARED_SECRET_MAX];
@@ -3075,8 +3075,8 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
 		goto disconnect_rcu_unlock;
 	}
 
-	if (p_want_lose && nc->want_lose) {
-		conn_err(tconn, "both sides have the 'want_lose' flag set\n");
+	if (p_discard_my_data && nc->discard_my_data) {
+		conn_err(tconn, "both sides have the 'discard_my_data' flag set\n");
 		goto disconnect_rcu_unlock;
 	}
 
@@ -3806,7 +3806,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
 	}
 
 	mutex_lock(&mdev->tconn->conf_update);
-	mdev->tconn->net_conf->want_lose = 0; /* without copy; single bit op is atomic */
+	mdev->tconn->net_conf->discard_my_data = 0; /* without copy; single bit op is atomic */
 	mutex_unlock(&mdev->tconn->conf_update);
 
 	drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 02647dc8c67c..6aece551d87e 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -161,7 +161,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__u32_field_def(24,	GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
 	__u32_field_def(25,	GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
 	__flg_field_def(26, GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
-	__flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	want_lose)
+	__flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	discard_my_data)
 	__flg_field_def(28, GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
 	__flg_field_def(29, GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
 	__flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	dry_run)
-- 
cgit v1.2.3-55-g7522


From 6394b9358e6187414b7a6de7ba2c681ee4a790ac Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Wed, 11 May 2011 14:29:52 +0200
Subject: drbd: Refer to resync-rate consistently throughout the code

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      | 6 +++---
 drivers/block/drbd/drbd_main.c     | 4 ++--
 drivers/block/drbd/drbd_receiver.c | 2 +-
 include/linux/drbd_genl.h          | 2 +-
 include/linux/drbd_limits.h        | 7 ++++---
 5 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index fa36757ffc4a..8026adacd3d2 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -389,21 +389,21 @@ struct p_barrier_ack {
 } __packed;
 
 struct p_rs_param {
-	u32 rate;
+	u32 resync_rate;
 
 	      /* Since protocol version 88 and higher. */
 	char verify_alg[0];
 } __packed;
 
 struct p_rs_param_89 {
-	u32 rate;
+	u32 resync_rate;
         /* protocol version 89: */
 	char verify_alg[SHARED_SECRET_MAX];
 	char csums_alg[SHARED_SECRET_MAX];
 } __packed;
 
 struct p_rs_param_95 {
-	u32 rate;
+	u32 resync_rate;
 	char verify_alg[SHARED_SECRET_MAX];
 	char csums_alg[SHARED_SECRET_MAX];
 	u32 c_plan_ahead;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 86c8bc5ac603..26d7763d5255 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -888,14 +888,14 @@ int drbd_send_sync_param(struct drbd_conf *mdev)
 
 	if (get_ldev(mdev)) {
 		dc = rcu_dereference(mdev->ldev->disk_conf);
-		p->rate = cpu_to_be32(dc->resync_rate);
+		p->resync_rate = cpu_to_be32(dc->resync_rate);
 		p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead);
 		p->c_delay_target = cpu_to_be32(dc->c_delay_target);
 		p->c_fill_target = cpu_to_be32(dc->c_fill_target);
 		p->c_max_rate = cpu_to_be32(dc->c_max_rate);
 		put_ldev(mdev);
 	} else {
-		p->rate = cpu_to_be32(DRBD_RATE_DEF);
+		p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF);
 		p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF);
 		p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF);
 		p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e4e8f8a408d1..684f79542727 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3221,7 +3221,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
 		old_disk_conf = mdev->ldev->disk_conf;
 		*new_disk_conf = *old_disk_conf;
 
-		new_disk_conf->resync_rate = be32_to_cpu(p->rate);
+		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
 	}
 
 	if (apv >= 88) {
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 6aece551d87e..778708d92939 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -113,7 +113,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__u32_field_def(6, GENLA_F_MANDATORY,	on_io_error, DRBD_ON_IO_ERROR_DEF)
 	__u32_field_def(7, GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
 
-	__u32_field_def(8,	GENLA_F_MANDATORY,	resync_rate, DRBD_RATE_DEF)
+	__u32_field_def(8,	GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF)
 	__u32_field_def(9,	GENLA_F_MANDATORY,	resync_after, DRBD_AFTER_DEF)
 	__u32_field_def(10,	GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
 	__u32_field_def(11,	GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 3d3e2d5125cb..48339ae69d50 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -98,10 +98,11 @@
 
 /* syncer { */
   /* FIXME allow rate to be zero? */
-#define DRBD_RATE_MIN 1
+#define DRBD_RESYNC_RATE_MIN 1
 /* channel bonding 10 GbE, or other hardware */
-#define DRBD_RATE_MAX (4 << 20)
-#define DRBD_RATE_DEF 250  /* kb/second */
+#define DRBD_RESYNC_RATE_MAX (4 << 20)
+#define DRBD_RESYNC_RATE_DEF 250
+#define DRBD_RESYNC_RATE_SCALE 'k'  /* kilobytes */
 
   /* less than 7 would hit performance unnecessarily.
    * 919 slots context information per transaction,
-- 
cgit v1.2.3-55-g7522


From 69ef82dea4c34e4a0541fc3f415b0fef70fe12b0 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Wed, 11 May 2011 14:34:35 +0200
Subject: drbd: Refer to connect-int consistently throughout the code

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_receiver.c | 12 ++++++------
 include/linux/drbd_genl.h          |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 684f79542727..7deade196a33 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -617,7 +617,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
 	struct sockaddr_in6 peer_in6;
 	struct net_conf *nc;
 	int err, peer_addr_len, my_addr_len;
-	int sndbuf_size, rcvbuf_size, try_connect_int;
+	int sndbuf_size, rcvbuf_size, connect_int;
 	int disconnect_on_error = 1;
 
 	rcu_read_lock();
@@ -629,7 +629,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
 
 	sndbuf_size = nc->sndbuf_size;
 	rcvbuf_size = nc->rcvbuf_size;
-	try_connect_int = nc->try_connect_int;
+	connect_int = nc->connect_int;
 
 	my_addr_len = min_t(int, nc->my_addr_len, sizeof(src_in6));
 	memcpy(&src_in6, nc->my_addr, my_addr_len);
@@ -653,7 +653,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
 	}
 
 	sock->sk->sk_rcvtimeo =
-	sock->sk->sk_sndtimeo = try_connect_int * HZ;
+	sock->sk->sk_sndtimeo = connect_int * HZ;
 	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
 
        /* explicitly bind to the configured IP as source IP
@@ -702,7 +702,7 @@ out:
 static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
 {
 	int timeo, err, my_addr_len;
-	int sndbuf_size, rcvbuf_size, try_connect_int;
+	int sndbuf_size, rcvbuf_size, connect_int;
 	struct socket *s_estab = NULL, *s_listen;
 	struct sockaddr_in6 my_addr;
 	struct net_conf *nc;
@@ -717,7 +717,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
 
 	sndbuf_size = nc->sndbuf_size;
 	rcvbuf_size = nc->rcvbuf_size;
-	try_connect_int = nc->try_connect_int;
+	connect_int = nc->connect_int;
 
 	my_addr_len = min_t(int, nc->my_addr_len, sizeof(struct sockaddr_in6));
 	memcpy(&my_addr, nc->my_addr, my_addr_len);
@@ -731,7 +731,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
 		goto out;
 	}
 
-	timeo = try_connect_int * HZ;
+	timeo = connect_int * HZ;
 	timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
 
 	s_listen->sk->sk_reuse    = 1; /* SO_REUSEADDR */
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 778708d92939..67c816c0fc28 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -143,7 +143,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__str_field_def(6,	GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
 	__str_field_def(7,	GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
 	__u32_field_def(8,	GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
-	__u32_field_def(9,	GENLA_F_MANDATORY,	try_connect_int, DRBD_CONNECT_INT_DEF)
+	__u32_field_def(9,	GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF)
 	__u32_field_def(10,	GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
 	__u32_field_def(11,	GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
 	__u32_field_def(12,	GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
-- 
cgit v1.2.3-55-g7522


From 95f8efd08bcce65df994049a292b94e56c7ada67 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Thu, 12 May 2011 11:15:34 +0200
Subject: drbd: Fix the upper limit of resync-after

The 32-bit resync_after netlink field takes a device minor number as
parameter, which is no longer limited to 255.  We cannot statically
verify which device numbers are valid, so set the ummer limit to the
highest possible signed 32-bit integer.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h    |  4 ++--
 drivers/block/drbd/drbd_nl.c     |  4 ++--
 drivers/block/drbd/drbd_worker.c | 26 +++++++++++++-------------
 include/linux/drbd.h             |  4 ++--
 include/linux/drbd_genl.h        |  2 +-
 include/linux/drbd_limits.h      |  7 ++++---
 6 files changed, 24 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 8026adacd3d2..e16722840767 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1408,8 +1408,8 @@ extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
 
 /* drbd_worker.c */
 extern int drbd_worker(struct drbd_thread *thi);
-enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor);
-void drbd_sync_after_changed(struct drbd_conf *mdev);
+enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor);
+void drbd_resync_after_changed(struct drbd_conf *mdev);
 extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side);
 extern void resume_next_sg(struct drbd_conf *mdev);
 extern void suspend_other_sg(struct drbd_conf *mdev);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 9a82306adf92..74c27f1507f3 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1183,10 +1183,10 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	write_lock_irq(&global_state_lock);
-	retcode = drbd_sync_after_valid(mdev, new_disk_conf->resync_after);
+	retcode = drbd_resync_after_valid(mdev, new_disk_conf->resync_after);
 	if (retcode == NO_ERROR) {
 		rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
-		drbd_sync_after_changed(mdev);
+		drbd_resync_after_changed(mdev);
 	}
 	write_unlock_irq(&global_state_lock);
 
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index ec8f4245ef9a..6410c55831e0 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -57,7 +57,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel);
 
 /* About the global_state_lock
    Each state transition on an device holds a read lock. In case we have
-   to evaluate the sync after dependencies, we grab a write lock, because
+   to evaluate the resync after dependencies, we grab a write lock, because
    we need stable states on all devices for that.  */
 rwlock_t global_state_lock;
 
@@ -1340,17 +1340,17 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
 static int _drbd_may_sync_now(struct drbd_conf *mdev)
 {
 	struct drbd_conf *odev = mdev;
-	int ra;
+	int resync_after;
 
 	while (1) {
 		if (!odev->ldev)
 			return 1;
 		rcu_read_lock();
-		ra = rcu_dereference(odev->ldev->disk_conf)->resync_after;
+		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
 		rcu_read_unlock();
-		if (ra == -1)
+		if (resync_after == -1)
 			return 1;
-		odev = minor_to_mdev(ra);
+		odev = minor_to_mdev(resync_after);
 		if (!expect(odev))
 			return 1;
 		if ((odev->state.conn >= C_SYNC_SOURCE &&
@@ -1426,36 +1426,36 @@ void suspend_other_sg(struct drbd_conf *mdev)
 }
 
 /* caller must hold global_state_lock */
-enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor)
+enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor)
 {
 	struct drbd_conf *odev;
-	int ra;
+	int resync_after;
 
 	if (o_minor == -1)
 		return NO_ERROR;
 	if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
-		return ERR_SYNC_AFTER;
+		return ERR_RESYNC_AFTER;
 
 	/* check for loops */
 	odev = minor_to_mdev(o_minor);
 	while (1) {
 		if (odev == mdev)
-			return ERR_SYNC_AFTER_CYCLE;
+			return ERR_RESYNC_AFTER_CYCLE;
 
 		rcu_read_lock();
-		ra = rcu_dereference(odev->ldev->disk_conf)->resync_after;
+		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
 		rcu_read_unlock();
 		/* dependency chain ends here, no cycles. */
-		if (ra == -1)
+		if (resync_after == -1)
 			return NO_ERROR;
 
 		/* follow the dependency chain */
-		odev = minor_to_mdev(ra);
+		odev = minor_to_mdev(resync_after);
 	}
 }
 
 /* caller must hold global_state_lock */
-void drbd_sync_after_changed(struct drbd_conf *mdev)
+void drbd_resync_after_changed(struct drbd_conf *mdev)
 {
 	int changes;
 
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 6c7c85d8fc41..05063e6db81f 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -130,8 +130,8 @@ enum drbd_ret_code {
 	ERR_INTR		= 129, /* EINTR */
 	ERR_RESIZE_RESYNC	= 130,
 	ERR_NO_PRIMARY		= 131,
-	ERR_SYNC_AFTER		= 132,
-	ERR_SYNC_AFTER_CYCLE	= 133,
+	ERR_RESYNC_AFTER	= 132,
+	ERR_RESYNC_AFTER_CYCLE	= 133,
 	ERR_PAUSE_IS_SET	= 134,
 	ERR_PAUSE_IS_CLEAR	= 135,
 	ERR_PACKET_NR		= 137,
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 67c816c0fc28..a59466f7f661 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -114,7 +114,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__u32_field_def(7, GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
 
 	__u32_field_def(8,	GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF)
-	__u32_field_def(9,	GENLA_F_MANDATORY,	resync_after, DRBD_AFTER_DEF)
+	__u32_field_def(9,	GENLA_F_MANDATORY,	resync_after, DRBD_RESYNC_AFTER_DEF)
 	__u32_field_def(10,	GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
 	__u32_field_def(11,	GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
 	__u32_field_def(12,	GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 48339ae69d50..c4a8f0fef7b2 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -113,9 +113,10 @@
 #define DRBD_AL_EXTENTS_MAX  6433
 #define DRBD_AL_EXTENTS_DEF  127
 
-#define DRBD_AFTER_MIN  -1
-#define DRBD_AFTER_MAX  255
-#define DRBD_AFTER_DEF  -1
+#define DRBD_RESYNC_AFTER_MIN  -1
+#define DRBD_RESYNC_AFTER_MAX  (1<<30)
+#define DRBD_RESYNC_AFTER_DEF  -1
+#define DRBD_RESYNC_AFTER_SCALE '1'
 
 /* } */
 
-- 
cgit v1.2.3-55-g7522


From 3a45abd577727d2268e190d372600f8652883453 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Thu, 12 May 2011 12:02:54 +0200
Subject: drbd: Convert resync-after into a signed netlink field

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_genl.h         | 2 +-
 include/linux/genl_magic_func.h   | 3 +++
 include/linux/genl_magic_struct.h | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index a59466f7f661..7b174a093a8d 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -114,7 +114,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__u32_field_def(7, GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
 
 	__u32_field_def(8,	GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF)
-	__u32_field_def(9,	GENLA_F_MANDATORY,	resync_after, DRBD_RESYNC_AFTER_DEF)
+	__s32_field_def(9,	GENLA_F_MANDATORY,	resync_after, DRBD_RESYNC_AFTER_DEF)
 	__u32_field_def(10,	GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
 	__u32_field_def(11,	GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
 	__u32_field_def(12,	GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index e908f1c50355..94e839aafae3 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -437,6 +437,9 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 #undef __u32_field_def
 #define __u32_field_def(attr_nr, attr_flag, name, default)		\
 	x->name = default;
+#undef __s32_field_def
+#define __s32_field_def(attr_nr, attr_flag, name, default)		\
+	x->name = default;
 #undef __flg_field_def
 #define __flg_field_def(attr_nr, attr_flag, name, default)		\
 	x->name = default;
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index b1ddbb5bd725..0fca21fd1af5 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -115,6 +115,8 @@ enum {
 	__flg_field(attr_nr, attr_flag, name)
 #define __u32_field_def(attr_nr, attr_flag, name, default) \
 	__u32_field(attr_nr, attr_flag, name)
+#define __s32_field_def(attr_nr, attr_flag, name, default) \
+	__s32_field(attr_nr, attr_flag, name)
 #define __str_field_def(attr_nr, attr_flag, name, maxlen) \
 	__str_field(attr_nr, attr_flag, name, maxlen)
 
-- 
cgit v1.2.3-55-g7522


From c5482bbd9607bf38cbc952eacaa429e6ba3160a0 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Wed, 11 May 2011 14:44:55 +0200
Subject: drbd: Rename DISK_SIZE_SECT -> DISK_SIZE

We don't have the units in constant names in other places, either.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index c4a8f0fef7b2..8f8bbea545e0 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -125,9 +125,10 @@
  * the upper limit with 64bit kernel, enough ram and flexible meta data
  * is 1 PiB, currently. */
 /* DRBD_MAX_SECTORS */
-#define DRBD_DISK_SIZE_SECT_MIN  0
-#define DRBD_DISK_SIZE_SECT_MAX  (1 * (2LLU << 40))
-#define DRBD_DISK_SIZE_SECT_DEF  0 /* = disabled = no user size... */
+#define DRBD_DISK_SIZE_MIN  0
+#define DRBD_DISK_SIZE_MAX  (16 * (2LLU << 30))
+#define DRBD_DISK_SIZE_DEF  0 /* = disabled = no user size... */
+#define DRBD_DISK_SIZE_SCALE 's'  /* sectors */
 
 #define DRBD_ON_IO_ERROR_DEF EP_DETACH
 #define DRBD_FENCING_DEF FP_DONT_CARE
-- 
cgit v1.2.3-55-g7522


From dcb20d1a8e7d9602e52a9b673ae4d7f746d2cbb2 Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Mon, 16 May 2011 14:30:24 +0200
Subject: drbd: Refuse to change network options online when...

* the peer does not speak protocol_version 100 and the
  user wants to change one of:
    - wire_protocol
    - two_primaries
    - integrity_alg

* the user wants to remove the allow_two_primaries flag
  when there are two primaries

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 22 ++++++++++++++++++----
 include/linux/drbd.h         |  1 +
 2 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 74c27f1507f3..133a6724657d 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1722,10 +1722,24 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n
 	struct drbd_conf *mdev;
 	int i;
 
-	if (old_conf && tconn->agreed_pro_version < 100 &&
-	    tconn->cstate == C_WF_REPORT_PARAMS &&
-	    new_conf->wire_protocol != old_conf->wire_protocol)
-		return ERR_NEED_APV_100;
+	if (old_conf && tconn->cstate == C_WF_REPORT_PARAMS && tconn->agreed_pro_version < 100) {
+		if (new_conf->wire_protocol != old_conf->wire_protocol)
+			return ERR_NEED_APV_100;
+
+		if (new_conf->two_primaries != old_conf->two_primaries)
+			return ERR_NEED_APV_100;
+
+		if (!new_conf->integrity_alg != !old_conf->integrity_alg)
+			return ERR_NEED_APV_100;
+
+		if (strcmp(new_conf->integrity_alg, old_conf->integrity_alg))
+			return ERR_NEED_APV_100;
+	}
+
+	if (!new_conf->two_primaries &&
+	    conn_highest_role(tconn) == R_PRIMARY &&
+	    conn_highest_peer(tconn) == R_PRIMARY)
+		return ERR_NEED_ALLOW_TWO_PRI;
 
 	if (new_conf->two_primaries &&
 	    (new_conf->wire_protocol != DRBD_PROT_C))
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 05063e6db81f..679e81123229 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -161,6 +161,7 @@ enum drbd_ret_code {
 	ERR_MINOR_EXISTS	= 161,
 	ERR_INVALID_REQUEST	= 162,
 	ERR_NEED_APV_100	= 163,
+	ERR_NEED_ALLOW_TWO_PRI  = 164,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
-- 
cgit v1.2.3-55-g7522


From 309f0b70ab789bf85c5f5f32dbc466d42f024747 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Fri, 13 May 2011 01:24:14 +0200
Subject: drbd: Use more generic constant names

These constants are useful for the same purpose in more than one place.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 8f8bbea545e0..3627f760966e 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -113,10 +113,10 @@
 #define DRBD_AL_EXTENTS_MAX  6433
 #define DRBD_AL_EXTENTS_DEF  127
 
-#define DRBD_RESYNC_AFTER_MIN  -1
-#define DRBD_RESYNC_AFTER_MAX  (1<<30)
-#define DRBD_RESYNC_AFTER_DEF  -1
-#define DRBD_RESYNC_AFTER_SCALE '1'
+#define DRBD_MINOR_NUMBER_MIN  -1
+#define DRBD_MINOR_NUMBER_MAX  (1<<30)
+#define DRBD_MINOR_NUMBER_DEF  -1
+#define DRBD_MINOR_NUMBER_SCALE '1'
 
 /* } */
 
-- 
cgit v1.2.3-55-g7522


From 509100e6012db92f4af3796436b450447c6c8268 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Tue, 17 May 2011 13:29:46 +0200
Subject: drbd: Output signed / unsigned netlink fields correctly

Note: All input values are still treated as signed; unsigned long long values
are still broken.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/genl_magic_func.h   | 23 +++++++++++------
 include/linux/genl_magic_struct.h | 52 ++++++++++++++++++++++++++++-----------
 2 files changed, 53 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 94e839aafae3..2ae16126c6a4 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -78,12 +78,13 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly =		\
 { s_fields };
 
 #undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, __put) \
+#define __field(attr_nr, attr_flag, name, nla_type, _type, __get,	\
+		 __put, __is_signed)					\
 	[__nla_type(attr_nr)] = { .type = nla_type },
 
 #undef __array
 #define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen,	\
-		__get, __put)						\
+		__get, __put, __is_signed)				\
 	[__nla_type(attr_nr)] = { .type = nla_type,			\
 		.len = maxlen - (nla_type == NLA_NUL_STRING) },
 
@@ -241,7 +242,8 @@ static int s_name ## _from_attrs_for_change(struct s_name *s,		\
 		}
 
 #undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
 	__assign(attr_nr, attr_flag, name, nla_type, type,		\
 			if (s)						\
 				s->name = __get(nla);			\
@@ -249,7 +251,8 @@ static int s_name ## _from_attrs_for_change(struct s_name *s,		\
 
 /* validate_nla() already checked nla_len <= maxlen appropriately. */
 #undef __array
-#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
 	__assign(attr_nr, attr_flag, name, nla_type, type,		\
 			if (s)						\
 				s->name ## _len =			\
@@ -410,14 +413,16 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 
 
 #undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
 	if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) {	\
 		DPRINT_FIELD(">>", nla_type, name, s, NULL);		\
 		__put(skb, attr_nr, s->name);				\
 	}
 
 #undef __array
-#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
 	if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) {	\
 		DPRINT_ARRAY(">>",nla_type, name, s, NULL);		\
 		__put(skb, attr_nr, min_t(int, maxlen,			\
@@ -431,9 +436,11 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 /* Functions for initializing structs to default values.  */
 
 #undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)
 #undef __array
-#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put)
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)
 #undef __u32_field_def
 #define __u32_field_def(attr_nr, attr_flag, name, default)		\
 	x->name = default;
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index 0fca21fd1af5..ba911da84d9f 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -87,28 +87,28 @@ enum {
 /* possible field types */
 #define __flg_field(attr_nr, attr_flag, name) \
 	__field(attr_nr, attr_flag, name, NLA_U8, char, \
-			nla_get_u8, NLA_PUT_U8)
+			nla_get_u8, NLA_PUT_U8, false)
 #define __u8_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
-			nla_get_u8, NLA_PUT_U8)
+			nla_get_u8, NLA_PUT_U8, false)
 #define __u16_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U16, __u16, \
-			nla_get_u16, NLA_PUT_U16)
+			nla_get_u16, NLA_PUT_U16, false)
 #define __u32_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U32, __u32, \
-			nla_get_u32, NLA_PUT_U32)
+			nla_get_u32, NLA_PUT_U32, false)
 #define __s32_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U32, __s32, \
-			nla_get_u32, NLA_PUT_U32)
+			nla_get_u32, NLA_PUT_U32, true)
 #define __u64_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U64, __u64, \
-			nla_get_u64, NLA_PUT_U64)
+			nla_get_u64, NLA_PUT_U64, false)
 #define __str_field(attr_nr, attr_flag, name, maxlen) \
 	__array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \
-			nla_strlcpy, NLA_PUT)
+			nla_strlcpy, NLA_PUT, false)
 #define __bin_field(attr_nr, attr_flag, name, maxlen) \
 	__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
-			nla_memcpy, NLA_PUT)
+			nla_memcpy, NLA_PUT, false)
 
 /* fields with default values */
 #define __flg_field_def(attr_nr, attr_flag, name, default) \
@@ -174,11 +174,13 @@ enum {								\
 };
 
 #undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+#define __field(attr_nr, attr_flag, name, nla_type, type,	\
+		__get, __put, __is_signed)			\
 	T_ ## name = (__u16)(attr_nr | attr_flag),
 
 #undef __array
-#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+#define __array(attr_nr, attr_flag, name, nla_type, type,	\
+		maxlen, __get, __put, __is_signed)		\
 	T_ ## name = (__u16)(attr_nr | attr_flag),
 
 #include GENL_MAGIC_INCLUDE_FILE
@@ -238,11 +240,13 @@ static inline void ct_assert_unique_ ## s_name ## _attributes(void)	\
 }
 
 #undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
 	case attr_nr:
 
 #undef __array
-#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
 	case attr_nr:
 
 #include GENL_MAGIC_INCLUDE_FILE
@@ -260,16 +264,36 @@ static inline void ct_assert_unique_ ## s_name ## _attributes(void)	\
 struct s_name { s_fields };
 
 #undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
 	type name;
 
 #undef __array
-#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
 	type name[maxlen];	\
 	__u32 name ## _len;
 
 #include GENL_MAGIC_INCLUDE_FILE
 
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+enum {									\
+	s_fields							\
+};
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		is_signed)						\
+	F_ ## name ## _IS_SIGNED = is_signed,
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, is_signed)				\
+	F_ ## name ## _IS_SIGNED = is_signed,
+
+#include GENL_MAGIC_INCLUDE_FILE
+
 /* }}}1 */
 #endif /* GENL_MAGIC_STRUCT_H */
 /* vim: set foldmethod=marker nofoldenable : */
-- 
cgit v1.2.3-55-g7522


From bbbef2d5ad8203f67274196dac90754ae106a463 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Wed, 18 May 2011 16:48:16 +0200
Subject: drbd: Remove unused GENLA_F_MAY_IGNORE flag

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/genl_magic_struct.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index ba911da84d9f..f3c3425ac30f 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -40,11 +40,6 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
  * yet implemented features, if newer userland tries to use them even though
  * the genl_family version clearly indicates they are not available.
  *
- * @GENLA_F_MAY_IGNORE: To clearly document the fact, for good measure.
- * To be used for API extensions for things that have sane defaults,
- * so newer userland can still talk to older kernel, knowing it will
- * silently ignore these attributes if not yet known.
- *
  * NOTE: These flags overload
  *   NLA_F_NESTED		(1 << 15)
  *   NLA_F_NET_BYTEORDER	(1 << 14)
@@ -55,7 +50,6 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
  * See also: nla_type()
  */
 enum {
-	GENLA_F_MAY_IGNORE	= 0,
 	GENLA_F_MANDATORY	= 1 << 14,
 	GENLA_F_REQUIRED	= 1 << 15,
 
-- 
cgit v1.2.3-55-g7522


From 5f9359201b5cf1d94fe0e0c47fcba38cfc921863 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Thu, 19 May 2011 17:39:28 +0200
Subject: drbd: Make drbd's use of netlink attribute flags less confusing

Make it more clear in the flag names which flags are internal to drbd, and
which are not.

The check for mandatory attributes is the only extension visible at the netlink
layer.  Attributes with this flag set would look like unknown attributes to
some kernel versions.  The netlink layer would ignore them and also skip
consistency checks on the attribute type and legth.  To avoid this, we check
for mandatory attributes first, remove the mandatory flag, and then process the
attributes normally.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_genl.h         | 244 +++++++++++++++++++-------------------
 include/linux/genl_magic_func.h   | 107 ++++++++---------
 include/linux/genl_magic_struct.h |  64 ++++------
 3 files changed, 193 insertions(+), 222 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 7b174a093a8d..4ceecb9307d9 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -86,7 +86,7 @@
  */
 GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
 		/* "arbitrary" size strings, nla_policy.len = 0 */
-	__str_field(1, GENLA_F_MANDATORY,	info_text, 0)
+	__str_field(1, DRBD_GENLA_F_MANDATORY,	info_text, 0)
 )
 
 /* Configuration requests typically need a context to operate on.
@@ -95,133 +95,133 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
  * and/or the replication group (aka resource) name,
  * and the volume id within the resource. */
 GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
-	__u32_field(1, GENLA_F_MANDATORY,	ctx_volume)
-	__str_field(2, GENLA_F_MANDATORY,	ctx_conn_name, 128)
+	__u32_field(1, DRBD_GENLA_F_MANDATORY,	ctx_volume)
+	__str_field(2, DRBD_GENLA_F_MANDATORY,	ctx_conn_name, 128)
 )
 
 GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
-	__str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	backing_dev,	128)
-	__str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	meta_dev,	128)
-	__s32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	meta_dev_idx)
+	__str_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	backing_dev,	128)
+	__str_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	meta_dev,	128)
+	__s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	meta_dev_idx)
 
 	/* use the resize command to try and change the disk_size */
-	__u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	disk_size)
+	__u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	disk_size)
 	/* we could change the max_bio_bvecs,
 	 * but it won't propagate through the stack */
-	__u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	max_bio_bvecs)
-
-	__u32_field_def(6, GENLA_F_MANDATORY,	on_io_error, DRBD_ON_IO_ERROR_DEF)
-	__u32_field_def(7, GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
-
-	__u32_field_def(8,	GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF)
-	__s32_field_def(9,	GENLA_F_MANDATORY,	resync_after, DRBD_RESYNC_AFTER_DEF)
-	__u32_field_def(10,	GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
-	__u32_field_def(11,	GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
-	__u32_field_def(12,	GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
-	__u32_field_def(13,	GENLA_F_MANDATORY,	c_fill_target, DRBD_C_FILL_TARGET_DEF)
-	__u32_field_def(14,	GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
-	__u32_field_def(15,	GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
-
-	__flg_field_def(16, GENLA_F_MANDATORY,	disk_barrier, DRBD_DISK_BARRIER_DEF)
-	__flg_field_def(17, GENLA_F_MANDATORY,	disk_flushes, DRBD_DISK_FLUSHES_DEF)
-	__flg_field_def(18, GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
-	__flg_field_def(19, GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
+	__u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	max_bio_bvecs)
+
+	__u32_field_def(6, DRBD_GENLA_F_MANDATORY,	on_io_error, DRBD_ON_IO_ERROR_DEF)
+	__u32_field_def(7, DRBD_GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
+
+	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF)
+	__s32_field_def(9,	DRBD_GENLA_F_MANDATORY,	resync_after, DRBD_MINOR_NUMBER_DEF)
+	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
+	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
+	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
+	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	c_fill_target, DRBD_C_FILL_TARGET_DEF)
+	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
+	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
+
+	__flg_field_def(16, DRBD_GENLA_F_MANDATORY,	disk_barrier, DRBD_DISK_BARRIER_DEF)
+	__flg_field_def(17, DRBD_GENLA_F_MANDATORY,	disk_flushes, DRBD_DISK_FLUSHES_DEF)
+	__flg_field_def(18, DRBD_GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
+	__flg_field_def(19, DRBD_GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
-	__str_field_def(1,	GENLA_F_MANDATORY,	cpu_mask,       32)
-	__u32_field_def(2,	GENLA_F_MANDATORY,	on_no_data, DRBD_ON_NO_DATA_DEF)
+	__str_field_def(1,	DRBD_GENLA_F_MANDATORY,	cpu_mask,       32)
+	__u32_field_def(2,	DRBD_GENLA_F_MANDATORY,	on_no_data, DRBD_ON_NO_DATA_DEF)
 )
 
 GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
-	__bin_field(1,	GENLA_F_REQUIRED | GENLA_F_INVARIANT,	my_addr,	128)
-	__bin_field(2,	GENLA_F_REQUIRED | GENLA_F_INVARIANT,	peer_addr,	128)
-	__str_field_def(3,	GENLA_F_MANDATORY | GENLA_F_SENSITIVE,
+	__bin_field(1,	DRBD_F_REQUIRED | DRBD_F_INVARIANT,	my_addr,	128)
+	__bin_field(2,	DRBD_F_REQUIRED | DRBD_F_INVARIANT,	peer_addr,	128)
+	__str_field_def(3,	DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE,
 						shared_secret,	SHARED_SECRET_MAX)
-	__str_field_def(4,	GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
-	__str_field_def(5,	GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
-	__str_field_def(6,	GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
-	__str_field_def(7,	GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
-	__u32_field_def(8,	GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
-	__u32_field_def(9,	GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF)
-	__u32_field_def(10,	GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
-	__u32_field_def(11,	GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
-	__u32_field_def(12,	GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
-	__u32_field_def(13,	GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
-	__u32_field_def(14,	GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
-	__u32_field_def(15,	GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF)
-	__u32_field_def(16,	GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF)
-	__u32_field_def(17,	GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
-	__u32_field_def(18,	GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
-	__u32_field_def(19,	GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
-	__u32_field_def(20,	GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
-	__u32_field_def(21,	GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
-	__u32_field_def(22,	GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF)
-	__u32_field_def(23,	GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
-	__u32_field_def(24,	GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
-	__u32_field_def(25,	GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
-	__flg_field_def(26, GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
-	__flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	discard_my_data)
-	__flg_field_def(28, GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
-	__flg_field_def(29, GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
-	__flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	dry_run)
-	__flg_field_def(31,	GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
+	__str_field_def(4,	DRBD_GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	__str_field_def(5,	DRBD_GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
+	__str_field_def(6,	DRBD_GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
+	__str_field_def(7,	DRBD_GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
+	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
+	__u32_field_def(9,	DRBD_GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF)
+	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
+	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
+	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
+	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
+	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
+	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF)
+	__u32_field_def(16,	DRBD_GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF)
+	__u32_field_def(17,	DRBD_GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
+	__u32_field_def(18,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
+	__u32_field_def(19,	DRBD_GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
+	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
+	__u32_field_def(21,	DRBD_GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
+	__u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF)
+	__u32_field_def(23,	DRBD_GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
+	__u32_field_def(24,	DRBD_GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
+	__u32_field_def(25,	DRBD_GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
+	__flg_field_def(26, DRBD_GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
+	__flg_field(27, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	discard_my_data)
+	__flg_field_def(28, DRBD_GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
+	__flg_field_def(29, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
+	__flg_field(30, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	dry_run)
+	__flg_field_def(31,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
-	__flg_field(1, GENLA_F_MANDATORY,	assume_uptodate)
+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	assume_uptodate)
 )
 
 GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
-	__u64_field(1, GENLA_F_MANDATORY,	resize_size)
-	__flg_field(2, GENLA_F_MANDATORY,	resize_force)
-	__flg_field(3, GENLA_F_MANDATORY,	no_resync)
+	__u64_field(1, DRBD_GENLA_F_MANDATORY,	resize_size)
+	__flg_field(2, DRBD_GENLA_F_MANDATORY,	resize_force)
+	__flg_field(3, DRBD_GENLA_F_MANDATORY,	no_resync)
 )
 
 GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
 	/* the reason of the broadcast,
 	 * if this is an event triggered broadcast. */
-	__u32_field(1, GENLA_F_MANDATORY,	sib_reason)
-	__u32_field(2, GENLA_F_REQUIRED,	current_state)
-	__u64_field(3, GENLA_F_MANDATORY,	capacity)
-	__u64_field(4, GENLA_F_MANDATORY,	ed_uuid)
+	__u32_field(1, DRBD_GENLA_F_MANDATORY,	sib_reason)
+	__u32_field(2, DRBD_F_REQUIRED,	current_state)
+	__u64_field(3, DRBD_GENLA_F_MANDATORY,	capacity)
+	__u64_field(4, DRBD_GENLA_F_MANDATORY,	ed_uuid)
 
 	/* These are for broadcast from after state change work.
 	 * prev_state and new_state are from the moment the state change took
 	 * place, new_state is not neccessarily the same as current_state,
 	 * there may have been more state changes since.  Which will be
 	 * broadcasted soon, in their respective after state change work.  */
-	__u32_field(5, GENLA_F_MANDATORY,	prev_state)
-	__u32_field(6, GENLA_F_MANDATORY,	new_state)
+	__u32_field(5, DRBD_GENLA_F_MANDATORY,	prev_state)
+	__u32_field(6, DRBD_GENLA_F_MANDATORY,	new_state)
 
 	/* if we have a local disk: */
-	__bin_field(7, GENLA_F_MANDATORY,	uuids, (UI_SIZE*sizeof(__u64)))
-	__u32_field(8, GENLA_F_MANDATORY,	disk_flags)
-	__u64_field(9, GENLA_F_MANDATORY,	bits_total)
-	__u64_field(10, GENLA_F_MANDATORY,	bits_oos)
+	__bin_field(7, DRBD_GENLA_F_MANDATORY,	uuids, (UI_SIZE*sizeof(__u64)))
+	__u32_field(8, DRBD_GENLA_F_MANDATORY,	disk_flags)
+	__u64_field(9, DRBD_GENLA_F_MANDATORY,	bits_total)
+	__u64_field(10, DRBD_GENLA_F_MANDATORY,	bits_oos)
 	/* and in case resync or online verify is active */
-	__u64_field(11, GENLA_F_MANDATORY,	bits_rs_total)
-	__u64_field(12, GENLA_F_MANDATORY,	bits_rs_failed)
+	__u64_field(11, DRBD_GENLA_F_MANDATORY,	bits_rs_total)
+	__u64_field(12, DRBD_GENLA_F_MANDATORY,	bits_rs_failed)
 
 	/* for pre and post notifications of helper execution */
-	__str_field(13, GENLA_F_MANDATORY,	helper, 32)
-	__u32_field(14, GENLA_F_MANDATORY,	helper_exit_code)
+	__str_field(13, DRBD_GENLA_F_MANDATORY,	helper, 32)
+	__u32_field(14, DRBD_GENLA_F_MANDATORY,	helper_exit_code)
 )
 
 GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
-	__u64_field(1, GENLA_F_MANDATORY,	ov_start_sector)
+	__u64_field(1, DRBD_GENLA_F_MANDATORY,	ov_start_sector)
 )
 
 GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms,
-	__flg_field(1, GENLA_F_MANDATORY, clear_bm)
+	__flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm)
 )
 
 GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms,
-	__u32_field(1,	GENLA_F_REQUIRED,	timeout_type)
+	__u32_field(1,	DRBD_F_REQUIRED,	timeout_type)
 )
 
 GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms,
-	__flg_field(1, GENLA_F_MANDATORY,	force_disconnect)
+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_disconnect)
 )
 
 /*
@@ -232,11 +232,11 @@ GENL_mc_group(events)
 	/* kernel -> userspace announcement of changes */
 GENL_notification(
 	DRBD_EVENT, 1, events,
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_STATE_INFO, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_MANDATORY)
-	GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_MANDATORY)
-	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY)
 )
 
 	/* query kernel for specific or all info */
@@ -250,116 +250,116 @@ GENL_op(
 	),
 	/* To select the object .doit.
 	 * Or a subset of objects in .dumpit. */
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
 )
 
 #if 0
 	/* TO BE DONE */
 	/* create or destroy resources, aka replication groups */
 GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 #endif
 
 	/* add DRBD minor devices as volumes to resources */
 GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 
 	/* add or delete replication links to resources */
 GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 
 GENL_op(DRBD_ADM_RESOURCE_OPTS, 9,
 	GENL_doit(drbd_adm_resource_opts),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY)
 )
 
 GENL_op(
 	DRBD_ADM_CONNECT, 10,
 	GENL_doit(drbd_adm_connect),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
 )
 
 GENL_op(
 	DRBD_ADM_CHG_NET_OPTS, 29,
 	GENL_doit(drbd_adm_net_opts),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
 )
 
 GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 
 GENL_op(DRBD_ADM_ATTACH, 12,
 	GENL_doit(drbd_adm_attach),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_F_REQUIRED)
 )
 
 GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28,
 	GENL_doit(drbd_adm_disk_opts),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_DISK_OPTS, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DISK_OPTS, DRBD_F_REQUIRED)
 )
 
 GENL_op(
 	DRBD_ADM_RESIZE, 13,
 	GENL_doit(drbd_adm_resize),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY)
 )
 
 GENL_op(
 	DRBD_ADM_PRIMARY, 14,
 	GENL_doit(drbd_adm_set_role),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
 )
 
 GENL_op(
 	DRBD_ADM_SECONDARY, 15,
 	GENL_doit(drbd_adm_set_role),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
 )
 
 GENL_op(
 	DRBD_ADM_NEW_C_UUID, 16,
 	GENL_doit(drbd_adm_new_c_uuid),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY)
 )
 
 GENL_op(
 	DRBD_ADM_START_OV, 17,
 	GENL_doit(drbd_adm_start_ov),
-	GENL_tla_expected(DRBD_NLA_START_OV_PARMS, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY)
 )
 
 GENL_op(DRBD_ADM_DETACH,	18, GENL_doit(drbd_adm_detach),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_INVALIDATE,	19, GENL_doit(drbd_adm_invalidate),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_INVAL_PEER,	20, GENL_doit(drbd_adm_invalidate_peer),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_PAUSE_SYNC,	21, GENL_doit(drbd_adm_pause_sync),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_RESUME_SYNC,	22, GENL_doit(drbd_adm_resume_sync),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_SUSPEND_IO,	23, GENL_doit(drbd_adm_suspend_io),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_RESUME_IO,	24, GENL_doit(drbd_adm_resume_io),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_OUTDATE,	25, GENL_doit(drbd_adm_outdate),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_DOWN,		27, GENL_doit(drbd_adm_down),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 2ae16126c6a4..58edd403a3ff 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -3,53 +3,6 @@
 
 #include <linux/genl_magic_struct.h>
 
-/*
- * Extension of genl attribute validation policies			{{{1
- *									{{{2
- */
-
-/**
- * nla_is_required - return true if this attribute is required
- * @nla: netlink attribute
- */
-static inline int nla_is_required(const struct nlattr *nla)
-{
-        return nla->nla_type & GENLA_F_REQUIRED;
-}
-
-/**
- * nla_is_mandatory - return true if understanding this attribute is mandatory
- * @nla: netlink attribute
- * Note: REQUIRED attributes are implicitly MANDATORY as well
- */
-static inline int nla_is_mandatory(const struct nlattr *nla)
-{
-        return nla->nla_type & (GENLA_F_MANDATORY | GENLA_F_REQUIRED);
-}
-
-/* Functionality to be integrated into nla_parse(), and validate_nla(),
- * respectively.
- *
- * Enforcing the "mandatory" bit is done here,
- * by rejecting unknown mandatory attributes.
- *
- * Part of enforcing the "required" flag would mean to embed it into
- * nla_policy.type, and extending validate_nla(), which currently does
- * BUG_ON(pt->type > NLA_TYPE_MAX); we have to work on existing kernels,
- * so we cannot do that.  Thats why enforcing "required" is done in the
- * generated assignment functions below. */
-static int nla_check_unknown(int maxtype, struct nlattr *head, int len)
-{
-	struct nlattr *nla;
-	int rem;
-        nla_for_each_attr(nla, head, len, rem) {
-		__u16 type = nla_type(nla);
-		if (type > maxtype && nla_is_mandatory(nla))
-			return -EOPNOTSUPP;
-	}
-	return 0;
-}
-
 /*
  * Magic: declare tla policy						{{{1
  * Magic: declare nested policies
@@ -80,13 +33,13 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly =		\
 #undef __field
 #define __field(attr_nr, attr_flag, name, nla_type, _type, __get,	\
 		 __put, __is_signed)					\
-	[__nla_type(attr_nr)] = { .type = nla_type },
+	[attr_nr] = { .type = nla_type },
 
 #undef __array
 #define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen,	\
 		__get, __put, __is_signed)				\
-	[__nla_type(attr_nr)] = { .type = nla_type,			\
-		.len = maxlen - (nla_type == NLA_NUL_STRING) },
+	[attr_nr] = { .type = nla_type,					\
+		      .len = maxlen - (nla_type == NLA_NUL_STRING) },
 
 #include GENL_MAGIC_INCLUDE_FILE
 
@@ -189,6 +142,43 @@ static struct nlattr *nested_attr_tb[128];
 #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
 #endif
 
+static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
+{
+	struct nlattr *head = nla_data(nla);
+	int len = nla_len(nla);
+	int rem;
+
+	/*
+	 * validate_nla (called from nla_parse_nested) ignores attributes
+	 * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
+	 * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
+	 * flag set also, check and remove that flag before calling
+	 * nla_parse_nested.
+	 */
+
+	nla_for_each_attr(nla, head, len, rem) {
+		if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
+			if (nla_type(nla) > maxtype)
+				return -EOPNOTSUPP;
+			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
+		}
+	}
+	return 0;
+}
+
+static inline int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype,
+					struct nlattr *nla,
+					const struct nla_policy *policy)
+{
+	int err;
+
+	err = drbd_nla_check_mandatory(maxtype, nla);
+	if (!err)
+		err = nla_parse_nested(tb, maxtype, nla, policy);
+
+	return err;
+}
+
 #undef GENL_struct
 #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
 /* *_from_attrs functions are static, but potentially unused */		\
@@ -204,12 +194,9 @@ static int __ ## s_name ## _from_attrs(struct s_name *s,		\
 	if (!tla)							\
 		return -ENOMSG;						\
 	DPRINT_TLA(#s_name, "<=-", #tag_name);				\
-	err = nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \
+	err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy);	\
 	if (err)							\
 		return err;						\
-	err = nla_check_unknown(maxtype, nla_data(tla), nla_len(tla));	\
-	if (err)							\
-	      return err;						\
 									\
 	s_fields							\
 	return 0;							\
@@ -226,17 +213,17 @@ static int s_name ## _from_attrs_for_change(struct s_name *s,		\
 }					__attribute__((unused))		\
 
 #define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...)	\
-		nla = ntb[__nla_type(attr_nr)];				\
+		nla = ntb[attr_nr];						\
 		if (nla) {						\
-			if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) {		\
+			if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\
 				pr_info("<< must not change invariant attr: %s\n", #name);	\
 				return -EEXIST;				\
 			}						\
 			assignment;					\
-		} else if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) {		\
+		} else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\
 			/* attribute missing from payload, */		\
 			/* which was expected */			\
-		} else if ((attr_flag) & GENLA_F_REQUIRED) {		\
+		} else if ((attr_flag) & DRBD_F_REQUIRED) {		\
 			pr_info("<< missing attr: %s\n", #name);	\
 			return -ENOMSG;					\
 		}
@@ -415,7 +402,7 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 #undef __field
 #define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
 		__is_signed)						\
-	if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) {	\
+	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\
 		DPRINT_FIELD(">>", nla_type, name, s, NULL);		\
 		__put(skb, attr_nr, s->name);				\
 	}
@@ -423,7 +410,7 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 #undef __array
 #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
 		__get, __put, __is_signed)				\
-	if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) {	\
+	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\
 		DPRINT_ARRAY(">>",nla_type, name, s, NULL);		\
 		__put(skb, attr_nr, min_t(int, maxlen,			\
 			s->name ## _len + (nla_type == NLA_NUL_STRING)),\
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index f3c3425ac30f..1d0bd79e27b3 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -26,50 +26,34 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
  * Extension of genl attribute validation policies			{{{2
  */
 
-/**
- * GENLA_F_FLAGS - policy type flags to ease compatible ABI evolvement
- *
- * @GENLA_F_REQUIRED: attribute has to be present, or message is considered invalid.
- * Adding new REQUIRED attributes breaks ABI compatibility, so don't do that.
+/*
+ * @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not
+ * know about.  This flag can be set in nlattr->nla_type to indicate that this
+ * attribute must not be ignored.
  *
- * @GENLA_F_MANDATORY: if present, receiver _must_ understand it.
- * Without this, unknown attributes (> maxtype) are _silently_ ignored
- * by validate_nla().
+ * We check and remove this flag in drbd_nla_check_mandatory() before
+ * validating the attribute types and lengths via nla_parse_nested().
+ */
+#define DRBD_GENLA_F_MANDATORY (1 << 14)
+
+/*
+ * Flags specific to drbd and not visible at the netlink layer, used in
+ * <struct>_from_attrs and <struct>_to_skb:
  *
- * To be used for API extensions, so older kernel can reject requests for not
- * yet implemented features, if newer userland tries to use them even though
- * the genl_family version clearly indicates they are not available.
+ * @DRBD_F_REQUIRED: Attribute is required; a request without this attribute is
+ * invalid.
  *
- * NOTE: These flags overload
- *   NLA_F_NESTED		(1 << 15)
- *   NLA_F_NET_BYTEORDER	(1 << 14)
- * from linux/netlink.h, which are not useful for validate_nla():
- * NET_BYTEORDER is not used anywhere, and NESTED would be specified by setting
- * .type = NLA_NESTED in the appropriate policy.
+ * @DRBD_F_SENSITIVE: Attribute includes sensitive information and must not be
+ * included in unpriviledged get requests or broadcasts.
  *
- * See also: nla_type()
+ * @DRBD_F_INVARIANT: Attribute is set when an object is initially created, but
+ * cannot subsequently be changed.
  */
-enum {
-	GENLA_F_MANDATORY	= 1 << 14,
-	GENLA_F_REQUIRED	= 1 << 15,
-
-	/* Below will not be present in the __u16 .nla_type, but can be
-	 * triggered on in <struct>_to_skb resp. <struct>_from_attrs */
-
-	/* To exclude "sensitive" information from broadcasts, or on
-	 * unpriviledged get requests.  This is useful because genetlink
-	 * multicast groups can be listened in on by anyone.  */
-	GENLA_F_SENSITIVE	= 1 << 16,
-
-	/* INVARIAN options cannot be changed at runtime.
-	 * Useful to share an attribute policy and struct definition,
-	 * between some "create" and "change" commands,
-	 * but disallow certain fields to be changed online.
-	 */
-	GENLA_F_INVARIANT	= 1 << 17,
-};
+#define DRBD_F_REQUIRED (1 << 0)
+#define DRBD_F_SENSITIVE (1 << 1)
+#define DRBD_F_INVARIANT (1 << 2)
 
-#define __nla_type(x)	((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK))
+#define __nla_type(x)	((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY))
 
 /*									}}}1
  * MAGIC
@@ -170,12 +154,12 @@ enum {								\
 #undef __field
 #define __field(attr_nr, attr_flag, name, nla_type, type,	\
 		__get, __put, __is_signed)			\
-	T_ ## name = (__u16)(attr_nr | attr_flag),
+	T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
 
 #undef __array
 #define __array(attr_nr, attr_flag, name, nla_type, type,	\
 		maxlen, __get, __put, __is_signed)		\
-	T_ ## name = (__u16)(attr_nr | attr_flag),
+	T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
 
 #include GENL_MAGIC_INCLUDE_FILE
 
-- 
cgit v1.2.3-55-g7522


From 5084d71d89e1a94193378efb12ac659e4e6ada3f Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Tue, 24 May 2011 14:08:58 +0200
Subject: drbd: drbd_nla_check_mandatory(): Need to remove the
 DRBD_GENLA_F_MANDATORY flag first

We need to remove the flag before checking for valid types.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/genl_magic_func.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 58edd403a3ff..357f2ad403b1 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -158,9 +158,9 @@ static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
 
 	nla_for_each_attr(nla, head, len, rem) {
 		if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
+			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
 			if (nla_type(nla) > maxtype)
 				return -EOPNOTSUPP;
-			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
 		}
 	}
 	return 0;
-- 
cgit v1.2.3-55-g7522


From 67b58bf723b083d4776cd7c9959246ef46c0d36f Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Mon, 6 Jun 2011 15:36:04 +0200
Subject: drbd: spelling fix: too small

It is not "to small", but "too small".

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 8 ++++----
 include/linux/drbd.h         | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 59923db780b9..31d27dd92924 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1360,7 +1360,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
 			(unsigned long long) drbd_get_max_capacity(nbc),
 			(unsigned long long) new_disk_conf->disk_size);
-		retcode = ERR_DISK_TO_SMALL;
+		retcode = ERR_DISK_TOO_SMALL;
 		goto fail;
 	}
 
@@ -1374,7 +1374,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
-		retcode = ERR_MD_DISK_TO_SMALL;
+		retcode = ERR_MD_DISK_TOO_SMALL;
 		dev_warn(DEV, "refusing attach: md-device too small, "
 		     "at least %llu sectors needed for this meta-disk type\n",
 		     (unsigned long long) min_md_device_sectors);
@@ -1385,7 +1385,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	 * (we may currently be R_PRIMARY with no local disk...) */
 	if (drbd_get_max_capacity(nbc) <
 	    drbd_get_capacity(mdev->this_bdev)) {
-		retcode = ERR_DISK_TO_SMALL;
+		retcode = ERR_DISK_TOO_SMALL;
 		goto fail;
 	}
 
@@ -1447,7 +1447,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
 	    drbd_new_dev_size(mdev, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
 		dev_warn(DEV, "refusing to truncate a consistent device\n");
-		retcode = ERR_DISK_TO_SMALL;
+		retcode = ERR_DISK_TOO_SMALL;
 		goto force_diskless_dec;
 	}
 
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 679e81123229..fedda00374af 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -112,8 +112,8 @@ enum drbd_ret_code {
 	ERR_OPEN_MD_DISK	= 105,
 	ERR_DISK_NOT_BDEV	= 107,
 	ERR_MD_NOT_BDEV		= 108,
-	ERR_DISK_TO_SMALL	= 111,
-	ERR_MD_DISK_TO_SMALL	= 112,
+	ERR_DISK_TOO_SMALL	= 111,
+	ERR_MD_DISK_TOO_SMALL	= 112,
 	ERR_BDCLAIM_DISK	= 114,
 	ERR_BDCLAIM_MD_DISK	= 115,
 	ERR_MD_IDX_INVALID	= 116,
-- 
cgit v1.2.3-55-g7522


From 789c1b626cb490acb36cf481b45040b324f60fde Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Mon, 6 Jun 2011 16:16:44 +0200
Subject: drbd: Use the terminology suggested by the command names in the
 source code and messages

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 16 ++++++++--------
 include/linux/drbd.h         |  4 ++--
 include/linux/drbd_genl.h    | 17 ++++-------------
 3 files changed, 14 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 31d27dd92924..5b4090f52f5a 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -47,8 +47,8 @@
 int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info);
 
-int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info);
-int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
 
 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
@@ -2972,7 +2972,7 @@ drbd_check_conn_name(const char *name)
 	return NO_ERROR;
 }
 
-int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info)
+int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 {
 	enum drbd_ret_code retcode;
 
@@ -2989,7 +2989,7 @@ int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info)
 	if (adm_ctx.tconn) {
 		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
 			retcode = ERR_INVALID_REQUEST;
-			drbd_msg_put_info("connection exists");
+			drbd_msg_put_info("resource exists");
 		}
 		/* else: still NO_ERROR */
 		goto out;
@@ -3086,7 +3086,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 
 	if (!adm_ctx.tconn) {
-		retcode = ERR_CONN_NOT_KNOWN;
+		retcode = ERR_RES_NOT_KNOWN;
 		goto out;
 	}
 
@@ -3140,7 +3140,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 		retcode = NO_ERROR;
 	} else {
 		/* "can not happen" */
-		retcode = ERR_CONN_IN_USE;
+		retcode = ERR_RES_IN_USE;
 		drbd_msg_put_info("failed to delete connection");
 	}
 	goto out;
@@ -3149,7 +3149,7 @@ out:
 	return 0;
 }
 
-int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info)
+int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
 {
 	enum drbd_ret_code retcode;
 
@@ -3166,7 +3166,7 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info)
 
 		retcode = NO_ERROR;
 	} else {
-		retcode = ERR_CONN_IN_USE;
+		retcode = ERR_RES_IN_USE;
 	}
 
 	if (retcode == NO_ERROR)
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index fedda00374af..161cd414b036 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -155,8 +155,8 @@ enum drbd_ret_code {
 	ERR_CONG_NOT_PROTO_A	= 155,
 	ERR_PIC_AFTER_DEP	= 156,
 	ERR_PIC_PEER_DEP	= 157,
-	ERR_CONN_NOT_KNOWN      = 158,
-	ERR_CONN_IN_USE         = 159,
+	ERR_RES_NOT_KNOWN	= 158,
+	ERR_RES_IN_USE		= 159,
 	ERR_MINOR_CONFIGURED    = 160,
 	ERR_MINOR_EXISTS	= 161,
 	ERR_INVALID_REQUEST	= 162,
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 4ceecb9307d9..47ef324b69db 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -253,25 +253,16 @@ GENL_op(
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
 )
 
-#if 0
-	/* TO BE DONE */
-	/* create or destroy resources, aka replication groups */
-GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
-GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
-#endif
-
 	/* add DRBD minor devices as volumes to resources */
-GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor),
+GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 
-	/* add or delete replication links to resources */
-GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection),
+	/* add or delete resources */
+GENL_op(DRBD_ADM_NEW_RESOURCE, 7, GENL_doit(drbd_adm_new_resource),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
-GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection),
+GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 
 GENL_op(DRBD_ADM_RESOURCE_OPTS, 9,
-- 
cgit v1.2.3-55-g7522


From 7c3063cc6f0e75cdf312f5f318f9a4c02e460397 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Thu, 9 Jun 2011 17:52:12 +0200
Subject: drbd: Also need to check for DRBD_GENLA_F_MANDATORY flags before
 nla_find_nested()

This is done by introducing drbd_nla_find_nested() which handles the flag
before calling nla_find_nested().

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h   |  6 +++
 drivers/block/drbd/drbd_nl.c    | 96 ++++++++++++++++++++++++++++++++---------
 include/linux/drbd_genl.h       |  2 +-
 include/linux/genl_magic_func.h | 37 ----------------
 4 files changed, 83 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index c3019730a24f..c58430183d5f 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1407,6 +1407,12 @@ extern bool conn_try_outdate_peer(struct drbd_tconn *tconn);
 extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn);
 extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
 
+struct nla_policy;
+extern int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla);
+extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+				 const struct nla_policy *policy);
+extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype);
+
 /* drbd_worker.c */
 extern int drbd_worker(struct drbd_thread *thi);
 enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 5b4090f52f5a..24187f1c93d5 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -92,7 +92,7 @@ static struct drbd_config_context {
 #define VOLUME_UNSPECIFIED		(-1U)
 	/* pointer into the request skb,
 	 * limited lifetime! */
-	char *conn_name;
+	char *resource_name;
 
 	/* reply buffer */
 	struct sk_buff *reply_skb;
@@ -191,15 +191,15 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 		/* and assign stuff to the global adm_ctx */
 		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
 		adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED;
-		nla = nested_attr_tb[__nla_type(T_ctx_conn_name)];
+		nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
 		if (nla)
-			adm_ctx.conn_name = nla_data(nla);
+			adm_ctx.resource_name = nla_data(nla);
 	} else
 		adm_ctx.volume = VOLUME_UNSPECIFIED;
 
 	adm_ctx.minor = d_in->minor;
 	adm_ctx.mdev = minor_to_mdev(d_in->minor);
-	adm_ctx.tconn = conn_get_by_name(adm_ctx.conn_name);
+	adm_ctx.tconn = conn_get_by_name(adm_ctx.resource_name);
 
 	if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) {
 		drbd_msg_put_info("unknown minor");
@@ -214,7 +214,8 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 	if (adm_ctx.mdev && adm_ctx.tconn &&
 	    adm_ctx.mdev->tconn != adm_ctx.tconn) {
 		pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n",
-				adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name);
+				adm_ctx.minor, adm_ctx.resource_name,
+				adm_ctx.mdev->tconn->name);
 		drbd_msg_put_info("minor exists in different connection");
 		return ERR_INVALID_REQUEST;
 	}
@@ -239,7 +240,7 @@ fail:
 static int drbd_adm_finish(struct genl_info *info, int retcode)
 {
 	struct nlattr *nla;
-	const char *conn_name = NULL;
+	const char *resource_name = NULL;
 
 	if (adm_ctx.tconn) {
 		kref_put(&adm_ctx.tconn->kref, &conn_destroy);
@@ -253,9 +254,10 @@ static int drbd_adm_finish(struct genl_info *info, int retcode)
 
 	nla = info->attrs[DRBD_NLA_CFG_CONTEXT];
 	if (nla) {
-		nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name));
-		if (nla)
-			conn_name = nla_data(nla);
+		int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
+		nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
+		if (nla && !IS_ERR(nla))
+			resource_name = nla_data(nla);
 	}
 
 	drbd_adm_send_reply(adm_ctx.reply_skb, info);
@@ -2526,7 +2528,7 @@ int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
 	return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
 }
 
-int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *conn_name, unsigned vnr)
+int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, unsigned vnr)
 {
 	struct nlattr *nla;
 	nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
@@ -2534,7 +2536,7 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *conn_name, unsigne
 		goto nla_put_failure;
 	if (vnr != VOLUME_UNSPECIFIED)
 		NLA_PUT_U32(skb, T_ctx_volume, vnr);
-	NLA_PUT_STRING(skb, T_ctx_conn_name, conn_name);
+	NLA_PUT_STRING(skb, T_ctx_resource_name, resource_name);
 	nla_nest_end(skb, nla);
 	return 0;
 
@@ -2778,8 +2780,9 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
 	struct nlattr *nla;
-	const char *conn_name;
+	const char *resource_name;
 	struct drbd_tconn *tconn;
+	int maxtype;
 
 	/* Is this a followup call? */
 	if (cb->args[0]) {
@@ -2799,12 +2802,15 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
 	/* No explicit context given.  Dump all. */
 	if (!nla)
 		goto dump;
-	nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name));
+	maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
+	nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
+	if (IS_ERR(nla))
+		return PTR_ERR(nla);
 	/* context given, but no name present? */
 	if (!nla)
 		return -EINVAL;
-	conn_name = nla_data(nla);
-	tconn = conn_get_by_name(conn_name);
+	resource_name = nla_data(nla);
+	tconn = conn_get_by_name(resource_name);
 
 	if (!tconn)
 		return -ENODEV;
@@ -2957,16 +2963,16 @@ out_nolock:
 }
 
 static enum drbd_ret_code
-drbd_check_conn_name(const char *name)
+drbd_check_resource_name(const char *name)
 {
 	if (!name || !name[0]) {
-		drbd_msg_put_info("connection name missing");
+		drbd_msg_put_info("resource name missing");
 		return ERR_MANDATORY_TAG;
 	}
 	/* if we want to use these in sysfs/configfs/debugfs some day,
 	 * we must not allow slashes */
 	if (strchr(name, '/')) {
-		drbd_msg_put_info("invalid connection name");
+		drbd_msg_put_info("invalid resource name");
 		return ERR_INVALID_REQUEST;
 	}
 	return NO_ERROR;
@@ -2982,7 +2988,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
-	retcode = drbd_check_conn_name(adm_ctx.conn_name);
+	retcode = drbd_check_resource_name(adm_ctx.resource_name);
 	if (retcode != NO_ERROR)
 		goto out;
 
@@ -2995,7 +3001,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	if (!conn_create(adm_ctx.conn_name))
+	if (!conn_create(adm_ctx.resource_name))
 		retcode = ERR_NOMEM;
 out:
 	drbd_adm_finish(info, retcode);
@@ -3213,3 +3219,53 @@ failed:
 			"Event seq:%u sib_reason:%u\n",
 			err, seq, sib->sib_reason);
 }
+
+int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
+{
+	struct nlattr *head = nla_data(nla);
+	int len = nla_len(nla);
+	int rem;
+
+	/*
+	 * validate_nla (called from nla_parse_nested) ignores attributes
+	 * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
+	 * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
+	 * flag set also, check and remove that flag before calling
+	 * nla_parse_nested.
+	 */
+
+	nla_for_each_attr(nla, head, len, rem) {
+		if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
+			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
+			if (nla_type(nla) > maxtype)
+				return -EOPNOTSUPP;
+		}
+	}
+	return 0;
+}
+
+int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+			  const struct nla_policy *policy)
+{
+	int err;
+
+	err = drbd_nla_check_mandatory(maxtype, nla);
+	if (!err)
+		err = nla_parse_nested(tb, maxtype, nla, policy);
+
+	return err;
+}
+
+struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype)
+{
+	int err;
+	/*
+	 * If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and
+	 * we don't know about that attribute, reject all the nested
+	 * attributes.
+	 */
+	err = drbd_nla_check_mandatory(maxtype, nla);
+	if (err)
+		return ERR_PTR(err);
+	return nla_find_nested(nla, attrtype);
+}
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 47ef324b69db..0c2102c05384 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -96,7 +96,7 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
  * and the volume id within the resource. */
 GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
 	__u32_field(1, DRBD_GENLA_F_MANDATORY,	ctx_volume)
-	__str_field(2, DRBD_GENLA_F_MANDATORY,	ctx_conn_name, 128)
+	__str_field(2, DRBD_GENLA_F_MANDATORY,	ctx_resource_name, 128)
 )
 
 GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 357f2ad403b1..0b8a88e2e83e 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -142,43 +142,6 @@ static struct nlattr *nested_attr_tb[128];
 #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
 #endif
 
-static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
-{
-	struct nlattr *head = nla_data(nla);
-	int len = nla_len(nla);
-	int rem;
-
-	/*
-	 * validate_nla (called from nla_parse_nested) ignores attributes
-	 * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
-	 * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
-	 * flag set also, check and remove that flag before calling
-	 * nla_parse_nested.
-	 */
-
-	nla_for_each_attr(nla, head, len, rem) {
-		if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
-			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
-			if (nla_type(nla) > maxtype)
-				return -EOPNOTSUPP;
-		}
-	}
-	return 0;
-}
-
-static inline int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype,
-					struct nlattr *nla,
-					const struct nla_policy *policy)
-{
-	int err;
-
-	err = drbd_nla_check_mandatory(maxtype, nla);
-	if (!err)
-		err = nla_parse_nested(tb, maxtype, nla, policy);
-
-	return err;
-}
-
 #undef GENL_struct
 #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
 /* *_from_attrs functions are static, but potentially unused */		\
-- 
cgit v1.2.3-55-g7522


From 089c075d88ac9407b8d7c5c8fc4b21c0d940bd82 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Tue, 14 Jun 2011 18:28:09 +0200
Subject: drbd: Convert the generic netlink interface to accept connection
 endpoints

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |   7 ++
 drivers/block/drbd/drbd_main.c     |  21 +++++
 drivers/block/drbd/drbd_nl.c       | 158 ++++++++++++++++++++++---------------
 drivers/block/drbd/drbd_receiver.c |  21 +++--
 drivers/block/drbd/drbd_state.c    |   2 +
 include/linux/drbd_genl.h          |  62 +++++++--------
 6 files changed, 164 insertions(+), 107 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index c3019730a24f..6d6d1056d824 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -836,6 +836,11 @@ struct drbd_tconn {			/* is a resource from the config file */
 	wait_queue_head_t ping_wait;	/* Woken upon reception of a ping, and a state change */
 	struct res_opts res_opts;
 
+	struct sockaddr_storage my_addr;
+	int my_addr_len;
+	struct sockaddr_storage peer_addr;
+	int peer_addr_len;
+
 	struct drbd_socket data;	/* data/barrier/cstate/parameter packets */
 	struct drbd_socket meta;	/* ping/ack (metadata) packets */
 	int agreed_pro_version;		/* actually used protocol version */
@@ -1377,6 +1382,8 @@ extern void drbd_minor_destroy(struct kref *kref);
 struct drbd_tconn *conn_create(const char *name);
 extern void conn_destroy(struct kref *kref);
 struct drbd_tconn *conn_get_by_name(const char *name);
+extern struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len,
+					    void *peer_addr, int peer_addr_len);
 extern void conn_free_crypto(struct drbd_tconn *tconn);
 
 extern int proc_details;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 178c711bc4af..79f275dc43a4 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2420,6 +2420,27 @@ found:
 	return tconn;
 }
 
+struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len,
+				     void *peer_addr, int peer_addr_len)
+{
+	struct drbd_tconn *tconn;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) {
+		if (tconn->my_addr_len == my_addr_len &&
+		    tconn->peer_addr_len == peer_addr_len &&
+		    !memcmp(&tconn->my_addr, my_addr, my_addr_len) &&
+		    !memcmp(&tconn->peer_addr, peer_addr, peer_addr_len)) {
+			kref_get(&tconn->kref);
+			goto found;
+		}
+	}
+	tconn = NULL;
+found:
+	rcu_read_unlock();
+	return tconn;
+}
+
 static int drbd_alloc_socket(struct drbd_socket *socket)
 {
 	socket->rbuf = (void *) __get_free_page(GFP_KERNEL);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 352be132b4be..e7933e04e7b8 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -94,6 +94,8 @@ static struct drbd_config_context {
 	/* pointer into the request skb,
 	 * limited lifetime! */
 	char *resource_name;
+	struct nlattr *my_addr;
+	struct nlattr *peer_addr;
 
 	/* reply buffer */
 	struct sk_buff *reply_skb;
@@ -142,6 +144,7 @@ int drbd_msg_put_info(const char *info)
  */
 #define DRBD_ADM_NEED_MINOR	1
 #define DRBD_ADM_NEED_RESOURCE	2
+#define DRBD_ADM_NEED_CONNECTION 4
 static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 		unsigned flags)
 {
@@ -174,6 +177,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 	adm_ctx.reply_dh->minor = d_in->minor;
 	adm_ctx.reply_dh->ret_code = NO_ERROR;
 
+	adm_ctx.volume = VOLUME_UNSPECIFIED;
 	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
 		struct nlattr *nla;
 		/* parse and validate only */
@@ -191,12 +195,21 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 
 		/* and assign stuff to the global adm_ctx */
 		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
-		adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED;
+		if (nla)
+			adm_ctx.volume = nla_get_u32(nla);
 		nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
 		if (nla)
 			adm_ctx.resource_name = nla_data(nla);
-	} else
-		adm_ctx.volume = VOLUME_UNSPECIFIED;
+		adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
+		adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
+		if ((adm_ctx.my_addr &&
+		     nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.tconn->my_addr)) ||
+		    (adm_ctx.peer_addr &&
+		     nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.tconn->peer_addr))) {
+			err = -EINVAL;
+			goto fail;
+		}
+	}
 
 	adm_ctx.minor = d_in->minor;
 	adm_ctx.mdev = minor_to_mdev(d_in->minor);
@@ -211,6 +224,26 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 		return ERR_INVALID_REQUEST;
 	}
 
+	if (flags & DRBD_ADM_NEED_CONNECTION) {
+		if (adm_ctx.tconn && !(flags & DRBD_ADM_NEED_RESOURCE)) {
+			drbd_msg_put_info("no resource name expected");
+			return ERR_INVALID_REQUEST;
+		}
+		if (adm_ctx.mdev) {
+			drbd_msg_put_info("no minor number expected");
+			return ERR_INVALID_REQUEST;
+		}
+		if (adm_ctx.my_addr && adm_ctx.peer_addr)
+			adm_ctx.tconn = conn_get_by_addrs(nla_data(adm_ctx.my_addr),
+							  nla_len(adm_ctx.my_addr),
+							  nla_data(adm_ctx.peer_addr),
+							  nla_len(adm_ctx.peer_addr));
+		if (!adm_ctx.tconn) {
+			drbd_msg_put_info("unknown connection");
+			return ERR_INVALID_REQUEST;
+		}
+	}
+
 	/* some more paranoia, if the request was over-determined */
 	if (adm_ctx.mdev && adm_ctx.tconn &&
 	    adm_ctx.mdev->tconn != adm_ctx.tconn) {
@@ -268,30 +301,28 @@ static int drbd_adm_finish(struct genl_info *info, int retcode)
 static void setup_khelper_env(struct drbd_tconn *tconn, char **envp)
 {
 	char *afs;
-	struct net_conf *nc;
 
-	rcu_read_lock();
-	nc = rcu_dereference(tconn->net_conf);
-	if (nc) {
-		switch (((struct sockaddr *)nc->peer_addr)->sa_family) {
-		case AF_INET6:
-			afs = "ipv6";
-			snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
-				 &((struct sockaddr_in6 *)nc->peer_addr)->sin6_addr);
-			break;
-		case AF_INET:
-			afs = "ipv4";
-			snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
-				 &((struct sockaddr_in *)nc->peer_addr)->sin_addr);
-			break;
-		default:
-			afs = "ssocks";
-			snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
-				 &((struct sockaddr_in *)nc->peer_addr)->sin_addr);
-		}
-		snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
+	/* FIXME: A future version will not allow this case. */
+	if (tconn->my_addr_len == 0 || tconn->peer_addr_len == 0)
+		return;
+
+	switch (((struct sockaddr *)&tconn->peer_addr)->sa_family) {
+	case AF_INET6:
+		afs = "ipv6";
+		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
+			 &((struct sockaddr_in6 *)&tconn->peer_addr)->sin6_addr);
+		break;
+	case AF_INET:
+		afs = "ipv4";
+		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
+			 &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr);
+		break;
+	default:
+		afs = "ssocks";
+		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
+			 &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr);
 	}
-	rcu_read_unlock();
+	snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
 }
 
 int drbd_khelper(struct drbd_conf *mdev, char *cmd)
@@ -1874,7 +1905,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 	int rsr; /* re-sync running */
 	struct crypto crypto = { };
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
 	if (!adm_ctx.reply_skb)
 		return retcode;
 	if (retcode != NO_ERROR)
@@ -1986,18 +2017,39 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	struct drbd_conf *mdev;
 	struct net_conf *old_conf, *new_conf = NULL;
 	struct crypto crypto = { };
-	struct drbd_tconn *oconn;
 	struct drbd_tconn *tconn;
-	struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
 	enum drbd_ret_code retcode;
 	int i;
 	int err;
 
 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+
 	if (!adm_ctx.reply_skb)
 		return retcode;
 	if (retcode != NO_ERROR)
 		goto out;
+	if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
+		drbd_msg_put_info("connection endpoint(s) missing");
+		retcode = ERR_INVALID_REQUEST;
+		goto out;
+	}
+
+	/* No need for _rcu here. All reconfiguration is
+	 * strictly serialized on genl_lock(). We are protected against
+	 * concurrent reconfiguration/addition/deletion */
+	list_for_each_entry(tconn, &drbd_tconns, all_tconn) {
+		if (nla_len(adm_ctx.my_addr) == tconn->my_addr_len &&
+		    !memcmp(nla_data(adm_ctx.my_addr), &tconn->my_addr, tconn->my_addr_len)) {
+			retcode = ERR_LOCAL_ADDR;
+			goto out;
+		}
+
+		if (nla_len(adm_ctx.peer_addr) == tconn->peer_addr_len &&
+		    !memcmp(nla_data(adm_ctx.peer_addr), &tconn->peer_addr, tconn->peer_addr_len)) {
+			retcode = ERR_PEER_ADDR;
+			goto out;
+		}
+	}
 
 	tconn = adm_ctx.tconn;
 	conn_reconfig_start(tconn);
@@ -2027,37 +2079,6 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto fail;
 
-	retcode = NO_ERROR;
-
-	new_my_addr = (struct sockaddr *)&new_conf->my_addr;
-	new_peer_addr = (struct sockaddr *)&new_conf->peer_addr;
-
-	/* No need for _rcu here. All reconfiguration is
-	 * strictly serialized on genl_lock(). We are protected against
-	 * concurrent reconfiguration/addition/deletion */
-	list_for_each_entry(oconn, &drbd_tconns, all_tconn) {
-		struct net_conf *nc;
-		if (oconn == tconn)
-			continue;
-
-		rcu_read_lock();
-		nc = rcu_dereference(oconn->net_conf);
-		if (nc) {
-			taken_addr = (struct sockaddr *)&nc->my_addr;
-			if (new_conf->my_addr_len == nc->my_addr_len &&
-			    !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len))
-				retcode = ERR_LOCAL_ADDR;
-
-			taken_addr = (struct sockaddr *)&nc->peer_addr;
-			if (new_conf->peer_addr_len == nc->peer_addr_len &&
-			    !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len))
-				retcode = ERR_PEER_ADDR;
-		}
-		rcu_read_unlock();
-		if (retcode != NO_ERROR)
-			goto fail;
-	}
-
 	retcode = alloc_crypto(&crypto, new_conf);
 	if (retcode != NO_ERROR)
 		goto fail;
@@ -2083,6 +2104,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	tconn->csums_tfm = crypto.csums_tfm;
 	tconn->verify_tfm = crypto.verify_tfm;
 
+	tconn->my_addr_len = nla_len(adm_ctx.my_addr);
+	memcpy(&tconn->my_addr, nla_data(adm_ctx.my_addr), tconn->my_addr_len);
+	tconn->peer_addr_len = nla_len(adm_ctx.peer_addr);
+	memcpy(&tconn->peer_addr, nla_data(adm_ctx.peer_addr), tconn->peer_addr_len);
+
 	mutex_unlock(&tconn->conf_update);
 
 	rcu_read_lock();
@@ -2170,7 +2196,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 	enum drbd_ret_code retcode;
 	int err;
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
 	if (!adm_ctx.reply_skb)
 		return retcode;
 	if (retcode != NO_ERROR)
@@ -2529,7 +2555,7 @@ int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
 	return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
 }
 
-int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, unsigned vnr)
+int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsigned vnr)
 {
 	struct nlattr *nla;
 	nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
@@ -2537,7 +2563,11 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, uns
 		goto nla_put_failure;
 	if (vnr != VOLUME_UNSPECIFIED)
 		NLA_PUT_U32(skb, T_ctx_volume, vnr);
-	NLA_PUT_STRING(skb, T_ctx_resource_name, resource_name);
+	NLA_PUT_STRING(skb, T_ctx_resource_name, tconn->name);
+	if (tconn->my_addr_len)
+		NLA_PUT(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr);
+	if (tconn->peer_addr_len)
+		NLA_PUT(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr);
 	nla_nest_end(skb, nla);
 	return 0;
 
@@ -2574,7 +2604,7 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 
 	/* We need to add connection name and volume number information still.
 	 * Minor number is in drbd_genlmsghdr. */
-	if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr))
+	if (nla_put_drbd_cfg_context(skb, mdev->tconn, mdev->vnr))
 		goto nla_put_failure;
 
 	if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive))
@@ -2736,7 +2766,7 @@ next_tconn:
 			/* this is a tconn without a single volume */
 			dh->minor = -1U;
 			dh->ret_code = NO_ERROR;
-			if (nla_put_drbd_cfg_context(skb, tconn->name, VOLUME_UNSPECIFIED))
+			if (nla_put_drbd_cfg_context(skb, tconn, VOLUME_UNSPECIFIED))
 				genlmsg_cancel(skb, dh);
 			else
 				genlmsg_end(skb, dh);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 4ba097293278..ab1d36cb6214 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -626,23 +626,21 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
 		rcu_read_unlock();
 		return NULL;
 	}
-
 	sndbuf_size = nc->sndbuf_size;
 	rcvbuf_size = nc->rcvbuf_size;
 	connect_int = nc->connect_int;
+	rcu_read_unlock();
 
-	my_addr_len = min_t(int, nc->my_addr_len, sizeof(src_in6));
-	memcpy(&src_in6, nc->my_addr, my_addr_len);
+	my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6));
+	memcpy(&src_in6, &tconn->my_addr, my_addr_len);
 
-	if (((struct sockaddr *)nc->my_addr)->sa_family == AF_INET6)
+	if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6)
 		src_in6.sin6_port = 0;
 	else
 		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
 
-	peer_addr_len = min_t(int, nc->peer_addr_len, sizeof(src_in6));
-	memcpy(&peer_in6, nc->peer_addr, peer_addr_len);
-
-	rcu_read_unlock();
+	peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6));
+	memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len);
 
 	what = "sock_create_kern";
 	err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
@@ -714,15 +712,14 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
 		rcu_read_unlock();
 		return NULL;
 	}
-
 	sndbuf_size = nc->sndbuf_size;
 	rcvbuf_size = nc->rcvbuf_size;
 	connect_int = nc->connect_int;
-
-	my_addr_len = min_t(int, nc->my_addr_len, sizeof(struct sockaddr_in6));
-	memcpy(&my_addr, nc->my_addr, my_addr_len);
 	rcu_read_unlock();
 
+	my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6));
+	memcpy(&my_addr, &tconn->my_addr, my_addr_len);
+
 	what = "sock_create_kern";
 	err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
 		SOCK_STREAM, IPPROTO_TCP, &s_listen);
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index cd55f46d5c55..d978e4d98a15 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1418,6 +1418,8 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
 
 		mutex_lock(&tconn->conf_update);
 		old_conf = tconn->net_conf;
+		tconn->my_addr_len = 0;
+		tconn->peer_addr_len = 0;
 		rcu_assign_pointer(tconn->net_conf, NULL);
 		conn_free_crypto(tconn);
 		mutex_unlock(&tconn->conf_update);
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 0c2102c05384..b93db6c83882 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -97,6 +97,8 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
 GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
 	__u32_field(1, DRBD_GENLA_F_MANDATORY,	ctx_volume)
 	__str_field(2, DRBD_GENLA_F_MANDATORY,	ctx_resource_name, 128)
+	__bin_field(3, DRBD_GENLA_F_MANDATORY,	ctx_my_addr, 128)
+	__bin_field(4, DRBD_GENLA_F_MANDATORY,	ctx_peer_addr, 128)
 )
 
 GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
@@ -134,38 +136,36 @@ GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
 )
 
 GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
-	__bin_field(1,	DRBD_F_REQUIRED | DRBD_F_INVARIANT,	my_addr,	128)
-	__bin_field(2,	DRBD_F_REQUIRED | DRBD_F_INVARIANT,	peer_addr,	128)
-	__str_field_def(3,	DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE,
+	__str_field_def(1,	DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE,
 						shared_secret,	SHARED_SECRET_MAX)
-	__str_field_def(4,	DRBD_GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
-	__str_field_def(5,	DRBD_GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
-	__str_field_def(6,	DRBD_GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
-	__str_field_def(7,	DRBD_GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
-	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
-	__u32_field_def(9,	DRBD_GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF)
-	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
-	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
-	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
-	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
-	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
-	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF)
-	__u32_field_def(16,	DRBD_GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF)
-	__u32_field_def(17,	DRBD_GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
-	__u32_field_def(18,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
-	__u32_field_def(19,	DRBD_GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
-	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
-	__u32_field_def(21,	DRBD_GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
-	__u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF)
-	__u32_field_def(23,	DRBD_GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
-	__u32_field_def(24,	DRBD_GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
-	__u32_field_def(25,	DRBD_GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
-	__flg_field_def(26, DRBD_GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
-	__flg_field(27, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	discard_my_data)
-	__flg_field_def(28, DRBD_GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
-	__flg_field_def(29, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
-	__flg_field(30, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	dry_run)
-	__flg_field_def(31,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
+	__str_field_def(2,	DRBD_GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	__str_field_def(3,	DRBD_GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
+	__str_field_def(4,	DRBD_GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
+	__str_field_def(5,	DRBD_GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
+	__u32_field_def(6,	DRBD_GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
+	__u32_field_def(7,	DRBD_GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF)
+	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
+	__u32_field_def(9,	DRBD_GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
+	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
+	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
+	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
+	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF)
+	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF)
+	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
+	__u32_field_def(16,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
+	__u32_field_def(17,	DRBD_GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
+	__u32_field_def(18,	DRBD_GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
+	__u32_field_def(19,	DRBD_GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
+	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF)
+	__u32_field_def(21,	DRBD_GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
+	__u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
+	__u32_field_def(23,	DRBD_GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
+	__flg_field_def(24, DRBD_GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
+	__flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	discard_my_data)
+	__flg_field_def(26, DRBD_GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
+	__flg_field_def(27, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
+	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	dry_run)
+	__flg_field_def(29,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
-- 
cgit v1.2.3-55-g7522


From 6dff2902208364d058746ee794da4d960f6eec6f Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Tue, 28 Jun 2011 14:18:12 +0200
Subject: drbd: Rename --dry-run to --tentative

drbdadm already has a --dry-run option, so this option cannot directly be
passed through to drbdsetup.  Rename the drbdsetup option to resolve this
conflict.

For backward compatibility, make --dry-run an alias of --tentative.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_main.c     | 4 ++--
 drivers/block/drbd/drbd_receiver.c | 6 +++---
 include/linux/drbd_genl.h          | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 933d4767c110..72b1dfa4b656 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -926,7 +926,7 @@ int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd)
 	rcu_read_lock();
 	nc = rcu_dereference(tconn->net_conf);
 
-	if (nc->dry_run && tconn->agreed_pro_version < 92) {
+	if (nc->tentative && tconn->agreed_pro_version < 92) {
 		rcu_read_unlock();
 		mutex_unlock(&sock->mutex);
 		conn_err(tconn, "--dry-run is not supported by peer");
@@ -945,7 +945,7 @@ int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd)
 	cf = 0;
 	if (nc->discard_my_data)
 		cf |= CF_DISCARD_MY_DATA;
-	if (nc->dry_run)
+	if (nc->tentative)
 		cf |= CF_DRY_RUN;
 	p->conn_flags    = cpu_to_be32(cf);
 
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index ab1d36cb6214..d55a3cb21c31 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2836,7 +2836,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
 	enum drbd_conns rv = C_MASK;
 	enum drbd_disk_state mydisk;
 	struct net_conf *nc;
-	int hg, rule_nr, rr_conflict, dry_run;
+	int hg, rule_nr, rr_conflict, tentative;
 
 	mydisk = mdev->state.disk;
 	if (mydisk == D_NEGOTIATING)
@@ -2916,7 +2916,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
 			     (hg < 0) ? "peer" : "this");
 	}
 	rr_conflict = nc->rr_conflict;
-	dry_run = nc->dry_run;
+	tentative = nc->tentative;
 	rcu_read_unlock();
 
 	if (hg == -100) {
@@ -2949,7 +2949,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
 		}
 	}
 
-	if (dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
+	if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
 		if (hg == 0)
 			dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
 		else
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index b93db6c83882..e879a9324380 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -164,7 +164,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	discard_my_data)
 	__flg_field_def(26, DRBD_GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
 	__flg_field_def(27, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
-	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	dry_run)
+	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	tentative)
 	__flg_field_def(29,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
 )
 
-- 
cgit v1.2.3-55-g7522


From f03c254961cce65ee2b21c4beccb6975b6f9d308 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Mon, 20 Jun 2011 22:21:19 +0200
Subject: drbd: allow ping-timeout of up to 30 seconds

Allow up to 300 centi-seconds to be configured for the "ping timeout".
There may be setups where heavy congestion, huge buffers, and asymmetric
bandwidth limitations may need a "huge" ping-timeout as work-around
for "spurious connection loss" problems.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 3627f760966e..82db83410f08 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -62,7 +62,7 @@
 
  /* timeout for the ping packets.*/
 #define DRBD_PING_TIMEO_MIN  1
-#define DRBD_PING_TIMEO_MAX  100
+#define DRBD_PING_TIMEO_MAX  300
 #define DRBD_PING_TIMEO_DEF  5
 
   /* max number of write requests between write barriers */
-- 
cgit v1.2.3-55-g7522


From d942ae44537669418a7cbfd916531d30513dbca8 Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Tue, 31 May 2011 13:07:24 +0200
Subject: drbd: Fixes from the 8.3 development branch

 * commit 'ae57a0a':
   drbd: Only print sanitize state's warnings, if the state change happens
   drbd: we should write meta data updates with FLUSH FUA
   drbd: fix limit define, we support 1 PiByte now
   drbd: fix log message argument order
   drbd: Typo in user-visible message.
   drbd: Make "(rcv|snd)buf-size" and "ping-timeout" available for the proxy, too.
   drbd: Allow keywords to be used in multiple config sections.
   drbd: fix typos in comments.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h   | 10 ++++----
 drivers/block/drbd/drbd_state.c | 56 ++++++++++++++++++++++++++++++-----------
 include/linux/drbd_limits.h     |  2 +-
 3 files changed, 47 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index de42c7cf7caf..1d71b3a3586a 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -372,11 +372,11 @@ struct p_connection_features {
 	u32 protocol_max;
 
 	/* should be more than enough for future enhancements
-	 * for now, feature_flags and the reserverd array shall be zero.
+	 * for now, feature_flags and the reserved array shall be zero.
 	 */
 
 	u32 _pad;
-	u64 reserverd[7];
+	u64 reserved[7];
 } __packed;
 
 struct p_barrier {
@@ -914,7 +914,7 @@ struct drbd_conf {
 	atomic_t ap_bio_cnt;	 /* Requests we need to complete */
 	atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
 	atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
-	atomic_t unacked_cnt;	 /* Need to send replys for */
+	atomic_t unacked_cnt;	 /* Need to send replies for */
 	atomic_t local_cnt;	 /* Waiting for local completion */
 
 	/* Interval tree of pending local requests */
@@ -2153,7 +2153,7 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev)
 		/* disk state is stable as well. */
 		break;
 
-	/* no new io accepted during tansitional states */
+	/* no new io accepted during transitional states */
 	case D_ATTACHING:
 	case D_FAILED:
 	case D_NEGOTIATING:
@@ -2217,7 +2217,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev)
 	/* we wait here
 	 *    as long as the device is suspended
 	 *    until the bitmap is no longer on the fly during connection
-	 *    handshake as long as we would exeed the max_buffer limit.
+	 *    handshake as long as we would exceed the max_buffer limit.
 	 *
 	 * to avoid races with the reconnect code,
 	 * we need to atomic_inc within the spinlock. */
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 8c9d0348736d..2cf69b25f1e7 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -37,6 +37,15 @@ struct after_state_chg_work {
 	struct completion *done;
 };
 
+enum sanitize_state_warnings {
+	NO_WARNING,
+	ABORTED_ONLINE_VERIFY,
+	ABORTED_RESYNC,
+	CONNECTION_LOST_NEGOTIATING,
+	IMPLICITLY_UPGRADED_DISK,
+	IMPLICITLY_UPGRADED_PDSK,
+};
+
 static int w_after_state_ch(struct drbd_work *w, int unused);
 static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 			   union drbd_state ns, enum chg_state_flags flags);
@@ -44,7 +53,7 @@ static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
 static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state);
 static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
 static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns,
-				       const char **warn_sync_abort);
+				       enum sanitize_state_warnings *warn);
 
 static inline bool is_susp(union drbd_state s)
 {
@@ -656,6 +665,21 @@ is_valid_transition(union drbd_state os, union drbd_state ns)
 	return rv;
 }
 
+static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn)
+{
+	static const char *msg_table[] = {
+		[NO_WARNING] = "",
+		[ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
+		[ABORTED_RESYNC] = "Resync aborted.",
+		[CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
+		[IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
+		[IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
+	};
+
+	if (warn != NO_WARNING)
+		dev_warn(DEV, "%s\n", msg_table[warn]);
+}
+
 /**
  * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
  * @mdev:	DRBD device.
@@ -667,11 +691,14 @@ is_valid_transition(union drbd_state os, union drbd_state ns)
  * to D_UNKNOWN. This rule and many more along those lines are in this function.
  */
 static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns,
-				       const char **warn_sync_abort)
+				       enum sanitize_state_warnings *warn)
 {
 	enum drbd_fencing_p fp;
 	enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
 
+	if (warn)
+		*warn = NO_WARNING;
+
 	fp = FP_DONT_CARE;
 	if (get_ldev(mdev)) {
 		rcu_read_lock();
@@ -695,10 +722,9 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 	/* An implication of the disk states onto the connection state */
 	/* Abort resync if a disk fails/detaches */
 	if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
-		if (warn_sync_abort)
-			*warn_sync_abort =
-				ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ?
-				"Online-verify" : "Resync";
+		if (warn)
+			*warn = ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ?
+				ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
 		ns.conn = C_CONNECTED;
 	}
 
@@ -709,7 +735,8 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 			ns.disk = mdev->new_state_tmp.disk;
 			ns.pdsk = mdev->new_state_tmp.pdsk;
 		} else {
-			dev_alert(DEV, "Connection lost while negotiating, no data!\n");
+			if (warn)
+				*warn = CONNECTION_LOST_NEGOTIATING;
 			ns.disk = D_DISKLESS;
 			ns.pdsk = D_UNKNOWN;
 		}
@@ -791,16 +818,16 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 		ns.disk = disk_max;
 
 	if (ns.disk < disk_min) {
-		dev_warn(DEV, "Implicitly set disk from %s to %s\n",
-			 drbd_disk_str(ns.disk), drbd_disk_str(disk_min));
+		if (warn)
+			*warn = IMPLICITLY_UPGRADED_DISK;
 		ns.disk = disk_min;
 	}
 	if (ns.pdsk > pdsk_max)
 		ns.pdsk = pdsk_max;
 
 	if (ns.pdsk < pdsk_min) {
-		dev_warn(DEV, "Implicitly set pdsk from %s to %s\n",
-			 drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min));
+		if (warn)
+			*warn = IMPLICITLY_UPGRADED_PDSK;
 		ns.pdsk = pdsk_min;
 	}
 
@@ -875,12 +902,12 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
 {
 	union drbd_state os;
 	enum drbd_state_rv rv = SS_SUCCESS;
-	const char *warn_sync_abort = NULL;
+	enum sanitize_state_warnings ssw;
 	struct after_state_chg_work *ascw;
 
 	os = drbd_read_state(mdev);
 
-	ns = sanitize_state(mdev, ns, &warn_sync_abort);
+	ns = sanitize_state(mdev, ns, &ssw);
 	if (ns.i == os.i)
 		return SS_NOTHING_TO_DO;
 
@@ -909,8 +936,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
 		return rv;
 	}
 
-	if (warn_sync_abort)
-		dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
+	print_sanitize_warnings(mdev, ssw);
 
 	drbd_pr_state_change(mdev, os, ns, flags);
 
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 82db83410f08..f1046b13d9f6 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -126,7 +126,7 @@
  * is 1 PiB, currently. */
 /* DRBD_MAX_SECTORS */
 #define DRBD_DISK_SIZE_MIN  0
-#define DRBD_DISK_SIZE_MAX  (16 * (2LLU << 30))
+#define DRBD_DISK_SIZE_MAX  (1 * (2LLU << 40))
 #define DRBD_DISK_SIZE_DEF  0 /* = disabled = no user size... */
 #define DRBD_DISK_SIZE_SCALE 's'  /* sectors */
 
-- 
cgit v1.2.3-55-g7522


From cdfda633d235028e9b27381dedb65416409e8729 Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Tue, 5 Jul 2011 15:38:59 +0200
Subject: drbd: detach from frozen backing device

* drbd-8.3:
  documentation: Documented detach's --force and disk's --disk-timeout
  drbd: Implemented the disk-timeout option
  drbd: Force flag for the detach operation
  drbd: Allow new IOs while the local disk in in FAILED state
  drbd: Bitmap IO functions can not return prematurely if the disk breaks
  drbd: Added a kref to bm_aio_ctx
  drbd: Hold a reference to ldev while doing meta-data IO
  drbd: Keep a reference to the bio until the completion handler finished
  drbd: Implemented wait_until_done_or_disk_failure()
  drbd: Replaced md_io_mutex by an atomic: md_io_in_use
  drbd: moved md_io into mdev
  drbd: Immediately allow completion of IOs, that wait for IO completions on a failed disk
  drbd: Keep a reference to barrier acked requests

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_actlog.c   |  75 ++++++++++++++++++------
 drivers/block/drbd/drbd_bitmap.c   | 115 +++++++++++++++++++++++++++----------
 drivers/block/drbd/drbd_int.h      |  12 ++--
 drivers/block/drbd/drbd_main.c     |  77 ++++++++++++++++++++++---
 drivers/block/drbd/drbd_nl.c       |  28 ++++++++-
 drivers/block/drbd/drbd_receiver.c |   2 -
 drivers/block/drbd/drbd_req.c      |  52 +++++++++++------
 drivers/block/drbd/drbd_req.h      |  19 +++---
 drivers/block/drbd/drbd_state.c    |   7 +++
 drivers/block/drbd/drbd_worker.c   |   9 ++-
 include/linux/drbd_genl.h          |   9 ++-
 include/linux/drbd_limits.h        |   6 ++
 12 files changed, 321 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index aeb483daea06..58b5b61628fc 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -114,18 +114,44 @@ struct drbd_atodb_wait {
 
 static int w_al_write_transaction(struct drbd_work *, int);
 
+void *drbd_md_get_buffer(struct drbd_conf *mdev)
+{
+	int r;
+
+	wait_event(mdev->misc_wait,
+		   (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 ||
+		   mdev->state.disk <= D_FAILED);
+
+	return r ? NULL : page_address(mdev->md_io_page);
+}
+
+void drbd_md_put_buffer(struct drbd_conf *mdev)
+{
+	if (atomic_dec_and_test(&mdev->md_io_in_use))
+		wake_up(&mdev->misc_wait);
+}
+
+static bool md_io_allowed(struct drbd_conf *mdev)
+{
+	enum drbd_disk_state ds = mdev->state.disk;
+	return ds >= D_NEGOTIATING || ds == D_ATTACHING;
+}
+
+void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done)
+{
+	wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev));
+}
+
 static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
 				 struct drbd_backing_dev *bdev,
 				 struct page *page, sector_t sector,
 				 int rw, int size)
 {
 	struct bio *bio;
-	struct drbd_md_io md_io;
 	int err;
 
-	md_io.mdev = mdev;
-	init_completion(&md_io.event);
-	md_io.error = 0;
+	mdev->md_io.done = 0;
+	mdev->md_io.error = -ENODEV;
 
 	if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))
 		rw |= REQ_FUA | REQ_FLUSH;
@@ -137,17 +163,25 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
 	err = -EIO;
 	if (bio_add_page(bio, page, size, 0) != size)
 		goto out;
-	bio->bi_private = &md_io;
+	bio->bi_private = &mdev->md_io;
 	bio->bi_end_io = drbd_md_io_complete;
 	bio->bi_rw = rw;
 
+	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* Corresponding put_ldev in drbd_md_io_complete() */
+		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
+		err = -ENODEV;
+		goto out;
+	}
+
+	bio_get(bio); /* one bio_put() is in the completion handler */
+	atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */
 	if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
 		bio_endio(bio, -EIO);
 	else
 		submit_bio(rw, bio);
-	wait_for_completion(&md_io.event);
+	wait_until_done_or_disk_failure(mdev, &mdev->md_io.done);
 	if (bio_flagged(bio, BIO_UPTODATE))
-		err = md_io.error;
+		err = mdev->md_io.error;
 
  out:
 	bio_put(bio);
@@ -160,7 +194,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
 	int err;
 	struct page *iop = mdev->md_io_page;
 
-	D_ASSERT(mutex_is_locked(&mdev->md_io_mutex));
+	D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1);
 
 	BUG_ON(!bdev->md_bdev);
 
@@ -344,8 +378,14 @@ w_al_write_transaction(struct drbd_work *w, int unused)
 		return 0;
 	}
 
-	mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */
-	buffer = page_address(mdev->md_io_page);
+	buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */
+	if (!buffer) {
+		dev_err(DEV, "disk failed while waiting for md_io buffer\n");
+		aw->err = -EIO;
+		complete(&((struct update_al_work *)w)->event);
+		put_ldev(mdev);
+		return 1;
+	}
 
 	memset(buffer, 0, sizeof(*buffer));
 	buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
@@ -415,7 +455,7 @@ w_al_write_transaction(struct drbd_work *w, int unused)
 		mdev->al_tr_number++;
 	}
 
-	mutex_unlock(&mdev->md_io_mutex);
+	drbd_md_put_buffer(mdev);
 	complete(&((struct update_al_work *)w)->event);
 	put_ldev(mdev);
 
@@ -506,8 +546,9 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	/* lock out all other meta data io for now,
 	 * and make sure the page is mapped.
 	 */
-	mutex_lock(&mdev->md_io_mutex);
-	b = page_address(mdev->md_io_page);
+	b = drbd_md_get_buffer(mdev);
+	if (!b)
+		return 0;
 
 	/* Always use the full ringbuffer space for now.
 	 * possible optimization: read in all of it,
@@ -528,7 +569,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 
 		/* IO error */
 		if (rv == -1) {
-			mutex_unlock(&mdev->md_io_mutex);
+			drbd_md_put_buffer(mdev);
 			return 0;
 		}
 
@@ -558,7 +599,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	if (!found_valid) {
 		if (found_initialized != mx)
 			dev_warn(DEV, "No usable activity log found.\n");
-		mutex_unlock(&mdev->md_io_mutex);
+		drbd_md_put_buffer(mdev);
 		return 1;
 	}
 
@@ -573,7 +614,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 		if (!expect(rv != 0))
 			goto cancel;
 		if (rv == -1) {
-			mutex_unlock(&mdev->md_io_mutex);
+			drbd_md_put_buffer(mdev);
 			return 0;
 		}
 
@@ -643,7 +684,7 @@ cancel:
 	mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
 
 	/* ok, we are done with it */
-	mutex_unlock(&mdev->md_io_mutex);
+	drbd_md_put_buffer(mdev);
 
 	dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n",
 	     transactions, active_extents);
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 52c48143b22a..706e5220dd4a 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -918,13 +918,22 @@ void drbd_bm_clear_all(struct drbd_conf *mdev)
 struct bm_aio_ctx {
 	struct drbd_conf *mdev;
 	atomic_t in_flight;
-	struct completion done;
+	unsigned int done;
 	unsigned flags;
 #define BM_AIO_COPY_PAGES	1
 #define BM_AIO_WRITE_HINTED	2
 	int error;
+	struct kref kref;
 };
 
+static void bm_aio_ctx_destroy(struct kref *kref)
+{
+	struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref);
+
+	put_ldev(ctx->mdev);
+	kfree(ctx);
+}
+
 /* bv_page may be a copy, or may be the original */
 static void bm_async_io_complete(struct bio *bio, int error)
 {
@@ -968,13 +977,16 @@ static void bm_async_io_complete(struct bio *bio, int error)
 
 	bio_put(bio);
 
-	if (atomic_dec_and_test(&ctx->in_flight))
-		complete(&ctx->done);
+	if (atomic_dec_and_test(&ctx->in_flight)) {
+		ctx->done = 1;
+		wake_up(&mdev->misc_wait);
+		kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+	}
 }
 
 static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
 {
-	struct bio *bio = bio_alloc_drbd(GFP_KERNEL);
+	struct bio *bio = bio_alloc_drbd(GFP_NOIO);
 	struct drbd_conf *mdev = ctx->mdev;
 	struct drbd_bitmap *b = mdev->bitmap;
 	struct page *page;
@@ -1032,12 +1044,7 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
  */
 static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
 {
-	struct bm_aio_ctx ctx = {
-		.mdev = mdev,
-		.in_flight = ATOMIC_INIT(1),
-		.done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
-		.flags = flags,
-	};
+	struct bm_aio_ctx *ctx;
 	struct drbd_bitmap *b = mdev->bitmap;
 	int num_pages, i, count = 0;
 	unsigned long now;
@@ -1052,7 +1059,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
 	 * For lazy writeout, we don't care for ongoing changes to the bitmap,
 	 * as we submit copies of pages anyways.
 	 */
-	if (!ctx.flags)
+
+	ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
+	if (!ctx)
+		return -ENOMEM;
+
+	*ctx = (struct bm_aio_ctx) {
+		.mdev = mdev,
+		.in_flight = ATOMIC_INIT(1),
+		.done = 0,
+		.flags = flags,
+		.error = 0,
+		.kref = { ATOMIC_INIT(2) },
+	};
+
+	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* put is in bm_aio_ctx_destroy() */
+		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (!ctx->flags)
 		WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
 
 	num_pages = b->bm_number_of_pages;
@@ -1081,32 +1108,40 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
 				continue;
 			}
 		}
-		atomic_inc(&ctx.in_flight);
-		bm_page_io_async(&ctx, i, rw);
+		atomic_inc(&ctx->in_flight);
+		bm_page_io_async(ctx, i, rw);
 		++count;
 		cond_resched();
 	}
 
 	/*
-	 * We initialize ctx.in_flight to one to make sure bm_async_io_complete
-	 * will not complete() early, and decrement / test it here.  If there
+	 * We initialize ctx->in_flight to one to make sure bm_async_io_complete
+	 * will not set ctx->done early, and decrement / test it here.  If there
 	 * are still some bios in flight, we need to wait for them here.
+	 * If all IO is done already (or nothing had been submitted), there is
+	 * no need to wait.  Still, we need to put the kref associated with the
+	 * "in_flight reached zero, all done" event.
 	 */
-	if (!atomic_dec_and_test(&ctx.in_flight))
-		wait_for_completion(&ctx.done);
+	if (!atomic_dec_and_test(&ctx->in_flight))
+		wait_until_done_or_disk_failure(mdev, &ctx->done);
+	else
+		kref_put(&ctx->kref, &bm_aio_ctx_destroy);
 
 	/* summary for global bitmap IO */
 	if (flags == 0)
 		dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
-				rw == WRITE ? "WRITE" : "READ",
-				count, jiffies - now);
+			 rw == WRITE ? "WRITE" : "READ",
+			 count, jiffies - now);
 
-	if (ctx.error) {
+	if (ctx->error) {
 		dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
 		drbd_chk_io_error(mdev, 1, true);
-		err = -EIO; /* ctx.error ? */
+		err = -EIO; /* ctx->error ? */
 	}
 
+	if (atomic_read(&ctx->in_flight))
+		err = -EIO; /* Disk failed during IO... */
+
 	now = jiffies;
 	if (rw == WRITE) {
 		drbd_md_flush(mdev);
@@ -1121,6 +1156,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
 		dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
 		     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
 
+out:
+	kref_put(&ctx->kref, &bm_aio_ctx_destroy);
 	return err;
 }
 
@@ -1177,28 +1214,46 @@ int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local)
  */
 int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local)
 {
-	struct bm_aio_ctx ctx = {
+	struct bm_aio_ctx *ctx;
+	int err;
+
+	if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) {
+		dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx);
+		return 0;
+	}
+
+	ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
+	if (!ctx)
+		return -ENOMEM;
+
+	*ctx = (struct bm_aio_ctx) {
 		.mdev = mdev,
 		.in_flight = ATOMIC_INIT(1),
-		.done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
+		.done = 0,
 		.flags = BM_AIO_COPY_PAGES,
+		.error = 0,
+		.kref = { ATOMIC_INIT(2) },
 	};
 
-	if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) {
-		dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx);
-		return 0;
+	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* put is in bm_aio_ctx_destroy() */
+		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
+		err = -ENODEV;
+		goto out;
 	}
 
-	bm_page_io_async(&ctx, idx, WRITE_SYNC);
-	wait_for_completion(&ctx.done);
+	bm_page_io_async(ctx, idx, WRITE_SYNC);
+	wait_until_done_or_disk_failure(mdev, &ctx->done);
 
-	if (ctx.error)
+	if (ctx->error)
 		drbd_chk_io_error(mdev, 1, true);
 		/* that should force detach, so the in memory bitmap will be
 		 * gone in a moment as well. */
 
 	mdev->bm_writ_cnt++;
-	return ctx.error;
+	err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error;
+ out:
+	kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+	return err;
 }
 
 /* NOTE
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 6035784f0de3..4e582058a7c9 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -780,8 +780,7 @@ struct drbd_backing_dev {
 };
 
 struct drbd_md_io {
-	struct drbd_conf *mdev;
-	struct completion event;
+	unsigned int done;
 	int error;
 };
 
@@ -852,6 +851,7 @@ struct drbd_tconn {			/* is a resource from the config file */
 	struct drbd_tl_epoch *newest_tle;
 	struct drbd_tl_epoch *oldest_tle;
 	struct list_head out_of_sequence_requests;
+	struct list_head barrier_acked_requests;
 
 	struct crypto_hash *cram_hmac_tfm;
 	struct crypto_hash *integrity_tfm;  /* checksums we compute, updates protected by tconn->data->mutex */
@@ -978,7 +978,8 @@ struct drbd_conf {
 	atomic_t pp_in_use_by_net;	/* sendpage()d, still referenced by tcp */
 	wait_queue_head_t ee_wait;
 	struct page *md_io_page;	/* one page buffer for md_io */
-	struct mutex md_io_mutex;	/* protects the md_io_buffer */
+	struct drbd_md_io md_io;
+	atomic_t md_io_in_use;		/* protects the md_io, md_io_page and md_io_tmpp */
 	spinlock_t al_lock;
 	wait_queue_head_t al_wait;
 	struct lru_cache *act_log;	/* activity log */
@@ -1424,9 +1425,12 @@ extern void resume_next_sg(struct drbd_conf *mdev);
 extern void suspend_other_sg(struct drbd_conf *mdev);
 extern int drbd_resync_finished(struct drbd_conf *mdev);
 /* maybe rather drbd_main.c ? */
+extern void *drbd_md_get_buffer(struct drbd_conf *mdev);
+extern void drbd_md_put_buffer(struct drbd_conf *mdev);
 extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
 		struct drbd_backing_dev *bdev, sector_t sector, int rw);
 extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int);
+extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done);
 extern void drbd_rs_controller_reset(struct drbd_conf *mdev);
 
 static inline void ov_out_of_sync_print(struct drbd_conf *mdev)
@@ -2151,12 +2155,12 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev)
 	case D_OUTDATED:
 	case D_CONSISTENT:
 	case D_UP_TO_DATE:
+	case D_FAILED:
 		/* disk state is stable as well. */
 		break;
 
 	/* no new io accepted during transitional states */
 	case D_ATTACHING:
-	case D_FAILED:
 	case D_NEGOTIATING:
 	case D_UNKNOWN:
 	case D_MASK:
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 448de7bf8223..15384986e4a4 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -215,6 +215,7 @@ static int tl_init(struct drbd_tconn *tconn)
 	tconn->oldest_tle = b;
 	tconn->newest_tle = b;
 	INIT_LIST_HEAD(&tconn->out_of_sequence_requests);
+	INIT_LIST_HEAD(&tconn->barrier_acked_requests);
 
 	return 1;
 }
@@ -315,7 +316,7 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
 	   These have been list_move'd to the out_of_sequence_requests list in
 	   _req_mod(, BARRIER_ACKED) above.
 	   */
-	list_del_init(&b->requests);
+	list_splice_init(&b->requests, &tconn->barrier_acked_requests);
 	mdev = b->w.mdev;
 
 	nob = b->next;
@@ -417,8 +418,23 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
 		b = tmp;
 		list_splice(&carry_reads, &b->requests);
 	}
-}
 
+	/* Actions operating on the disk state, also want to work on
+	   requests that got barrier acked. */
+	switch (what) {
+	case FAIL_FROZEN_DISK_IO:
+	case RESTART_FROZEN_DISK_IO:
+		list_for_each_safe(le, tle, &tconn->barrier_acked_requests) {
+			req = list_entry(le, struct drbd_request, tl_requests);
+			_req_mod(req, what);
+		}
+	case CONNECTION_LOST_WHILE_PENDING:
+	case RESEND:
+		break;
+	default:
+		conn_err(tconn, "what = %d in _tl_restart()\n", what);
+	}
+}
 
 /**
  * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
@@ -467,6 +483,42 @@ void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
 	spin_unlock_irq(&tconn->req_lock);
 }
 
+/**
+ * tl_apply() - Applies an event to all requests for a certain mdev in the TL
+ * @mdev:	DRBD device.
+ * @what:       The action/event to perform with all request objects
+ *
+ * @what might ony be ABORT_DISK_IO.
+ */
+void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what)
+{
+	struct drbd_tconn *tconn = mdev->tconn;
+	struct drbd_tl_epoch *b;
+	struct list_head *le, *tle;
+	struct drbd_request *req;
+
+	D_ASSERT(what == ABORT_DISK_IO);
+
+	spin_lock_irq(&tconn->req_lock);
+	b = tconn->oldest_tle;
+	while (b) {
+		list_for_each_safe(le, tle, &b->requests) {
+			req = list_entry(le, struct drbd_request, tl_requests);
+			if (req->w.mdev == mdev)
+				_req_mod(req, what);
+		}
+		b = b->next;
+	}
+
+	list_for_each_safe(le, tle, &tconn->barrier_acked_requests) {
+		req = list_entry(le, struct drbd_request, tl_requests);
+		if (req->w.mdev == mdev)
+			_req_mod(req, what);
+	}
+
+	spin_unlock_irq(&tconn->req_lock);
+}
+
 static int drbd_thread_setup(void *arg)
 {
 	struct drbd_thread *thi = (struct drbd_thread *) arg;
@@ -2003,8 +2055,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
 	atomic_set(&mdev->rs_sect_in, 0);
 	atomic_set(&mdev->rs_sect_ev, 0);
 	atomic_set(&mdev->ap_in_flight, 0);
+	atomic_set(&mdev->md_io_in_use, 0);
 
-	mutex_init(&mdev->md_io_mutex);
 	mutex_init(&mdev->own_state_mutex);
 	mdev->state_mutex = &mdev->own_state_mutex;
 
@@ -2282,6 +2334,8 @@ void drbd_minor_destroy(struct kref *kref)
 	struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref);
 	struct drbd_tconn *tconn = mdev->tconn;
 
+	del_timer_sync(&mdev->request_timer);
+
 	/* paranoia asserts */
 	D_ASSERT(mdev->open_cnt == 0);
 	D_ASSERT(list_empty(&mdev->tconn->data.work.q));
@@ -2868,8 +2922,10 @@ void drbd_md_sync(struct drbd_conf *mdev)
 	if (!get_ldev_if_state(mdev, D_FAILED))
 		return;
 
-	mutex_lock(&mdev->md_io_mutex);
-	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
+	buffer = drbd_md_get_buffer(mdev);
+	if (!buffer)
+		goto out;
+
 	memset(buffer, 0, 512);
 
 	buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
@@ -2900,7 +2956,8 @@ void drbd_md_sync(struct drbd_conf *mdev)
 	 * since we updated it on metadata. */
 	mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
 
-	mutex_unlock(&mdev->md_io_mutex);
+	drbd_md_put_buffer(mdev);
+out:
 	put_ldev(mdev);
 }
 
@@ -2920,8 +2977,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	if (!get_ldev_if_state(mdev, D_ATTACHING))
 		return ERR_IO_MD_DISK;
 
-	mutex_lock(&mdev->md_io_mutex);
-	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
+	buffer = drbd_md_get_buffer(mdev);
+	if (!buffer)
+		goto out;
 
 	if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
 		/* NOTE: can't do normal error processing here as this is
@@ -2983,7 +3041,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 		bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF;
 
  err:
-	mutex_unlock(&mdev->md_io_mutex);
+	drbd_md_put_buffer(mdev);
+ out:
 	put_ldev(mdev);
 
 	return rv;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 97d1dab045d2..bf8d0b077624 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1236,6 +1236,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 	synchronize_rcu();
 	kfree(old_disk_conf);
 	kfree(old_plan);
+	mod_timer(&mdev->request_timer, jiffies + HZ);
 	goto success;
 
 fail_unlock:
@@ -1628,6 +1629,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	if (rv < SS_SUCCESS)
 		goto force_diskless_dec;
 
+	mod_timer(&mdev->request_timer, jiffies + HZ);
+
 	if (mdev->state.role == R_PRIMARY)
 		mdev->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
 	else
@@ -1667,10 +1670,17 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
-static int adm_detach(struct drbd_conf *mdev)
+static int adm_detach(struct drbd_conf *mdev, int force)
 {
 	enum drbd_state_rv retcode;
 	int ret;
+
+	if (force) {
+		drbd_force_state(mdev, NS(disk, D_FAILED));
+		retcode = SS_SUCCESS;
+		goto out;
+	}
+
 	drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
 	retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
 	/* D_FAILED will transition to DISKLESS. */
@@ -1681,6 +1691,7 @@ static int adm_detach(struct drbd_conf *mdev)
 		retcode = SS_NOTHING_TO_DO;
 	if (ret)
 		retcode = ERR_INTR;
+out:
 	return retcode;
 }
 
@@ -1692,6 +1703,8 @@ static int adm_detach(struct drbd_conf *mdev)
 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 {
 	enum drbd_ret_code retcode;
+	struct detach_parms parms = { };
+	int err;
 
 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
@@ -1699,7 +1712,16 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
-	retcode = adm_detach(adm_ctx.mdev);
+	if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
+		err = detach_parms_from_attrs(&parms, info);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out;
+		}
+	}
+
+	retcode = adm_detach(adm_ctx.mdev, parms.force_detach);
 out:
 	drbd_adm_finish(info, retcode);
 	return 0;
@@ -3116,7 +3138,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 
 	/* detach */
 	idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
-		retcode = adm_detach(mdev);
+		retcode = adm_detach(mdev, 0);
 		if (retcode < SS_SUCCESS) {
 			drbd_msg_put_info("failed to detach");
 			goto out;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 7218750d2937..3a7e54b8f418 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -4366,8 +4366,6 @@ static int drbd_disconnected(struct drbd_conf *mdev)
 	atomic_set(&mdev->rs_pending_cnt, 0);
 	wake_up(&mdev->misc_wait);
 
-	del_timer(&mdev->request_timer);
-
 	del_timer_sync(&mdev->resync_timer);
 	resync_timer_fn((unsigned long)mdev);
 
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index c4e4553f5c2c..8fa51cda3b7e 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -213,8 +213,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
 {
 	const unsigned long s = req->rq_state;
 	struct drbd_conf *mdev = req->w.mdev;
-	/* only WRITES may end up here without a master bio (on barrier ack) */
-	int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE;
+	int rw = req->rq_state & RQ_WRITE ? WRITE : READ;
 
 	/* we must not complete the master bio, while it is
 	 *	still being processed by _drbd_send_zc_bio (drbd_send_dblock)
@@ -225,7 +224,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
 	 *	the receiver,
 	 *	the bio_endio completion callbacks.
 	 */
-	if (s & RQ_LOCAL_PENDING)
+	if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED))
 		return;
 	if (req->i.waiting) {
 		/* Retry all conflicting peer requests.  */
@@ -288,6 +287,9 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
 		req->master_bio = NULL;
 	}
 
+	if (s & RQ_LOCAL_PENDING)
+		return;
+
 	if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) {
 		/* this is disconnected (local only) operation,
 		 * or protocol C P_WRITE_ACK,
@@ -362,7 +364,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		break;
 
 	case COMPLETED_OK:
-		if (bio_data_dir(req->master_bio) == WRITE)
+		if (req->rq_state & RQ_WRITE)
 			mdev->writ_cnt += req->i.size >> 9;
 		else
 			mdev->read_cnt += req->i.size >> 9;
@@ -374,6 +376,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		put_ldev(mdev);
 		break;
 
+	case ABORT_DISK_IO:
+		req->rq_state |= RQ_LOCAL_ABORTED;
+		if (req->rq_state & RQ_WRITE)
+			_req_may_be_done_not_susp(req, m);
+		else
+			goto goto_queue_for_net_read;
+		break;
+
 	case WRITE_COMPLETED_WITH_ERROR:
 		req->rq_state |= RQ_LOCAL_COMPLETED;
 		req->rq_state &= ~RQ_LOCAL_PENDING;
@@ -402,6 +412,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		__drbd_chk_io_error(mdev, false);
 		put_ldev(mdev);
 
+	goto_queue_for_net_read:
+
 		/* no point in retrying if there is no good remote data,
 		 * or we have no connection. */
 		if (mdev->state.pdsk != D_UP_TO_DATE) {
@@ -1071,14 +1083,21 @@ void request_timer_fn(unsigned long data)
 	struct drbd_request *req; /* oldest request */
 	struct list_head *le;
 	struct net_conf *nc;
-	unsigned long et; /* effective timeout = ko_count * timeout */
+	unsigned long ent = 0, dt = 0, et; /* effective timeout = ko_count * timeout */
 
 	rcu_read_lock();
 	nc = rcu_dereference(tconn->net_conf);
-	et = nc ? nc->timeout * HZ/10 * nc->ko_count : 0;
+	ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0;
+
+	if (get_ldev(mdev)) {
+		dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10;
+		put_ldev(mdev);
+	}
 	rcu_read_unlock();
 
-	if (!et || mdev->state.conn < C_WF_REPORT_PARAMS)
+	et = min_not_zero(dt, ent);
+
+	if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED))
 		return; /* Recurring timer stopped */
 
 	spin_lock_irq(&tconn->req_lock);
@@ -1091,17 +1110,18 @@ void request_timer_fn(unsigned long data)
 
 	le = le->prev;
 	req = list_entry(le, struct drbd_request, tl_requests);
-	if (time_is_before_eq_jiffies(req->start_time + et)) {
-		if (req->rq_state & RQ_NET_PENDING) {
+	if (ent && req->rq_state & RQ_NET_PENDING) {
+		if (time_is_before_eq_jiffies(req->start_time + ent)) {
 			dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
-			_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL);
-		} else {
-			dev_warn(DEV, "Local backing block device frozen?\n");
-			mod_timer(&mdev->request_timer, jiffies + et);
+			_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
+		}
+	}
+	if (dt && req->rq_state & RQ_LOCAL_PENDING) {
+		if (time_is_before_eq_jiffies(req->start_time + dt)) {
+			dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
+			__drbd_chk_io_error(mdev, 1);
 		}
-	} else {
-		mod_timer(&mdev->request_timer, req->start_time + et);
 	}
-
 	spin_unlock_irq(&tconn->req_lock);
+	mod_timer(&mdev->request_timer, req->start_time + et);
 }
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 5135c95fbf85..f6aff150addb 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -106,6 +106,7 @@ enum drbd_req_event {
 	READ_COMPLETED_WITH_ERROR,
 	READ_AHEAD_COMPLETED_WITH_ERROR,
 	WRITE_COMPLETED_WITH_ERROR,
+	ABORT_DISK_IO,
 	COMPLETED_OK,
 	RESEND,
 	FAIL_FROZEN_DISK_IO,
@@ -119,18 +120,21 @@ enum drbd_req_event {
  * same time, so we should hold the request lock anyways.
  */
 enum drbd_req_state_bits {
-	/* 210
-	 * 000: no local possible
-	 * 001: to be submitted
+	/* 3210
+	 * 0000: no local possible
+	 * 0001: to be submitted
 	 *    UNUSED, we could map: 011: submitted, completion still pending
-	 * 110: completed ok
-	 * 010: completed with error
+	 * 0110: completed ok
+	 * 0010: completed with error
+	 * 1001: Aborted (before completion)
+	 * 1x10: Aborted and completed -> free
 	 */
 	__RQ_LOCAL_PENDING,
 	__RQ_LOCAL_COMPLETED,
 	__RQ_LOCAL_OK,
+	__RQ_LOCAL_ABORTED,
 
-	/* 76543
+	/* 87654
 	 * 00000: no network possible
 	 * 00001: to be send
 	 * 00011: to be send, on worker queue
@@ -209,8 +213,9 @@ enum drbd_req_state_bits {
 #define RQ_LOCAL_PENDING   (1UL << __RQ_LOCAL_PENDING)
 #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED)
 #define RQ_LOCAL_OK        (1UL << __RQ_LOCAL_OK)
+#define RQ_LOCAL_ABORTED   (1UL << __RQ_LOCAL_ABORTED)
 
-#define RQ_LOCAL_MASK      ((RQ_LOCAL_OK << 1)-1) /* 0x07 */
+#define RQ_LOCAL_MASK      ((RQ_LOCAL_ABORTED << 1)-1)
 
 #define RQ_NET_PENDING     (1UL << __RQ_NET_PENDING)
 #define RQ_NET_QUEUED      (1UL << __RQ_NET_QUEUED)
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 4c13a6f4f184..f51cefdbeff3 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -29,6 +29,9 @@
 #include "drbd_int.h"
 #include "drbd_req.h"
 
+/* in drbd_main.c */
+extern void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what);
+
 struct after_state_chg_work {
 	struct drbd_work w;
 	union drbd_state os;
@@ -1315,6 +1318,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 		rcu_read_unlock();
 		was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
 
+		/* Immediately allow completion of all application IO, that waits
+		   for completion from the local disk. */
+		tl_apply(mdev, ABORT_DISK_IO);
+
 		/* current state still has to be D_FAILED,
 		 * there is only one way out: to D_DISKLESS,
 		 * and that may only happen after our put_ldev below. */
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 6410c55831e0..dac8d9bc4bec 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -67,11 +67,18 @@ rwlock_t global_state_lock;
 void drbd_md_io_complete(struct bio *bio, int error)
 {
 	struct drbd_md_io *md_io;
+	struct drbd_conf *mdev;
 
 	md_io = (struct drbd_md_io *)bio->bi_private;
+	mdev = container_of(md_io, struct drbd_conf, md_io);
+
 	md_io->error = error;
 
-	complete(&md_io->event);
+	md_io->done = 1;
+	wake_up(&mdev->misc_wait);
+	bio_put(bio);
+	drbd_md_put_buffer(mdev);
+	put_ldev(mdev);
 }
 
 /* reads on behalf of the partner,
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index e879a9324380..2e6cefefe5e5 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -128,6 +128,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__flg_field_def(17, DRBD_GENLA_F_MANDATORY,	disk_flushes, DRBD_DISK_FLUSHES_DEF)
 	__flg_field_def(18, DRBD_GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
 	__flg_field_def(19, DRBD_GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
+	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	disk_timeout, DRBD_DISK_TIMEOUT_DEF)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
@@ -224,6 +225,10 @@ GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms,
 	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_disconnect)
 )
 
+GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms,
+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_detach)
+)
+
 /*
  * Notifications and commands (genlmsghdr->cmd)
  */
@@ -335,7 +340,9 @@ GENL_op(
 )
 
 GENL_op(DRBD_ADM_DETACH,	18, GENL_doit(drbd_adm_detach),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY))
+
 GENL_op(DRBD_ADM_INVALIDATE,	19, GENL_doit(drbd_adm_invalidate),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_INVAL_PEER,	20, GENL_doit(drbd_adm_invalidate_peer),
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index f1046b13d9f6..ddd332db2a5d 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -50,6 +50,12 @@
 #define DRBD_TIMEOUT_MAX 600
 #define DRBD_TIMEOUT_DEF 60       /* 6 seconds */
 
+ /* If backing disk takes longer than disk_timeout, mark the disk as failed */
+#define DRBD_DISK_TIMEOUT_MIN 0    /* 0 = disabled */
+#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
+#define DRBD_DISK_TIMEOUT_DEF 0    /* disabled */
+#define DRBD_DISK_TIMEOUT_SCALE '1'
+
   /* active connection retries when C_WF_CONNECTION */
 #define DRBD_CONNECT_INT_MIN 1
 #define DRBD_CONNECT_INT_MAX 120
-- 
cgit v1.2.3-55-g7522


From d5d7ebd42250620a6da2a8f6943c024391433488 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Tue, 5 Jul 2011 20:59:26 +0200
Subject: drbd: on attach, enforce clean meta data

Detection of unclean shutdown has moved into user space.

The kernel code will, whenever it updates the meta data, mark it as
"unclean", and will refuse to attach to such unclean meta data.

"drbdadm up" now schedules "drbdmeta apply-al", which will apply
the activity log to the bitmap, and/or reinitialize it, if necessary,
as well as set a "clean" indicator flag.

This moves a bit code out of kernel space.
As a side effect, it also prevents some 8.3 module from accidentally
ignoring the 8.4 style activity log, if someone should downgrade,
whether on purpose, or accidentally because he changed kernel versions
without providing an 8.4 for the new kernel, and the new kernel comes
with in-tree 8.3.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_actlog.c | 259 ---------------------------------------
 drivers/block/drbd/drbd_int.h    |   6 -
 drivers/block/drbd/drbd_main.c   |  26 ++--
 drivers/block/drbd/drbd_nl.c     |  19 +--
 drivers/block/drbd/drbd_state.c  |   1 +
 include/linux/drbd.h             |  10 +-
 6 files changed, 28 insertions(+), 293 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 58b5b61628fc..da8ffd54fc18 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -462,265 +462,6 @@ w_al_write_transaction(struct drbd_work *w, int unused)
 	return 0;
 }
 
-/* FIXME
- * reading of the activity log,
- * and potentially dirtying of the affected bitmap regions,
- * should be done from userland only.
- * DRBD would simply always attach with an empty activity log,
- * and refuse to attach to something that looks like a crashed primary.
- */
-
-/**
- * drbd_al_read_tr() - Read a single transaction from the on disk activity log
- * @mdev:	DRBD device.
- * @bdev:	Block device to read form.
- * @b:		pointer to an al_transaction.
- * @index:	On disk slot of the transaction to read.
- *
- * Returns -1 on IO error, 0 on checksum error and 1 upon success.
- */
-static int drbd_al_read_tr(struct drbd_conf *mdev,
-			   struct drbd_backing_dev *bdev,
-			   int index)
-{
-	struct al_transaction_on_disk *b = page_address(mdev->md_io_page);
-	sector_t sector;
-	u32 crc;
-
-	sector =  bdev->md.md_offset
-		+ bdev->md.al_offset
-		+ index * (MD_BLOCK_SIZE>>9);
-
-	/* Dont process error normally,
-	 * as this is done before disk is attached! */
-	if (drbd_md_sync_page_io(mdev, bdev, sector, READ))
-		return -1;
-
-	if (!expect(b->magic == cpu_to_be32(DRBD_AL_MAGIC)))
-		return 0;
-
-	if (!expect(be16_to_cpu(b->n_updates) <= AL_UPDATES_PER_TRANSACTION))
-		return 0;
-
-	if (!expect(be16_to_cpu(b->context_size) <= DRBD_AL_EXTENTS_MAX))
-		return 0;
-
-	if (!expect(be16_to_cpu(b->context_start_slot_nr) < DRBD_AL_EXTENTS_MAX))
-		return 0;
-
-	crc = be32_to_cpu(b->crc32c);
-	b->crc32c = 0;
-	if (!expect(crc == crc32c(0, b, 4096)))
-		return 0;
-
-	return 1;
-}
-
-/**
- * drbd_al_read_log() - Restores the activity log from its on disk representation.
- * @mdev:	DRBD device.
- * @bdev:	Block device to read form.
- *
- * Returns 1 on success, returns 0 when reading the log failed due to IO errors.
- */
-int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
-{
-	struct al_transaction_on_disk *b;
-	int i;
-	int rv;
-	int mx;
-	int active_extents = 0;
-	int transactions = 0;
-	int found_valid = 0;
-	int found_initialized = 0;
-	int from = 0;
-	int to = 0;
-	u32 from_tnr = 0;
-	u32 to_tnr = 0;
-	u32 cnr;
-
-	/* Note that this is expected to be called with a newly created,
-	 * clean and all unused activity log of the "expected size".
-	 */
-
-	/* lock out all other meta data io for now,
-	 * and make sure the page is mapped.
-	 */
-	b = drbd_md_get_buffer(mdev);
-	if (!b)
-		return 0;
-
-	/* Always use the full ringbuffer space for now.
-	 * possible optimization: read in all of it,
-	 * then scan the in-memory pages. */
-
-	mx = (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
-
-	/* Find the valid transaction in the log */
-	for (i = 0; i < mx; i++) {
-		rv = drbd_al_read_tr(mdev, bdev, i);
-		/* invalid data in that block */
-		if (rv == 0)
-			continue;
-		if (be16_to_cpu(b->transaction_type) == AL_TR_INITIALIZED) {
-			++found_initialized;
-			continue;
-		}
-
-		/* IO error */
-		if (rv == -1) {
-			drbd_md_put_buffer(mdev);
-			return 0;
-		}
-
-		cnr = be32_to_cpu(b->tr_number);
-		if (++found_valid == 1) {
-			from = i;
-			to = i;
-			from_tnr = cnr;
-			to_tnr = cnr;
-			continue;
-		}
-
-		D_ASSERT(cnr != to_tnr);
-		D_ASSERT(cnr != from_tnr);
-		if ((int)cnr - (int)from_tnr < 0) {
-			D_ASSERT(from_tnr - cnr + i - from == mx);
-			from = i;
-			from_tnr = cnr;
-		}
-		if ((int)cnr - (int)to_tnr > 0) {
-			D_ASSERT(cnr - to_tnr == i - to);
-			to = i;
-			to_tnr = cnr;
-		}
-	}
-
-	if (!found_valid) {
-		if (found_initialized != mx)
-			dev_warn(DEV, "No usable activity log found.\n");
-		drbd_md_put_buffer(mdev);
-		return 1;
-	}
-
-	/* Read the valid transactions.
-	 * dev_info(DEV, "Reading from %d to %d.\n",from,to); */
-	i = from;
-	while (1) {
-		struct lc_element *e;
-		unsigned j, n, slot, extent_nr;
-
-		rv = drbd_al_read_tr(mdev, bdev, i);
-		if (!expect(rv != 0))
-			goto cancel;
-		if (rv == -1) {
-			drbd_md_put_buffer(mdev);
-			return 0;
-		}
-
-		/* deal with different transaction types.
-		 * not yet implemented */
-		if (!expect(b->transaction_type == 0))
-			goto cancel;
-
-		/* on the fly re-create/resize activity log?
-		 * will be a special transaction type flag. */
-		if (!expect(be16_to_cpu(b->context_size) == mdev->act_log->nr_elements))
-			goto cancel;
-		if (!expect(be16_to_cpu(b->context_start_slot_nr) < mdev->act_log->nr_elements))
-			goto cancel;
-
-		/* We are the only user of the activity log right now,
-		 * don't actually need to take that lock. */
-		spin_lock_irq(&mdev->al_lock);
-
-		/* first, apply the context, ... */
-		for (j = 0, slot = be16_to_cpu(b->context_start_slot_nr);
-		     j < AL_CONTEXT_PER_TRANSACTION &&
-		     slot < mdev->act_log->nr_elements; j++, slot++) {
-			extent_nr = be32_to_cpu(b->context[j]);
-			e = lc_element_by_index(mdev->act_log, slot);
-			if (e->lc_number != extent_nr) {
-				if (extent_nr != LC_FREE)
-					active_extents++;
-				else
-					active_extents--;
-			}
-			lc_set(mdev->act_log, extent_nr, slot);
-		}
-
-		/* ... then apply the updates,
-		 * which override the context information.
-		 * drbd_al_read_tr already did the rangecheck
-		 * on n <= AL_UPDATES_PER_TRANSACTION */
-		n = be16_to_cpu(b->n_updates);
-		for (j = 0; j < n; j++) {
-			slot = be16_to_cpu(b->update_slot_nr[j]);
-			extent_nr = be32_to_cpu(b->update_extent_nr[j]);
-			if (!expect(slot < mdev->act_log->nr_elements))
-				break;
-			e = lc_element_by_index(mdev->act_log, slot);
-			if (e->lc_number != extent_nr) {
-				if (extent_nr != LC_FREE)
-					active_extents++;
-				else
-					active_extents--;
-			}
-			lc_set(mdev->act_log, extent_nr, slot);
-		}
-		spin_unlock_irq(&mdev->al_lock);
-
-		transactions++;
-
-cancel:
-		if (i == to)
-			break;
-		i++;
-		if (i >= mx)
-			i = 0;
-	}
-
-	mdev->al_tr_number = to_tnr+1;
-	mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
-
-	/* ok, we are done with it */
-	drbd_md_put_buffer(mdev);
-
-	dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n",
-	     transactions, active_extents);
-
-	return 1;
-}
-
-/**
- * drbd_al_apply_to_bm() - Sets the bitmap to dirty(1) where covered by active AL extents
- * @mdev:	DRBD device.
- */
-void drbd_al_apply_to_bm(struct drbd_conf *mdev)
-{
-	unsigned int enr;
-	unsigned long add = 0;
-	char ppb[10];
-	int i, tmp;
-
-	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
-
-	for (i = 0; i < mdev->act_log->nr_elements; i++) {
-		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
-		if (enr == LC_FREE)
-			continue;
-		tmp = drbd_bm_ALe_set_all(mdev, enr);
-		dynamic_dev_dbg(DEV, "AL: set %d bits in extent %u\n", tmp, enr);
-		add += tmp;
-	}
-
-	lc_unlock(mdev->act_log);
-	wake_up(&mdev->al_wait);
-
-	dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n",
-	     ppsize(ppb, Bit2KB(add)));
-}
-
 static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext)
 {
 	int rv;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 4e582058a7c9..9d0d6d0fb820 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -164,10 +164,6 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) {
 /* usual integer division */
 #define div_floor(A, B) ((A)/(B))
 
-/* drbd_meta-data.c (still in drbd_main.c) */
-/* 4th incarnation of the disk layout. */
-#define DRBD_MD_MAGIC (DRBD_MAGIC+4)
-
 extern struct ratelimit_state drbd_ratelimit_state;
 extern struct idr minors; /* RCU, updates: genl_lock() */
 extern struct list_head drbd_tconns; /* RCU, updates: genl_lock() */
@@ -1560,7 +1556,6 @@ extern void drbd_rs_cancel_all(struct drbd_conf *mdev);
 extern int drbd_rs_del_all(struct drbd_conf *mdev);
 extern void drbd_rs_failed_io(struct drbd_conf *mdev,
 		sector_t sector, int size);
-extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *);
 extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go);
 extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector,
 		int size, const char *file, const unsigned int line);
@@ -1570,7 +1565,6 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
 		int size, const char *file, const unsigned int line);
 #define drbd_set_out_of_sync(mdev, sector, size) \
 	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
-extern void drbd_al_apply_to_bm(struct drbd_conf *mdev);
 extern void drbd_al_shrink(struct drbd_conf *mdev);
 
 /* drbd_nl.c */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 15384986e4a4..f1d696ab6e83 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2932,7 +2932,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
 	for (i = UI_CURRENT; i < UI_SIZE; i++)
 		buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
 	buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
-	buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
+	buffer->magic = cpu_to_be32(DRBD_MD_MAGIC_84_UNCLEAN);
 
 	buffer->md_size_sect  = cpu_to_be32(mdev->ldev->md.md_size_sect);
 	buffer->al_offset     = cpu_to_be32(mdev->ldev->md.al_offset);
@@ -2967,11 +2967,12 @@ out:
  * @bdev:	Device from which the meta data should be read in.
  *
  * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
- * something goes wrong.  Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
+ * something goes wrong.
  */
 int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 {
 	struct meta_data_on_disk *buffer;
+	u32 magic, flags;
 	int i, rv = NO_ERROR;
 
 	if (!get_ldev_if_state(mdev, D_ATTACHING))
@@ -2989,8 +2990,20 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 		goto err;
 	}
 
-	if (buffer->magic != cpu_to_be32(DRBD_MD_MAGIC)) {
-		dev_err(DEV, "Error while reading metadata, magic not found.\n");
+	magic = be32_to_cpu(buffer->magic);
+	flags = be32_to_cpu(buffer->flags);
+	if (magic == DRBD_MD_MAGIC_84_UNCLEAN ||
+	    (magic == DRBD_MD_MAGIC_08 && !(flags & MDF_AL_CLEAN))) {
+			/* btw: that's Activity Log clean, not "all" clean. */
+		dev_err(DEV, "Found unclean meta data. Did you \"drbdadm apply-al\"?\n");
+		rv = ERR_MD_UNCLEAN;
+		goto err;
+	}
+	if (magic != DRBD_MD_MAGIC_08) {
+		if (magic == DRBD_MD_MAGIC_07) 
+			dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n");
+		else
+			dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n");
 		rv = ERR_MD_INVALID;
 		goto err;
 	}
@@ -3035,11 +3048,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	}
 	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	/* This blocks wants to be get removed... */
-	bdev->disk_conf->al_extents = be32_to_cpu(buffer->al_nr_extents);
-	if (bdev->disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
-		bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF;
-
  err:
 	drbd_md_put_buffer(mdev);
  out:
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index bf8d0b077624..b39f5dc0f47b 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1267,7 +1267,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	union drbd_state ns, os;
 	enum drbd_state_rv rv;
 	struct net_conf *nc;
-	int cp_discovered = 0;
 
 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
@@ -1477,11 +1476,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		goto force_diskless_dec;
 	}
 
-	if (!drbd_al_read_log(mdev, nbc)) {
-		retcode = ERR_IO_MD_DISK;
-		goto force_diskless_dec;
-	}
-
 	/* Reset the "barriers don't work" bits here, then force meta data to
 	 * be written, to ensure we determine if barriers are supported. */
 	if (new_disk_conf->md_flushes)
@@ -1511,10 +1505,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		clear_bit(CRASHED_PRIMARY, &mdev->flags);
 
 	if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
-	    !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod)) {
+	    !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod))
 		set_bit(CRASHED_PRIMARY, &mdev->flags);
-		cp_discovered = 1;
-	}
 
 	mdev->send_cnt = 0;
 	mdev->recv_cnt = 0;
@@ -1566,15 +1558,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
-	if (cp_discovered) {
-		drbd_al_apply_to_bm(mdev);
-		if (drbd_bitmap_io(mdev, &drbd_bm_write,
-			"crashed primary apply AL", BM_LOCKED_MASK)) {
-			retcode = ERR_IO_MD_DISK;
-			goto force_diskless_dec;
-		}
-	}
-
 	if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
 		drbd_suspend_al(mdev); /* IO is still suspended here... */
 
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index f51cefdbeff3..c4d0d96d7906 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1017,6 +1017,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
 						 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
 						 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
 
+		mdf &= ~MDF_AL_CLEAN;
 		if (test_bit(CRASHED_PRIMARY, &mdev->flags))
 			mdf |= MDF_CRASHED_PRIMARY;
 		if (mdev->state.role == R_PRIMARY ||
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 161cd414b036..1e9f754b66ac 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -162,6 +162,7 @@ enum drbd_ret_code {
 	ERR_INVALID_REQUEST	= 162,
 	ERR_NEED_APV_100	= 163,
 	ERR_NEED_ALLOW_TWO_PRI  = 164,
+	ERR_MD_UNCLEAN          = 165,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
@@ -321,7 +322,8 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv);
 #define MDF_FULL_SYNC		(1 << 3)
 #define MDF_WAS_UP_TO_DATE	(1 << 4)
 #define MDF_PEER_OUT_DATED	(1 << 5)
-#define MDF_CRASHED_PRIMARY     (1 << 6)
+#define MDF_CRASHED_PRIMARY	(1 << 6)
+#define MDF_AL_CLEAN		(1 << 7)
 
 enum drbd_uuid_index {
 	UI_CURRENT,
@@ -341,10 +343,16 @@ enum drbd_timeout_flag {
 
 #define UUID_JUST_CREATED ((__u64)4)
 
+/* magic numbers used in meta data and network packets */
 #define DRBD_MAGIC 0x83740267
 #define DRBD_MAGIC_BIG 0x835a
 #define DRBD_MAGIC_100 0x8620ec20
 
+#define DRBD_MD_MAGIC_07   (DRBD_MAGIC+3)
+#define DRBD_MD_MAGIC_08   (DRBD_MAGIC+4)
+#define DRBD_MD_MAGIC_84_UNCLEAN	(DRBD_MAGIC+5)
+
+
 /* how I came up with this magic?
  * base64 decode "actlog==" ;) */
 #define DRBD_AL_MAGIC 0x69cb65a2
-- 
cgit v1.2.3-55-g7522


From 65d94927e036cd8e8e1406fa7fc387b4ae730159 Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Wed, 13 Jul 2011 10:24:51 +0200
Subject: drbd: Changed some defaults

* Enabled the resync controller, with a fill target of 50Kib. That gives
  reasonable resync speeds without tuning. A much better default than
  the 250KiB/s fixed.

* Enable bitmap compression. It is save to use, and most people have
  more CPU power than network bandwidth.

* ko-count of 7: Abort a connection if the peer fails to process a
  write request within 42 seconds.

* al-extents of 1237: ~5 GiB seems to be a much more sane default
  these days.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index ddd332db2a5d..defdebfecb72 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -16,7 +16,7 @@
 #define DEBUG_RANGE_CHECK 0
 
 #define DRBD_MINOR_COUNT_MIN 1
-#define DRBD_MINOR_COUNT_MAX 256
+#define DRBD_MINOR_COUNT_MAX (1U << 20)
 #define DRBD_MINOR_COUNT_DEF 32
 
 #define DRBD_VOLUME_MAX 65535
@@ -99,7 +99,7 @@
    * 200 should be more than enough even for very short timeouts */
 #define DRBD_KO_COUNT_MIN  0
 #define DRBD_KO_COUNT_MAX  200
-#define DRBD_KO_COUNT_DEF  0
+#define DRBD_KO_COUNT_DEF  7
 /* } */
 
 /* syncer { */
@@ -117,7 +117,7 @@
    * 919 * 7 = 6433 */
 #define DRBD_AL_EXTENTS_MIN  7
 #define DRBD_AL_EXTENTS_MAX  6433
-#define DRBD_AL_EXTENTS_DEF  127
+#define DRBD_AL_EXTENTS_DEF  1237
 
 #define DRBD_MINOR_NUMBER_MIN  -1
 #define DRBD_MINOR_NUMBER_MAX  (1<<30)
@@ -151,7 +151,7 @@
 
 #define DRBD_C_PLAN_AHEAD_MIN  0
 #define DRBD_C_PLAN_AHEAD_MAX  300
-#define DRBD_C_PLAN_AHEAD_DEF  0 /* RS rate controller disabled by default */
+#define DRBD_C_PLAN_AHEAD_DEF  20
 
 #define DRBD_C_DELAY_TARGET_MIN 1
 #define DRBD_C_DELAY_TARGET_MAX 100
@@ -159,7 +159,7 @@
 
 #define DRBD_C_FILL_TARGET_MIN 0
 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
-#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */
+#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */
 
 #define DRBD_C_MAX_RATE_MIN     250 /* kByte/sec */
 #define DRBD_C_MAX_RATE_MAX     (4 << 20)
@@ -167,7 +167,7 @@
 
 #define DRBD_C_MIN_RATE_MIN     0 /* kByte/sec */
 #define DRBD_C_MIN_RATE_MAX     (4 << 20)
-#define DRBD_C_MIN_RATE_DEF     4096
+#define DRBD_C_MIN_RATE_DEF     250
 
 #define DRBD_CONG_FILL_MIN	0
 #define DRBD_CONG_FILL_MAX	(10<<21) /* 10GByte in sectors */
@@ -187,6 +187,6 @@
 
 #define DRBD_ALLOW_TWO_PRIMARIES_DEF	0
 #define DRBD_ALWAYS_ASBP_DEF	0
-#define DRBD_USE_RLE_DEF	0
+#define DRBD_USE_RLE_DEF	1
 
 #endif
-- 
cgit v1.2.3-55-g7522


From 32bdb64038ba3127245912dae2cc8a450bb1d705 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Mon, 9 May 2011 18:26:20 +0200
Subject: drbd: Define scale factors in a single place

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index defdebfecb72..cd3565cfed44 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -18,29 +18,35 @@
 #define DRBD_MINOR_COUNT_MIN 1
 #define DRBD_MINOR_COUNT_MAX (1U << 20)
 #define DRBD_MINOR_COUNT_DEF 32
+#define DRBD_MINOR_COUNT_SCALE '1'
 
 #define DRBD_VOLUME_MAX 65535
 
 #define DRBD_DIALOG_REFRESH_MIN 0
 #define DRBD_DIALOG_REFRESH_MAX 600
+#define DRBD_DIALOG_REFRESH_SCALE '1'
 
 /* valid port number */
 #define DRBD_PORT_MIN 1
 #define DRBD_PORT_MAX 0xffff
+#define DRBD_PORT_SCALE '1'
 
 /* startup { */
   /* if you want more than 3.4 days, disable */
 #define DRBD_WFC_TIMEOUT_MIN 0
 #define DRBD_WFC_TIMEOUT_MAX 300000
 #define DRBD_WFC_TIMEOUT_DEF 0
+#define DRBD_WFC_TIMEOUT_SCALE '1'
 
 #define DRBD_DEGR_WFC_TIMEOUT_MIN 0
 #define DRBD_DEGR_WFC_TIMEOUT_MAX 300000
 #define DRBD_DEGR_WFC_TIMEOUT_DEF 0
+#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1'
 
 #define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0
 #define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000
 #define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0
+#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1'
 /* }*/
 
 /* net { */
@@ -49,6 +55,7 @@
 #define DRBD_TIMEOUT_MIN 1
 #define DRBD_TIMEOUT_MAX 600
 #define DRBD_TIMEOUT_DEF 60       /* 6 seconds */
+#define DRBD_TIMEOUT_SCALE '1'
 
  /* If backing disk takes longer than disk_timeout, mark the disk as failed */
 #define DRBD_DISK_TIMEOUT_MIN 0    /* 0 = disabled */
@@ -60,46 +67,55 @@
 #define DRBD_CONNECT_INT_MIN 1
 #define DRBD_CONNECT_INT_MAX 120
 #define DRBD_CONNECT_INT_DEF 10   /* seconds */
+#define DRBD_CONNECT_INT_SCALE '1'
 
   /* keep-alive probes when idle */
 #define DRBD_PING_INT_MIN 1
 #define DRBD_PING_INT_MAX 120
 #define DRBD_PING_INT_DEF 10
+#define DRBD_PING_INT_SCALE '1'
 
  /* timeout for the ping packets.*/
 #define DRBD_PING_TIMEO_MIN  1
 #define DRBD_PING_TIMEO_MAX  300
 #define DRBD_PING_TIMEO_DEF  5
+#define DRBD_PING_TIMEO_SCALE '1'
 
   /* max number of write requests between write barriers */
 #define DRBD_MAX_EPOCH_SIZE_MIN 1
 #define DRBD_MAX_EPOCH_SIZE_MAX 20000
 #define DRBD_MAX_EPOCH_SIZE_DEF 2048
+#define DRBD_MAX_EPOCH_SIZE_SCALE '1'
 
   /* I don't think that a tcp send buffer of more than 10M is useful */
 #define DRBD_SNDBUF_SIZE_MIN  0
 #define DRBD_SNDBUF_SIZE_MAX  (10<<20)
 #define DRBD_SNDBUF_SIZE_DEF  0
+#define DRBD_SNDBUF_SIZE_SCALE '1'
 
 #define DRBD_RCVBUF_SIZE_MIN  0
 #define DRBD_RCVBUF_SIZE_MAX  (10<<20)
 #define DRBD_RCVBUF_SIZE_DEF  0
+#define DRBD_RCVBUF_SIZE_SCALE '1'
 
   /* @4k PageSize -> 128kB - 512MB */
 #define DRBD_MAX_BUFFERS_MIN  32
 #define DRBD_MAX_BUFFERS_MAX  131072
 #define DRBD_MAX_BUFFERS_DEF  2048
+#define DRBD_MAX_BUFFERS_SCALE '1'
 
   /* @4k PageSize -> 4kB - 512MB */
 #define DRBD_UNPLUG_WATERMARK_MIN  1
 #define DRBD_UNPLUG_WATERMARK_MAX  131072
 #define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16)
+#define DRBD_UNPLUG_WATERMARK_SCALE '1'
 
   /* 0 is disabled.
    * 200 should be more than enough even for very short timeouts */
 #define DRBD_KO_COUNT_MIN  0
 #define DRBD_KO_COUNT_MAX  200
 #define DRBD_KO_COUNT_DEF  7
+#define DRBD_KO_COUNT_SCALE '1'
 /* } */
 
 /* syncer { */
@@ -118,6 +134,7 @@
 #define DRBD_AL_EXTENTS_MIN  7
 #define DRBD_AL_EXTENTS_MAX  6433
 #define DRBD_AL_EXTENTS_DEF  1237
+#define DRBD_AL_EXTENTS_SCALE '1'
 
 #define DRBD_MINOR_NUMBER_MIN  -1
 #define DRBD_MINOR_NUMBER_MAX  (1<<30)
@@ -148,34 +165,42 @@
 #define DRBD_MAX_BIO_BVECS_MIN 0
 #define DRBD_MAX_BIO_BVECS_MAX 128
 #define DRBD_MAX_BIO_BVECS_DEF 0
+#define DRBD_MAX_BIO_BVECS_SCALE '1'
 
 #define DRBD_C_PLAN_AHEAD_MIN  0
 #define DRBD_C_PLAN_AHEAD_MAX  300
 #define DRBD_C_PLAN_AHEAD_DEF  20
+#define DRBD_C_PLAN_AHEAD_SCALE '1'
 
 #define DRBD_C_DELAY_TARGET_MIN 1
 #define DRBD_C_DELAY_TARGET_MAX 100
 #define DRBD_C_DELAY_TARGET_DEF 10
+#define DRBD_C_DELAY_TARGET_SCALE '1'
 
 #define DRBD_C_FILL_TARGET_MIN 0
 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
 #define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */
+#define DRBD_C_FILL_TARGET_SCALE 's'  /* sectors */
 
-#define DRBD_C_MAX_RATE_MIN     250 /* kByte/sec */
+#define DRBD_C_MAX_RATE_MIN     250
 #define DRBD_C_MAX_RATE_MAX     (4 << 20)
 #define DRBD_C_MAX_RATE_DEF     102400
+#define DRBD_C_MAX_RATE_SCALE	'k'  /* kilobytes */
 
-#define DRBD_C_MIN_RATE_MIN     0 /* kByte/sec */
+#define DRBD_C_MIN_RATE_MIN     0
 #define DRBD_C_MIN_RATE_MAX     (4 << 20)
 #define DRBD_C_MIN_RATE_DEF     250
+#define DRBD_C_MIN_RATE_SCALE	'k'  /* kilobytes */
 
 #define DRBD_CONG_FILL_MIN	0
 #define DRBD_CONG_FILL_MAX	(10<<21) /* 10GByte in sectors */
 #define DRBD_CONG_FILL_DEF	0
+#define DRBD_CONG_FILL_SCALE	's'  /* sectors */
 
 #define DRBD_CONG_EXTENTS_MIN	DRBD_AL_EXTENTS_MIN
 #define DRBD_CONG_EXTENTS_MAX	DRBD_AL_EXTENTS_MAX
 #define DRBD_CONG_EXTENTS_DEF	DRBD_AL_EXTENTS_DEF
+#define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE
 
 #define DRBD_PROTOCOL_DEF DRBD_PROT_C
 
-- 
cgit v1.2.3-55-g7522


From 0317d9ecbc9bac43642b4aa70e3e1106f4fd26a1 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Wed, 13 Jul 2011 13:40:30 +0200
Subject: drbd: Fix the maximum accepted minor device number

The maximum minor device number allowed by the kernel is (1<<20 - 1).  Reject
device numbers higher than that to earlier catch possible errors.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index cd3565cfed44..7d956e91ae7b 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -137,7 +137,7 @@
 #define DRBD_AL_EXTENTS_SCALE '1'
 
 #define DRBD_MINOR_NUMBER_MIN  -1
-#define DRBD_MINOR_NUMBER_MAX  (1<<30)
+#define DRBD_MINOR_NUMBER_MAX  ((1 << 20) - 1)
 #define DRBD_MINOR_NUMBER_DEF  -1
 #define DRBD_MINOR_NUMBER_SCALE '1'
 
-- 
cgit v1.2.3-55-g7522


From b80c043327ea4faac62a329a1d35f16c47a5128e Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Mon, 18 Jul 2011 11:09:17 +0200
Subject: drbd: The minor_count module parameter is only a hint nowadays

* The max of minor_count is 255
* In drbdadm count the number of minors, instead of finding
  the highest minor number
* No longer us the magic in the init script

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 7d956e91ae7b..6d0a24331ed2 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -16,7 +16,7 @@
 #define DEBUG_RANGE_CHECK 0
 
 #define DRBD_MINOR_COUNT_MIN 1
-#define DRBD_MINOR_COUNT_MAX (1U << 20)
+#define DRBD_MINOR_COUNT_MAX 255
 #define DRBD_MINOR_COUNT_DEF 32
 #define DRBD_MINOR_COUNT_SCALE '1'
 
-- 
cgit v1.2.3-55-g7522


From 380207d08e7c4d1b19c0323777278992b4fbf9d6 Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Fri, 11 Nov 2011 12:31:20 +0100
Subject: drbd: Load balancing of read requests

New config option for the disk secition "read-balancing", with
the values: prefer-local, prefer-remote, round-robin, when-congested-remote.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  1 +
 drivers/block/drbd/drbd_receiver.c |  2 +-
 drivers/block/drbd/drbd_req.c      | 57 +++++++++++++++++++++++++++++++++++++-
 include/linux/drbd.h               |  8 ++++++
 include/linux/drbd_genl.h          |  1 +
 include/linux/drbd_limits.h        |  1 +
 6 files changed, 68 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d397681fb7aa..e2cccb40f5af 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -698,6 +698,7 @@ enum {
 	AHEAD_TO_SYNC_SOURCE,   /* Ahead -> SyncSource queued */
 	B_RS_H_DONE,		/* Before resync handler done (already executed) */
 	DISCARD_MY_DATA,	/* discard_my_data flag per volume */
+	READ_BALANCE_RR,
 };
 
 struct drbd_bitmap; /* opaque for drbd_conf */
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e546dd3fab8a..733b8bd663d5 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -4974,7 +4974,7 @@ static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
 
 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 
-	dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
+	dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
 	    (unsigned long long)sector, be32_to_cpu(p->blksize));
 
 	return validate_req_change_req_state(mdev, p->block_id, sector,
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index ceb04a94aace..98251e2a7fb7 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -563,6 +563,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE)
 			atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
 
+		if (!(req->rq_state & RQ_WRITE) &&
+		    mdev->state.disk == D_UP_TO_DATE &&
+		    !IS_ERR_OR_NULL(req->private_bio))
+			goto goto_read_retry_local;
+
 		/* if it is still queued, we may not complete it here.
 		 * it will be canceled soon. */
 		if (!(req->rq_state & RQ_NET_QUEUED))
@@ -625,10 +630,22 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
 
 		req->rq_state |= RQ_NET_DONE;
+
+		if (!(req->rq_state & RQ_WRITE) &&
+		    mdev->state.disk == D_UP_TO_DATE &&
+		    !IS_ERR_OR_NULL(req->private_bio))
+			goto goto_read_retry_local;
+
 		_req_may_be_done_not_susp(req, m);
 		/* else: done by HANDED_OVER_TO_NETWORK */
 		break;
 
+	goto_read_retry_local:
+		req->rq_state |= RQ_LOCAL_PENDING;
+		req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
+		generic_make_request(req->private_bio);
+		break;
+
 	case FAIL_FROZEN_DISK_IO:
 		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
 			break;
@@ -689,6 +706,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		dec_ap_pending(mdev);
 		req->rq_state &= ~RQ_NET_PENDING;
 		req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
+		if (!IS_ERR_OR_NULL(req->private_bio)) {
+			bio_put(req->private_bio);
+			req->private_bio = NULL;
+			put_ldev(mdev);
+		}
 		_req_may_be_done_not_susp(req, m);
 		break;
 	};
@@ -723,6 +745,35 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int
 	return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0;
 }
 
+static bool remote_due_to_read_balancing(struct drbd_conf *mdev)
+{
+	enum drbd_read_balancing rbm;
+	struct backing_dev_info *bdi;
+
+	if (mdev->state.pdsk < D_UP_TO_DATE)
+		return false;
+
+	rcu_read_lock();
+	rbm = rcu_dereference(mdev->ldev->disk_conf)->read_balancing;
+	rcu_read_unlock();
+
+	switch (rbm) {
+	case RB_CONGESTED_REMOTE:
+		bdi = &mdev->ldev->backing_bdev->bd_disk->queue->backing_dev_info;
+		return bdi_read_congested(bdi);
+	case RB_LEAST_PENDING:
+		return atomic_read(&mdev->local_cnt) >
+			atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt);
+	case RB_ROUND_ROBIN:
+		return test_and_change_bit(READ_BALANCE_RR, &mdev->flags);
+	case RB_PREFER_REMOTE:
+		return true;
+	case RB_PREFER_LOCAL:
+	default:
+		return false;
+	}
+}
+
 /*
  * complete_conflicting_writes  -  wait for any conflicting write requests
  *
@@ -790,6 +841,10 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s
 				bio_put(req->private_bio);
 				req->private_bio = NULL;
 				put_ldev(mdev);
+			} else if (remote_due_to_read_balancing(mdev)) {
+				/* Keep the private bio in case we need it
+				   for a local retry */
+				local = 0;
 			}
 		}
 		remote = !local && mdev->state.pdsk >= D_UP_TO_DATE;
@@ -1017,7 +1072,7 @@ fail_free_complete:
 	if (req->rq_state & RQ_IN_ACT_LOG)
 		drbd_al_complete_io(mdev, &req->i);
 fail_and_free_req:
-	if (local) {
+	if (!IS_ERR_OR_NULL(req->private_bio)) {
 		bio_put(req->private_bio);
 		req->private_bio = NULL;
 		put_ldev(mdev);
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 1e9f754b66ac..157ba3d74dc7 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -102,6 +102,14 @@ enum drbd_on_congestion {
 	OC_DISCONNECT,
 };
 
+enum drbd_read_balancing {
+	RB_PREFER_LOCAL,
+	RB_PREFER_REMOTE,
+	RB_ROUND_ROBIN,
+	RB_LEAST_PENDING,
+	RB_CONGESTED_REMOTE,
+};
+
 /* KEEP the order, do not delete or insert. Only append. */
 enum drbd_ret_code {
 	ERR_CODE_BASE		= 100,
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 2e6cefefe5e5..826008f297fe 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -129,6 +129,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__flg_field_def(18, DRBD_GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
 	__flg_field_def(19, DRBD_GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
 	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	disk_timeout, DRBD_DISK_TIMEOUT_DEF)
+	__u32_field_def(21,	0 /* OPTIONAL */,       read_balancing, DRBD_READ_BALANCING_DEF)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 6d0a24331ed2..17ef66a5c114 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -161,6 +161,7 @@
 #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
 #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
 #define DRBD_ON_CONGESTION_DEF OC_BLOCK
+#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL
 
 #define DRBD_MAX_BIO_BVECS_MIN 0
 #define DRBD_MAX_BIO_BVECS_MAX 128
-- 
cgit v1.2.3-55-g7522


From d60de03a6694302b691bdf858ede9cbdfb7112d6 Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Thu, 17 Nov 2011 10:12:31 +0100
Subject: drbd: Load balancing method: striping

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_req.c | 13 +++++++++++--
 include/linux/drbd.h          |  6 ++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 98251e2a7fb7..5b28de0c5960 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -745,10 +745,11 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int
 	return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0;
 }
 
-static bool remote_due_to_read_balancing(struct drbd_conf *mdev)
+static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector)
 {
 	enum drbd_read_balancing rbm;
 	struct backing_dev_info *bdi;
+	int stripe_shift;
 
 	if (mdev->state.pdsk < D_UP_TO_DATE)
 		return false;
@@ -764,6 +765,14 @@ static bool remote_due_to_read_balancing(struct drbd_conf *mdev)
 	case RB_LEAST_PENDING:
 		return atomic_read(&mdev->local_cnt) >
 			atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt);
+	case RB_32K_STRIPING:  /* stripe_shift = 15 */
+	case RB_64K_STRIPING:
+	case RB_128K_STRIPING:
+	case RB_256K_STRIPING:
+	case RB_512K_STRIPING:
+	case RB_1M_STRIPING:   /* stripe_shift = 20 */
+		stripe_shift = (rbm - RB_32K_STRIPING + 15);
+		return (sector >> (stripe_shift - 9)) & 1;
 	case RB_ROUND_ROBIN:
 		return test_and_change_bit(READ_BALANCE_RR, &mdev->flags);
 	case RB_PREFER_REMOTE:
@@ -841,7 +850,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s
 				bio_put(req->private_bio);
 				req->private_bio = NULL;
 				put_ldev(mdev);
-			} else if (remote_due_to_read_balancing(mdev)) {
+			} else if (remote_due_to_read_balancing(mdev, sector)) {
 				/* Keep the private bio in case we need it
 				   for a local retry */
 				local = 0;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 157ba3d74dc7..1e86156c10f7 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -108,6 +108,12 @@ enum drbd_read_balancing {
 	RB_ROUND_ROBIN,
 	RB_LEAST_PENDING,
 	RB_CONGESTED_REMOTE,
+	RB_32K_STRIPING,
+	RB_64K_STRIPING,
+	RB_128K_STRIPING,
+	RB_256K_STRIPING,
+	RB_512K_STRIPING,
+	RB_1M_STRIPING,
 };
 
 /* KEEP the order, do not delete or insert. Only append. */
-- 
cgit v1.2.3-55-g7522


From 26ec92871be1e6bd48d0be9ab38ee1ebbeea49f1 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher
Date: Wed, 11 Jul 2012 20:36:03 +0200
Subject: drbd: Stop using NLA_PUT*().

These macros no longer exist in kernel version v3.5-rc1.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c      | 54 +++++++++++++++++++++++----------------
 include/linux/genl_magic_func.h   |  8 +++---
 include/linux/genl_magic_struct.h | 16 ++++++------
 3 files changed, 45 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index cbd45de533cb..dc5bd6bbb280 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2554,13 +2554,17 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsi
 	nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
 	if (!nla)
 		goto nla_put_failure;
-	if (vnr != VOLUME_UNSPECIFIED)
-		NLA_PUT_U32(skb, T_ctx_volume, vnr);
-	NLA_PUT_STRING(skb, T_ctx_resource_name, tconn->name);
-	if (tconn->my_addr_len)
-		NLA_PUT(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr);
-	if (tconn->peer_addr_len)
-		NLA_PUT(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr);
+	if (vnr != VOLUME_UNSPECIFIED &&
+	    nla_put_u32(skb, T_ctx_volume, vnr))
+		goto nla_put_failure;
+	if (nla_put_string(skb, T_ctx_resource_name, tconn->name))
+		goto nla_put_failure;
+	if (tconn->my_addr_len &&
+	    nla_put(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr))
+		goto nla_put_failure;
+	if (tconn->peer_addr_len &&
+	    nla_put(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr))
+		goto nla_put_failure;
 	nla_nest_end(skb, nla);
 	return 0;
 
@@ -2618,20 +2622,23 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 	nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
 	if (!nla)
 		goto nla_put_failure;
-	NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY);
-	NLA_PUT_U32(skb, T_current_state, mdev->state.i);
-	NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid);
-	NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev));
+	if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
+	    nla_put_u32(skb, T_current_state, mdev->state.i) ||
+	    nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) ||
+	    nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)))
+		goto nla_put_failure;
 
 	if (got_ldev) {
-		NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags);
-		NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid);
-		NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev));
-		NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev));
+		if (nla_put_u32(skb, T_disk_flags, mdev->ldev->md.flags) ||
+		    nla_put(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid) ||
+		    nla_put_u64(skb, T_bits_total, drbd_bm_bits(mdev)) ||
+		    nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(mdev)))
+			goto nla_put_failure;
 		if (C_SYNC_SOURCE <= mdev->state.conn &&
 		    C_PAUSED_SYNC_T >= mdev->state.conn) {
-			NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total);
-			NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed);
+			if (nla_put_u64(skb, T_bits_rs_total, mdev->rs_total) ||
+			    nla_put_u64(skb, T_bits_rs_failed, mdev->rs_failed))
+				goto nla_put_failure;
 		}
 	}
 
@@ -2641,15 +2648,18 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 		case SIB_GET_STATUS_REPLY:
 			break;
 		case SIB_STATE_CHANGE:
-			NLA_PUT_U32(skb, T_prev_state, sib->os.i);
-			NLA_PUT_U32(skb, T_new_state, sib->ns.i);
+			if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
+			    nla_put_u32(skb, T_new_state, sib->ns.i))
+				goto nla_put_failure;
 			break;
 		case SIB_HELPER_POST:
-			NLA_PUT_U32(skb,
-				T_helper_exit_code, sib->helper_exit_code);
+			if (nla_put_u32(skb, T_helper_exit_code,
+					sib->helper_exit_code))
+				goto nla_put_failure;
 			/* fall through */
 		case SIB_HELPER_PRE:
-			NLA_PUT_STRING(skb, T_helper, sib->helper_name);
+			if (nla_put_string(skb, T_helper, sib->helper_name))
+				goto nla_put_failure;
 			break;
 		}
 	}
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 0b8a88e2e83e..023bc346b877 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -367,7 +367,8 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 		__is_signed)						\
 	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\
 		DPRINT_FIELD(">>", nla_type, name, s, NULL);		\
-		__put(skb, attr_nr, s->name);				\
+		if (__put(skb, attr_nr, s->name))			\
+			goto nla_put_failure;				\
 	}
 
 #undef __array
@@ -375,9 +376,10 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 		__get, __put, __is_signed)				\
 	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\
 		DPRINT_ARRAY(">>",nla_type, name, s, NULL);		\
-		__put(skb, attr_nr, min_t(int, maxlen,			\
+		if (__put(skb, attr_nr, min_t(int, maxlen,		\
 			s->name ## _len + (nla_type == NLA_NUL_STRING)),\
-						s->name);		\
+						s->name))		\
+			goto nla_put_failure;				\
 	}
 
 #include GENL_MAGIC_INCLUDE_FILE
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index 1d0bd79e27b3..eecd19b37001 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -65,28 +65,28 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
 /* possible field types */
 #define __flg_field(attr_nr, attr_flag, name) \
 	__field(attr_nr, attr_flag, name, NLA_U8, char, \
-			nla_get_u8, NLA_PUT_U8, false)
+			nla_get_u8, nla_put_u8, false)
 #define __u8_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
-			nla_get_u8, NLA_PUT_U8, false)
+			nla_get_u8, nla_put_u8, false)
 #define __u16_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U16, __u16, \
-			nla_get_u16, NLA_PUT_U16, false)
+			nla_get_u16, nla_put_u16, false)
 #define __u32_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U32, __u32, \
-			nla_get_u32, NLA_PUT_U32, false)
+			nla_get_u32, nla_put_u32, false)
 #define __s32_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U32, __s32, \
-			nla_get_u32, NLA_PUT_U32, true)
+			nla_get_u32, nla_put_u32, true)
 #define __u64_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U64, __u64, \
-			nla_get_u64, NLA_PUT_U64, false)
+			nla_get_u64, nla_put_u64, false)
 #define __str_field(attr_nr, attr_flag, name, maxlen) \
 	__array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \
-			nla_strlcpy, NLA_PUT, false)
+			nla_strlcpy, nla_put, false)
 #define __bin_field(attr_nr, attr_flag, name, maxlen) \
 	__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
-			nla_memcpy, NLA_PUT, false)
+			nla_memcpy, nla_put, false)
 
 /* fields with default values */
 #define __flg_field_def(attr_nr, attr_flag, name, default) \
-- 
cgit v1.2.3-55-g7522


From 9a51ab1c1b3c1e21f076cdd571bbe6ca7d1b504c Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Mon, 20 Feb 2012 21:53:28 +0100
Subject: drbd: New disk option al-updates

By disabling al-updates one might increase performace. The price for
that is that in case a crashed primary (that had al-updates disabled)
is reintegraded, it will receive a full-resync instead of a bitmap
based resync.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_actlog.c | 12 ++++++++++--
 drivers/block/drbd/drbd_nl.c     | 17 +++++++++++++++--
 include/linux/drbd.h             |  1 +
 include/linux/drbd_genl.h        |  3 +++
 include/linux/drbd_limits.h      |  1 +
 5 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 9eae28944312..83d48d210b69 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -276,8 +276,16 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i)
 		/* Double check: it may have been committed by someone else,
 		 * while we have been waiting for the lock. */
 		if (mdev->act_log->pending_changes) {
-			al_write_transaction(mdev);
-			mdev->al_writ_cnt++;
+			bool write_al_updates;
+
+			rcu_read_lock();
+			write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates;
+			rcu_read_unlock();
+
+			if (write_al_updates) {
+				al_write_transaction(mdev);
+				mdev->al_writ_cnt++;
+			}
 
 			spin_lock_irq(&mdev->al_lock);
 			/* FIXME
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index dc5bd6bbb280..c5d4fac1a111 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1230,6 +1230,11 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 
 	mutex_unlock(&mdev->tconn->conf_update);
 
+	if (new_disk_conf->al_updates)
+		mdev->ldev->md.flags &= MDF_AL_DISABLED;
+	else
+		mdev->ldev->md.flags |= MDF_AL_DISABLED;
+
 	drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush);
 
 	drbd_md_sync(mdev);
@@ -1545,7 +1550,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	} else if (dd == grew)
 		set_bit(RESYNC_AFTER_NEG, &mdev->flags);
 
-	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
+	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) ||
+	    (test_bit(CRASHED_PRIMARY, &mdev->flags) &&
+	     drbd_md_test_flag(mdev->ldev, MDF_AL_DISABLED))) {
 		dev_info(DEV, "Assuming that all blocks are out of sync "
 		     "(aka FullSync)\n");
 		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
@@ -1588,13 +1595,19 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	if (ns.disk == D_CONSISTENT &&
 	    (ns.pdsk == D_OUTDATED || rcu_dereference(mdev->ldev->disk_conf)->fencing == FP_DONT_CARE))
 		ns.disk = D_UP_TO_DATE;
-	rcu_read_unlock();
 
 	/* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
 	   MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
 	   this point, because drbd_request_state() modifies these
 	   flags. */
 
+	if (rcu_dereference(mdev->ldev->disk_conf)->al_updates)
+		mdev->ldev->md.flags &= MDF_AL_DISABLED;
+	else
+		mdev->ldev->md.flags |= MDF_AL_DISABLED;
+
+	rcu_read_unlock();
+
 	/* In case we are C_CONNECTED postpone any decision on the new disk
 	   state after the negotiation phase. */
 	if (mdev->state.conn == C_CONNECTED) {
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 1e86156c10f7..36ae7dd28d90 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -338,6 +338,7 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv);
 #define MDF_PEER_OUT_DATED	(1 << 5)
 #define MDF_CRASHED_PRIMARY	(1 << 6)
 #define MDF_AL_CLEAN		(1 << 7)
+#define MDF_AL_DISABLED		(1 << 8)
 
 enum drbd_uuid_index {
 	UI_CURRENT,
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 826008f297fe..92ec4b50a885 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -130,6 +130,8 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__flg_field_def(19, DRBD_GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
 	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	disk_timeout, DRBD_DISK_TIMEOUT_DEF)
 	__u32_field_def(21,	0 /* OPTIONAL */,       read_balancing, DRBD_READ_BALANCING_DEF)
+	/* 9: __u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */
+	__flg_field_def(23,     0 /* OPTIONAL */,	al_updates, DRBD_AL_UPDATES_DEF)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
@@ -168,6 +170,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__flg_field_def(27, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
 	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	tentative)
 	__flg_field_def(29,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
+	/* 9: __u32_field_def(30,	DRBD_GENLA_F_MANDATORY,	fencing_policy, DRBD_FENCING_DEF) */
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 17ef66a5c114..1fa19c5f5e64 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -210,6 +210,7 @@
 #define DRBD_DISK_DRAIN_DEF	1
 #define DRBD_MD_FLUSHES_DEF	1
 #define DRBD_TCP_CORK_DEF	1
+#define DRBD_AL_UPDATES_DEF     1
 
 #define DRBD_ALLOW_TWO_PRIMARIES_DEF	0
 #define DRBD_ALWAYS_ASBP_DEF	0
-- 
cgit v1.2.3-55-g7522


From 37be2f59d3149b95afaeeeff94edde2c07f165d2 Mon Sep 17 00:00:00 2001
From: Eric Sandeen
Date: Thu, 8 Nov 2012 11:22:46 -0500
Subject: ext4: remove ext4_handle_release_buffer()

ext4_handle_release_buffer() was intended to remove journal
write access from a buffer, but it doesn't actually do anything
at all other than add a BUFFER_TRACE point, but it's not reliably
used for that either.  Remove all the associated dead code.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/ext4/ext4_jbd2.h   |  7 -------
 fs/ext4/resize.c      | 17 +++--------------
 fs/ext4/xattr.c       |  1 -
 fs/jbd2/journal.c     |  1 -
 fs/jbd2/transaction.c | 11 -----------
 include/linux/jbd2.h  |  1 -
 6 files changed, 3 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 56d258c18303..7177f9b21cb2 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -254,13 +254,6 @@ static inline void ext4_handle_sync(handle_t *handle)
 		handle->h_sync = 1;
 }
 
-static inline void ext4_handle_release_buffer(handle_t *handle,
-						struct buffer_head *bh)
-{
-	if (ext4_handle_valid(handle))
-		jbd2_journal_release_buffer(handle, bh);
-}
-
 static inline int ext4_handle_is_aborted(handle_t *handle)
 {
 	if (ext4_handle_valid(handle))
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 47bf06a2765d..d99387b89edd 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -783,7 +783,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 
 	err = ext4_journal_get_write_access(handle, gdb_bh);
 	if (unlikely(err))
-		goto exit_sbh;
+		goto exit_dind;
 
 	err = ext4_journal_get_write_access(handle, dind);
 	if (unlikely(err))
@@ -792,7 +792,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	/* ext4_reserve_inode_write() gets a reference on the iloc */
 	err = ext4_reserve_inode_write(handle, inode, &iloc);
 	if (unlikely(err))
-		goto exit_dindj;
+		goto exit_dind;
 
 	n_group_desc = ext4_kvmalloc((gdb_num + 1) *
 				     sizeof(struct buffer_head *),
@@ -846,12 +846,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 
 exit_inode:
 	ext4_kvfree(n_group_desc);
-	/* ext4_handle_release_buffer(handle, iloc.bh); */
 	brelse(iloc.bh);
-exit_dindj:
-	/* ext4_handle_release_buffer(handle, dind); */
-exit_sbh:
-	/* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
 exit_dind:
 	brelse(dind);
 exit_bh:
@@ -969,14 +964,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	}
 
 	for (i = 0; i < reserved_gdb; i++) {
-		if ((err = ext4_journal_get_write_access(handle, primary[i]))) {
-			/*
-			int j;
-			for (j = 0; j < i; j++)
-				ext4_handle_release_buffer(handle, primary[j]);
-			 */
+		if ((err = ext4_journal_get_write_access(handle, primary[i])))
 			goto exit_bh;
-		}
 	}
 
 	if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 2cdb98d62980..b1adda1b750d 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -794,7 +794,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 			int offset = (char *)s->here - bs->bh->b_data;
 
 			unlock_buffer(bs->bh);
-			ext4_handle_release_buffer(handle, bs->bh);
 			if (ce) {
 				mb_cache_entry_release(ce);
 				ce = NULL;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 484b8d1c6cb6..dbf41f9452db 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -60,7 +60,6 @@ EXPORT_SYMBOL(jbd2_journal_get_create_access);
 EXPORT_SYMBOL(jbd2_journal_get_undo_access);
 EXPORT_SYMBOL(jbd2_journal_set_triggers);
 EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
-EXPORT_SYMBOL(jbd2_journal_release_buffer);
 EXPORT_SYMBOL(jbd2_journal_forget);
 #if 0
 EXPORT_SYMBOL(journal_sync_buffer);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index a74ba4659549..deffd945c8e2 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1207,17 +1207,6 @@ out:
 	return ret;
 }
 
-/*
- * jbd2_journal_release_buffer: undo a get_write_access without any buffer
- * updates, if the update decided in the end that it didn't need access.
- *
- */
-void
-jbd2_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
-{
-	BUFFER_TRACE(bh, "entry");
-}
-
 /**
  * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
  * @handle: transaction handle
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 3efc43f3f162..de7f55682088 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1096,7 +1096,6 @@ extern int	 jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
 void		 jbd2_journal_set_triggers(struct buffer_head *,
 					   struct jbd2_buffer_trigger_type *type);
 extern int	 jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
-extern void	 jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_forget (handle_t *, struct buffer_head *);
 extern void	 journal_sync_buffer (struct buffer_head *);
 extern void	 jbd2_journal_invalidatepage(journal_t *,
-- 
cgit v1.2.3-55-g7522


From fd47d3e1c2949838e858379aaf2bc20647be2912 Mon Sep 17 00:00:00 2001
From: Behan Webster
Date: Thu, 8 Nov 2012 11:24:46 -0500
Subject: jbd2: remove VLAIS usage from JBD2 code

The use of variable length arrays in structs (VLAIS) in the Linux Kernel code
precludes the use of compilers which don't implement VLAIS (for instance the
Clang compiler). Since ctx is always a 32-bit CRC, hard coding a size of 4
bytes accomplishes the same thing without the use of VLAIS. This is the same
technique already employed in fs/ext4/ext4.h

Signed-off-by: Mark Charlebois <charlebm@gmail.com>
Signed-off-by: Behan Webster <behanw@converseincode.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd2.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index de7f55682088..1be23d9fdacb 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1302,15 +1302,21 @@ static inline int jbd_space_needed(journal_t *journal)
 
 extern int jbd_blocks_per_page(struct inode *inode);
 
+/* JBD uses a CRC32 checksum */
+#define JBD_MAX_CHECKSUM_SIZE 4
+
 static inline u32 jbd2_chksum(journal_t *journal, u32 crc,
 			      const void *address, unsigned int length)
 {
 	struct {
 		struct shash_desc shash;
-		char ctx[crypto_shash_descsize(journal->j_chksum_driver)];
+		char ctx[JBD_MAX_CHECKSUM_SIZE];
 	} desc;
 	int err;
 
+	BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) >
+		JBD_MAX_CHECKSUM_SIZE);
+
 	desc.shash.tfm = journal->j_chksum_driver;
 	desc.shash.flags = 0;
 	*(u32 *)desc.ctx = crc;
-- 
cgit v1.2.3-55-g7522


From 2be975c6d920de989ff5e4bc09ffe87e59d94662 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov
Date: Sat, 3 Nov 2012 12:16:12 -0700
Subject: Input: introduce managed input devices (add devres support)

There is a demand from driver's writers to use managed devices framework
for their drivers. Unfortunately up to this moment input devices did not
provide support for managed devices and that lead to mixing two styles
of resource management which usually introduced more bugs, such as
manually unregistering input device but relying in devres to free
interrupt handler which (unless device is properly shut off) can cause
ISR to reference already freed memory.

This change introduces devm_input_allocate_device() that will allocate
managed instance of input device so that driver writers who prefer
using devm_* framework do not have to mix 2 styles.

Reviewed-by: Henrik Rydberg <rydberg@euromail.se>
Reviewed-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/input.c | 176 ++++++++++++++++++++++++++++++++++++++++++--------
 include/linux/input.h |   7 +-
 2 files changed, 155 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index f1be1a77edf3..ce01332f7b3a 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1726,7 +1726,7 @@ EXPORT_SYMBOL_GPL(input_class);
 /**
  * input_allocate_device - allocate memory for new input device
  *
- * Returns prepared struct input_dev or NULL.
+ * Returns prepared struct input_dev or %NULL.
  *
  * NOTE: Use input_free_device() to free devices that have not been
  * registered; input_unregister_device() should be used for already
@@ -1753,6 +1753,70 @@ struct input_dev *input_allocate_device(void)
 }
 EXPORT_SYMBOL(input_allocate_device);
 
+struct input_devres {
+	struct input_dev *input;
+};
+
+static int devm_input_device_match(struct device *dev, void *res, void *data)
+{
+	struct input_devres *devres = res;
+
+	return devres->input == data;
+}
+
+static void devm_input_device_release(struct device *dev, void *res)
+{
+	struct input_devres *devres = res;
+	struct input_dev *input = devres->input;
+
+	dev_dbg(dev, "%s: dropping reference to %s\n",
+		__func__, dev_name(&input->dev));
+	input_put_device(input);
+}
+
+/**
+ * devm_input_allocate_device - allocate managed input device
+ * @dev: device owning the input device being created
+ *
+ * Returns prepared struct input_dev or %NULL.
+ *
+ * Managed input devices do not need to be explicitly unregistered or
+ * freed as it will be done automatically when owner device unbinds from
+ * its driver (or binding fails). Once managed input device is allocated,
+ * it is ready to be set up and registered in the same fashion as regular
+ * input device. There are no special devm_input_device_[un]register()
+ * variants, regular ones work with both managed and unmanaged devices.
+ *
+ * NOTE: the owner device is set up as parent of input device and users
+ * should not override it.
+ */
+
+struct input_dev *devm_input_allocate_device(struct device *dev)
+{
+	struct input_dev *input;
+	struct input_devres *devres;
+
+	devres = devres_alloc(devm_input_device_release,
+			      sizeof(struct input_devres), GFP_KERNEL);
+	if (!devres)
+		return NULL;
+
+	input = input_allocate_device();
+	if (!input) {
+		devres_free(devres);
+		return NULL;
+	}
+
+	input->dev.parent = dev;
+	input->devres_managed = true;
+
+	devres->input = input;
+	devres_add(dev, devres);
+
+	return input;
+}
+EXPORT_SYMBOL(devm_input_allocate_device);
+
 /**
  * input_free_device - free memory occupied by input_dev structure
  * @dev: input device to free
@@ -1769,8 +1833,14 @@ EXPORT_SYMBOL(input_allocate_device);
  */
 void input_free_device(struct input_dev *dev)
 {
-	if (dev)
+	if (dev) {
+		if (dev->devres_managed)
+			WARN_ON(devres_destroy(dev->dev.parent,
+						devm_input_device_release,
+						devm_input_device_match,
+						dev));
 		input_put_device(dev);
+	}
 }
 EXPORT_SYMBOL(input_free_device);
 
@@ -1891,6 +1961,38 @@ static void input_cleanse_bitmasks(struct input_dev *dev)
 	INPUT_CLEANSE_BITMASK(dev, SW, sw);
 }
 
+static void __input_unregister_device(struct input_dev *dev)
+{
+	struct input_handle *handle, *next;
+
+	input_disconnect_device(dev);
+
+	mutex_lock(&input_mutex);
+
+	list_for_each_entry_safe(handle, next, &dev->h_list, d_node)
+		handle->handler->disconnect(handle);
+	WARN_ON(!list_empty(&dev->h_list));
+
+	del_timer_sync(&dev->timer);
+	list_del_init(&dev->node);
+
+	input_wakeup_procfs_readers();
+
+	mutex_unlock(&input_mutex);
+
+	device_del(&dev->dev);
+}
+
+static void devm_input_device_unregister(struct device *dev, void *res)
+{
+	struct input_devres *devres = res;
+	struct input_dev *input = devres->input;
+
+	dev_dbg(dev, "%s: unregistering device %s\n",
+		__func__, dev_name(&input->dev));
+	__input_unregister_device(input);
+}
+
 /**
  * input_register_device - register device with input core
  * @dev: device to be registered
@@ -1906,11 +2008,21 @@ static void input_cleanse_bitmasks(struct input_dev *dev)
 int input_register_device(struct input_dev *dev)
 {
 	static atomic_t input_no = ATOMIC_INIT(0);
+	struct input_devres *devres = NULL;
 	struct input_handler *handler;
 	unsigned int packet_size;
 	const char *path;
 	int error;
 
+	if (dev->devres_managed) {
+		devres = devres_alloc(devm_input_device_unregister,
+				      sizeof(struct input_devres), GFP_KERNEL);
+		if (!devres)
+			return -ENOMEM;
+
+		devres->input = dev;
+	}
+
 	/* Every input device generates EV_SYN/SYN_REPORT events. */
 	__set_bit(EV_SYN, dev->evbit);
 
@@ -1926,8 +2038,10 @@ int input_register_device(struct input_dev *dev)
 
 	dev->max_vals = max(dev->hint_events_per_packet, packet_size) + 2;
 	dev->vals = kcalloc(dev->max_vals, sizeof(*dev->vals), GFP_KERNEL);
-	if (!dev->vals)
-		return -ENOMEM;
+	if (!dev->vals) {
+		error = -ENOMEM;
+		goto err_devres_free;
+	}
 
 	/*
 	 * If delay and period are pre-set by the driver, then autorepeating
@@ -1952,7 +2066,7 @@ int input_register_device(struct input_dev *dev)
 
 	error = device_add(&dev->dev);
 	if (error)
-		return error;
+		goto err_free_vals;
 
 	path = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
 	pr_info("%s as %s\n",
@@ -1961,10 +2075,8 @@ int input_register_device(struct input_dev *dev)
 	kfree(path);
 
 	error = mutex_lock_interruptible(&input_mutex);
-	if (error) {
-		device_del(&dev->dev);
-		return error;
-	}
+	if (error)
+		goto err_device_del;
 
 	list_add_tail(&dev->node, &input_dev_list);
 
@@ -1975,7 +2087,21 @@ int input_register_device(struct input_dev *dev)
 
 	mutex_unlock(&input_mutex);
 
+	if (dev->devres_managed) {
+		dev_dbg(dev->dev.parent, "%s: registering %s with devres.\n",
+			__func__, dev_name(&dev->dev));
+		devres_add(dev->dev.parent, devres);
+	}
 	return 0;
+
+err_device_del:
+	device_del(&dev->dev);
+err_free_vals:
+	kfree(dev->vals);
+	dev->vals = NULL;
+err_devres_free:
+	devres_free(devres);
+	return error;
 }
 EXPORT_SYMBOL(input_register_device);
 
@@ -1988,24 +2114,20 @@ EXPORT_SYMBOL(input_register_device);
  */
 void input_unregister_device(struct input_dev *dev)
 {
-	struct input_handle *handle, *next;
-
-	input_disconnect_device(dev);
-
-	mutex_lock(&input_mutex);
-
-	list_for_each_entry_safe(handle, next, &dev->h_list, d_node)
-		handle->handler->disconnect(handle);
-	WARN_ON(!list_empty(&dev->h_list));
-
-	del_timer_sync(&dev->timer);
-	list_del_init(&dev->node);
-
-	input_wakeup_procfs_readers();
-
-	mutex_unlock(&input_mutex);
-
-	device_unregister(&dev->dev);
+	if (dev->devres_managed) {
+		WARN_ON(devres_destroy(dev->dev.parent,
+					devm_input_device_unregister,
+					devm_input_device_match,
+					dev));
+		__input_unregister_device(dev);
+		/*
+		 * We do not do input_put_device() here because it will be done
+		 * when 2nd devres fires up.
+		 */
+	} else {
+		__input_unregister_device(dev);
+		input_put_device(dev);
+	}
 }
 EXPORT_SYMBOL(input_unregister_device);
 
diff --git a/include/linux/input.h b/include/linux/input.h
index cab994ba6d91..5538cc09a4f5 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -112,6 +112,8 @@ struct input_value {
  * @h_list: list of input handles associated with the device. When
  *	accessing the list dev->mutex must be held
  * @node: used to place the device onto input_dev_list
+ * @devres_managed: indicates that devices is managed with devres framework
+ *	and needs not be explicitly unregistered or freed.
  */
 struct input_dev {
 	const char *name;
@@ -180,6 +182,8 @@ struct input_dev {
 	unsigned int num_vals;
 	unsigned int max_vals;
 	struct input_value *vals;
+
+	bool devres_managed;
 };
 #define to_input_dev(d) container_of(d, struct input_dev, dev)
 
@@ -323,7 +327,8 @@ struct input_handle {
 	struct list_head	h_node;
 };
 
-struct input_dev *input_allocate_device(void);
+struct input_dev __must_check *input_allocate_device(void);
+struct input_dev __must_check *devm_input_allocate_device(struct device *);
 void input_free_device(struct input_dev *dev);
 
 static inline struct input_dev *input_get_device(struct input_dev *dev)
-- 
cgit v1.2.3-55-g7522


From 58ffa580a748dd16b1e5ab260bea39cdbd1e94ef Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Thu, 26 Jul 2012 14:09:49 +0200
Subject: drbd: introduce stop-sector to online verify

We now can schedule only a specific range of sectors for online verify,
or interrupt a running verify without interrupting the connection.

Had to bump the protocol version differently, we are now 101.
Added verify_can_do_stop_sector() { protocol >= 97 && protocol != 100; }

Also, the return value convention for worker callbacks has changed,
we returned "true/false" for "keep the connection up" in 8.3,
we return 0 for success and <= for failure in 8.4.
Affected: receive_state()

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  7 +++++++
 drivers/block/drbd/drbd_nl.c       | 14 +++++++++-----
 drivers/block/drbd/drbd_proc.c     | 12 +++++++++---
 drivers/block/drbd/drbd_receiver.c | 10 +++++++++-
 drivers/block/drbd/drbd_state.c    | 17 +++++++++++++----
 drivers/block/drbd/drbd_worker.c   | 33 +++++++++++++++++++++++++++------
 include/linux/drbd.h               |  2 +-
 include/linux/drbd_genl.h          |  1 +
 8 files changed, 76 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 22adfc7189de..eddc4388a1b1 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -971,6 +971,7 @@ struct drbd_conf {
 
 	/* where does the admin want us to start? (sector) */
 	sector_t ov_start_sector;
+	sector_t ov_stop_sector;
 	/* where are we now? (sector) */
 	sector_t ov_position;
 	/* Start sector of out of sync range (to merge printk reporting). */
@@ -2264,6 +2265,12 @@ static inline void dec_ap_bio(struct drbd_conf *mdev)
 		wake_up(&mdev->misc_wait);
 }
 
+static inline bool verify_can_do_stop_sector(struct drbd_conf *mdev)
+{
+	return mdev->tconn->agreed_pro_version >= 97 &&
+		mdev->tconn->agreed_pro_version != 100;
+}
+
 static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
 {
 	int changed = mdev->ed_uuid != val;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 4afd626ca3dc..eefb56308aea 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2939,6 +2939,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 {
 	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
+	struct start_ov_parms parms;
 
 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
@@ -2947,19 +2948,22 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 
 	mdev = adm_ctx.mdev;
+
+	/* resume from last known position, if possible */
+	parms.ov_start_sector = mdev->ov_start_sector;
+	parms.ov_stop_sector = ULLONG_MAX;
 	if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
-		/* resume from last known position, if possible */
-		struct start_ov_parms parms =
-			{ .ov_start_sector = mdev->ov_start_sector };
 		int err = start_ov_parms_from_attrs(&parms, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
 			goto out;
 		}
-		/* w_make_ov_request expects position to be aligned */
-		mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT;
 	}
+	/* w_make_ov_request expects position to be aligned */
+	mdev->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
+	mdev->ov_stop_sector = parms.ov_stop_sector;
+
 	/* If there is still bitmap IO pending, e.g. previous resync or verify
 	 * just being finished, wait for it before requesting a new resync. */
 	drbd_suspend_io(mdev);
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index e0f0d2a6d538..56672a61eb94 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -167,18 +167,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
 		 * we convert to sectors in the display below. */
 		unsigned long bm_bits = drbd_bm_bits(mdev);
 		unsigned long bit_pos;
+		unsigned long long stop_sector = 0;
 		if (mdev->state.conn == C_VERIFY_S ||
-		    mdev->state.conn == C_VERIFY_T)
+		    mdev->state.conn == C_VERIFY_T) {
 			bit_pos = bm_bits - mdev->ov_left;
-		else
+			if (verify_can_do_stop_sector(mdev))
+				stop_sector = mdev->ov_stop_sector;
+		} else
 			bit_pos = mdev->bm_resync_fo;
 		/* Total sectors may be slightly off for oddly
 		 * sized devices. So what. */
 		seq_printf(seq,
-			"\t%3d%% sector pos: %llu/%llu\n",
+			"\t%3d%% sector pos: %llu/%llu",
 			(int)(bit_pos / (bm_bits/100+1)),
 			(unsigned long long)bit_pos * BM_SECT_PER_BIT,
 			(unsigned long long)bm_bits * BM_SECT_PER_BIT);
+		if (stop_sector != 0 && stop_sector != ULLONG_MAX)
+			seq_printf(seq, " stop sector: %llu", stop_sector);
+		seq_printf(seq, "\n");
 	}
 }
 
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 7fe6b01618d4..8fddec96dfbe 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3843,7 +3843,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
 	 * already decided to close the connection again,
 	 * we must not "re-establish" it here. */
 	if (os.conn <= C_TEAR_DOWN)
-		return false;
+		return -ECONNRESET;
 
 	/* If this is the "end of sync" confirmation, usually the peer disk
 	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
@@ -3875,6 +3875,14 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
 		}
 	}
 
+	/* explicit verify finished notification, stop sector reached. */
+	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
+	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
+		ov_out_of_sync_print(mdev);
+		drbd_resync_finished(mdev);
+		return 0;
+	}
+
 	/* peer says his disk is inconsistent, while we think it is uptodate,
 	 * and this happens while the peer still thinks we have a sync going on,
 	 * but we think we are already done with the sync.
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 444581828d70..12f2b4fbe559 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -975,13 +975,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
 	wake_up(&mdev->state_wait);
 	wake_up(&mdev->tconn->ping_wait);
 
-	/* aborted verify run. log the last position */
+	/* Aborted verify run, or we reached the stop sector.
+	 * Log the last position, unless end-of-device. */
 	if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
-	    ns.conn < C_CONNECTED) {
+	    ns.conn <= C_CONNECTED) {
 		mdev->ov_start_sector =
 			BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
-		dev_info(DEV, "Online Verify reached sector %llu\n",
-			(unsigned long long)mdev->ov_start_sector);
+		if (mdev->ov_left)
+			dev_info(DEV, "Online Verify reached sector %llu\n",
+				(unsigned long long)mdev->ov_start_sector);
 	}
 
 	if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
@@ -1422,6 +1424,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
 		drbd_send_state(mdev, ns);
 
+	/* Verify finished, or reached stop sector.  Peer did not know about
+	 * the stop sector, and we may even have changed the stop sector during
+	 * verify to interrupt/stop early.  Send the new state. */
+	if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
+	&& verify_can_do_stop_sector(mdev))
+		drbd_send_state(mdev, ns);
+
 	/* Wake up role changes, that were delayed because of connection establishing */
 	if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) {
 		if (test_and_clear_bit(STATE_SENT, &mdev->tconn->flags))
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 9d7e1fb0f431..1c9c6fd332c3 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -692,6 +692,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel)
 	int number, i, size;
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+	bool stop_sector_reached = false;
 
 	if (unlikely(cancel))
 		return 1;
@@ -700,9 +701,17 @@ static int w_make_ov_request(struct drbd_work *w, int cancel)
 
 	sector = mdev->ov_position;
 	for (i = 0; i < number; i++) {
-		if (sector >= capacity) {
+		if (sector >= capacity)
 			return 1;
-		}
+
+		/* We check for "finished" only in the reply path:
+		 * w_e_end_ov_reply().
+		 * We need to send at least one request out. */
+		stop_sector_reached = i > 0
+			&& verify_can_do_stop_sector(mdev)
+			&& sector >= mdev->ov_stop_sector;
+		if (stop_sector_reached)
+			break;
 
 		size = BM_BLOCK_SIZE;
 
@@ -726,7 +735,8 @@ static int w_make_ov_request(struct drbd_work *w, int cancel)
 
  requeue:
 	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
-	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
+	if (i == 0 || !stop_sector_reached)
+		mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
 	return 1;
 }
 
@@ -792,7 +802,12 @@ int drbd_resync_finished(struct drbd_conf *mdev)
 	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
 	if (dt <= 0)
 		dt = 1;
+	
 	db = mdev->rs_total;
+	/* adjust for verify start and stop sectors, respective reached position */
+	if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+		db -= mdev->ov_left;
+
 	dbdt = Bit2KB(db/dt);
 	mdev->rs_paused /= HZ;
 
@@ -815,7 +830,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
 	ns.conn = C_CONNECTED;
 
 	dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
-	     verify_done ? "Online verify " : "Resync",
+	     verify_done ? "Online verify" : "Resync",
 	     dt + mdev->rs_paused, mdev->rs_paused, dbdt);
 
 	n_oos = drbd_bm_total_weight(mdev);
@@ -896,7 +911,9 @@ out:
 	mdev->rs_total  = 0;
 	mdev->rs_failed = 0;
 	mdev->rs_paused = 0;
-	if (verify_done)
+
+	/* reset start sector, if we reached end of device */
+	if (verify_done && mdev->ov_left == 0)
 		mdev->ov_start_sector = 0;
 
 	drbd_md_sync(mdev);
@@ -1144,6 +1161,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
 	unsigned int size = peer_req->i.size;
 	int digest_size;
 	int err, eq = 0;
+	bool stop_sector_reached = false;
 
 	if (unlikely(cancel)) {
 		drbd_free_peer_req(mdev, peer_req);
@@ -1194,7 +1212,10 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
 	if ((mdev->ov_left & 0x200) == 0x200)
 		drbd_advance_rs_marks(mdev, mdev->ov_left);
 
-	if (mdev->ov_left == 0) {
+	stop_sector_reached = verify_can_do_stop_sector(mdev) &&
+		(sector + (size>>9)) >= mdev->ov_stop_sector;
+
+	if (mdev->ov_left == 0 || stop_sector_reached) {
 		ov_out_of_sync_print(mdev);
 		drbd_resync_finished(mdev);
 	}
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 36ae7dd28d90..5171c3530886 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -55,7 +55,7 @@ extern const char *drbd_buildtag(void);
 #define REL_VERSION "8.3.11"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 100
+#define PRO_VERSION_MAX 101
 
 
 enum drbd_io_error_p {
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 92ec4b50a885..9430e9ab37a8 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -215,6 +215,7 @@ GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
 
 GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
 	__u64_field(1, DRBD_GENLA_F_MANDATORY,	ov_start_sector)
+	__u64_field(2, DRBD_GENLA_F_MANDATORY,	ov_stop_sector)
 )
 
 GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms,
-- 
cgit v1.2.3-55-g7522


From 3174f8c5045ad247563434c4b4897bd89313eafc Mon Sep 17 00:00:00 2001
From: Philipp Marek
Date: Sat, 3 Mar 2012 21:04:30 +0100
Subject: drbd: pass some more information to userspace.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 11 ++++++++++-
 include/linux/drbd_genl.h    | 10 ++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index eefb56308aea..466d6b1d9309 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2666,7 +2666,16 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 	if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
 	    nla_put_u32(skb, T_current_state, mdev->state.i) ||
 	    nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) ||
-	    nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)))
+	    nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)) ||
+	    nla_put_u64(skb, T_send_cnt, mdev->send_cnt) ||
+	    nla_put_u64(skb, T_recv_cnt, mdev->recv_cnt) ||
+	    nla_put_u64(skb, T_read_cnt, mdev->read_cnt) ||
+	    nla_put_u64(skb, T_writ_cnt, mdev->writ_cnt) ||
+	    nla_put_u64(skb, T_al_writ_cnt, mdev->al_writ_cnt) ||
+	    nla_put_u64(skb, T_bm_writ_cnt, mdev->bm_writ_cnt) ||
+	    nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&mdev->ap_bio_cnt)) ||
+	    nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&mdev->ap_pending_cnt)) ||
+	    nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&mdev->rs_pending_cnt)))
 		goto nla_put_failure;
 
 	if (got_ldev) {
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 9430e9ab37a8..d0d8fac8a6e4 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -211,6 +211,16 @@ GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
 	/* for pre and post notifications of helper execution */
 	__str_field(13, DRBD_GENLA_F_MANDATORY,	helper, 32)
 	__u32_field(14, DRBD_GENLA_F_MANDATORY,	helper_exit_code)
+
+	__u64_field(15,                      0, send_cnt)
+	__u64_field(16,                      0, recv_cnt)
+	__u64_field(17,                      0, read_cnt)
+	__u64_field(18,                      0, writ_cnt)
+	__u64_field(19,                      0, al_writ_cnt)
+	__u64_field(20,                      0, bm_writ_cnt)
+	__u32_field(21,                      0, ap_bio_cnt)
+	__u32_field(22,                      0, ap_pending_cnt)
+	__u32_field(23,                      0, rs_pending_cnt)
 )
 
 GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
-- 
cgit v1.2.3-55-g7522


From eb12010e9af119c84e6b2214064a98681027e0e3 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg
Date: Wed, 1 Aug 2012 12:46:20 +0200
Subject: drbd: disambiguation, s/ERR_DISCARD/ERR_DISCARD_IMPOSSIBLE/

If for some reason (typically "split-brained" cluster manager)
drbd replica data has diverged, we can chose a victim,
and reconnect using "--discard-my-data", causing the victim
to become sync-target, fetching all changed blocks from the peer.

If we are Primary, we are potentially in use, and we refuse to
"roll back" changes to the data below the page cache and other users.

Rename the error symbol for this to ERR_DISCARD_IMPOSSIBLE.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 2 +-
 include/linux/drbd.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 35bb572a2076..d1073705bf1f 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1829,7 +1829,7 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n
 				return ERR_STONITH_AND_PROT_A;
 		}
 		if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data)
-			return ERR_DISCARD;
+			return ERR_DISCARD_IMPOSSIBLE;
 	}
 
 	if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A)
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 5171c3530886..0b93e5e2e064 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -136,7 +136,7 @@ enum drbd_ret_code {
 	ERR_AUTH_ALG		= 120,
 	ERR_AUTH_ALG_ND		= 121,
 	ERR_NOMEM		= 122,
-	ERR_DISCARD		= 123,
+	ERR_DISCARD_IMPOSSIBLE	= 123,
 	ERR_DISK_CONFIGURED	= 124,
 	ERR_NET_CONFIGURED	= 125,
 	ERR_MANDATORY_TAG	= 126,
-- 
cgit v1.2.3-55-g7522


From 328e0f125bf41f4f33f684db22015f92cb44fe56 Mon Sep 17 00:00:00 2001
From: Philipp Reisner
Date: Fri, 19 Oct 2012 14:37:47 +0200
Subject: drbd: Broadcast sync progress no more often than once per second

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h    | 1 +
 drivers/block/drbd/drbd_nl.c     | 6 ++++++
 drivers/block/drbd/drbd_worker.c | 4 ++++
 include/linux/drbd.h             | 4 ++--
 4 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 057ffed6eb7e..784f4eb2ed61 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -965,6 +965,7 @@ struct drbd_conf {
 	unsigned long rs_mark_time[DRBD_SYNC_MARKS];
 	/* current index into rs_mark_{left,time} */
 	int rs_last_mark;
+	unsigned long rs_last_bcast; /* [unit jiffies] */
 
 	/* where does the admin want us to start? (sector) */
 	sector_t ov_start_sector;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 298dd3e35e02..d339a2754a85 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -3295,6 +3295,12 @@ void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib)
 	unsigned seq;
 	int err = -ENOMEM;
 
+	if (sib->sib_reason == SIB_SYNC_PROGRESS &&
+	    time_after(jiffies, mdev->rs_last_bcast + HZ))
+		mdev->rs_last_bcast = jiffies;
+	else
+		return;
+
 	seq = atomic_inc_return(&drbd_genl_seq);
 	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
 	if (!msg)
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 64a7305c678a..424dc7bdf9b7 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1696,6 +1696,10 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
 	write_unlock_irq(&global_state_lock);
 
 	if (r == SS_SUCCESS) {
+		/* reset rs_last_bcast when a resync or verify is started,
+		 * to deal with potential jiffies wrap. */
+		mdev->rs_last_bcast = jiffies - HZ;
+
 		dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
 		     drbd_conn_str(ns.conn),
 		     (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 0b93e5e2e064..0c5a18ec322c 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -52,8 +52,8 @@
 #endif
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.11"
-#define API_VERSION 88
+#define REL_VERSION "8.4.2"
+#define API_VERSION 1
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 101
 
-- 
cgit v1.2.3-55-g7522


From a8638030f668884720b8f4456448d0ce33952b05 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 9 Nov 2012 09:12:29 -0800
Subject: cgroup: add cgroup_subsys->post_create()

Currently, there's no way for a controller to find out whether a new
cgroup finished all ->create() allocatinos successfully and is
considered "live" by cgroup.

This becomes a problem later when we add generic descendants walking
to cgroup which can be used by controllers as controllers don't have a
synchronization point where it can synchronize against new cgroups
appearing in such walks.

This patch adds ->post_create().  It's called after all ->create()
succeeded and the cgroup is linked into the generic cgroup hierarchy.
This plays the counterpart of ->pre_destroy().

When used in combination with the to-be-added generic descendant
iterators, ->post_create() can be used to implement reliable state
inheritance.  It will be explained with the descendant iterators.

v2: Added a paragraph about its future use w/ descendant iterators per
    Michal.

v3: Forgot to add ->post_create() invocation to cgroup_load_subsys().
    Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Glauber Costa <glommer@parallels.com>
---
 include/linux/cgroup.h |  1 +
 kernel/cgroup.c        | 15 +++++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index fe876a77031a..b4421221111d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -438,6 +438,7 @@ int cgroup_taskset_size(struct cgroup_taskset *tset);
 
 struct cgroup_subsys {
 	struct cgroup_subsys_state *(*create)(struct cgroup *cgrp);
+	void (*post_create)(struct cgroup *cgrp);
 	void (*pre_destroy)(struct cgroup *cgrp);
 	void (*destroy)(struct cgroup *cgrp);
 	int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 998ab5957c6a..26f2edbaf4f5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4059,10 +4059,15 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 	if (err < 0)
 		goto err_remove;
 
-	/* each css holds a ref to the cgroup's dentry */
-	for_each_subsys(root, ss)
+	for_each_subsys(root, ss) {
+		/* each css holds a ref to the cgroup's dentry */
 		dget(dentry);
 
+		/* creation succeeded, notify subsystems */
+		if (ss->post_create)
+			ss->post_create(cgrp);
+	}
+
 	/* The cgroup directory was pre-locked for us */
 	BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
 
@@ -4280,6 +4285,9 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 
 	ss->active = 1;
 
+	if (ss->post_create)
+		ss->post_create(&ss->root->top_cgroup);
+
 	/* this function shouldn't be used with modular subsystems, since they
 	 * need to register a subsys_id, among other things */
 	BUG_ON(ss->module);
@@ -4389,6 +4397,9 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 
 	ss->active = 1;
 
+	if (ss->post_create)
+		ss->post_create(&ss->root->top_cgroup);
+
 	/* success! */
 	mutex_unlock(&cgroup_mutex);
 	return 0;
-- 
cgit v1.2.3-55-g7522


From eb6fd5040ee799009173daa49c3e7aa0362167c9 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 9 Nov 2012 09:12:29 -0800
Subject: cgroup: use rculist ops for cgroup->children

Use RCU safe list operations for cgroup->children.  This will be used
to implement cgroup children / descendant walking which can be used by
controllers.

Note that cgroup_create() now puts a new cgroup at the end of the
->children list instead of head.  This isn't strictly necessary but is
done so that the iteration order is more conventional.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 1 +
 kernel/cgroup.c        | 8 +++-----
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b4421221111d..90c33ebdd6bc 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -12,6 +12,7 @@
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/rcupdate.h>
+#include <linux/rculist.h>
 #include <linux/cgroupstats.h>
 #include <linux/prio_heap.h>
 #include <linux/rwsem.h>
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 26f2edbaf4f5..2ed5968a04e7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1650,7 +1650,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 
 		free_cg_links(&tmp_cg_links);
 
-		BUG_ON(!list_empty(&root_cgrp->sibling));
 		BUG_ON(!list_empty(&root_cgrp->children));
 		BUG_ON(root->number_of_cgroups != 1);
 
@@ -1699,7 +1698,6 @@ static void cgroup_kill_sb(struct super_block *sb) {
 
 	BUG_ON(root->number_of_cgroups != 1);
 	BUG_ON(!list_empty(&cgrp->children));
-	BUG_ON(!list_empty(&cgrp->sibling));
 
 	mutex_lock(&cgroup_mutex);
 	mutex_lock(&cgroup_root_mutex);
@@ -4052,7 +4050,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 		}
 	}
 
-	list_add(&cgrp->sibling, &cgrp->parent->children);
+	list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
 	root->number_of_cgroups++;
 
 	err = cgroup_create_dir(cgrp, dentry, mode);
@@ -4083,7 +4081,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 
  err_remove:
 
-	list_del(&cgrp->sibling);
+	list_del_rcu(&cgrp->sibling);
 	root->number_of_cgroups--;
 
  err_destroy:
@@ -4209,7 +4207,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 	raw_spin_unlock(&release_list_lock);
 
 	/* delete this cgroup from parent->children */
-	list_del_init(&cgrp->sibling);
+	list_del_rcu(&cgrp->sibling);
 
 	list_del_init(&cgrp->allcg_node);
 
-- 
cgit v1.2.3-55-g7522


From 574bd9f7c7c1d32f52dea5488018a6ff79031e22 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 9 Nov 2012 09:12:29 -0800
Subject: cgroup: implement generic child / descendant walk macros

Currently, cgroup doesn't provide any generic helper for walking a
given cgroup's children or descendants.  This patch adds the following
three macros.

* cgroup_for_each_child() - walk immediate children of a cgroup.

* cgroup_for_each_descendant_pre() - visit all descendants of a cgroup
  in pre-order tree traversal.

* cgroup_for_each_descendant_post() - visit all descendants of a
  cgroup in post-order tree traversal.

All three only require the user to hold RCU read lock during
traversal.  Verifying that each iterated cgroup is online is the
responsibility of the user.  When used with proper synchronization,
cgroup_for_each_descendant_pre() can be used to propagate state
updates to descendants in reliable way.  See comments for details.

v2: s/config/state/ in commit message and comments per Michal.  More
    documentation on synchronization rules.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujisu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/cgroup.c        | 86 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 180 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 90c33ebdd6bc..8f64b459fbd4 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -534,6 +534,100 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
 	return task_subsys_state(task, subsys_id)->cgroup;
 }
 
+/**
+ * cgroup_for_each_child - iterate through children of a cgroup
+ * @pos: the cgroup * to use as the loop cursor
+ * @cgroup: cgroup whose children to walk
+ *
+ * Walk @cgroup's children.  Must be called under rcu_read_lock().  A child
+ * cgroup which hasn't finished ->post_create() or already has finished
+ * ->pre_destroy() may show up during traversal and it's each subsystem's
+ * responsibility to verify that each @pos is alive.
+ *
+ * If a subsystem synchronizes against the parent in its ->post_create()
+ * and before starting iterating, a cgroup which finished ->post_create()
+ * is guaranteed to be visible in the future iterations.
+ */
+#define cgroup_for_each_child(pos, cgroup)				\
+	list_for_each_entry_rcu(pos, &(cgroup)->children, sibling)
+
+struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
+					  struct cgroup *cgroup);
+
+/**
+ * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants
+ * @pos: the cgroup * to use as the loop cursor
+ * @cgroup: cgroup whose descendants to walk
+ *
+ * Walk @cgroup's descendants.  Must be called under rcu_read_lock().  A
+ * descendant cgroup which hasn't finished ->post_create() or already has
+ * finished ->pre_destroy() may show up during traversal and it's each
+ * subsystem's responsibility to verify that each @pos is alive.
+ *
+ * If a subsystem synchronizes against the parent in its ->post_create()
+ * and before starting iterating, and synchronizes against @pos on each
+ * iteration, any descendant cgroup which finished ->post_create() is
+ * guaranteed to be visible in the future iterations.
+ *
+ * In other words, the following guarantees that a descendant can't escape
+ * state updates of its ancestors.
+ *
+ * my_post_create(@cgrp)
+ * {
+ *	Lock @cgrp->parent and @cgrp;
+ *	Inherit state from @cgrp->parent;
+ *	Unlock both.
+ * }
+ *
+ * my_update_state(@cgrp)
+ * {
+ *	Lock @cgrp;
+ *	Update @cgrp's state;
+ *	Unlock @cgrp;
+ *
+ *	cgroup_for_each_descendant_pre(@pos, @cgrp) {
+ *		Lock @pos;
+ *		Verify @pos is alive and inherit state from @pos->parent;
+ *		Unlock @pos;
+ *	}
+ * }
+ *
+ * As long as the inheriting step, including checking the parent state, is
+ * enclosed inside @pos locking, double-locking the parent isn't necessary
+ * while inheriting.  The state update to the parent is guaranteed to be
+ * visible by walking order and, as long as inheriting operations to the
+ * same @pos are atomic to each other, multiple updates racing each other
+ * still result in the correct state.  It's guaranateed that at least one
+ * inheritance happens for any cgroup after the latest update to its
+ * parent.
+ *
+ * If checking parent's state requires locking the parent, each inheriting
+ * iteration should lock and unlock both @pos->parent and @pos.
+ *
+ * Alternatively, a subsystem may choose to use a single global lock to
+ * synchronize ->post_create() and ->pre_destroy() against tree-walking
+ * operations.
+ */
+#define cgroup_for_each_descendant_pre(pos, cgroup)			\
+	for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos);	\
+	     pos = cgroup_next_descendant_pre((pos), (cgroup)))
+
+struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
+					   struct cgroup *cgroup);
+
+/**
+ * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants
+ * @pos: the cgroup * to use as the loop cursor
+ * @cgroup: cgroup whose descendants to walk
+ *
+ * Similar to cgroup_for_each_descendant_pre() but performs post-order
+ * traversal instead.  Note that the walk visibility guarantee described in
+ * pre-order walk doesn't apply the same to post-order walks.
+ */
+#define cgroup_for_each_descendant_post(pos, cgroup)			\
+	for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos);	\
+	     pos = cgroup_next_descendant_post((pos), (cgroup)))
+
 /* A cgroup_iter should be treated as an opaque object */
 struct cgroup_iter {
 	struct list_head *cg_link;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2ed5968a04e7..0f8fa6aa371b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2984,6 +2984,92 @@ static void cgroup_enable_task_cg_lists(void)
 	write_unlock(&css_set_lock);
 }
 
+/**
+ * cgroup_next_descendant_pre - find the next descendant for pre-order walk
+ * @pos: the current position (%NULL to initiate traversal)
+ * @cgroup: cgroup whose descendants to walk
+ *
+ * To be used by cgroup_for_each_descendant_pre().  Find the next
+ * descendant to visit for pre-order traversal of @cgroup's descendants.
+ */
+struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
+					  struct cgroup *cgroup)
+{
+	struct cgroup *next;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	/* if first iteration, pretend we just visited @cgroup */
+	if (!pos) {
+		if (list_empty(&cgroup->children))
+			return NULL;
+		pos = cgroup;
+	}
+
+	/* visit the first child if exists */
+	next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling);
+	if (next)
+		return next;
+
+	/* no child, visit my or the closest ancestor's next sibling */
+	do {
+		next = list_entry_rcu(pos->sibling.next, struct cgroup,
+				      sibling);
+		if (&next->sibling != &pos->parent->children)
+			return next;
+
+		pos = pos->parent;
+	} while (pos != cgroup);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
+
+static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos)
+{
+	struct cgroup *last;
+
+	do {
+		last = pos;
+		pos = list_first_or_null_rcu(&pos->children, struct cgroup,
+					     sibling);
+	} while (pos);
+
+	return last;
+}
+
+/**
+ * cgroup_next_descendant_post - find the next descendant for post-order walk
+ * @pos: the current position (%NULL to initiate traversal)
+ * @cgroup: cgroup whose descendants to walk
+ *
+ * To be used by cgroup_for_each_descendant_post().  Find the next
+ * descendant to visit for post-order traversal of @cgroup's descendants.
+ */
+struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
+					   struct cgroup *cgroup)
+{
+	struct cgroup *next;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	/* if first iteration, visit the leftmost descendant */
+	if (!pos) {
+		next = cgroup_leftmost_descendant(cgroup);
+		return next != cgroup ? next : NULL;
+	}
+
+	/* if there's an unvisited sibling, visit its leftmost descendant */
+	next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
+	if (&next->sibling != &pos->parent->children)
+		return cgroup_leftmost_descendant(next);
+
+	/* no sibling left, visit parent */
+	next = pos->parent;
+	return next != cgroup ? next : NULL;
+}
+EXPORT_SYMBOL_GPL(cgroup_next_descendant_post);
+
 void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
 	__acquires(css_set_lock)
 {
-- 
cgit v1.2.3-55-g7522


From d77807230e1ef30dbdee85aa24d27073a14dd168 Mon Sep 17 00:00:00 2001
From: David Howells
Date: Wed, 7 Nov 2012 02:37:17 +0000
Subject: UAPI: (Scripted) Disintegrate include/linux/hdlc

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
Acked-by: Krzysztof Halasa <khc@pm.waw.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/hdlc/Kbuild       |  1 -
 include/linux/hdlc/ioctl.h      | 81 -----------------------------------------
 include/uapi/linux/hdlc/Kbuild  |  1 +
 include/uapi/linux/hdlc/ioctl.h | 81 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 82 insertions(+), 82 deletions(-)
 delete mode 100644 include/linux/hdlc/ioctl.h
 create mode 100644 include/uapi/linux/hdlc/ioctl.h

(limited to 'include/linux')

diff --git a/include/linux/hdlc/Kbuild b/include/linux/hdlc/Kbuild
index 1fb26448faa9..e69de29bb2d1 100644
--- a/include/linux/hdlc/Kbuild
+++ b/include/linux/hdlc/Kbuild
@@ -1 +0,0 @@
-header-y += ioctl.h
diff --git a/include/linux/hdlc/ioctl.h b/include/linux/hdlc/ioctl.h
deleted file mode 100644
index 583972364357..000000000000
--- a/include/linux/hdlc/ioctl.h
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifndef __HDLC_IOCTL_H__
-#define __HDLC_IOCTL_H__
-
-
-#define GENERIC_HDLC_VERSION 4	/* For synchronization with sethdlc utility */
-
-#define CLOCK_DEFAULT   0	/* Default setting */
-#define CLOCK_EXT	1	/* External TX and RX clock - DTE */
-#define CLOCK_INT	2	/* Internal TX and RX clock - DCE */
-#define CLOCK_TXINT	3	/* Internal TX and external RX clock */
-#define CLOCK_TXFROMRX	4	/* TX clock derived from external RX clock */
-
-
-#define ENCODING_DEFAULT	0 /* Default setting */
-#define ENCODING_NRZ		1
-#define ENCODING_NRZI		2
-#define ENCODING_FM_MARK	3
-#define ENCODING_FM_SPACE	4
-#define ENCODING_MANCHESTER	5
-
-
-#define PARITY_DEFAULT		0 /* Default setting */
-#define PARITY_NONE		1 /* No parity */
-#define PARITY_CRC16_PR0	2 /* CRC16, initial value 0x0000 */
-#define PARITY_CRC16_PR1	3 /* CRC16, initial value 0xFFFF */
-#define PARITY_CRC16_PR0_CCITT	4 /* CRC16, initial 0x0000, ITU-T version */
-#define PARITY_CRC16_PR1_CCITT	5 /* CRC16, initial 0xFFFF, ITU-T version */
-#define PARITY_CRC32_PR0_CCITT	6 /* CRC32, initial value 0x00000000 */
-#define PARITY_CRC32_PR1_CCITT	7 /* CRC32, initial value 0xFFFFFFFF */
-
-#define LMI_DEFAULT		0 /* Default setting */
-#define LMI_NONE		1 /* No LMI, all PVCs are static */
-#define LMI_ANSI		2 /* ANSI Annex D */
-#define LMI_CCITT		3 /* ITU-T Annex A */
-#define LMI_CISCO		4 /* The "original" LMI, aka Gang of Four */
-
-typedef struct { 
-	unsigned int clock_rate; /* bits per second */
-	unsigned int clock_type; /* internal, external, TX-internal etc. */
-	unsigned short loopback;
-} sync_serial_settings;          /* V.35, V.24, X.21 */
-
-typedef struct { 
-	unsigned int clock_rate; /* bits per second */
-	unsigned int clock_type; /* internal, external, TX-internal etc. */
-	unsigned short loopback;
-	unsigned int slot_map;
-} te1_settings;                  /* T1, E1 */
-
-typedef struct {
-	unsigned short encoding;
-	unsigned short parity;
-} raw_hdlc_proto;
-
-typedef struct {
-	unsigned int t391;
-	unsigned int t392;
-	unsigned int n391;
-	unsigned int n392;
-	unsigned int n393;
-	unsigned short lmi;
-	unsigned short dce; /* 1 for DCE (network side) operation */
-} fr_proto;
-
-typedef struct {
-	unsigned int dlci;
-} fr_proto_pvc;          /* for creating/deleting FR PVCs */
-
-typedef struct {
-	unsigned int dlci;
-	char master[IFNAMSIZ];	/* Name of master FRAD device */
-}fr_proto_pvc_info;		/* for returning PVC information only */
-
-typedef struct {
-    unsigned int interval;
-    unsigned int timeout;
-} cisco_proto;
-
-/* PPP doesn't need any info now - supply length = 0 to ioctl */
-
-#endif /* __HDLC_IOCTL_H__ */
diff --git a/include/uapi/linux/hdlc/Kbuild b/include/uapi/linux/hdlc/Kbuild
index aafaa5aa54d4..8c1d2cb75e33 100644
--- a/include/uapi/linux/hdlc/Kbuild
+++ b/include/uapi/linux/hdlc/Kbuild
@@ -1 +1,2 @@
 # UAPI Header export list
+header-y += ioctl.h
diff --git a/include/uapi/linux/hdlc/ioctl.h b/include/uapi/linux/hdlc/ioctl.h
new file mode 100644
index 000000000000..46939b24d612
--- /dev/null
+++ b/include/uapi/linux/hdlc/ioctl.h
@@ -0,0 +1,81 @@
+#ifndef __HDLC_IOCTL_H__
+#define __HDLC_IOCTL_H__
+
+
+#define GENERIC_HDLC_VERSION 4	/* For synchronization with sethdlc utility */
+
+#define CLOCK_DEFAULT   0	/* Default setting */
+#define CLOCK_EXT	1	/* External TX and RX clock - DTE */
+#define CLOCK_INT	2	/* Internal TX and RX clock - DCE */
+#define CLOCK_TXINT	3	/* Internal TX and external RX clock */
+#define CLOCK_TXFROMRX	4	/* TX clock derived from external RX clock */
+
+
+#define ENCODING_DEFAULT	0 /* Default setting */
+#define ENCODING_NRZ		1
+#define ENCODING_NRZI		2
+#define ENCODING_FM_MARK	3
+#define ENCODING_FM_SPACE	4
+#define ENCODING_MANCHESTER	5
+
+
+#define PARITY_DEFAULT		0 /* Default setting */
+#define PARITY_NONE		1 /* No parity */
+#define PARITY_CRC16_PR0	2 /* CRC16, initial value 0x0000 */
+#define PARITY_CRC16_PR1	3 /* CRC16, initial value 0xFFFF */
+#define PARITY_CRC16_PR0_CCITT	4 /* CRC16, initial 0x0000, ITU-T version */
+#define PARITY_CRC16_PR1_CCITT	5 /* CRC16, initial 0xFFFF, ITU-T version */
+#define PARITY_CRC32_PR0_CCITT	6 /* CRC32, initial value 0x00000000 */
+#define PARITY_CRC32_PR1_CCITT	7 /* CRC32, initial value 0xFFFFFFFF */
+
+#define LMI_DEFAULT		0 /* Default setting */
+#define LMI_NONE		1 /* No LMI, all PVCs are static */
+#define LMI_ANSI		2 /* ANSI Annex D */
+#define LMI_CCITT		3 /* ITU-T Annex A */
+#define LMI_CISCO		4 /* The "original" LMI, aka Gang of Four */
+
+typedef struct {
+	unsigned int clock_rate; /* bits per second */
+	unsigned int clock_type; /* internal, external, TX-internal etc. */
+	unsigned short loopback;
+} sync_serial_settings;          /* V.35, V.24, X.21 */
+
+typedef struct {
+	unsigned int clock_rate; /* bits per second */
+	unsigned int clock_type; /* internal, external, TX-internal etc. */
+	unsigned short loopback;
+	unsigned int slot_map;
+} te1_settings;                  /* T1, E1 */
+
+typedef struct {
+	unsigned short encoding;
+	unsigned short parity;
+} raw_hdlc_proto;
+
+typedef struct {
+	unsigned int t391;
+	unsigned int t392;
+	unsigned int n391;
+	unsigned int n392;
+	unsigned int n393;
+	unsigned short lmi;
+	unsigned short dce; /* 1 for DCE (network side) operation */
+} fr_proto;
+
+typedef struct {
+	unsigned int dlci;
+} fr_proto_pvc;          /* for creating/deleting FR PVCs */
+
+typedef struct {
+	unsigned int dlci;
+	char master[IFNAMSIZ];	/* Name of master FRAD device */
+}fr_proto_pvc_info;		/* for returning PVC information only */
+
+typedef struct {
+    unsigned int interval;
+    unsigned int timeout;
+} cisco_proto;
+
+/* PPP doesn't need any info now - supply length = 0 to ioctl */
+
+#endif /* __HDLC_IOCTL_H__ */
-- 
cgit v1.2.3-55-g7522


From f8f626754ebeca613cf1af2e6f890cfde0e74d5b Mon Sep 17 00:00:00 2001
From: Jesse Gross
Date: Fri, 9 Nov 2012 17:05:07 -0800
Subject: ipv6: Move ipv6_find_hdr() out of Netfilter code.

Open vSwitch will soon also use ipv6_find_hdr() so this moves it
out of Netfilter-specific code into a more common location.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 include/linux/netfilter_ipv6/ip6_tables.h |   9 ---
 include/net/ipv6.h                        |   9 +++
 net/ipv6/exthdrs_core.c                   | 103 ++++++++++++++++++++++++++++++
 net/ipv6/netfilter/ip6_tables.c           | 103 ------------------------------
 net/netfilter/xt_HMARK.c                  |   8 +--
 5 files changed, 116 insertions(+), 116 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 5f84c6229dc6..610208b18c05 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -47,15 +47,6 @@ ip6t_ext_hdr(u8 nexthdr)
 	       (nexthdr == IPPROTO_DSTOPTS);
 }
 
-enum {
-	IP6T_FH_F_FRAG	= (1 << 0),
-	IP6T_FH_F_AUTH	= (1 << 1),
-};
-
-/* find specified header and get offset to it */
-extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
-			 int target, unsigned short *fragoff, int *fragflg);
-
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
 
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 979bf6c13141..b2f0cfb0a381 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -630,6 +630,15 @@ extern int			ipv6_skip_exthdr(const struct sk_buff *, int start,
 
 extern bool			ipv6_ext_hdr(u8 nexthdr);
 
+enum {
+	IP6_FH_F_FRAG	= (1 << 0),
+	IP6_FH_F_AUTH	= (1 << 1),
+};
+
+/* find specified header and get offset to it */
+extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
+			 int target, unsigned short *fragoff, int *fragflg);
+
 extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type);
 
 extern struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index f73d59a14131..8ea253ad35b1 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -111,3 +111,106 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
 	return start;
 }
 EXPORT_SYMBOL(ipv6_skip_exthdr);
+
+/*
+ * find the offset to specified header or the protocol number of last header
+ * if target < 0. "last header" is transport protocol header, ESP, or
+ * "No next header".
+ *
+ * Note that *offset is used as input/output parameter. an if it is not zero,
+ * then it must be a valid offset to an inner IPv6 header. This can be used
+ * to explore inner IPv6 header, eg. ICMPv6 error messages.
+ *
+ * If target header is found, its offset is set in *offset and return protocol
+ * number. Otherwise, return -1.
+ *
+ * If the first fragment doesn't contain the final protocol header or
+ * NEXTHDR_NONE it is considered invalid.
+ *
+ * Note that non-1st fragment is special case that "the protocol number
+ * of last header" is "next header" field in Fragment header. In this case,
+ * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
+ * isn't NULL.
+ *
+ * if flags is not NULL and it's a fragment, then the frag flag IP6_FH_F_FRAG
+ * will be set. If it's an AH header, the IP6_FH_F_AUTH flag is set and
+ * target < 0, then this function will stop at the AH header.
+ */
+int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
+		  int target, unsigned short *fragoff, int *flags)
+{
+	unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
+	u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+	unsigned int len;
+
+	if (fragoff)
+		*fragoff = 0;
+
+	if (*offset) {
+		struct ipv6hdr _ip6, *ip6;
+
+		ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
+		if (!ip6 || (ip6->version != 6)) {
+			printk(KERN_ERR "IPv6 header not found\n");
+			return -EBADMSG;
+		}
+		start = *offset + sizeof(struct ipv6hdr);
+		nexthdr = ip6->nexthdr;
+	}
+	len = skb->len - start;
+
+	while (nexthdr != target) {
+		struct ipv6_opt_hdr _hdr, *hp;
+		unsigned int hdrlen;
+
+		if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
+			if (target < 0)
+				break;
+			return -ENOENT;
+		}
+
+		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+		if (hp == NULL)
+			return -EBADMSG;
+		if (nexthdr == NEXTHDR_FRAGMENT) {
+			unsigned short _frag_off;
+			__be16 *fp;
+
+			if (flags)	/* Indicate that this is a fragment */
+				*flags |= IP6_FH_F_FRAG;
+			fp = skb_header_pointer(skb,
+						start+offsetof(struct frag_hdr,
+							       frag_off),
+						sizeof(_frag_off),
+						&_frag_off);
+			if (fp == NULL)
+				return -EBADMSG;
+
+			_frag_off = ntohs(*fp) & ~0x7;
+			if (_frag_off) {
+				if (target < 0 &&
+				    ((!ipv6_ext_hdr(hp->nexthdr)) ||
+				     hp->nexthdr == NEXTHDR_NONE)) {
+					if (fragoff)
+						*fragoff = _frag_off;
+					return hp->nexthdr;
+				}
+				return -ENOENT;
+			}
+			hdrlen = 8;
+		} else if (nexthdr == NEXTHDR_AUTH) {
+			if (flags && (*flags & IP6_FH_F_AUTH) && (target < 0))
+				break;
+			hdrlen = (hp->hdrlen + 2) << 2;
+		} else
+			hdrlen = ipv6_optlen(hp);
+
+		nexthdr = hp->nexthdr;
+		len -= hdrlen;
+		start += hdrlen;
+	}
+
+	*offset = start;
+	return nexthdr;
+}
+EXPORT_SYMBOL(ipv6_find_hdr);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d7cb04506c3d..1ce4f157ce4f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2273,112 +2273,9 @@ static void __exit ip6_tables_fini(void)
 	unregister_pernet_subsys(&ip6_tables_net_ops);
 }
 
-/*
- * find the offset to specified header or the protocol number of last header
- * if target < 0. "last header" is transport protocol header, ESP, or
- * "No next header".
- *
- * Note that *offset is used as input/output parameter. an if it is not zero,
- * then it must be a valid offset to an inner IPv6 header. This can be used
- * to explore inner IPv6 header, eg. ICMPv6 error messages.
- *
- * If target header is found, its offset is set in *offset and return protocol
- * number. Otherwise, return -1.
- *
- * If the first fragment doesn't contain the final protocol header or
- * NEXTHDR_NONE it is considered invalid.
- *
- * Note that non-1st fragment is special case that "the protocol number
- * of last header" is "next header" field in Fragment header. In this case,
- * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
- * isn't NULL.
- *
- * if flags is not NULL and it's a fragment, then the frag flag IP6T_FH_F_FRAG
- * will be set. If it's an AH header, the IP6T_FH_F_AUTH flag is set and
- * target < 0, then this function will stop at the AH header.
- */
-int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
-		  int target, unsigned short *fragoff, int *flags)
-{
-	unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
-	u8 nexthdr = ipv6_hdr(skb)->nexthdr;
-	unsigned int len;
-
-	if (fragoff)
-		*fragoff = 0;
-
-	if (*offset) {
-		struct ipv6hdr _ip6, *ip6;
-
-		ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
-		if (!ip6 || (ip6->version != 6)) {
-			printk(KERN_ERR "IPv6 header not found\n");
-			return -EBADMSG;
-		}
-		start = *offset + sizeof(struct ipv6hdr);
-		nexthdr = ip6->nexthdr;
-	}
-	len = skb->len - start;
-
-	while (nexthdr != target) {
-		struct ipv6_opt_hdr _hdr, *hp;
-		unsigned int hdrlen;
-
-		if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
-			if (target < 0)
-				break;
-			return -ENOENT;
-		}
-
-		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
-		if (hp == NULL)
-			return -EBADMSG;
-		if (nexthdr == NEXTHDR_FRAGMENT) {
-			unsigned short _frag_off;
-			__be16 *fp;
-
-			if (flags)	/* Indicate that this is a fragment */
-				*flags |= IP6T_FH_F_FRAG;
-			fp = skb_header_pointer(skb,
-						start+offsetof(struct frag_hdr,
-							       frag_off),
-						sizeof(_frag_off),
-						&_frag_off);
-			if (fp == NULL)
-				return -EBADMSG;
-
-			_frag_off = ntohs(*fp) & ~0x7;
-			if (_frag_off) {
-				if (target < 0 &&
-				    ((!ipv6_ext_hdr(hp->nexthdr)) ||
-				     hp->nexthdr == NEXTHDR_NONE)) {
-					if (fragoff)
-						*fragoff = _frag_off;
-					return hp->nexthdr;
-				}
-				return -ENOENT;
-			}
-			hdrlen = 8;
-		} else if (nexthdr == NEXTHDR_AUTH) {
-			if (flags && (*flags & IP6T_FH_F_AUTH) && (target < 0))
-				break;
-			hdrlen = (hp->hdrlen + 2) << 2;
-		} else
-			hdrlen = ipv6_optlen(hp);
-
-		nexthdr = hp->nexthdr;
-		len -= hdrlen;
-		start += hdrlen;
-	}
-
-	*offset = start;
-	return nexthdr;
-}
-
 EXPORT_SYMBOL(ip6t_register_table);
 EXPORT_SYMBOL(ip6t_unregister_table);
 EXPORT_SYMBOL(ip6t_do_table);
-EXPORT_SYMBOL(ipv6_find_hdr);
 
 module_init(ip6_tables_init);
 module_exit(ip6_tables_fini);
diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index 1686ca1b53a1..73b73f687c58 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c
@@ -167,7 +167,7 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
 			  const struct xt_hmark_info *info)
 {
 	struct ipv6hdr *ip6, _ip6;
-	int flag = IP6T_FH_F_AUTH;
+	int flag = IP6_FH_F_AUTH;
 	unsigned int nhoff = 0;
 	u16 fragoff = 0;
 	int nexthdr;
@@ -177,7 +177,7 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
 	if (nexthdr < 0)
 		return 0;
 	/* No need to check for icmp errors on fragments */
-	if ((flag & IP6T_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6))
+	if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6))
 		goto noicmp;
 	/* Use inner header in case of ICMP errors */
 	if (get_inner6_hdr(skb, &nhoff)) {
@@ -185,7 +185,7 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
 		if (ip6 == NULL)
 			return -1;
 		/* If AH present, use SPI like in ESP. */
-		flag = IP6T_FH_F_AUTH;
+		flag = IP6_FH_F_AUTH;
 		nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
 		if (nexthdr < 0)
 			return -1;
@@ -201,7 +201,7 @@ noicmp:
 	if (t->proto == IPPROTO_ICMPV6)
 		return 0;
 
-	if (flag & IP6T_FH_F_FRAG)
+	if (flag & IP6_FH_F_FRAG)
 		return 0;
 
 	hmark_set_tuple_ports(skb, nhoff, t, info);
-- 
cgit v1.2.3-55-g7522


From 1789382a72a537447d65ea4131d8bcc1ad85ce7b Mon Sep 17 00:00:00 2001
From: Donald Dutile
Date: Mon, 5 Nov 2012 15:20:36 -0500
Subject: PCI: SRIOV control and status via sysfs

Provide files under sysfs to determine the maximum number of VFs
an SR-IOV-capable PCIe device supports, and methods to enable and
disable the VFs on a per-device basis.

Currently, VF enablement by SR-IOV-capable PCIe devices is done
via driver-specific module parameters.  If not setup in modprobe files,
it requires admin to unload & reload PF drivers with number of desired
VFs to enable.  Additionally, the enablement is system wide: all
devices controlled by the same driver have the same number of VFs
enabled.  Although the latter is probably desired, there are PCI
configurations setup by system BIOS that may not enable that to occur.

Two files are created for the PF of PCIe devices with SR-IOV support:

    sriov_totalvfs	Contains the maximum number of VFs the device
			could support as reported by the TotalVFs register
			in the SR-IOV extended capability.

    sriov_numvfs	Contains the number of VFs currently enabled on
			this device as reported by the NumVFs register in
			the SR-IOV extended capability.

			Writing zero to this file disables all VFs.

			Writing a positive number to this file enables that
			number of VFs.

These files are readable for all SR-IOV PF devices.  Writes to the
sriov_numvfs file are effective only if a driver that supports the
sriov_configure() method is attached.

Signed-off-by: Donald Dutile <ddutile@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>---
 drivers/pci/pci-sysfs.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h     |   1 +
 2 files changed, 128 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index fbbb97f28259..74508c63bd47 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -404,6 +404,106 @@ static ssize_t d3cold_allowed_show(struct device *dev,
 }
 #endif
 
+#ifdef CONFIG_PCI_IOV
+static ssize_t sriov_totalvfs_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sprintf(buf, "%u\n", pdev->sriov->total);
+}
+
+
+static ssize_t sriov_numvfs_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sprintf(buf, "%u\n", pdev->sriov->nr_virtfn);
+}
+
+/*
+ * num_vfs > 0; number of vfs to enable
+ * num_vfs = 0; disable all vfs
+ *
+ * Note: SRIOV spec doesn't allow partial VF
+ *       disable, so its all or none.
+ */
+static ssize_t sriov_numvfs_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int num_vfs_enabled = 0;
+	int num_vfs;
+	int ret = 0;
+	u16 total;
+
+	if (kstrtoint(buf, 0, &num_vfs) < 0)
+		return -EINVAL;
+
+	/* is PF driver loaded w/callback */
+	if (!pdev->driver || !pdev->driver->sriov_configure) {
+		dev_info(&pdev->dev,
+			 "Driver doesn't support SRIOV configuration via sysfs\n");
+		return -ENOSYS;
+	}
+
+	/* if enabling vf's ... */
+	total = pdev->sriov->total;
+	/* Requested VFs to enable < totalvfs and none enabled already */
+	if ((num_vfs > 0) && (num_vfs <= total)) {
+		if (pdev->sriov->nr_virtfn == 0) {
+			num_vfs_enabled =
+				pdev->driver->sriov_configure(pdev, num_vfs);
+			if ((num_vfs_enabled >= 0) &&
+			    (num_vfs_enabled != num_vfs)) {
+				dev_warn(&pdev->dev,
+					 "Only %d VFs enabled\n",
+					 num_vfs_enabled);
+				return count;
+			} else if (num_vfs_enabled < 0)
+				/* error code from driver callback */
+				return num_vfs_enabled;
+		} else if (num_vfs == pdev->sriov->nr_virtfn) {
+			dev_warn(&pdev->dev,
+				 "%d VFs already enabled; no enable action taken\n",
+				 num_vfs);
+			return count;
+		} else {
+			dev_warn(&pdev->dev,
+				 "%d VFs already enabled. Disable before enabling %d VFs\n",
+				 pdev->sriov->nr_virtfn, num_vfs);
+			return -EINVAL;
+		}
+	}
+
+	/* disable vfs */
+	if (num_vfs == 0) {
+		if (pdev->sriov->nr_virtfn != 0) {
+			ret = pdev->driver->sriov_configure(pdev, 0);
+			return ret ? ret : count;
+		} else {
+			dev_warn(&pdev->dev,
+				 "All VFs disabled; no disable action taken\n");
+			return count;
+		}
+	}
+
+	dev_err(&pdev->dev,
+		"Invalid value for number of VFs to enable: %d\n", num_vfs);
+
+	return -EINVAL;
+}
+
+static struct device_attribute sriov_totalvfs_attr = __ATTR_RO(sriov_totalvfs);
+static struct device_attribute sriov_numvfs_attr =
+		__ATTR(sriov_numvfs, (S_IRUGO|S_IWUSR|S_IWGRP),
+		       sriov_numvfs_show, sriov_numvfs_store);
+#endif /* CONFIG_PCI_IOV */
+
 struct device_attribute pci_dev_attrs[] = {
 	__ATTR_RO(resource),
 	__ATTR_RO(vendor),
@@ -1421,6 +1521,30 @@ static umode_t pci_dev_attrs_are_visible(struct kobject *kobj,
 	return a->mode;
 }
 
+#ifdef CONFIG_PCI_IOV
+static struct attribute *sriov_dev_attrs[] = {
+	&sriov_totalvfs_attr.attr,
+	&sriov_numvfs_attr.attr,
+	NULL,
+};
+
+static umode_t sriov_attrs_are_visible(struct kobject *kobj,
+					 struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+
+	if (!dev_is_pf(dev))
+		return 0;
+
+	return a->mode;
+}
+
+static struct attribute_group sriov_dev_attr_group = {
+	.attrs = sriov_dev_attrs,
+	.is_visible = sriov_attrs_are_visible,
+};
+#endif /* CONFIG_PCI_IOV */
+
 static struct attribute_group pci_dev_attr_group = {
 	.attrs = pci_dev_dev_attrs,
 	.is_visible = pci_dev_attrs_are_visible,
@@ -1428,6 +1552,9 @@ static struct attribute_group pci_dev_attr_group = {
 
 static const struct attribute_group *pci_dev_attr_groups[] = {
 	&pci_dev_attr_group,
+#ifdef CONFIG_PCI_IOV
+	&sriov_dev_attr_group,
+#endif
 	NULL,
 };
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ee2179546c63..7ef8fba319da 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -573,6 +573,7 @@ struct pci_driver {
 	int  (*resume_early) (struct pci_dev *dev);
 	int  (*resume) (struct pci_dev *dev);	                /* Device woken up */
 	void (*shutdown) (struct pci_dev *dev);
+	int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* PF pdev */
 	const struct pci_error_handlers *err_handler;
 	struct device_driver	driver;
 	struct pci_dynids dynids;
-- 
cgit v1.2.3-55-g7522


From bff73156d3ad661655e6d9ef04c2284cf3abb0f1 Mon Sep 17 00:00:00 2001
From: Donald Dutile
Date: Mon, 5 Nov 2012 15:20:37 -0500
Subject: PCI: Provide method to reduce the number of total VFs supported

Some implementations of SRIOV provide a capability structure
value of TotalVFs that is greater than what the software can support.
Provide a method to reduce the capability structure reported value
to the value the driver can support.
This ensures sysfs reports the current capability of the system,
hardware and software.
Example for its use: igb & ixgbe -- report 8 & 64 as TotalVFs,
but drivers only support 7 & 63 maximum.

Signed-off-by: Donald Dutile <ddutile@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>---
 drivers/pci/iov.c       | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/pci-sysfs.c |  4 ++--
 drivers/pci/pci.h       |  1 +
 include/linux/pci.h     | 10 ++++++++++
 4 files changed, 60 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index aeccc911abb8..f6781e42d00d 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -735,3 +735,50 @@ int pci_num_vf(struct pci_dev *dev)
 		return dev->sriov->nr_virtfn;
 }
 EXPORT_SYMBOL_GPL(pci_num_vf);
+
+/**
+ * pci_sriov_set_totalvfs -- reduce the TotalVFs available
+ * @dev: the PCI PF device
+ * numvfs: number that should be used for TotalVFs supported
+ *
+ * Should be called from PF driver's probe routine with
+ * device's mutex held.
+ *
+ * Returns 0 if PF is an SRIOV-capable device and
+ * value of numvfs valid. If not a PF with VFS, return -EINVAL;
+ * if VFs already enabled, return -EBUSY.
+ */
+int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs)
+{
+	if (!dev || !dev->is_physfn || (numvfs > dev->sriov->total))
+		return -EINVAL;
+
+	/* Shouldn't change if VFs already enabled */
+	if (dev->sriov->ctrl & PCI_SRIOV_CTRL_VFE)
+		return -EBUSY;
+	else
+		dev->sriov->drvttl = numvfs;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_sriov_set_totalvfs);
+
+/**
+ * pci_sriov_get_totalvfs -- get total VFs supported on this devic3
+ * @dev: the PCI PF device
+ *
+ * For a PCIe device with SRIOV support, return the PCIe
+ * SRIOV capability value of TotalVFs or the value of drvttl
+ * if the driver reduced it.  Otherwise, -EINVAL.
+ */
+int pci_sriov_get_totalvfs(struct pci_dev *dev)
+{
+	if (!dev || !dev->is_physfn)
+		return -EINVAL;
+
+	if (dev->sriov->drvttl)
+		return dev->sriov->drvttl;
+	else
+		return dev->sriov->total;
+}
+EXPORT_SYMBOL_GPL(pci_sriov_get_totalvfs);
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 74508c63bd47..99b5f83d2468 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -411,7 +411,7 @@ static ssize_t sriov_totalvfs_show(struct device *dev,
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 
-	return sprintf(buf, "%u\n", pdev->sriov->total);
+	return sprintf(buf, "%u\n", pci_sriov_get_totalvfs(pdev));
 }
 
 
@@ -452,7 +452,7 @@ static ssize_t sriov_numvfs_store(struct device *dev,
 	}
 
 	/* if enabling vf's ... */
-	total = pdev->sriov->total;
+	total = pci_sriov_get_totalvfs(pdev);
 	/* Requested VFs to enable < totalvfs and none enabled already */
 	if ((num_vfs > 0) && (num_vfs <= total)) {
 		if (pdev->sriov->nr_virtfn == 0) {
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 6f6cd145bb7e..553bbba76eec 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -240,6 +240,7 @@ struct pci_sriov {
 	u16 stride;		/* following VF stride */
 	u32 pgsz;		/* page size for BAR alignment */
 	u8 link;		/* Function Dependency Link */
+	u16 drvttl;		/* max num VFs driver supports */
 	struct pci_dev *dev;	/* lowest numbered PF */
 	struct pci_dev *self;	/* this PF */
 	struct mutex lock;	/* lock for VF bus */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7ef8fba319da..1ad824966e64 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1611,6 +1611,8 @@ extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
 extern void pci_disable_sriov(struct pci_dev *dev);
 extern irqreturn_t pci_sriov_migration(struct pci_dev *dev);
 extern int pci_num_vf(struct pci_dev *dev);
+extern int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs);
+extern int pci_sriov_get_totalvfs(struct pci_dev *dev);
 #else
 static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
 {
@@ -1627,6 +1629,14 @@ static inline int pci_num_vf(struct pci_dev *dev)
 {
 	return 0;
 }
+static inline int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs)
+{
+	return 0;
+}
+static inline int pci_sriov_get_totalvfs(struct pci_dev *dev)
+{
+	return 0;
+}
 #endif
 
 #if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE)
-- 
cgit v1.2.3-55-g7522


From 800963fd598e2acbcd3a21a17e3ab3c185ad0d6a Mon Sep 17 00:00:00 2001
From: Henrik Rydberg
Date: Sat, 10 Nov 2012 00:32:36 -0800
Subject: Input: document new members of struct input_dev

Fixes kernel-doc warnings for the members added in 3.7-rc1.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 5538cc09a4f5..82ce323b9986 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -112,6 +112,9 @@ struct input_value {
  * @h_list: list of input handles associated with the device. When
  *	accessing the list dev->mutex must be held
  * @node: used to place the device onto input_dev_list
+ * @num_vals: number of values queued in the current frame
+ * @max_vals: maximum number of values queued in a frame
+ * @vals: array of values queued in the current frame
  * @devres_managed: indicates that devices is managed with devres framework
  *	and needs not be explicitly unregistered or freed.
  */
-- 
cgit v1.2.3-55-g7522


From 84b36ce5f79c01f792c623f14e92ed86cdccb42f Mon Sep 17 00:00:00 2001
From: Jonathan Cameron
Date: Sat, 30 Jun 2012 20:06:00 +0100
Subject: staging:iio: Add support for multiple buffers

Route all buffer writes through the demux.
Addition or removal of a buffer results in tear down and
setup of all the buffers for a given device.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Tested-by: srinivas pandruvada <srinivas.pandruvada@intel.com>
---
 drivers/iio/accel/hid-sensor-accel-3d.c         |  15 +-
 drivers/iio/adc/ad7266.c                        |   3 +-
 drivers/iio/adc/ad7476.c                        |   2 +-
 drivers/iio/adc/ad7887.c                        |   2 +-
 drivers/iio/adc/ad_sigma_delta.c                |   2 +-
 drivers/iio/adc/at91_adc.c                      |   3 +-
 drivers/iio/gyro/hid-sensor-gyro-3d.c           |  15 +-
 drivers/iio/industrialio-buffer.c               | 380 ++++++++++++++++--------
 drivers/iio/industrialio-core.c                 |   1 +
 drivers/iio/light/adjd_s311.c                   |   3 +-
 drivers/iio/light/hid-sensor-als.c              |  15 +-
 drivers/iio/magnetometer/hid-sensor-magn-3d.c   |  15 +-
 drivers/staging/iio/accel/adis16201_ring.c      |   2 +-
 drivers/staging/iio/accel/adis16203_ring.c      |   2 +-
 drivers/staging/iio/accel/adis16204_ring.c      |   2 +-
 drivers/staging/iio/accel/adis16209_ring.c      |   2 +-
 drivers/staging/iio/accel/adis16240_ring.c      |   2 +-
 drivers/staging/iio/accel/lis3l02dq_ring.c      |   2 +-
 drivers/staging/iio/adc/ad7298_ring.c           |   2 +-
 drivers/staging/iio/adc/ad7606_ring.c           |   2 +-
 drivers/staging/iio/adc/ad799x_ring.c           |   2 +-
 drivers/staging/iio/adc/max1363_ring.c          |   2 +-
 drivers/staging/iio/adc/mxs-lradc.c             |   3 +-
 drivers/staging/iio/gyro/adis16260_ring.c       |   2 +-
 drivers/staging/iio/iio_simple_dummy_buffer.c   |   5 +-
 drivers/staging/iio/impedance-analyzer/ad5933.c |   4 +-
 drivers/staging/iio/imu/adis16400_ring.c        |   5 +-
 drivers/staging/iio/meter/ade7758_ring.c        |   2 +-
 include/linux/iio/buffer.h                      |  24 +-
 include/linux/iio/iio.h                         |   2 +
 30 files changed, 313 insertions(+), 210 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c
index 314a4057879e..a95cda0e387f 100644
--- a/drivers/iio/accel/hid-sensor-accel-3d.c
+++ b/drivers/iio/accel/hid-sensor-accel-3d.c
@@ -197,21 +197,8 @@ static const struct iio_info accel_3d_info = {
 /* Function to push data to buffer */
 static void hid_sensor_push_data(struct iio_dev *indio_dev, u8 *data, int len)
 {
-	struct iio_buffer *buffer = indio_dev->buffer;
-	int datum_sz;
-
 	dev_dbg(&indio_dev->dev, "hid_sensor_push_data\n");
-	if (!buffer) {
-		dev_err(&indio_dev->dev, "Buffer == NULL\n");
-		return;
-	}
-	datum_sz = buffer->access->get_bytes_per_datum(buffer);
-	if (len > datum_sz) {
-		dev_err(&indio_dev->dev, "Datum size mismatch %d:%d\n", len,
-				datum_sz);
-		return;
-	}
-	iio_push_to_buffer(buffer, (u8 *)data);
+	iio_push_to_buffers(indio_dev, (u8 *)data);
 }
 
 /* Callback handler to send event after all samples are received and captured */
diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c
index b11f214779a2..a6f4fc5f8201 100644
--- a/drivers/iio/adc/ad7266.c
+++ b/drivers/iio/adc/ad7266.c
@@ -91,7 +91,6 @@ static irqreturn_t ad7266_trigger_handler(int irq, void *p)
 {
 	struct iio_poll_func *pf = p;
 	struct iio_dev *indio_dev = pf->indio_dev;
-	struct iio_buffer *buffer = indio_dev->buffer;
 	struct ad7266_state *st = iio_priv(indio_dev);
 	int ret;
 
@@ -99,7 +98,7 @@ static irqreturn_t ad7266_trigger_handler(int irq, void *p)
 	if (ret == 0) {
 		if (indio_dev->scan_timestamp)
 			((s64 *)st->data)[1] = pf->timestamp;
-		iio_push_to_buffer(buffer, (u8 *)st->data);
+		iio_push_to_buffers(indio_dev, (u8 *)st->data);
 	}
 
 	iio_trigger_notify_done(indio_dev->trig);
diff --git a/drivers/iio/adc/ad7476.c b/drivers/iio/adc/ad7476.c
index 7f2f45a0a48d..330248bfebae 100644
--- a/drivers/iio/adc/ad7476.c
+++ b/drivers/iio/adc/ad7476.c
@@ -76,7 +76,7 @@ static irqreturn_t ad7476_trigger_handler(int irq, void  *p)
 	if (indio_dev->scan_timestamp)
 		((s64 *)st->data)[1] = time_ns;
 
-	iio_push_to_buffer(indio_dev->buffer, st->data);
+	iio_push_to_buffers(indio_dev, st->data);
 done:
 	iio_trigger_notify_done(indio_dev->trig);
 
diff --git a/drivers/iio/adc/ad7887.c b/drivers/iio/adc/ad7887.c
index fd62309b4d3d..81153fafac7a 100644
--- a/drivers/iio/adc/ad7887.c
+++ b/drivers/iio/adc/ad7887.c
@@ -134,7 +134,7 @@ static irqreturn_t ad7887_trigger_handler(int irq, void *p)
 		memcpy(st->data + indio_dev->scan_bytes - sizeof(s64),
 		       &time_ns, sizeof(time_ns));
 
-	iio_push_to_buffer(indio_dev->buffer, st->data);
+	iio_push_to_buffers(indio_dev, st->data);
 done:
 	iio_trigger_notify_done(indio_dev->trig);
 
diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c
index 67baa1363d7a..afe6d78c8ff0 100644
--- a/drivers/iio/adc/ad_sigma_delta.c
+++ b/drivers/iio/adc/ad_sigma_delta.c
@@ -391,7 +391,7 @@ static irqreturn_t ad_sd_trigger_handler(int irq, void *p)
 		break;
 	}
 
-	iio_push_to_buffer(indio_dev->buffer, (uint8_t *)data);
+	iio_push_to_buffers(indio_dev, (uint8_t *)data);
 
 	iio_trigger_notify_done(indio_dev->trig);
 	sigma_delta->irq_dis = false;
diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c
index 2e2c9a80aa37..03b85940f4ba 100644
--- a/drivers/iio/adc/at91_adc.c
+++ b/drivers/iio/adc/at91_adc.c
@@ -65,7 +65,6 @@ static irqreturn_t at91_adc_trigger_handler(int irq, void *p)
 	struct iio_poll_func *pf = p;
 	struct iio_dev *idev = pf->indio_dev;
 	struct at91_adc_state *st = iio_priv(idev);
-	struct iio_buffer *buffer = idev->buffer;
 	int i, j = 0;
 
 	for (i = 0; i < idev->masklength; i++) {
@@ -81,7 +80,7 @@ static irqreturn_t at91_adc_trigger_handler(int irq, void *p)
 		*timestamp = pf->timestamp;
 	}
 
-	iio_push_to_buffer(buffer, st->buffer);
+	iio_push_to_buffers(indio_dev, (u8 *)st->buffer);
 
 	iio_trigger_notify_done(idev->trig);
 
diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c
index 4c56ada51c39..02ef989b830d 100644
--- a/drivers/iio/gyro/hid-sensor-gyro-3d.c
+++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c
@@ -197,21 +197,8 @@ static const struct iio_info gyro_3d_info = {
 /* Function to push data to buffer */
 static void hid_sensor_push_data(struct iio_dev *indio_dev, u8 *data, int len)
 {
-	struct iio_buffer *buffer = indio_dev->buffer;
-	int datum_sz;
-
 	dev_dbg(&indio_dev->dev, "hid_sensor_push_data\n");
-	if (!buffer) {
-		dev_err(&indio_dev->dev, "Buffer == NULL\n");
-		return;
-	}
-	datum_sz = buffer->access->get_bytes_per_datum(buffer);
-	if (len > datum_sz) {
-		dev_err(&indio_dev->dev, "Datum size mismatch %d:%d\n", len,
-				datum_sz);
-		return;
-	}
-	iio_push_to_buffer(buffer, (u8 *)data);
+	iio_push_to_buffers(indio_dev, (u8 *)data);
 }
 
 /* Callback handler to send event after all samples are received and captured */
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index 722a83fd8d85..aaadd32f9f0d 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -31,6 +31,18 @@ static const char * const iio_endian_prefix[] = {
 	[IIO_LE] = "le",
 };
 
+static bool iio_buffer_is_active(struct iio_dev *indio_dev,
+				 struct iio_buffer *buf)
+{
+	struct list_head *p;
+
+	list_for_each(p, &indio_dev->buffer_list)
+		if (p == &buf->buffer_list)
+			return true;
+
+	return false;
+}
+
 /**
  * iio_buffer_read_first_n_outer() - chrdev read for buffer access
  *
@@ -134,7 +146,7 @@ static ssize_t iio_scan_el_store(struct device *dev,
 	if (ret < 0)
 		return ret;
 	mutex_lock(&indio_dev->mlock);
-	if (iio_buffer_enabled(indio_dev)) {
+	if (iio_buffer_is_active(indio_dev, indio_dev->buffer)) {
 		ret = -EBUSY;
 		goto error_ret;
 	}
@@ -180,12 +192,11 @@ static ssize_t iio_scan_el_ts_store(struct device *dev,
 		return ret;
 
 	mutex_lock(&indio_dev->mlock);
-	if (iio_buffer_enabled(indio_dev)) {
+	if (iio_buffer_is_active(indio_dev, indio_dev->buffer)) {
 		ret = -EBUSY;
 		goto error_ret;
 	}
 	indio_dev->buffer->scan_timestamp = state;
-	indio_dev->scan_timestamp = state;
 error_ret:
 	mutex_unlock(&indio_dev->mlock);
 
@@ -385,7 +396,7 @@ ssize_t iio_buffer_write_length(struct device *dev,
 			return len;
 
 	mutex_lock(&indio_dev->mlock);
-	if (iio_buffer_enabled(indio_dev)) {
+	if (iio_buffer_is_active(indio_dev, indio_dev->buffer)) {
 		ret = -EBUSY;
 	} else {
 		if (buffer->access->set_length)
@@ -398,102 +409,14 @@ ssize_t iio_buffer_write_length(struct device *dev,
 }
 EXPORT_SYMBOL(iio_buffer_write_length);
 
-ssize_t iio_buffer_store_enable(struct device *dev,
-				struct device_attribute *attr,
-				const char *buf,
-				size_t len)
-{
-	int ret;
-	bool requested_state, current_state;
-	int previous_mode;
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
-	struct iio_buffer *buffer = indio_dev->buffer;
-
-	mutex_lock(&indio_dev->mlock);
-	previous_mode = indio_dev->currentmode;
-	requested_state = !(buf[0] == '0');
-	current_state = iio_buffer_enabled(indio_dev);
-	if (current_state == requested_state) {
-		printk(KERN_INFO "iio-buffer, current state requested again\n");
-		goto done;
-	}
-	if (requested_state) {
-		if (indio_dev->setup_ops->preenable) {
-			ret = indio_dev->setup_ops->preenable(indio_dev);
-			if (ret) {
-				printk(KERN_ERR
-				       "Buffer not started: "
-				       "buffer preenable failed\n");
-				goto error_ret;
-			}
-		}
-		if (buffer->access->request_update) {
-			ret = buffer->access->request_update(buffer);
-			if (ret) {
-				printk(KERN_INFO
-				       "Buffer not started: "
-				       "buffer parameter update failed\n");
-				goto error_ret;
-			}
-		}
-		/* Definitely possible for devices to support both of these. */
-		if (indio_dev->modes & INDIO_BUFFER_TRIGGERED) {
-			if (!indio_dev->trig) {
-				printk(KERN_INFO
-				       "Buffer not started: no trigger\n");
-				ret = -EINVAL;
-				goto error_ret;
-			}
-			indio_dev->currentmode = INDIO_BUFFER_TRIGGERED;
-		} else if (indio_dev->modes & INDIO_BUFFER_HARDWARE)
-			indio_dev->currentmode = INDIO_BUFFER_HARDWARE;
-		else { /* should never be reached */
-			ret = -EINVAL;
-			goto error_ret;
-		}
-
-		if (indio_dev->setup_ops->postenable) {
-			ret = indio_dev->setup_ops->postenable(indio_dev);
-			if (ret) {
-				printk(KERN_INFO
-				       "Buffer not started: "
-				       "postenable failed\n");
-				indio_dev->currentmode = previous_mode;
-				if (indio_dev->setup_ops->postdisable)
-					indio_dev->setup_ops->
-						postdisable(indio_dev);
-				goto error_ret;
-			}
-		}
-	} else {
-		if (indio_dev->setup_ops->predisable) {
-			ret = indio_dev->setup_ops->predisable(indio_dev);
-			if (ret)
-				goto error_ret;
-		}
-		indio_dev->currentmode = INDIO_DIRECT_MODE;
-		if (indio_dev->setup_ops->postdisable) {
-			ret = indio_dev->setup_ops->postdisable(indio_dev);
-			if (ret)
-				goto error_ret;
-		}
-	}
-done:
-	mutex_unlock(&indio_dev->mlock);
-	return len;
-
-error_ret:
-	mutex_unlock(&indio_dev->mlock);
-	return ret;
-}
-EXPORT_SYMBOL(iio_buffer_store_enable);
-
 ssize_t iio_buffer_show_enable(struct device *dev,
 			       struct device_attribute *attr,
 			       char *buf)
 {
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
-	return sprintf(buf, "%d\n", iio_buffer_enabled(indio_dev));
+	return sprintf(buf, "%d\n",
+		       iio_buffer_is_active(indio_dev,
+					    indio_dev->buffer));
 }
 EXPORT_SYMBOL(iio_buffer_show_enable);
 
@@ -537,35 +460,220 @@ static int iio_compute_scan_bytes(struct iio_dev *indio_dev, const long *mask,
 	return bytes;
 }
 
-int iio_sw_buffer_preenable(struct iio_dev *indio_dev)
+int iio_update_buffers(struct iio_dev *indio_dev,
+		       struct iio_buffer *insert_buffer,
+		       struct iio_buffer *remove_buffer)
 {
-	struct iio_buffer *buffer = indio_dev->buffer;
-	dev_dbg(&indio_dev->dev, "%s\n", __func__);
+	int ret;
+	int success = 0;
+	struct iio_buffer *buffer;
+	unsigned long *compound_mask;
+	const unsigned long *old_mask;
 
-	/* How much space will the demuxed element take? */
-	indio_dev->scan_bytes =
-		iio_compute_scan_bytes(indio_dev, buffer->scan_mask,
-				       buffer->scan_timestamp);
-	buffer->access->set_bytes_per_datum(buffer, indio_dev->scan_bytes);
+	/* Wind down existing buffers - iff there are any */
+	if (!list_empty(&indio_dev->buffer_list)) {
+		if (indio_dev->setup_ops->predisable) {
+			ret = indio_dev->setup_ops->predisable(indio_dev);
+			if (ret)
+				goto error_ret;
+		}
+		indio_dev->currentmode = INDIO_DIRECT_MODE;
+		if (indio_dev->setup_ops->postdisable) {
+			ret = indio_dev->setup_ops->postdisable(indio_dev);
+			if (ret)
+				goto error_ret;
+		}
+	}
+	/* Keep a copy of current setup to allow roll back */
+	old_mask = indio_dev->active_scan_mask;
+	if (!indio_dev->available_scan_masks)
+		indio_dev->active_scan_mask = NULL;
+
+	if (remove_buffer)
+		list_del(&remove_buffer->buffer_list);
+	if (insert_buffer)
+		list_add(&insert_buffer->buffer_list, &indio_dev->buffer_list);
+
+	/* If no buffers in list, we are done */
+	if (list_empty(&indio_dev->buffer_list)) {
+		indio_dev->currentmode = INDIO_DIRECT_MODE;
+		if (indio_dev->available_scan_masks == NULL)
+			kfree(old_mask);
+		return 0;
+	}
 
 	/* What scan mask do we actually have ?*/
-	if (indio_dev->available_scan_masks)
+	compound_mask = kcalloc(BITS_TO_LONGS(indio_dev->masklength),
+				sizeof(long), GFP_KERNEL);
+	if (compound_mask == NULL) {
+		if (indio_dev->available_scan_masks == NULL)
+			kfree(old_mask);
+		return -ENOMEM;
+	}
+	indio_dev->scan_timestamp = 0;
+
+	list_for_each_entry(buffer, &indio_dev->buffer_list, buffer_list) {
+		bitmap_or(compound_mask, compound_mask, buffer->scan_mask,
+			  indio_dev->masklength);
+		indio_dev->scan_timestamp |= buffer->scan_timestamp;
+	}
+	if (indio_dev->available_scan_masks) {
 		indio_dev->active_scan_mask =
 			iio_scan_mask_match(indio_dev->available_scan_masks,
 					    indio_dev->masklength,
-					    buffer->scan_mask);
-	else
-		indio_dev->active_scan_mask = buffer->scan_mask;
-
-	if (indio_dev->active_scan_mask == NULL)
-		return -EINVAL;
+					    compound_mask);
+		if (indio_dev->active_scan_mask == NULL) {
+			/*
+			 * Roll back.
+			 * Note can only occur when adding a buffer.
+			 */
+			list_del(&insert_buffer->buffer_list);
+			indio_dev->active_scan_mask = old_mask;
+			success = -EINVAL;
+		}
+	} else {
+		indio_dev->active_scan_mask = compound_mask;
+	}
 
 	iio_update_demux(indio_dev);
 
-	if (indio_dev->info->update_scan_mode)
-		return indio_dev->info
+	/* Wind up again */
+	if (indio_dev->setup_ops->preenable) {
+		ret = indio_dev->setup_ops->preenable(indio_dev);
+		if (ret) {
+			printk(KERN_ERR
+			       "Buffer not started:"
+			       "buffer preenable failed\n");
+			goto error_remove_inserted;
+		}
+	}
+	indio_dev->scan_bytes =
+		iio_compute_scan_bytes(indio_dev,
+				       indio_dev->active_scan_mask,
+				       indio_dev->scan_timestamp);
+	list_for_each_entry(buffer, &indio_dev->buffer_list, buffer_list)
+		if (buffer->access->request_update) {
+			ret = buffer->access->request_update(buffer);
+			if (ret) {
+				printk(KERN_INFO
+				       "Buffer not started:"
+				       "buffer parameter update failed\n");
+				goto error_run_postdisable;
+			}
+		}
+	if (indio_dev->info->update_scan_mode) {
+		ret = indio_dev->info
 			->update_scan_mode(indio_dev,
 					   indio_dev->active_scan_mask);
+		if (ret < 0) {
+			printk(KERN_INFO "update scan mode failed\n");
+			goto error_run_postdisable;
+		}
+	}
+	/* Definitely possible for devices to support both of these.*/
+	if (indio_dev->modes & INDIO_BUFFER_TRIGGERED) {
+		if (!indio_dev->trig) {
+			printk(KERN_INFO "Buffer not started: no trigger\n");
+			ret = -EINVAL;
+			/* Can only occur on first buffer */
+			goto error_run_postdisable;
+		}
+		indio_dev->currentmode = INDIO_BUFFER_TRIGGERED;
+	} else if (indio_dev->modes & INDIO_BUFFER_HARDWARE) {
+		indio_dev->currentmode = INDIO_BUFFER_HARDWARE;
+	} else { /* should never be reached */
+		ret = -EINVAL;
+		goto error_run_postdisable;
+	}
+
+	if (indio_dev->setup_ops->postenable) {
+		ret = indio_dev->setup_ops->postenable(indio_dev);
+		if (ret) {
+			printk(KERN_INFO
+			       "Buffer not started: postenable failed\n");
+			indio_dev->currentmode = INDIO_DIRECT_MODE;
+			if (indio_dev->setup_ops->postdisable)
+				indio_dev->setup_ops->postdisable(indio_dev);
+			goto error_disable_all_buffers;
+		}
+	}
+
+	if (indio_dev->available_scan_masks)
+		kfree(compound_mask);
+	else
+		kfree(old_mask);
+
+	return success;
+
+error_disable_all_buffers:
+	indio_dev->currentmode = INDIO_DIRECT_MODE;
+error_run_postdisable:
+	if (indio_dev->setup_ops->postdisable)
+		indio_dev->setup_ops->postdisable(indio_dev);
+error_remove_inserted:
+
+	if (insert_buffer)
+		list_del(&insert_buffer->buffer_list);
+	indio_dev->active_scan_mask = old_mask;
+	kfree(compound_mask);
+error_ret:
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iio_update_buffers);
+
+ssize_t iio_buffer_store_enable(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf,
+				size_t len)
+{
+	int ret;
+	bool requested_state;
+	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct iio_buffer *pbuf = indio_dev->buffer;
+	bool inlist;
+
+	ret = strtobool(buf, &requested_state);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&indio_dev->mlock);
+
+	/* Find out if it is in the list */
+	inlist = iio_buffer_is_active(indio_dev, pbuf);
+	/* Already in desired state */
+	if (inlist == requested_state)
+		goto done;
+
+	if (requested_state)
+		ret = iio_update_buffers(indio_dev,
+					 indio_dev->buffer, NULL);
+	else
+		ret = iio_update_buffers(indio_dev,
+					 NULL, indio_dev->buffer);
+
+	if (ret < 0)
+		goto done;
+done:
+	mutex_unlock(&indio_dev->mlock);
+	return (ret < 0) ? ret : len;
+}
+EXPORT_SYMBOL(iio_buffer_store_enable);
+
+int iio_sw_buffer_preenable(struct iio_dev *indio_dev)
+{
+	struct iio_buffer *buffer;
+	unsigned bytes;
+	dev_dbg(&indio_dev->dev, "%s\n", __func__);
+
+	list_for_each_entry(buffer, &indio_dev->buffer_list, buffer_list)
+		if (buffer->access->set_bytes_per_datum) {
+			bytes = iio_compute_scan_bytes(indio_dev,
+						       buffer->scan_mask,
+						       buffer->scan_timestamp);
+
+			buffer->access->set_bytes_per_datum(buffer, bytes);
+		}
 	return 0;
 }
 EXPORT_SYMBOL(iio_sw_buffer_preenable);
@@ -599,7 +707,11 @@ static bool iio_validate_scan_mask(struct iio_dev *indio_dev,
  * iio_scan_mask_set() - set particular bit in the scan mask
  * @buffer: the buffer whose scan mask we are interested in
  * @bit: the bit to be set.
- **/
+ *
+ * Note that at this point we have no way of knowing what other
+ * buffers might request, hence this code only verifies that the
+ * individual buffers request is plausible.
+ */
 int iio_scan_mask_set(struct iio_dev *indio_dev,
 		      struct iio_buffer *buffer, int bit)
 {
@@ -682,13 +794,12 @@ static unsigned char *iio_demux(struct iio_buffer *buffer,
 	return buffer->demux_bounce;
 }
 
-int iio_push_to_buffer(struct iio_buffer *buffer, unsigned char *data)
+static int iio_push_to_buffer(struct iio_buffer *buffer, unsigned char *data)
 {
 	unsigned char *dataout = iio_demux(buffer, data);
 
 	return buffer->access->store_to(buffer, dataout);
 }
-EXPORT_SYMBOL_GPL(iio_push_to_buffer);
 
 static void iio_buffer_demux_free(struct iio_buffer *buffer)
 {
@@ -699,10 +810,26 @@ static void iio_buffer_demux_free(struct iio_buffer *buffer)
 	}
 }
 
-int iio_update_demux(struct iio_dev *indio_dev)
+
+int iio_push_to_buffers(struct iio_dev *indio_dev, unsigned char *data)
+{
+	int ret;
+	struct iio_buffer *buf;
+
+	list_for_each_entry(buf, &indio_dev->buffer_list, buffer_list) {
+		ret = iio_push_to_buffer(buf, data);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iio_push_to_buffers);
+
+static int iio_buffer_update_demux(struct iio_dev *indio_dev,
+				   struct iio_buffer *buffer)
 {
 	const struct iio_chan_spec *ch;
-	struct iio_buffer *buffer = indio_dev->buffer;
 	int ret, in_ind = -1, out_ind, length;
 	unsigned in_loc = 0, out_loc = 0;
 	struct iio_demux_table *p;
@@ -787,4 +914,23 @@ error_clear_mux_table:
 
 	return ret;
 }
+
+int iio_update_demux(struct iio_dev *indio_dev)
+{
+	struct iio_buffer *buffer;
+	int ret;
+
+	list_for_each_entry(buffer, &indio_dev->buffer_list, buffer_list) {
+		ret = iio_buffer_update_demux(indio_dev, buffer);
+		if (ret < 0)
+			goto error_clear_mux_table;
+	}
+	return 0;
+
+error_clear_mux_table:
+	list_for_each_entry(buffer, &indio_dev->buffer_list, buffer_list)
+		iio_buffer_demux_free(buffer);
+
+	return ret;
+}
 EXPORT_SYMBOL_GPL(iio_update_demux);
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index cd700368eed0..060a4045be85 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -856,6 +856,7 @@ struct iio_dev *iio_device_alloc(int sizeof_priv)
 			return NULL;
 		}
 		dev_set_name(&dev->dev, "iio:device%d", dev->id);
+		INIT_LIST_HEAD(&dev->buffer_list);
 	}
 
 	return dev;
diff --git a/drivers/iio/light/adjd_s311.c b/drivers/iio/light/adjd_s311.c
index 164b62b91a4b..36d210a06b28 100644
--- a/drivers/iio/light/adjd_s311.c
+++ b/drivers/iio/light/adjd_s311.c
@@ -164,7 +164,6 @@ static irqreturn_t adjd_s311_trigger_handler(int irq, void *p)
 	struct iio_poll_func *pf = p;
 	struct iio_dev *indio_dev = pf->indio_dev;
 	struct adjd_s311_data *data = iio_priv(indio_dev);
-	struct iio_buffer *buffer = indio_dev->buffer;
 	s64 time_ns = iio_get_time_ns();
 	int len = 0;
 	int i, j = 0;
@@ -187,7 +186,7 @@ static irqreturn_t adjd_s311_trigger_handler(int irq, void *p)
 	if (indio_dev->scan_timestamp)
 		*(s64 *)((u8 *)data->buffer + ALIGN(len, sizeof(s64)))
 			= time_ns;
-	iio_push_to_buffer(buffer, (u8 *)data->buffer);
+	iio_push_to_buffers(indio_dev, (u8 *)data->buffer);
 
 done:
 	iio_trigger_notify_done(indio_dev->trig);
diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c
index 96e3691e42c4..8e1f69844eea 100644
--- a/drivers/iio/light/hid-sensor-als.c
+++ b/drivers/iio/light/hid-sensor-als.c
@@ -176,21 +176,8 @@ static const struct iio_info als_info = {
 /* Function to push data to buffer */
 static void hid_sensor_push_data(struct iio_dev *indio_dev, u8 *data, int len)
 {
-	struct iio_buffer *buffer = indio_dev->buffer;
-	int datum_sz;
-
 	dev_dbg(&indio_dev->dev, "hid_sensor_push_data\n");
-	if (!buffer) {
-		dev_err(&indio_dev->dev, "Buffer == NULL\n");
-		return;
-	}
-	datum_sz = buffer->access->get_bytes_per_datum(buffer);
-	if (len > datum_sz) {
-		dev_err(&indio_dev->dev, "Datum size mismatch %d:%d\n", len,
-				datum_sz);
-		return;
-	}
-	iio_push_to_buffer(buffer, (u8 *)data);
+	iio_push_to_buffers(indio_dev, (u8 *)data);
 }
 
 /* Callback handler to send event after all samples are received and captured */
diff --git a/drivers/iio/magnetometer/hid-sensor-magn-3d.c b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
index c4f0d274f577..d1b5fb74b9bf 100644
--- a/drivers/iio/magnetometer/hid-sensor-magn-3d.c
+++ b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
@@ -198,21 +198,8 @@ static const struct iio_info magn_3d_info = {
 /* Function to push data to buffer */
 static void hid_sensor_push_data(struct iio_dev *indio_dev, u8 *data, int len)
 {
-	struct iio_buffer *buffer = indio_dev->buffer;
-	int datum_sz;
-
 	dev_dbg(&indio_dev->dev, "hid_sensor_push_data\n");
-	if (!buffer) {
-		dev_err(&indio_dev->dev, "Buffer == NULL\n");
-		return;
-	}
-	datum_sz = buffer->access->get_bytes_per_datum(buffer);
-	if (len > datum_sz) {
-		dev_err(&indio_dev->dev, "Datum size mismatch %d:%d\n", len,
-				datum_sz);
-		return;
-	}
-	iio_push_to_buffer(buffer, (u8 *)data);
+	iio_push_to_buffers(indio_dev, (u8 *)data);
 }
 
 /* Callback handler to send event after all samples are received and captured */
diff --git a/drivers/staging/iio/accel/adis16201_ring.c b/drivers/staging/iio/accel/adis16201_ring.c
index 97c09f0c26ae..e14ca60092a2 100644
--- a/drivers/staging/iio/accel/adis16201_ring.c
+++ b/drivers/staging/iio/accel/adis16201_ring.c
@@ -82,7 +82,7 @@ static irqreturn_t adis16201_trigger_handler(int irq, void *p)
 	if (indio_dev->scan_timestamp)
 		*((s64 *)(data + ((i + 3)/4)*4)) = pf->timestamp;
 
-	iio_push_to_buffer(indio_dev->buffer, (u8 *)data);
+	iio_push_to_buffers(indio_dev, (u8 *)data);
 
 	kfree(data);
 done:
diff --git a/drivers/staging/iio/accel/adis16203_ring.c b/drivers/staging/iio/accel/adis16203_ring.c
index 7507e1a04591..eba2e285c84a 100644
--- a/drivers/staging/iio/accel/adis16203_ring.c
+++ b/drivers/staging/iio/accel/adis16203_ring.c
@@ -81,7 +81,7 @@ static irqreturn_t adis16203_trigger_handler(int irq, void *p)
 	if (indio_dev->scan_timestamp)
 		*((s64 *)(data + ((i + 3)/4)*4)) = pf->timestamp;
 
-	iio_push_to_buffer(indio_dev->buffer, (u8 *)data);
+	iio_push_to_buffers(indio_dev, (u8 *)data);
 
 	kfree(data);
 done:
diff --git a/drivers/staging/iio/accel/adis16204_ring.c b/drivers/staging/iio/accel/adis16204_ring.c
index 4c976bec986b..3611a13836c9 100644
--- a/drivers/staging/iio/accel/adis16204_ring.c
+++ b/drivers/staging/iio/accel/adis16204_ring.c
@@ -78,7 +78,7 @@ static irqreturn_t adis16204_trigger_handler(int irq, void *p)
 	if (indio_dev->scan_timestamp)
 		*((s64 *)(data + ((i + 3)/4)*4)) = pf->timestamp;
 
-	iio_push_to_buffer(indio_dev->buffer, (u8 *)data);
+	iio_push_to_buffers(indio_dev, (u8 *)data);
 
 	kfree(data);
 done:
diff --git a/drivers/staging/iio/accel/adis16209_ring.c b/drivers/staging/iio/accel/adis16209_ring.c
index f939e29d6c82..6af9a5dbc709 100644
--- a/drivers/staging/iio/accel/adis16209_ring.c
+++ b/drivers/staging/iio/accel/adis16209_ring.c
@@ -78,7 +78,7 @@ static irqreturn_t adis16209_trigger_handler(int irq, void *p)
 	if (indio_dev->scan_timestamp)
 		*((s64 *)(data + ((i + 3)/4)*4)) = pf->timestamp;
 
-	iio_push_to_buffer(indio_dev->buffer, (u8 *)data);
+	iio_push_to_buffers(indio_dev, (u8 *)data);
 
 	kfree(data);
 done:
diff --git a/drivers/staging/iio/accel/adis16240_ring.c b/drivers/staging/iio/accel/adis16240_ring.c
index caff8e25e0a2..e2ac8a8c8107 100644
--- a/drivers/staging/iio/accel/adis16240_ring.c
+++ b/drivers/staging/iio/accel/adis16240_ring.c
@@ -76,7 +76,7 @@ static irqreturn_t adis16240_trigger_handler(int irq, void *p)
 	if (indio_dev->scan_timestamp)
 		*((s64 *)(data + ((i + 3)/4)*4)) = pf->timestamp;
 
-	iio_push_to_buffer(indio_dev->buffer, (u8 *)data);
+	iio_push_to_buffers(indio_dev, (u8 *)data);
 
 	kfree(data);
 done:
diff --git a/drivers/staging/iio/accel/lis3l02dq_ring.c b/drivers/staging/iio/accel/lis3l02dq_ring.c
index 246352716537..bc38651c315e 100644
--- a/drivers/staging/iio/accel/lis3l02dq_ring.c
+++ b/drivers/staging/iio/accel/lis3l02dq_ring.c
@@ -154,7 +154,7 @@ static irqreturn_t lis3l02dq_trigger_handler(int irq, void *p)
 	if (indio_dev->scan_timestamp)
 		*(s64 *)((u8 *)data + ALIGN(len, sizeof(s64)))
 			= pf->timestamp;
-	iio_push_to_buffer(indio_dev->buffer, (u8 *)data);
+	iio_push_to_buffers(indio_dev, (u8 *)data);
 
 	kfree(data);
 done:
diff --git a/drivers/staging/iio/adc/ad7298_ring.c b/drivers/staging/iio/adc/ad7298_ring.c
index c2906a85fedb..b3dd514b9627 100644
--- a/drivers/staging/iio/adc/ad7298_ring.c
+++ b/drivers/staging/iio/adc/ad7298_ring.c
@@ -93,7 +93,7 @@ static irqreturn_t ad7298_trigger_handler(int irq, void *p)
 						 indio_dev->masklength); i++)
 		buf[i] = be16_to_cpu(st->rx_buf[i]);
 
-	iio_push_to_buffer(indio_dev->buffer, (u8 *)buf);
+	iio_push_to_buffers(indio_dev, (u8 *)buf);
 
 done:
 	iio_trigger_notify_done(indio_dev->trig);
diff --git a/drivers/staging/iio/adc/ad7606_ring.c b/drivers/staging/iio/adc/ad7606_ring.c
index ba04d0ffd4f4..2b25cb07fe41 100644
--- a/drivers/staging/iio/adc/ad7606_ring.c
+++ b/drivers/staging/iio/adc/ad7606_ring.c
@@ -83,7 +83,7 @@ static void ad7606_poll_bh_to_ring(struct work_struct *work_s)
 	if (indio_dev->scan_timestamp)
 		*((s64 *)(buf + indio_dev->scan_bytes - sizeof(s64))) = time_ns;
 
-	iio_push_to_buffer(indio_dev->buffer, buf);
+	iio_push_to_buffers(indio_dev, buf);
 done:
 	gpio_set_value(st->pdata->gpio_convst, 0);
 	iio_trigger_notify_done(indio_dev->trig);
diff --git a/drivers/staging/iio/adc/ad799x_ring.c b/drivers/staging/iio/adc/ad799x_ring.c
index 86026d9b20bc..2c5f38475a8e 100644
--- a/drivers/staging/iio/adc/ad799x_ring.c
+++ b/drivers/staging/iio/adc/ad799x_ring.c
@@ -77,7 +77,7 @@ static irqreturn_t ad799x_trigger_handler(int irq, void *p)
 		memcpy(rxbuf + indio_dev->scan_bytes - sizeof(s64),
 			&time_ns, sizeof(time_ns));
 
-	iio_push_to_buffer(indio_dev->buffer, rxbuf);
+	iio_push_to_buffers(indio_dev, rxbuf);
 done:
 	kfree(rxbuf);
 out:
diff --git a/drivers/staging/iio/adc/max1363_ring.c b/drivers/staging/iio/adc/max1363_ring.c
index 5f74f3b7671a..688304bdbaf0 100644
--- a/drivers/staging/iio/adc/max1363_ring.c
+++ b/drivers/staging/iio/adc/max1363_ring.c
@@ -80,7 +80,7 @@ static irqreturn_t max1363_trigger_handler(int irq, void *p)
 
 	if (indio_dev->scan_timestamp)
 		memcpy(rxbuf + d_size - sizeof(s64), &time_ns, sizeof(time_ns));
-	iio_push_to_buffer(indio_dev->buffer, rxbuf);
+	iio_push_to_buffers(indio_dev, rxbuf);
 
 done_free:
 	kfree(rxbuf);
diff --git a/drivers/staging/iio/adc/mxs-lradc.c b/drivers/staging/iio/adc/mxs-lradc.c
index df5bba284b73..3b467d8c5882 100644
--- a/drivers/staging/iio/adc/mxs-lradc.c
+++ b/drivers/staging/iio/adc/mxs-lradc.c
@@ -237,7 +237,6 @@ static irqreturn_t mxs_lradc_trigger_handler(int irq, void *p)
 	struct iio_poll_func *pf = p;
 	struct iio_dev *iio = pf->indio_dev;
 	struct mxs_lradc *lradc = iio_priv(iio);
-	struct iio_buffer *buffer = iio->buffer;
 	const uint32_t chan_value = LRADC_CH_ACCUMULATE |
 		((LRADC_DELAY_TIMER_LOOP - 1) << LRADC_CH_NUM_SAMPLES_OFFSET);
 	int i, j = 0;
@@ -256,7 +255,7 @@ static irqreturn_t mxs_lradc_trigger_handler(int irq, void *p)
 		*timestamp = pf->timestamp;
 	}
 
-	iio_push_to_buffer(buffer, (u8 *)lradc->buffer);
+	iio_push_to_buffers(iio, (u8 *)lradc->buffer);
 
 	iio_trigger_notify_done(iio->trig);
 
diff --git a/drivers/staging/iio/gyro/adis16260_ring.c b/drivers/staging/iio/gyro/adis16260_ring.c
index e294cb49736d..d6c48f850a9e 100644
--- a/drivers/staging/iio/gyro/adis16260_ring.c
+++ b/drivers/staging/iio/gyro/adis16260_ring.c
@@ -81,7 +81,7 @@ static irqreturn_t adis16260_trigger_handler(int irq, void *p)
 	if (indio_dev->scan_timestamp)
 		*((s64 *)(data + ((i + 3)/4)*4)) = pf->timestamp;
 
-	iio_push_to_buffer(indio_dev->buffer, (u8 *)data);
+	iio_push_to_buffers(indio_dev, (u8 *)data);
 
 	kfree(data);
 done:
diff --git a/drivers/staging/iio/iio_simple_dummy_buffer.c b/drivers/staging/iio/iio_simple_dummy_buffer.c
index 697d9700db2f..dee16f0e7570 100644
--- a/drivers/staging/iio/iio_simple_dummy_buffer.c
+++ b/drivers/staging/iio/iio_simple_dummy_buffer.c
@@ -46,7 +46,6 @@ static irqreturn_t iio_simple_dummy_trigger_h(int irq, void *p)
 {
 	struct iio_poll_func *pf = p;
 	struct iio_dev *indio_dev = pf->indio_dev;
-	struct iio_buffer *buffer = indio_dev->buffer;
 	int len = 0;
 	u16 *data;
 
@@ -76,7 +75,7 @@ static irqreturn_t iio_simple_dummy_trigger_h(int irq, void *p)
 		     i < bitmap_weight(indio_dev->active_scan_mask,
 				       indio_dev->masklength);
 		     i++, j++) {
-			j = find_next_bit(buffer->scan_mask,
+			j = find_next_bit(indio_dev->active_scan_mask,
 					  indio_dev->masklength, j);
 			/* random access read from the 'device' */
 			data[i] = fakedata[j];
@@ -87,7 +86,7 @@ static irqreturn_t iio_simple_dummy_trigger_h(int irq, void *p)
 	if (indio_dev->scan_timestamp)
 		*(s64 *)((u8 *)data + ALIGN(len, sizeof(s64)))
 			= iio_get_time_ns();
-	iio_push_to_buffer(buffer, (u8 *)data);
+	iio_push_to_buffers(indio_dev, (u8 *)data);
 
 	kfree(data);
 
diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index de21d47f33e9..b1fef147d5b5 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -647,7 +647,6 @@ static void ad5933_work(struct work_struct *work)
 	struct ad5933_state *st = container_of(work,
 		struct ad5933_state, work.work);
 	struct iio_dev *indio_dev = i2c_get_clientdata(st->client);
-	struct iio_buffer *ring = indio_dev->buffer;
 	signed short buf[2];
 	unsigned char status;
 
@@ -677,8 +676,7 @@ static void ad5933_work(struct work_struct *work)
 		} else {
 			buf[0] = be16_to_cpu(buf[0]);
 		}
-		/* save datum to the ring */
-		iio_push_to_buffer(ring, (u8 *)buf);
+		iio_push_to_buffers(indio_dev, (u8 *)buf);
 	} else {
 		/* no data available - try again later */
 		schedule_delayed_work(&st->work, st->poll_time_jiffies);
diff --git a/drivers/staging/iio/imu/adis16400_ring.c b/drivers/staging/iio/imu/adis16400_ring.c
index 260bdd1a4681..d46c1e38cf7b 100644
--- a/drivers/staging/iio/imu/adis16400_ring.c
+++ b/drivers/staging/iio/imu/adis16400_ring.c
@@ -114,7 +114,6 @@ static irqreturn_t adis16400_trigger_handler(int irq, void *p)
 	struct iio_poll_func *pf = p;
 	struct iio_dev *indio_dev = pf->indio_dev;
 	struct adis16400_state *st = iio_priv(indio_dev);
-	struct iio_buffer *ring = indio_dev->buffer;
 	int i = 0, j, ret = 0;
 	s16 *data;
 
@@ -148,9 +147,9 @@ static irqreturn_t adis16400_trigger_handler(int irq, void *p)
 		}
 	}
 	/* Guaranteed to be aligned with 8 byte boundary */
-	if (ring->scan_timestamp)
+	if (indio_dev->scan_timestamp)
 		*((s64 *)(data + ((i + 3)/4)*4)) = pf->timestamp;
-	iio_push_to_buffer(ring, (u8 *) data);
+	iio_push_to_buffers(indio_dev, (u8 *) data);
 
 done:
 	kfree(data);
diff --git a/drivers/staging/iio/meter/ade7758_ring.c b/drivers/staging/iio/meter/ade7758_ring.c
index 9e49baccf660..4552a4c7fe33 100644
--- a/drivers/staging/iio/meter/ade7758_ring.c
+++ b/drivers/staging/iio/meter/ade7758_ring.c
@@ -73,7 +73,7 @@ static irqreturn_t ade7758_trigger_handler(int irq, void *p)
 	if (indio_dev->scan_timestamp)
 		dat64[1] = pf->timestamp;
 
-	iio_push_to_buffer(indio_dev->buffer, (u8 *)dat64);
+	iio_push_to_buffers(indio_dev, (u8 *)dat64);
 
 	iio_trigger_notify_done(indio_dev->trig);
 
diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index c629b3a1d9a9..027040569180 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -66,7 +66,8 @@ struct iio_buffer_access_funcs {
  * @stufftoread:	[INTERN] flag to indicate new data.
  * @demux_list:		[INTERN] list of operations required to demux the scan.
  * @demux_bounce:	[INTERN] buffer for doing gather from incoming scan.
- **/
+ * @buffer_list:	[INTERN] entry in the devices list of current buffers.
+ */
 struct iio_buffer {
 	int					length;
 	int					bytes_per_datum;
@@ -81,8 +82,21 @@ struct iio_buffer {
 	const struct attribute_group *attrs;
 	struct list_head			demux_list;
 	unsigned char				*demux_bounce;
+	struct list_head			buffer_list;
 };
 
+/**
+ * iio_update_buffers() - add or remove buffer from active list
+ * @indio_dev:		device to add buffer to
+ * @insert_buffer:	buffer to insert
+ * @remove_buffer:	buffer_to_remove
+ *
+ * Note this will tear down the all buffering and build it up again
+ */
+int iio_update_buffers(struct iio_dev *indio_dev,
+		       struct iio_buffer *insert_buffer,
+		       struct iio_buffer *remove_buffer);
+
 /**
  * iio_buffer_init() - Initialize the buffer structure
  * @buffer:		buffer to be initialized
@@ -115,11 +129,11 @@ int iio_scan_mask_set(struct iio_dev *indio_dev,
 		      struct iio_buffer *buffer, int bit);
 
 /**
- * iio_push_to_buffer() - push to a registered buffer.
- * @buffer:		IIO buffer structure for device
- * @data:		the data to push to the buffer
+ * iio_push_to_buffers() - push to a registered buffer.
+ * @indio_dev:		iio_dev structure for device.
+ * @data:		Full scan.
  */
-int iio_push_to_buffer(struct iio_buffer *buffer, unsigned char *data);
+int iio_push_to_buffers(struct iio_dev *indio_dev, unsigned char *data);
 
 int iio_update_demux(struct iio_dev *indio_dev);
 
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 7806c24e5bc8..adca93a999a7 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -410,6 +410,7 @@ struct iio_buffer_setup_ops {
  *			and owner
  * @event_interface:	[INTERN] event chrdevs associated with interrupt lines
  * @buffer:		[DRIVER] any buffer present
+ * @buffer_list:	[INTERN] list of all buffers currently attached
  * @scan_bytes:		[INTERN] num bytes captured to be fed to buffer demux
  * @mlock:		[INTERN] lock used to prevent simultaneous device state
  *			changes
@@ -448,6 +449,7 @@ struct iio_dev {
 	struct iio_event_interface	*event_interface;
 
 	struct iio_buffer		*buffer;
+	struct list_head		buffer_list;
 	int				scan_bytes;
 	struct mutex			mlock;
 
-- 
cgit v1.2.3-55-g7522


From 0464415dd21785aa8e8b12dbc939fcb5ca52f464 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron
Date: Sat, 30 Jun 2012 20:06:00 +0100
Subject: staging:iio:in kernel users: Add a data field for channel specific
 info.

Used to allow information about a given channel mapping to be passed
through from board files to the consumer drivers.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/inkern.c         | 1 +
 include/linux/iio/consumer.h | 2 ++
 include/linux/iio/machine.h  | 2 ++
 3 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index b394621d362c..d55e98fb300e 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -203,6 +203,7 @@ struct iio_channel *iio_channel_get_all(const char *name)
 		if (name && strcmp(name, c->map->consumer_dev_name) != 0)
 			continue;
 		chans[mapind].indio_dev = c->indio_dev;
+		chans[mapind].data = c->map->consumer_data;
 		chans[mapind].channel =
 			iio_chan_spec_from_name(chans[mapind].indio_dev,
 						c->map->adc_channel_label);
diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h
index e875bcf0478f..57efee63a6da 100644
--- a/include/linux/iio/consumer.h
+++ b/include/linux/iio/consumer.h
@@ -18,10 +18,12 @@ struct iio_chan_spec;
  * struct iio_channel - everything needed for a consumer to use a channel
  * @indio_dev:		Device on which the channel exists.
  * @channel:		Full description of the channel.
+ * @data:		Data about the channel used by consumer.
  */
 struct iio_channel {
 	struct iio_dev *indio_dev;
 	const struct iio_chan_spec *channel;
+	void *data;
 };
 
 /**
diff --git a/include/linux/iio/machine.h b/include/linux/iio/machine.h
index 809a3f08d5a5..1601a2a63a72 100644
--- a/include/linux/iio/machine.h
+++ b/include/linux/iio/machine.h
@@ -19,11 +19,13 @@
  * @consumer_dev_name:	Name to uniquely identify the consumer device.
  * @consumer_channel:	Unique name used to identify the channel on the
  *			consumer side.
+ * @consumer_data:	Data about the channel for use by the consumer driver.
  */
 struct iio_map {
 	const char *adc_channel_label;
 	const char *consumer_dev_name;
 	const char *consumer_channel;
+	void *consumer_data;
 };
 
 #endif
-- 
cgit v1.2.3-55-g7522


From 92d1079b281f89f1c65c6aece3cfab4fb422c797 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron
Date: Sat, 30 Jun 2012 20:06:00 +0100
Subject: staging:iio: add a callback buffer for in kernel push interface

This callback buffer is meant to be opaque to users, but basically
adds a very simple pass through buffer to which data may be
pushed when it is inserted into the buffer list.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/Kconfig          |   6 +++
 drivers/iio/Makefile         |   1 +
 drivers/iio/buffer_cb.c      | 113 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/iio/consumer.h |  46 ++++++++++++++++++
 4 files changed, 166 insertions(+)
 create mode 100644 drivers/iio/buffer_cb.c

(limited to 'include/linux')

diff --git a/drivers/iio/Kconfig b/drivers/iio/Kconfig
index fc937aca71fb..65ae734c607d 100644
--- a/drivers/iio/Kconfig
+++ b/drivers/iio/Kconfig
@@ -20,6 +20,12 @@ config IIO_BUFFER
 
 if IIO_BUFFER
 
+config IIO_BUFFER_CB
+boolean "IIO callback buffer used for push in-kernel interfaces"
+	help
+	  Should be selected by any drivers that do-inkernel push
+	  usage.  That is, those where the data is pushed to the consumer.
+
 config IIO_KFIFO_BUF
 	select IIO_TRIGGER
 	tristate "Industrial I/O buffering based on kfifo"
diff --git a/drivers/iio/Makefile b/drivers/iio/Makefile
index 761f2b65ac52..31d76a07ec65 100644
--- a/drivers/iio/Makefile
+++ b/drivers/iio/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_IIO) += industrialio.o
 industrialio-y := industrialio-core.o industrialio-event.o inkern.o
 industrialio-$(CONFIG_IIO_BUFFER) += industrialio-buffer.o
 industrialio-$(CONFIG_IIO_TRIGGER) += industrialio-trigger.o
+industrialio-$(CONFIG_IIO_BUFFER_CB) += buffer_cb.o
 
 obj-$(CONFIG_IIO_TRIGGERED_BUFFER) += industrialio-triggered-buffer.o
 obj-$(CONFIG_IIO_KFIFO_BUF) += kfifo_buf.o
diff --git a/drivers/iio/buffer_cb.c b/drivers/iio/buffer_cb.c
new file mode 100644
index 000000000000..4d40e24f3721
--- /dev/null
+++ b/drivers/iio/buffer_cb.c
@@ -0,0 +1,113 @@
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/consumer.h>
+
+struct iio_cb_buffer {
+	struct iio_buffer buffer;
+	int (*cb)(u8 *data, void *private);
+	void *private;
+	struct iio_channel *channels;
+};
+
+static int iio_buffer_cb_store_to(struct iio_buffer *buffer, u8 *data)
+{
+	struct iio_cb_buffer *cb_buff = container_of(buffer,
+						     struct iio_cb_buffer,
+						     buffer);
+
+	return cb_buff->cb(data, cb_buff->private);
+}
+
+static struct iio_buffer_access_funcs iio_cb_access = {
+	.store_to = &iio_buffer_cb_store_to,
+};
+
+struct iio_cb_buffer *iio_channel_get_all_cb(const char *name,
+					     int (*cb)(u8 *data,
+						       void *private),
+					     void *private)
+{
+	int ret;
+	struct iio_cb_buffer *cb_buff;
+	struct iio_dev *indio_dev;
+	struct iio_channel *chan;
+
+	cb_buff = kzalloc(sizeof(*cb_buff), GFP_KERNEL);
+	if (cb_buff == NULL) {
+		ret = -ENOMEM;
+		goto error_ret;
+	}
+
+	cb_buff->private = private;
+	cb_buff->cb = cb;
+	cb_buff->buffer.access = &iio_cb_access;
+	INIT_LIST_HEAD(&cb_buff->buffer.demux_list);
+
+	cb_buff->channels = iio_channel_get_all(name);
+	if (IS_ERR(cb_buff->channels)) {
+		ret = PTR_ERR(cb_buff->channels);
+		goto error_free_cb_buff;
+	}
+
+	indio_dev = cb_buff->channels[0].indio_dev;
+	cb_buff->buffer.scan_mask
+		= kcalloc(BITS_TO_LONGS(indio_dev->masklength), sizeof(long),
+			  GFP_KERNEL);
+	if (cb_buff->buffer.scan_mask == NULL) {
+		ret = -ENOMEM;
+		goto error_release_channels;
+	}
+	chan = &cb_buff->channels[0];
+	while (chan->indio_dev) {
+		if (chan->indio_dev != indio_dev) {
+			ret = -EINVAL;
+			goto error_release_channels;
+		}
+		set_bit(chan->channel->scan_index,
+			cb_buff->buffer.scan_mask);
+		chan++;
+	}
+
+	return cb_buff;
+
+error_release_channels:
+	iio_channel_release_all(cb_buff->channels);
+error_free_cb_buff:
+	kfree(cb_buff);
+error_ret:
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(iio_channel_get_all_cb);
+
+int iio_channel_start_all_cb(struct iio_cb_buffer *cb_buff)
+{
+	return iio_update_buffers(cb_buff->channels[0].indio_dev,
+				  &cb_buff->buffer,
+				  NULL);
+}
+EXPORT_SYMBOL_GPL(iio_channel_start_all_cb);
+
+void iio_channel_stop_all_cb(struct iio_cb_buffer *cb_buff)
+{
+	iio_update_buffers(cb_buff->channels[0].indio_dev,
+			   NULL,
+			   &cb_buff->buffer);
+}
+EXPORT_SYMBOL_GPL(iio_channel_stop_all_cb);
+
+void iio_channel_release_all_cb(struct iio_cb_buffer *cb_buff)
+{
+	iio_channel_release_all(cb_buff->channels);
+	kfree(cb_buff);
+}
+EXPORT_SYMBOL_GPL(iio_channel_release_all_cb);
+
+struct iio_channel
+*iio_channel_cb_get_channels(const struct iio_cb_buffer *cb_buffer)
+{
+	return cb_buffer->channels;
+}
+EXPORT_SYMBOL_GPL(iio_channel_cb_get_channels);
diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h
index 57efee63a6da..126c0a9375f2 100644
--- a/include/linux/iio/consumer.h
+++ b/include/linux/iio/consumer.h
@@ -61,6 +61,52 @@ struct iio_channel *iio_channel_get_all(const char *name);
  */
 void iio_channel_release_all(struct iio_channel *chan);
 
+struct iio_cb_buffer;
+/**
+ * iio_channel_get_all_cb() - register callback for triggered capture
+ * @name:		Name of client device.
+ * @cb:			Callback function.
+ * @private:		Private data passed to callback.
+ *
+ * NB right now we have no ability to mux data from multiple devices.
+ * So if the channels requested come from different devices this will
+ * fail.
+ */
+struct iio_cb_buffer *iio_channel_get_all_cb(const char *name,
+					     int (*cb)(u8 *data,
+						       void *private),
+					     void *private);
+/**
+ * iio_channel_release_all_cb() - release and unregister the callback.
+ * @cb_buffer:		The callback buffer that was allocated.
+ */
+void iio_channel_release_all_cb(struct iio_cb_buffer *cb_buffer);
+
+/**
+ * iio_channel_start_all_cb() - start the flow of data through callback.
+ * @cb_buff:		The callback buffer we are starting.
+ */
+int iio_channel_start_all_cb(struct iio_cb_buffer *cb_buff);
+
+/**
+ * iio_channel_stop_all_cb() - stop the flow of data through the callback.
+ * @cb_buff:		The callback buffer we are stopping.
+ */
+void iio_channel_stop_all_cb(struct iio_cb_buffer *cb_buff);
+
+/**
+ * iio_channel_cb_get_channels() - get access to the underlying channels.
+ * @cb_buff:		The callback buffer from whom we want the channel
+ *			information.
+ *
+ * This function allows one to obtain information about the channels.
+ * Whilst this may allow direct reading if all buffers are disabled, the
+ * primary aim is to allow drivers that are consuming a channel to query
+ * things like scaling of the channel.
+ */
+struct iio_channel
+*iio_channel_cb_get_channels(const struct iio_cb_buffer *cb_buffer);
+
 /**
  * iio_read_channel_raw() - read from a given channel
  * @chan:		The channel being queried.
-- 
cgit v1.2.3-55-g7522


From 76f93992e4c44f30be797d5c99d6f369ed001747 Mon Sep 17 00:00:00 2001
From: Lee Jones
Date: Mon, 5 Nov 2012 16:10:31 +0100
Subject: mfd: Provide the STMPE driver with its own IRQ domain

The STMPE driver is yet another IRQ controller which requires its
own IRQ domain. So, we provide it with one.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/stmpe.c       | 82 +++++++++++++++++++++++++++++------------------
 include/linux/mfd/stmpe.h |  2 ++
 2 files changed, 52 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index ad13cb00a749..5c8d8f260df2 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/pm.h>
 #include <linux/slab.h>
 #include <linux/mfd/core.h>
@@ -757,7 +758,9 @@ static irqreturn_t stmpe_irq(int irq, void *data)
 	int i;
 
 	if (variant->id_val == STMPE801_ID) {
-		handle_nested_irq(stmpe->irq_base);
+		int base = irq_create_mapping(stmpe->domain, 0);
+
+		handle_nested_irq(base);
 		return IRQ_HANDLED;
 	}
 
@@ -778,8 +781,9 @@ static irqreturn_t stmpe_irq(int irq, void *data)
 		while (status) {
 			int bit = __ffs(status);
 			int line = bank * 8 + bit;
+			int nestedirq = irq_create_mapping(stmpe->domain, line);
 
-			handle_nested_irq(stmpe->irq_base + line);
+			handle_nested_irq(nestedirq);
 			status &= ~(1 << bit);
 		}
 
@@ -820,7 +824,7 @@ static void stmpe_irq_sync_unlock(struct irq_data *data)
 static void stmpe_irq_mask(struct irq_data *data)
 {
 	struct stmpe *stmpe = irq_data_get_irq_chip_data(data);
-	int offset = data->irq - stmpe->irq_base;
+	int offset = data->hwirq;
 	int regoffset = offset / 8;
 	int mask = 1 << (offset % 8);
 
@@ -830,7 +834,7 @@ static void stmpe_irq_mask(struct irq_data *data)
 static void stmpe_irq_unmask(struct irq_data *data)
 {
 	struct stmpe *stmpe = irq_data_get_irq_chip_data(data);
-	int offset = data->irq - stmpe->irq_base;
+	int offset = data->hwirq;
 	int regoffset = offset / 8;
 	int mask = 1 << (offset % 8);
 
@@ -845,43 +849,62 @@ static struct irq_chip stmpe_irq_chip = {
 	.irq_unmask		= stmpe_irq_unmask,
 };
 
-static int __devinit stmpe_irq_init(struct stmpe *stmpe)
+static int stmpe_irq_map(struct irq_domain *d, unsigned int virq,
+                                irq_hw_number_t hwirq)
 {
+	struct stmpe *stmpe = d->host_data;
 	struct irq_chip *chip = NULL;
-	int num_irqs = stmpe->variant->num_irqs;
-	int base = stmpe->irq_base;
-	int irq;
 
 	if (stmpe->variant->id_val != STMPE801_ID)
 		chip = &stmpe_irq_chip;
 
-	for (irq = base; irq < base + num_irqs; irq++) {
-		irq_set_chip_data(irq, stmpe);
-		irq_set_chip_and_handler(irq, chip, handle_edge_irq);
-		irq_set_nested_thread(irq, 1);
+	irq_set_chip_data(virq, stmpe);
+	irq_set_chip_and_handler(virq, chip, handle_edge_irq);
+	irq_set_nested_thread(virq, 1);
 #ifdef CONFIG_ARM
-		set_irq_flags(irq, IRQF_VALID);
+	set_irq_flags(virq, IRQF_VALID);
 #else
-		irq_set_noprobe(irq);
+	irq_set_noprobe(virq);
 #endif
-	}
 
 	return 0;
 }
 
-static void stmpe_irq_remove(struct stmpe *stmpe)
+static void stmpe_irq_unmap(struct irq_domain *d, unsigned int virq)
 {
-	int num_irqs = stmpe->variant->num_irqs;
-	int base = stmpe->irq_base;
-	int irq;
-
-	for (irq = base; irq < base + num_irqs; irq++) {
 #ifdef CONFIG_ARM
-		set_irq_flags(irq, 0);
+		set_irq_flags(virq, 0);
 #endif
-		irq_set_chip_and_handler(irq, NULL, NULL);
-		irq_set_chip_data(irq, NULL);
+		irq_set_chip_and_handler(virq, NULL, NULL);
+		irq_set_chip_data(virq, NULL);
+}
+
+static struct irq_domain_ops stmpe_irq_ops = {
+        .map    = stmpe_irq_map,
+        .unmap  = stmpe_irq_unmap,
+        .xlate  = irq_domain_xlate_twocell,
+};
+
+static int __devinit stmpe_irq_init(struct stmpe *stmpe)
+{
+	int base = stmpe->irq_base;
+	int num_irqs = stmpe->variant->num_irqs;
+
+	if (base) {
+		stmpe->domain = irq_domain_add_legacy(
+			NULL, num_irqs, base, 0, &stmpe_irq_ops, stmpe);
+	}
+	else {
+		stmpe->domain = irq_domain_add_linear(
+			NULL, num_irqs, &stmpe_irq_ops, stmpe);
+	}
+
+	if (!stmpe->domain) {
+		dev_err(stmpe->dev, "Failed to create irqdomain\n");
+		return -ENOSYS;
 	}
+
+	return 0;
 }
 
 static int __devinit stmpe_chip_init(struct stmpe *stmpe)
@@ -954,7 +977,7 @@ static int __devinit stmpe_add_device(struct stmpe *stmpe,
 				      struct mfd_cell *cell)
 {
 	return mfd_add_devices(stmpe->dev, stmpe->pdata->id, cell, 1,
-			       NULL, stmpe->irq_base, NULL);
+			       NULL, stmpe->irq_base, stmpe->domain);
 }
 
 static int __devinit stmpe_devices_init(struct stmpe *stmpe)
@@ -1067,7 +1090,7 @@ int __devinit stmpe_probe(struct stmpe_client_info *ci, int partnum)
 		if (ret) {
 			dev_err(stmpe->dev, "failed to request IRQ: %d\n",
 					ret);
-			goto out_removeirq;
+			goto free_gpio;
 		}
 	}
 
@@ -1083,9 +1106,6 @@ out_removedevs:
 	mfd_remove_devices(stmpe->dev);
 	if (stmpe->irq >= 0)
 		free_irq(stmpe->irq, stmpe);
-out_removeirq:
-	if (stmpe->irq >= 0)
-		stmpe_irq_remove(stmpe);
 free_gpio:
 	if (pdata->irq_over_gpio)
 		gpio_free(pdata->irq_gpio);
@@ -1098,10 +1118,8 @@ int stmpe_remove(struct stmpe *stmpe)
 {
 	mfd_remove_devices(stmpe->dev);
 
-	if (stmpe->irq >= 0) {
+	if (stmpe->irq >= 0)
 		free_irq(stmpe->irq, stmpe);
-		stmpe_irq_remove(stmpe);
-	}
 
 	if (stmpe->pdata->irq_over_gpio)
 		gpio_free(stmpe->pdata->irq_gpio);
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index f8d5b4d5843f..15dac790365f 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -62,6 +62,7 @@ struct stmpe_client_info;
  * @lock: lock protecting I/O operations
  * @irq_lock: IRQ bus lock
  * @dev: device, mostly for dev_dbg()
+ * @irq_domain: IRQ domain
  * @client: client - i2c or spi
  * @ci: client specific information
  * @partnum: part number
@@ -79,6 +80,7 @@ struct stmpe {
 	struct mutex lock;
 	struct mutex irq_lock;
 	struct device *dev;
+	struct irq_domain *domain;
 	void *client;
 	struct stmpe_client_info *ci;
 	enum stmpe_partnum partnum;
-- 
cgit v1.2.3-55-g7522


From 90a38d999739f35f4fc925c875e6ee518546b66c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven
Date: Mon, 15 Oct 2012 22:44:45 +0200
Subject: mfd: Remove Unicode Byte Order Marks from da9055

Older gcc (< 4.4) doesn't like files starting with Unicode BOMs:

include/linux/mfd/da9055/core.h:1: error: stray ‘\357’ in program
include/linux/mfd/da9055/core.h:1: error: stray ‘\273’ in program
include/linux/mfd/da9055/core.h:1: error: stray ‘\277’ in program
include/linux/mfd/da9055/pdata.h:1: error: stray ‘\357’ in program
include/linux/mfd/da9055/pdata.h:1: error: stray ‘\273’ in program
include/linux/mfd/da9055/pdata.h:1: error: stray ‘\277’ in program
include/linux/mfd/da9055/reg.h:1: error: stray ‘\357’ in program
include/linux/mfd/da9055/reg.h:1: error: stray ‘\273’ in program
include/linux/mfd/da9055/reg.h:1: error: stray ‘\277’ in program

Remove the BOMs, the rest of the files is plain ASCII anyway.

Output of "file" before:

include/linux/mfd/da9055/core.h:  UTF-8 Unicode (with BOM) C program text
include/linux/mfd/da9055/pdata.h: UTF-8 Unicode (with BOM) C program text
include/linux/mfd/da9055/reg.h:   UTF-8 Unicode (with BOM) C program text

Output of "file" after:

include/linux/mfd/da9055/core.h:  ASCII C program text
include/linux/mfd/da9055/pdata.h: ASCII C program text
include/linux/mfd/da9055/reg.h:   ASCII C program text

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/da9055/core.h  | 2 +-
 include/linux/mfd/da9055/pdata.h | 2 +-
 include/linux/mfd/da9055/reg.h   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/da9055/core.h b/include/linux/mfd/da9055/core.h
index c96ad682c59e..956afa445998 100644
--- a/include/linux/mfd/da9055/core.h
+++ b/include/linux/mfd/da9055/core.h
@@ -1,4 +1,4 @@
-﻿/*
+/*
  * da9055 declarations for DA9055 PMICs.
  *
  * Copyright(c) 2012 Dialog Semiconductor Ltd.
diff --git a/include/linux/mfd/da9055/pdata.h b/include/linux/mfd/da9055/pdata.h
index 147293b4471d..b9b204edb4e9 100644
--- a/include/linux/mfd/da9055/pdata.h
+++ b/include/linux/mfd/da9055/pdata.h
@@ -1,4 +1,4 @@
-﻿/* Copyright (C) 2012 Dialog Semiconductor Ltd.
+/* Copyright (C) 2012 Dialog Semiconductor Ltd.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/include/linux/mfd/da9055/reg.h b/include/linux/mfd/da9055/reg.h
index df237ee54803..2b592e072dbf 100644
--- a/include/linux/mfd/da9055/reg.h
+++ b/include/linux/mfd/da9055/reg.h
@@ -1,4 +1,4 @@
-﻿/*
+/*
  * DA9055 declarations for DA9055 PMICs.
  *
  * Copyright(c) 2012 Dialog Semiconductor Ltd.
-- 
cgit v1.2.3-55-g7522


From 89d16665d388837b30972081d97b814be26d68a2 Mon Sep 17 00:00:00 2001
From: Matt Fleming
Date: Fri, 9 Nov 2012 21:02:56 +0000
Subject: efivarfs: Use query_variable_info() to limit kmalloc()

We don't want someone who can write EFI variables to be able to
allocate arbitrarily large amounts of memory, so cap it to something
sensible like the amount of free space for EFI variables.

Acked-by: Jeremy Kerr <jeremy.kerr@canonical.com>
Cc: Matthew Garrett <mjg@redhat.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efivars.c | 43 ++++++++++++++++++++++++++++++++++---------
 include/linux/efi.h        |  1 +
 2 files changed, 35 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 9ac934018bba..d6b8d2f628fa 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -694,28 +694,51 @@ static ssize_t efivarfs_file_write(struct file *file,
 	struct inode *inode = file->f_mapping->host;
 	unsigned long datasize = count - sizeof(attributes);
 	unsigned long newdatasize;
+	u64 storage_size, remaining_size, max_size;
 	ssize_t bytes = 0;
 
 	if (count < sizeof(attributes))
 		return -EINVAL;
 
-	data = kmalloc(datasize, GFP_KERNEL);
+	if (copy_from_user(&attributes, userbuf, sizeof(attributes)))
+		return -EFAULT;
 
-	if (!data)
-		return -ENOMEM;
+	if (attributes & ~(EFI_VARIABLE_MASK))
+		return -EINVAL;
 
 	efivars = var->efivars;
 
-	if (copy_from_user(&attributes, userbuf, sizeof(attributes))) {
-		bytes = -EFAULT;
-		goto out;
+	/*
+	 * Ensure that the user can't allocate arbitrarily large
+	 * amounts of memory. Pick a default size of 64K if
+	 * QueryVariableInfo() isn't supported by the firmware.
+	 */
+	spin_lock(&efivars->lock);
+
+	if (!efivars->ops->query_variable_info)
+		status = EFI_UNSUPPORTED;
+	else {
+		const struct efivar_operations *fops = efivars->ops;
+		status = fops->query_variable_info(attributes, &storage_size,
+						   &remaining_size, &max_size);
 	}
 
-	if (attributes & ~(EFI_VARIABLE_MASK)) {
-		bytes = -EINVAL;
-		goto out;
+	spin_unlock(&efivars->lock);
+
+	if (status != EFI_SUCCESS) {
+		if (status != EFI_UNSUPPORTED)
+			return efi_status_to_err(status);
+
+		remaining_size = 65536;
 	}
 
+	if (datasize > remaining_size)
+		return -ENOSPC;
+
+	data = kmalloc(datasize, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
 	if (copy_from_user(data, userbuf + sizeof(attributes), datasize)) {
 		bytes = -EFAULT;
 		goto out;
@@ -1709,6 +1732,8 @@ efivars_init(void)
 	ops.get_variable = efi.get_variable;
 	ops.set_variable = efi.set_variable;
 	ops.get_next_variable = efi.get_next_variable;
+	ops.query_variable_info = efi.query_variable_info;
+
 	error = register_efivars(&__efivars, &ops, efi_kobj);
 	if (error)
 		goto err_put;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 5e2308d9c6be..f80079cd84f4 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -646,6 +646,7 @@ struct efivar_operations {
 	efi_get_variable_t *get_variable;
 	efi_get_next_variable_t *get_next_variable;
 	efi_set_variable_t *set_variable;
+	efi_query_variable_info_t *query_variable_info;
 };
 
 struct efivars {
-- 
cgit v1.2.3-55-g7522


From 9f488ba8633fe77910ea0ba80ec762d9c34af1b6 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman
Date: Tue, 13 Nov 2012 10:46:33 -0800
Subject: IIO: fix build error in lp8788-charger.c

Turns out that consumer.h needs to include types.h on some platforms to
build properly (like powerpc).

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/iio/consumer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h
index 126c0a9375f2..16c35ac045bd 100644
--- a/include/linux/iio/consumer.h
+++ b/include/linux/iio/consumer.h
@@ -9,6 +9,8 @@
  */
 #ifndef _IIO_INKERN_CONSUMER_H_
 #define _IIO_INKERN_CONSUMER_H_
+
+#include <linux/types.h>
 #include <linux/iio/types.h>
 
 struct iio_dev;
-- 
cgit v1.2.3-55-g7522


From ab7edb149c7548541ee588b8372c2041b6f1cbc8 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi
Date: Thu, 11 Oct 2012 13:55:32 +0200
Subject: mfd: twl6040: Convert to use regmap_irq

With regmap_irq it is possible to remove the twl6040-irq.c file and
simplify the code.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig         |   4 +-
 drivers/mfd/Makefile        |   2 +-
 drivers/mfd/twl6040-core.c  |  55 ++++++++----
 drivers/mfd/twl6040-irq.c   | 205 --------------------------------------------
 include/linux/mfd/twl6040.h |  10 +--
 5 files changed, 48 insertions(+), 228 deletions(-)
 delete mode 100644 drivers/mfd/twl6040-irq.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 3f2187ae8c5d..34242cada125 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -321,10 +321,10 @@ config MFD_TWL4030_AUDIO
 
 config TWL6040_CORE
 	bool "Support for TWL6040 audio codec"
-	depends on I2C=y && GENERIC_HARDIRQS
+	depends on I2C=y
 	select MFD_CORE
 	select REGMAP_I2C
-	select IRQ_DOMAIN
+	select REGMAP_IRQ
 	default n
 	help
 	  Say yes here if you want support for Texas Instruments TWL6040 audio
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index a4093a44304a..05bebf66ccd2 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -67,7 +67,7 @@ obj-$(CONFIG_TWL4030_CORE)	+= twl-core.o twl4030-irq.o twl6030-irq.o
 obj-$(CONFIG_TWL4030_MADC)      += twl4030-madc.o
 obj-$(CONFIG_TWL4030_POWER)    += twl4030-power.o
 obj-$(CONFIG_MFD_TWL4030_AUDIO)	+= twl4030-audio.o
-obj-$(CONFIG_TWL6040_CORE)	+= twl6040-core.o twl6040-irq.o
+obj-$(CONFIG_TWL6040_CORE)	+= twl6040-core.o
 
 obj-$(CONFIG_MFD_MC13XXX)	+= mc13xxx-core.o
 obj-$(CONFIG_MFD_MC13XXX_SPI)	+= mc13xxx-spi.o
diff --git a/drivers/mfd/twl6040-core.c b/drivers/mfd/twl6040-core.c
index 5817bc6d09dc..e5f7b795afff 100644
--- a/drivers/mfd/twl6040-core.c
+++ b/drivers/mfd/twl6040-core.c
@@ -499,6 +499,25 @@ static struct regmap_config twl6040_regmap_config = {
 	.readable_reg = twl6040_readable_reg,
 };
 
+static const struct regmap_irq twl6040_irqs[] = {
+	{ .reg_offset = 0, .mask = TWL6040_THINT, },
+	{ .reg_offset = 0, .mask = TWL6040_PLUGINT | TWL6040_UNPLUGINT, },
+	{ .reg_offset = 0, .mask = TWL6040_HOOKINT, },
+	{ .reg_offset = 0, .mask = TWL6040_HFINT, },
+	{ .reg_offset = 0, .mask = TWL6040_VIBINT, },
+	{ .reg_offset = 0, .mask = TWL6040_READYINT, },
+};
+
+static struct regmap_irq_chip twl6040_irq_chip = {
+	.name = "twl6040",
+	.irqs = twl6040_irqs,
+	.num_irqs = ARRAY_SIZE(twl6040_irqs),
+
+	.num_regs = 1,
+	.status_base = TWL6040_REG_INTID,
+	.mask_base = TWL6040_REG_INTMR,
+};
+
 static int __devinit twl6040_probe(struct i2c_client *client,
 				     const struct i2c_device_id *id)
 {
@@ -574,21 +593,27 @@ static int __devinit twl6040_probe(struct i2c_client *client,
 			goto gpio_err;
 	}
 
-	/* codec interrupt */
-	ret = twl6040_irq_init(twl6040);
-	if (ret)
+	ret = regmap_add_irq_chip(twl6040->regmap, twl6040->irq,
+			IRQF_ONESHOT, 0, &twl6040_irq_chip,
+			&twl6040->irq_data);
+	if (ret < 0)
 		goto irq_init_err;
 
-	ret = request_threaded_irq(twl6040->irq_base + TWL6040_IRQ_READY,
-				   NULL, twl6040_readyint_handler, IRQF_ONESHOT,
+	twl6040->irq_ready = regmap_irq_get_virq(twl6040->irq_data,
+					       TWL6040_IRQ_READY);
+	twl6040->irq_th = regmap_irq_get_virq(twl6040->irq_data,
+					       TWL6040_IRQ_TH);
+
+	ret = request_threaded_irq(twl6040->irq_ready, NULL,
+				   twl6040_readyint_handler, IRQF_ONESHOT,
 				   "twl6040_irq_ready", twl6040);
 	if (ret) {
 		dev_err(twl6040->dev, "READY IRQ request failed: %d\n", ret);
 		goto readyirq_err;
 	}
 
-	ret = request_threaded_irq(twl6040->irq_base + TWL6040_IRQ_TH,
-				   NULL, twl6040_thint_handler, IRQF_ONESHOT,
+	ret = request_threaded_irq(twl6040->irq_th, NULL,
+				   twl6040_thint_handler, IRQF_ONESHOT,
 				   "twl6040_irq_th", twl6040);
 	if (ret) {
 		dev_err(twl6040->dev, "Thermal IRQ request failed: %d\n", ret);
@@ -604,7 +629,7 @@ static int __devinit twl6040_probe(struct i2c_client *client,
 	 * The ASoC codec can work without pdata, pass the platform_data only if
 	 * it has been provided.
 	 */
-	irq = twl6040->irq_base + TWL6040_IRQ_PLUG;
+	irq = regmap_irq_get_virq(twl6040->irq_data, TWL6040_IRQ_PLUG);
 	cell = &twl6040->cells[children];
 	cell->name = "twl6040-codec";
 	twl6040_codec_rsrc[0].start = irq;
@@ -618,7 +643,7 @@ static int __devinit twl6040_probe(struct i2c_client *client,
 	children++;
 
 	if (twl6040_has_vibra(pdata, node)) {
-		irq = twl6040->irq_base + TWL6040_IRQ_VIB;
+		irq = regmap_irq_get_virq(twl6040->irq_data, TWL6040_IRQ_VIB);
 
 		cell = &twl6040->cells[children];
 		cell->name = "twl6040-vibra";
@@ -657,11 +682,11 @@ static int __devinit twl6040_probe(struct i2c_client *client,
 	return 0;
 
 mfd_err:
-	free_irq(twl6040->irq_base + TWL6040_IRQ_TH, twl6040);
+	free_irq(twl6040->irq_th, twl6040);
 thirq_err:
-	free_irq(twl6040->irq_base + TWL6040_IRQ_READY, twl6040);
+	free_irq(twl6040->irq_ready, twl6040);
 readyirq_err:
-	twl6040_irq_exit(twl6040);
+	regmap_del_irq_chip(twl6040->irq, twl6040->irq_data);
 irq_init_err:
 	if (gpio_is_valid(twl6040->audpwron))
 		gpio_free(twl6040->audpwron);
@@ -685,9 +710,9 @@ static int __devexit twl6040_remove(struct i2c_client *client)
 	if (gpio_is_valid(twl6040->audpwron))
 		gpio_free(twl6040->audpwron);
 
-	free_irq(twl6040->irq_base + TWL6040_IRQ_READY, twl6040);
-	free_irq(twl6040->irq_base + TWL6040_IRQ_TH, twl6040);
-	twl6040_irq_exit(twl6040);
+	free_irq(twl6040->irq_ready, twl6040);
+	free_irq(twl6040->irq_th, twl6040);
+	regmap_del_irq_chip(twl6040->irq, twl6040->irq_data);
 
 	mfd_remove_devices(&client->dev);
 	i2c_set_clientdata(client, NULL);
diff --git a/drivers/mfd/twl6040-irq.c b/drivers/mfd/twl6040-irq.c
deleted file mode 100644
index 4b42543da228..000000000000
--- a/drivers/mfd/twl6040-irq.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Interrupt controller support for TWL6040
- *
- * Author:     Misael Lopez Cruz <misael.lopez@ti.com>
- *
- * Copyright:   (C) 2011 Texas Instruments, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/irq.h>
-#include <linux/of.h>
-#include <linux/irqdomain.h>
-#include <linux/interrupt.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/twl6040.h>
-
-struct twl6040_irq_data {
-	int mask;
-	int status;
-};
-
-static struct twl6040_irq_data twl6040_irqs[] = {
-	{
-		.mask = TWL6040_THMSK,
-		.status = TWL6040_THINT,
-	},
-	{
-		.mask = TWL6040_PLUGMSK,
-		.status = TWL6040_PLUGINT | TWL6040_UNPLUGINT,
-	},
-	{
-		.mask = TWL6040_HOOKMSK,
-		.status = TWL6040_HOOKINT,
-	},
-	{
-		.mask = TWL6040_HFMSK,
-		.status = TWL6040_HFINT,
-	},
-	{
-		.mask = TWL6040_VIBMSK,
-		.status = TWL6040_VIBINT,
-	},
-	{
-		.mask = TWL6040_READYMSK,
-		.status = TWL6040_READYINT,
-	},
-};
-
-static inline
-struct twl6040_irq_data *irq_to_twl6040_irq(struct twl6040 *twl6040,
-					    int irq)
-{
-	return &twl6040_irqs[irq - twl6040->irq_base];
-}
-
-static void twl6040_irq_lock(struct irq_data *data)
-{
-	struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data);
-
-	mutex_lock(&twl6040->irq_mutex);
-}
-
-static void twl6040_irq_sync_unlock(struct irq_data *data)
-{
-	struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data);
-
-	/* write back to hardware any change in irq mask */
-	if (twl6040->irq_masks_cur != twl6040->irq_masks_cache) {
-		twl6040->irq_masks_cache = twl6040->irq_masks_cur;
-		twl6040_reg_write(twl6040, TWL6040_REG_INTMR,
-				  twl6040->irq_masks_cur);
-	}
-
-	mutex_unlock(&twl6040->irq_mutex);
-}
-
-static void twl6040_irq_enable(struct irq_data *data)
-{
-	struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data);
-	struct twl6040_irq_data *irq_data = irq_to_twl6040_irq(twl6040,
-							       data->irq);
-
-	twl6040->irq_masks_cur &= ~irq_data->mask;
-}
-
-static void twl6040_irq_disable(struct irq_data *data)
-{
-	struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data);
-	struct twl6040_irq_data *irq_data = irq_to_twl6040_irq(twl6040,
-							       data->irq);
-
-	twl6040->irq_masks_cur |= irq_data->mask;
-}
-
-static struct irq_chip twl6040_irq_chip = {
-	.name			= "twl6040",
-	.irq_bus_lock		= twl6040_irq_lock,
-	.irq_bus_sync_unlock	= twl6040_irq_sync_unlock,
-	.irq_enable		= twl6040_irq_enable,
-	.irq_disable		= twl6040_irq_disable,
-};
-
-static irqreturn_t twl6040_irq_thread(int irq, void *data)
-{
-	struct twl6040 *twl6040 = data;
-	u8 intid;
-	int i;
-
-	intid = twl6040_reg_read(twl6040, TWL6040_REG_INTID);
-
-	/* apply masking and report (backwards to handle READYINT first) */
-	for (i = ARRAY_SIZE(twl6040_irqs) - 1; i >= 0; i--) {
-		if (twl6040->irq_masks_cur & twl6040_irqs[i].mask)
-			intid &= ~twl6040_irqs[i].status;
-		if (intid & twl6040_irqs[i].status)
-			handle_nested_irq(twl6040->irq_base + i);
-	}
-
-	/* ack unmasked irqs */
-	twl6040_reg_write(twl6040, TWL6040_REG_INTID, intid);
-
-	return IRQ_HANDLED;
-}
-
-int twl6040_irq_init(struct twl6040 *twl6040)
-{
-	struct device_node *node = twl6040->dev->of_node;
-	int i, nr_irqs, irq_base, ret;
-	u8 val;
-
-	mutex_init(&twl6040->irq_mutex);
-
-	/* mask the individual interrupt sources */
-	twl6040->irq_masks_cur = TWL6040_ALLINT_MSK;
-	twl6040->irq_masks_cache = TWL6040_ALLINT_MSK;
-	twl6040_reg_write(twl6040, TWL6040_REG_INTMR, TWL6040_ALLINT_MSK);
-
-	nr_irqs = ARRAY_SIZE(twl6040_irqs);
-
-	irq_base = irq_alloc_descs(-1, 0, nr_irqs, 0);
-	if (IS_ERR_VALUE(irq_base)) {
-		dev_err(twl6040->dev, "Fail to allocate IRQ descs\n");
-		return irq_base;
-	}
-	twl6040->irq_base = irq_base;
-
-	irq_domain_add_legacy(node, ARRAY_SIZE(twl6040_irqs), irq_base, 0,
-			      &irq_domain_simple_ops, NULL);
-
-	/* Register them with genirq */
-	for (i = irq_base; i < irq_base + nr_irqs; i++) {
-		irq_set_chip_data(i, twl6040);
-		irq_set_chip_and_handler(i, &twl6040_irq_chip,
-					 handle_level_irq);
-		irq_set_nested_thread(i, 1);
-
-		/* ARM needs us to explicitly flag the IRQ as valid
-		 * and will set them noprobe when we do so. */
-#ifdef CONFIG_ARM
-		set_irq_flags(i, IRQF_VALID);
-#else
-		irq_set_noprobe(i);
-#endif
-	}
-
-	ret = request_threaded_irq(twl6040->irq, NULL, twl6040_irq_thread,
-				   IRQF_ONESHOT, "twl6040", twl6040);
-	if (ret) {
-		dev_err(twl6040->dev, "failed to request IRQ %d: %d\n",
-			twl6040->irq, ret);
-		return ret;
-	}
-
-	/* reset interrupts */
-	val = twl6040_reg_read(twl6040, TWL6040_REG_INTID);
-
-	/* interrupts cleared on write */
-	twl6040_clear_bits(twl6040, TWL6040_REG_ACCCTL, TWL6040_INTCLRMODE);
-
-	return 0;
-}
-EXPORT_SYMBOL(twl6040_irq_init);
-
-void twl6040_irq_exit(struct twl6040 *twl6040)
-{
-	free_irq(twl6040->irq, twl6040);
-}
-EXPORT_SYMBOL(twl6040_irq_exit);
diff --git a/include/linux/mfd/twl6040.h b/include/linux/mfd/twl6040.h
index a8eff4ad9be5..94ac944d12f0 100644
--- a/include/linux/mfd/twl6040.h
+++ b/include/linux/mfd/twl6040.h
@@ -207,10 +207,12 @@ struct twl6040_platform_data {
 };
 
 struct regmap;
+struct regmap_irq_chips_data;
 
 struct twl6040 {
 	struct device *dev;
 	struct regmap *regmap;
+	struct regmap_irq_chip_data *irq_data;
 	struct regulator_bulk_data supplies[2]; /* supplies for vio, v2v1 */
 	struct mutex mutex;
 	struct mutex irq_mutex;
@@ -228,9 +230,8 @@ struct twl6040 {
 	unsigned int mclk;
 
 	unsigned int irq;
-	unsigned int irq_base;
-	u8 irq_masks_cur;
-	u8 irq_masks_cache;
+	unsigned int irq_ready;
+	unsigned int irq_th;
 };
 
 int twl6040_reg_read(struct twl6040 *twl6040, unsigned int reg);
@@ -245,8 +246,7 @@ int twl6040_set_pll(struct twl6040 *twl6040, int pll_id,
 		    unsigned int freq_in, unsigned int freq_out);
 int twl6040_get_pll(struct twl6040 *twl6040);
 unsigned int twl6040_get_sysclk(struct twl6040 *twl6040);
-int twl6040_irq_init(struct twl6040 *twl6040);
-void twl6040_irq_exit(struct twl6040 *twl6040);
+
 /* Get the combined status of the vibra control register */
 int twl6040_get_vibralr_status(struct twl6040 *twl6040);
 
-- 
cgit v1.2.3-55-g7522


From 605511a848ae3ac4b2ce272ae6cbf8930b29ebb3 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Tue, 13 Nov 2012 19:18:05 +0530
Subject: mfd: Convert tps6586x to irq_domain

Allocate the irq base if it base is not porvided i.e.
in case of device tree invocation of this driver.
Convert the tps6586x driver to irq domain, using a
legacy IRQ mapping if an irq_base is specified in
platform data or dynamically allocated and otherwise
using a linear mapping.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps6586x.c       | 91 ++++++++++++++++++++++++++++++++------------
 include/linux/mfd/tps6586x.h |  1 +
 2 files changed, 67 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index 467464368773..2cdf1e6c00c8 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c
@@ -17,12 +17,14 @@
 
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/i2c.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/regulator/machine.h>
@@ -116,6 +118,7 @@ struct tps6586x {
 	int			irq_base;
 	u32			irq_en;
 	u8			mask_reg[5];
+	struct irq_domain	*irq_domain;
 };
 
 static inline struct tps6586x *dev_to_tps6586x(struct device *dev)
@@ -184,6 +187,14 @@ int tps6586x_update(struct device *dev, int reg, uint8_t val, uint8_t mask)
 }
 EXPORT_SYMBOL_GPL(tps6586x_update);
 
+int tps6586x_irq_get_virq(struct device *dev, int irq)
+{
+	struct tps6586x *tps6586x = dev_to_tps6586x(dev);
+
+	return irq_create_mapping(tps6586x->irq_domain, irq);
+}
+EXPORT_SYMBOL_GPL(tps6586x_irq_get_virq);
+
 static int __remove_subdev(struct device *dev, void *unused)
 {
 	platform_device_unregister(to_platform_device(dev));
@@ -205,7 +216,7 @@ static void tps6586x_irq_lock(struct irq_data *data)
 static void tps6586x_irq_enable(struct irq_data *irq_data)
 {
 	struct tps6586x *tps6586x = irq_data_get_irq_chip_data(irq_data);
-	unsigned int __irq = irq_data->irq - tps6586x->irq_base;
+	unsigned int __irq = irq_data->hwirq;
 	const struct tps6586x_irq_data *data = &tps6586x_irqs[__irq];
 
 	tps6586x->mask_reg[data->mask_reg] &= ~data->mask_mask;
@@ -216,7 +227,7 @@ static void tps6586x_irq_disable(struct irq_data *irq_data)
 {
 	struct tps6586x *tps6586x = irq_data_get_irq_chip_data(irq_data);
 
-	unsigned int __irq = irq_data->irq - tps6586x->irq_base;
+	unsigned int __irq = irq_data->hwirq;
 	const struct tps6586x_irq_data *data = &tps6586x_irqs[__irq];
 
 	tps6586x->mask_reg[data->mask_reg] |= data->mask_mask;
@@ -239,6 +250,39 @@ static void tps6586x_irq_sync_unlock(struct irq_data *data)
 	mutex_unlock(&tps6586x->irq_lock);
 }
 
+static struct irq_chip tps6586x_irq_chip = {
+	.name = "tps6586x",
+	.irq_bus_lock = tps6586x_irq_lock,
+	.irq_bus_sync_unlock = tps6586x_irq_sync_unlock,
+	.irq_disable = tps6586x_irq_disable,
+	.irq_enable = tps6586x_irq_enable,
+};
+
+static int tps6586x_irq_map(struct irq_domain *h, unsigned int virq,
+				irq_hw_number_t hw)
+{
+	struct tps6586x *tps6586x = h->host_data;
+
+	irq_set_chip_data(virq, tps6586x);
+	irq_set_chip_and_handler(virq, &tps6586x_irq_chip, handle_simple_irq);
+	irq_set_nested_thread(virq, 1);
+
+	/* ARM needs us to explicitly flag the IRQ as valid
+	 * and will set them noprobe when we do so. */
+#ifdef CONFIG_ARM
+	set_irq_flags(virq, IRQF_VALID);
+#else
+	irq_set_noprobe(virq);
+#endif
+
+	return 0;
+}
+
+static struct irq_domain_ops tps6586x_domain_ops = {
+	.map    = tps6586x_irq_map,
+	.xlate  = irq_domain_xlate_twocell,
+};
+
 static irqreturn_t tps6586x_irq(int irq, void *data)
 {
 	struct tps6586x *tps6586x = data;
@@ -259,7 +303,8 @@ static irqreturn_t tps6586x_irq(int irq, void *data)
 		int i = __ffs(acks);
 
 		if (tps6586x->irq_en & (1 << i))
-			handle_nested_irq(tps6586x->irq_base + i);
+			handle_nested_irq(
+				irq_find_mapping(tps6586x->irq_domain, i));
 
 		acks &= ~(1 << i);
 	}
@@ -272,11 +317,8 @@ static int __devinit tps6586x_irq_init(struct tps6586x *tps6586x, int irq,
 {
 	int i, ret;
 	u8 tmp[4];
-
-	if (!irq_base) {
-		dev_warn(tps6586x->dev, "No interrupt support on IRQ base\n");
-		return -EINVAL;
-	}
+	int new_irq_base;
+	int irq_num = ARRAY_SIZE(tps6586x_irqs);
 
 	mutex_init(&tps6586x->irq_lock);
 	for (i = 0; i < 5; i++) {
@@ -286,25 +328,24 @@ static int __devinit tps6586x_irq_init(struct tps6586x *tps6586x, int irq,
 
 	tps6586x_reads(tps6586x->dev, TPS6586X_INT_ACK1, sizeof(tmp), tmp);
 
-	tps6586x->irq_base = irq_base;
-
-	tps6586x->irq_chip.name = "tps6586x";
-	tps6586x->irq_chip.irq_enable = tps6586x_irq_enable;
-	tps6586x->irq_chip.irq_disable = tps6586x_irq_disable;
-	tps6586x->irq_chip.irq_bus_lock = tps6586x_irq_lock;
-	tps6586x->irq_chip.irq_bus_sync_unlock = tps6586x_irq_sync_unlock;
-
-	for (i = 0; i < ARRAY_SIZE(tps6586x_irqs); i++) {
-		int __irq = i + tps6586x->irq_base;
-		irq_set_chip_data(__irq, tps6586x);
-		irq_set_chip_and_handler(__irq, &tps6586x->irq_chip,
-					 handle_simple_irq);
-		irq_set_nested_thread(__irq, 1);
-#ifdef CONFIG_ARM
-		set_irq_flags(__irq, IRQF_VALID);
-#endif
+	if  (irq_base > 0) {
+		new_irq_base = irq_alloc_descs(irq_base, 0, irq_num, -1);
+		if (new_irq_base < 0) {
+			dev_err(tps6586x->dev,
+				"Failed to alloc IRQs: %d\n", new_irq_base);
+			return new_irq_base;
+		}
+	} else {
+		new_irq_base = 0;
 	}
 
+	tps6586x->irq_domain = irq_domain_add_simple(tps6586x->dev->of_node,
+				irq_num, new_irq_base, &tps6586x_domain_ops,
+				tps6586x);
+	if (!tps6586x->irq_domain) {
+		dev_err(tps6586x->dev, "Failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
 	ret = request_threaded_irq(irq, NULL, tps6586x_irq, IRQF_ONESHOT,
 				   "tps6586x", tps6586x);
 
diff --git a/include/linux/mfd/tps6586x.h b/include/linux/mfd/tps6586x.h
index 2dd123194958..ebd8b08a386b 100644
--- a/include/linux/mfd/tps6586x.h
+++ b/include/linux/mfd/tps6586x.h
@@ -93,5 +93,6 @@ extern int tps6586x_set_bits(struct device *dev, int reg, uint8_t bit_mask);
 extern int tps6586x_clr_bits(struct device *dev, int reg, uint8_t bit_mask);
 extern int tps6586x_update(struct device *dev, int reg, uint8_t val,
 			   uint8_t mask);
+extern int tps6586x_irq_get_virq(struct device *dev, int irq);
 
 #endif /*__LINUX_MFD_TPS6586X_H */
-- 
cgit v1.2.3-55-g7522


From d6ad418763888f617ac5b4849823e4cd670df1dd Mon Sep 17 00:00:00 2001
From: John Stultz
Date: Tue, 28 Feb 2012 16:50:11 -0800
Subject: time: Kill xtime_lock, replacing it with jiffies_lock

Now that timekeeping is protected by its own locks, rename
the xtime_lock to jifffies_lock to better describe what it
protects.

CC: Thomas Gleixner <tglx@linutronix.de>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/clocksource/i8253.c |  2 +-
 include/linux/jiffies.h     |  3 ++-
 kernel/time/jiffies.c       |  6 ++++--
 kernel/time/tick-common.c   |  8 ++++----
 kernel/time/tick-internal.h |  1 -
 kernel/time/tick-sched.c    | 22 +++++++++++-----------
 kernel/time/timekeeping.c   | 14 +++-----------
 7 files changed, 25 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c
index e7cab2da910f..14ee3efcc404 100644
--- a/drivers/clocksource/i8253.c
+++ b/drivers/clocksource/i8253.c
@@ -35,7 +35,7 @@ static cycle_t i8253_read(struct clocksource *cs)
 
 	raw_spin_lock_irqsave(&i8253_lock, flags);
 	/*
-	 * Although our caller may have the read side of xtime_lock,
+	 * Although our caller may have the read side of jiffies_lock,
 	 * this is now a seqlock, and we are cheating in this routine
 	 * by having side effects on state that we cannot undo if
 	 * there is a collision on the seqlock and our caller has to
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 6b87413da9d6..82ed068b1ebe 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -70,11 +70,12 @@ extern int register_refined_jiffies(long clock_tick_rate);
 
 /*
  * The 64-bit value is not atomic - you MUST NOT read it
- * without sampling the sequence number in xtime_lock.
+ * without sampling the sequence number in jiffies_lock.
  * get_jiffies_64() will do this for you as appropriate.
  */
 extern u64 __jiffy_data jiffies_64;
 extern unsigned long volatile __jiffy_data jiffies;
+extern seqlock_t jiffies_lock;
 
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 25f5b2699d37..7a925ba456fb 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -67,6 +67,8 @@ static struct clocksource clocksource_jiffies = {
 	.shift		= JIFFIES_SHIFT,
 };
 
+__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
+
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void)
 {
@@ -74,9 +76,9 @@ u64 get_jiffies_64(void)
 	u64 ret;
 
 	do {
-		seq = read_seqbegin(&xtime_lock);
+		seq = read_seqbegin(&jiffies_lock);
 		ret = jiffies_64;
-	} while (read_seqretry(&xtime_lock, seq));
+	} while (read_seqretry(&jiffies_lock, seq));
 	return ret;
 }
 EXPORT_SYMBOL(get_jiffies_64);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index da6c9ecad4e4..b1600a6973f4 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -63,13 +63,13 @@ int tick_is_oneshot_available(void)
 static void tick_periodic(int cpu)
 {
 	if (tick_do_timer_cpu == cpu) {
-		write_seqlock(&xtime_lock);
+		write_seqlock(&jiffies_lock);
 
 		/* Keep track of the next tick event */
 		tick_next_period = ktime_add(tick_next_period, tick_period);
 
 		do_timer(1);
-		write_sequnlock(&xtime_lock);
+		write_sequnlock(&jiffies_lock);
 	}
 
 	update_process_times(user_mode(get_irq_regs()));
@@ -130,9 +130,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 		ktime_t next;
 
 		do {
-			seq = read_seqbegin(&xtime_lock);
+			seq = read_seqbegin(&jiffies_lock);
 			next = tick_next_period;
-		} while (read_seqretry(&xtime_lock, seq));
+		} while (read_seqretry(&jiffies_lock, seq));
 
 		clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
 
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 4e265b901fed..cf3e59ed6dc0 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -141,4 +141,3 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
 #endif
 
 extern void do_timer(unsigned long ticks);
-extern seqlock_t xtime_lock;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a40260885265..a678046c3e5e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -31,7 +31,7 @@
 static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
 /*
- * The time, when the last jiffy update happened. Protected by xtime_lock.
+ * The time, when the last jiffy update happened. Protected by jiffies_lock.
  */
 static ktime_t last_jiffies_update;
 
@@ -49,14 +49,14 @@ static void tick_do_update_jiffies64(ktime_t now)
 	ktime_t delta;
 
 	/*
-	 * Do a quick check without holding xtime_lock:
+	 * Do a quick check without holding jiffies_lock:
 	 */
 	delta = ktime_sub(now, last_jiffies_update);
 	if (delta.tv64 < tick_period.tv64)
 		return;
 
-	/* Reevalute with xtime_lock held */
-	write_seqlock(&xtime_lock);
+	/* Reevalute with jiffies_lock held */
+	write_seqlock(&jiffies_lock);
 
 	delta = ktime_sub(now, last_jiffies_update);
 	if (delta.tv64 >= tick_period.tv64) {
@@ -79,7 +79,7 @@ static void tick_do_update_jiffies64(ktime_t now)
 		/* Keep the tick_next_period variable up to date */
 		tick_next_period = ktime_add(last_jiffies_update, tick_period);
 	}
-	write_sequnlock(&xtime_lock);
+	write_sequnlock(&jiffies_lock);
 }
 
 /*
@@ -89,12 +89,12 @@ static ktime_t tick_init_jiffy_update(void)
 {
 	ktime_t period;
 
-	write_seqlock(&xtime_lock);
+	write_seqlock(&jiffies_lock);
 	/* Did we start the jiffies update yet ? */
 	if (last_jiffies_update.tv64 == 0)
 		last_jiffies_update = tick_next_period;
 	period = last_jiffies_update;
-	write_sequnlock(&xtime_lock);
+	write_sequnlock(&jiffies_lock);
 	return period;
 }
 
@@ -282,11 +282,11 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 
 	/* Read jiffies and the time when jiffies were updated last */
 	do {
-		seq = read_seqbegin(&xtime_lock);
+		seq = read_seqbegin(&jiffies_lock);
 		last_update = last_jiffies_update;
 		last_jiffies = jiffies;
 		time_delta = timekeeping_max_deferment();
-	} while (read_seqretry(&xtime_lock, seq));
+	} while (read_seqretry(&jiffies_lock, seq));
 
 	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
 	    arch_needs_cpu(cpu)) {
@@ -658,7 +658,7 @@ static void tick_nohz_handler(struct clock_event_device *dev)
 	 * concurrency: This happens only when the cpu in charge went
 	 * into a long sleep. If two cpus happen to assign themself to
 	 * this duty, then the jiffies update is still serialized by
-	 * xtime_lock.
+	 * jiffies_lock.
 	 */
 	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
 		tick_do_timer_cpu = cpu;
@@ -810,7 +810,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
 	 * concurrency: This happens only when the cpu in charge went
 	 * into a long sleep. If two cpus happen to assign themself to
 	 * this duty, then the jiffies update is still serialized by
-	 * xtime_lock.
+	 * jiffies_lock.
 	 */
 	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
 		tick_do_timer_cpu = cpu;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e424970bb562..4c7de02eacdc 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -25,12 +25,6 @@
 
 static struct timekeeper timekeeper;
 
-/*
- * This read-write spinlock protects us from races in SMP while
- * playing with xtime.
- */
-__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
-
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
@@ -1299,9 +1293,7 @@ struct timespec get_monotonic_coarse(void)
 }
 
 /*
- * The 64-bit jiffies value is not atomic - you MUST NOT read it
- * without sampling the sequence number in xtime_lock.
- * jiffies is defined in the linker script...
+ * Must hold jiffies_lock
  */
 void do_timer(unsigned long ticks)
 {
@@ -1389,7 +1381,7 @@ EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
  */
 void xtime_update(unsigned long ticks)
 {
-	write_seqlock(&xtime_lock);
+	write_seqlock(&jiffies_lock);
 	do_timer(ticks);
-	write_sequnlock(&xtime_lock);
+	write_sequnlock(&jiffies_lock);
 }
-- 
cgit v1.2.3-55-g7522


From 5cb04436eef62aa8f5c482f8ec8deba391dea465 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa
Date: Tue, 6 Nov 2012 16:46:20 +0000
Subject: ipv6: add knob to send unsolicited ND on link-layer address change

This patch introduces a new knob ndisc_notify. If enabled, the kernel
will transmit an unsolicited neighbour advertisement on link-layer address
change to update the neighbour tables of the corresponding hosts more quickly.

This is the equivalent to arp_notify in ipv4 world.

Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h      | 1 +
 include/uapi/linux/ipv6.h | 1 +
 net/ipv6/addrconf.c       | 8 ++++++++
 net/ipv6/ndisc.c          | 7 +++++++
 4 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index bcba48a97868..5e11905a4f01 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -47,6 +47,7 @@ struct ipv6_devconf {
 	__s32		disable_ipv6;
 	__s32		accept_dad;
 	__s32		force_tllao;
+	__s32           ndisc_notify;
 	void		*sysctl;
 };
 
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index a6d7d1c536c3..5a2991cf0251 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -157,6 +157,7 @@ enum {
 	DEVCONF_DISABLE_IPV6,
 	DEVCONF_ACCEPT_DAD,
 	DEVCONF_FORCE_TLLAO,
+	DEVCONF_NDISC_NOTIFY,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fab23db8ee73..cb803b7bb0d8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4037,6 +4037,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
 	array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
 	array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
+	array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -4704,6 +4705,13 @@ static struct addrconf_sysctl_table
 			.mode           = 0644,
 			.proc_handler   = proc_dointvec
 		},
+		{
+			.procname       = "ndisc_notify",
+			.data           = &ipv6_devconf.ndisc_notify,
+			.maxlen         = sizeof(int),
+			.mode           = 0644,
+			.proc_handler   = proc_dointvec
+		},
 		{
 			/* sentinel */
 		}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 6ba4b54a550a..f41853bca428 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1572,11 +1572,18 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
 {
 	struct net_device *dev = ptr;
 	struct net *net = dev_net(dev);
+	struct inet6_dev *idev;
 
 	switch (event) {
 	case NETDEV_CHANGEADDR:
 		neigh_changeaddr(&nd_tbl, dev);
 		fib6_run_gc(~0UL, net);
+		idev = in6_dev_get(dev);
+		if (!idev)
+			break;
+		if (idev->cnf.ndisc_notify)
+			ndisc_send_unsol_na(dev);
+		in6_dev_put(idev);
 		break;
 	case NETDEV_DOWN:
 		neigh_ifdown(&nd_tbl, dev);
-- 
cgit v1.2.3-55-g7522


From 8cbd9cc6254065c97c4bac42daa55ba1abe73a8e Mon Sep 17 00:00:00 2001
From: David Sharp
Date: Tue, 13 Nov 2012 12:18:21 -0800
Subject: tracing,x86: Add a TSC trace_clock

In order to promote interoperability between userspace tracers and ftrace,
add a trace_clock that reports raw TSC values which will then be recorded
in the ring buffer. Userspace tracers that also record TSCs are then on
exactly the same time base as the kernel and events can be unambiguously
interlaced.

Tested: Enabled a tracepoint and the "tsc" trace_clock and saw very large
timestamp values.

v2:
Move arch-specific bits out of generic code.
v3:
Rename "x86-tsc", cleanups
v7:
Generic arch bits in Kbuild.

Google-Bug-Id: 6980623
Link: http://lkml.kernel.org/r/1352837903-32191-1-git-send-email-dhsharp@google.com

Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@linux.intel.com>
Signed-off-by: David Sharp <dhsharp@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/alpha/include/asm/Kbuild      |  1 +
 arch/arm/include/asm/Kbuild        |  1 +
 arch/arm64/include/asm/Kbuild      |  1 +
 arch/avr32/include/asm/Kbuild      |  1 +
 arch/blackfin/include/asm/Kbuild   |  1 +
 arch/c6x/include/asm/Kbuild        |  1 +
 arch/cris/include/asm/Kbuild       |  1 +
 arch/frv/include/asm/Kbuild        |  1 +
 arch/h8300/include/asm/Kbuild      |  1 +
 arch/hexagon/include/asm/Kbuild    |  1 +
 arch/ia64/include/asm/Kbuild       |  1 +
 arch/m32r/include/asm/Kbuild       |  1 +
 arch/m68k/include/asm/Kbuild       |  1 +
 arch/microblaze/include/asm/Kbuild |  1 +
 arch/mips/include/asm/Kbuild       |  1 +
 arch/mn10300/include/asm/Kbuild    |  1 +
 arch/openrisc/include/asm/Kbuild   |  1 +
 arch/parisc/include/asm/Kbuild     |  1 +
 arch/powerpc/include/asm/Kbuild    |  1 +
 arch/s390/include/asm/Kbuild       |  1 +
 arch/score/include/asm/Kbuild      |  1 +
 arch/sh/include/asm/Kbuild         |  1 +
 arch/sparc/include/asm/Kbuild      |  1 +
 arch/tile/include/asm/Kbuild       |  1 +
 arch/um/include/asm/Kbuild         |  1 +
 arch/unicore32/include/asm/Kbuild  |  1 +
 arch/x86/include/asm/trace_clock.h | 20 ++++++++++++++++++++
 arch/x86/kernel/Makefile           |  1 +
 arch/x86/kernel/trace_clock.c      | 21 +++++++++++++++++++++
 arch/xtensa/include/asm/Kbuild     |  1 +
 include/asm-generic/trace_clock.h  | 16 ++++++++++++++++
 include/linux/trace_clock.h        |  2 ++
 kernel/trace/trace.c               |  1 +
 33 files changed, 88 insertions(+)
 create mode 100644 arch/x86/include/asm/trace_clock.h
 create mode 100644 arch/x86/kernel/trace_clock.c
 create mode 100644 include/asm-generic/trace_clock.h

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 64ffc9e9e548..dcfabb9f05a0 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -11,3 +11,4 @@ header-y += reg.h
 header-y += regdef.h
 header-y += sysinfo.h
 generic-y += exec.h
+generic-y += trace_clock.h
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index f70ae175a3d6..514e398f1a07 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -31,5 +31,6 @@ generic-y += sockios.h
 generic-y += termbits.h
 generic-y += termios.h
 generic-y += timex.h
+generic-y += trace_clock.h
 generic-y += types.h
 generic-y += unaligned.h
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index a581a2205938..6e9ca462127f 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -43,6 +43,7 @@ generic-y += swab.h
 generic-y += termbits.h
 generic-y += termios.h
 generic-y += topology.h
+generic-y += trace_clock.h
 generic-y += types.h
 generic-y += unaligned.h
 generic-y += user.h
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index 4807ded352c5..4dd4f78d3dcc 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -1,3 +1,4 @@
 
 generic-y	+= clkdev.h
 generic-y	+= exec.h
+generic-y	+= trace_clock.h
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 5a0625aad6a0..27d70759474c 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -38,6 +38,7 @@ generic-y += statfs.h
 generic-y += termbits.h
 generic-y += termios.h
 generic-y += topology.h
+generic-y += trace_clock.h
 generic-y += types.h
 generic-y += ucontext.h
 generic-y += unaligned.h
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 112a496d8355..eae7b5963e86 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -49,6 +49,7 @@ generic-y += termbits.h
 generic-y += termios.h
 generic-y += tlbflush.h
 generic-y += topology.h
+generic-y += trace_clock.h
 generic-y += types.h
 generic-y += ucontext.h
 generic-y += user.h
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 6d43a951b5ec..15a122c3767c 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -11,3 +11,4 @@ header-y += sync_serial.h
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += module.h
+generic-y += trace_clock.h
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 4a159da23633..c5d767028306 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -1,3 +1,4 @@
 
 generic-y += clkdev.h
 generic-y += exec.h
+generic-y += trace_clock.h
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 50bbf387b2f8..4bc8ae73e08a 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += module.h
+generic-y += trace_clock.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 3bfa9b30f448..bdb54ceb53bc 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -48,6 +48,7 @@ generic-y += stat.h
 generic-y += termbits.h
 generic-y += termios.h
 generic-y += topology.h
+generic-y += trace_clock.h
 generic-y += types.h
 generic-y += ucontext.h
 generic-y += unaligned.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index dd02f09b6eda..05b03ecd7933 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -2,3 +2,4 @@
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += kvm_para.h
+generic-y += trace_clock.h
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index 50bbf387b2f8..4bc8ae73e08a 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += module.h
+generic-y += trace_clock.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 88fa3ac86fae..7f1949c0e089 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -24,6 +24,7 @@ generic-y += sections.h
 generic-y += siginfo.h
 generic-y += statfs.h
 generic-y += topology.h
+generic-y += trace_clock.h
 generic-y += types.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 8653072d7e9f..2957fcc71764 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm
 header-y  += elf.h
 generic-y += clkdev.h
 generic-y += exec.h
+generic-y += trace_clock.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 533053d12ced..9b54b7a403d4 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -1 +1,2 @@
 # MIPS headers
+generic-y += trace_clock.h
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 4a159da23633..c5d767028306 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -1,3 +1,4 @@
 
 generic-y += clkdev.h
 generic-y += exec.h
+generic-y += trace_clock.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 78de6805268d..8971026e1c63 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -60,6 +60,7 @@ generic-y += swab.h
 generic-y += termbits.h
 generic-y += termios.h
 generic-y += topology.h
+generic-y += trace_clock.h
 generic-y += types.h
 generic-y += ucontext.h
 generic-y += user.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index bac8debecffb..ff4c9faed546 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \
 	  segment.h topology.h vga.h device.h percpu.h hw_irq.h mutex.h \
 	  div64.h irq_regs.h kdebug.h kvm_para.h local64.h local.h param.h \
 	  poll.h xor.h clkdev.h exec.h
+generic-y += trace_clock.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index a4fe15e33c6f..2d62b484b3fc 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -2,3 +2,4 @@
 
 generic-y += clkdev.h
 generic-y += rwsem.h
+generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 0633dc6d254d..f313f9cbcf44 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -1,3 +1,4 @@
 
 
 generic-y += clkdev.h
+generic-y += trace_clock.h
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index ec697aeefd05..16e41fe1a419 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm
 header-y +=
 
 generic-y += clkdev.h
+generic-y += trace_clock.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 29f83beeef7a..280bea9e5e2b 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -31,5 +31,6 @@ generic-y += socket.h
 generic-y += statfs.h
 generic-y += termbits.h
 generic-y += termios.h
+generic-y += trace_clock.h
 generic-y += ucontext.h
 generic-y += xor.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 645a58da0e86..e26d430ce2fd 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -8,4 +8,5 @@ generic-y += local64.h
 generic-y += irq_regs.h
 generic-y += local.h
 generic-y += module.h
+generic-y += trace_clock.h
 generic-y += word-at-a-time.h
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 6948015e08a2..b17b9b8e53cd 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -34,5 +34,6 @@ generic-y += sockios.h
 generic-y += statfs.h
 generic-y += termbits.h
 generic-y += termios.h
+generic-y += trace_clock.h
 generic-y += types.h
 generic-y += xor.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 0f6e7b328265..b30f34a79882 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -2,3 +2,4 @@ generic-y += bug.h cputime.h device.h emergency-restart.h futex.h hardirq.h
 generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h
 generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h
 generic-y += switch_to.h clkdev.h
+generic-y += trace_clock.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index c910c9857e11..7be503e45695 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -54,6 +54,7 @@ generic-y += syscalls.h
 generic-y += termbits.h
 generic-y += termios.h
 generic-y += topology.h
+generic-y += trace_clock.h
 generic-y += types.h
 generic-y += ucontext.h
 generic-y += unaligned.h
diff --git a/arch/x86/include/asm/trace_clock.h b/arch/x86/include/asm/trace_clock.h
new file mode 100644
index 000000000000..5c1652728b6d
--- /dev/null
+++ b/arch/x86/include/asm/trace_clock.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_X86_TRACE_CLOCK_H
+#define _ASM_X86_TRACE_CLOCK_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_X86_TSC
+
+extern u64 notrace trace_clock_x86_tsc(void);
+
+# define ARCH_TRACE_CLOCKS \
+	{ trace_clock_x86_tsc,	"x86-tsc" },
+
+#else /* !CONFIG_X86_TSC */
+
+#define ARCH_TRACE_CLOCKS
+
+#endif
+
+#endif  /* _ASM_X86_TRACE_CLOCK_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9fd5eed3f8f5..34e923a53762 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
 obj-$(CONFIG_FTRACE_SYSCALLS)	+= ftrace.o
+obj-$(CONFIG_X86_TSC)		+= trace_clock.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
 obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c
new file mode 100644
index 000000000000..25b993729f9b
--- /dev/null
+++ b/arch/x86/kernel/trace_clock.c
@@ -0,0 +1,21 @@
+/*
+ * X86 trace clocks
+ */
+#include <asm/trace_clock.h>
+#include <asm/barrier.h>
+#include <asm/msr.h>
+
+/*
+ * trace_clock_x86_tsc(): A clock that is just the cycle counter.
+ *
+ * Unlike the other clocks, this is not in nanoseconds.
+ */
+u64 notrace trace_clock_x86_tsc(void)
+{
+	u64 ret;
+
+	rdtsc_barrier();
+	rdtscll(ret);
+
+	return ret;
+}
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 6d1302789995..095f0a2244f7 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -25,4 +25,5 @@ generic-y += siginfo.h
 generic-y += statfs.h
 generic-y += termios.h
 generic-y += topology.h
+generic-y += trace_clock.h
 generic-y += xor.h
diff --git a/include/asm-generic/trace_clock.h b/include/asm-generic/trace_clock.h
new file mode 100644
index 000000000000..6726f1bafb5e
--- /dev/null
+++ b/include/asm-generic/trace_clock.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_GENERIC_TRACE_CLOCK_H
+#define _ASM_GENERIC_TRACE_CLOCK_H
+/*
+ * Arch-specific trace clocks.
+ */
+
+/*
+ * Additional trace clocks added to the trace_clocks
+ * array in kernel/trace/trace.c
+ * None if the architecture has not defined it.
+ */
+#ifndef ARCH_TRACE_CLOCKS
+# define ARCH_TRACE_CLOCKS
+#endif
+
+#endif  /* _ASM_GENERIC_TRACE_CLOCK_H */
diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h
index 4eb490237d4c..d563f37e1a1d 100644
--- a/include/linux/trace_clock.h
+++ b/include/linux/trace_clock.h
@@ -12,6 +12,8 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 
+#include <asm/trace_clock.h>
+
 extern u64 notrace trace_clock_local(void);
 extern u64 notrace trace_clock(void);
 extern u64 notrace trace_clock_global(void);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c1434b5ce4d1..0d20620c0d27 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -488,6 +488,7 @@ static struct {
 	{ trace_clock_local,	"local" },
 	{ trace_clock_global,	"global" },
 	{ trace_clock_counter,	"counter" },
+	ARCH_TRACE_CLOCKS
 };
 
 int trace_clock_id;
-- 
cgit v1.2.3-55-g7522


From 8be0709f10e3dd5d7d07933ad61a9f18c4b93ca5 Mon Sep 17 00:00:00 2001
From: David Sharp
Date: Tue, 13 Nov 2012 12:18:22 -0800
Subject: tracing: Format non-nanosec times from tsc clock without a decimal
 point.

With the addition of the "tsc" clock, formatting timestamps to look like
fractional seconds is misleading. Mark clocks as either in nanoseconds or
not, and format non-nanosecond timestamps as decimal integers.

Tested:
$ cd /sys/kernel/debug/tracing/
$ cat trace_clock
[local] global tsc
$ echo sched_switch > set_event
$ echo 1 > tracing_on ; sleep 0.0005 ; echo 0 > tracing_on
$ cat trace
          <idle>-0     [000]  6330.555552: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120
           sleep-29964 [000]  6330.555628: sched_switch: prev_comm=bash prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
  ...
$ echo 1 > options/latency-format
$ cat trace
  <idle>-0       0 4104553247us+: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120
   sleep-29964   0 4104553322us+: sched_switch: prev_comm=bash prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
  ...
$ echo tsc > trace_clock
$ cat trace
$ echo 1 > tracing_on ; sleep 0.0005 ; echo 0 > tracing_on
$ echo 0 > options/latency-format
$ cat trace
          <idle>-0     [000] 16490053398357: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120
           sleep-31128 [000] 16490053588518: sched_switch: prev_comm=bash prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
  ...
echo 1 > options/latency-format
$ cat trace
  <idle>-0       0 91557653238+: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120
   sleep-31128   0 91557843399+: sched_switch: prev_comm=bash prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
  ...

v2:
Move arch-specific bits out of generic code.
v4:
Fix x86_32 build due to 64-bit division.

Google-Bug-Id: 6980623
Link: http://lkml.kernel.org/r/1352837903-32191-2-git-send-email-dhsharp@google.com

Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: David Sharp <dhsharp@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/trace_clock.h |  2 +-
 include/linux/ftrace_event.h       |  6 +++
 kernel/trace/trace.c               | 15 ++++++--
 kernel/trace/trace.h               |  4 --
 kernel/trace/trace_output.c        | 78 ++++++++++++++++++++++++++------------
 5 files changed, 72 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/trace_clock.h b/arch/x86/include/asm/trace_clock.h
index 5c1652728b6d..beab86cc282d 100644
--- a/arch/x86/include/asm/trace_clock.h
+++ b/arch/x86/include/asm/trace_clock.h
@@ -9,7 +9,7 @@
 extern u64 notrace trace_clock_x86_tsc(void);
 
 # define ARCH_TRACE_CLOCKS \
-	{ trace_clock_x86_tsc,	"x86-tsc" },
+	{ trace_clock_x86_tsc,	"x86-tsc",	.in_ns = 0 },
 
 #else /* !CONFIG_X86_TSC */
 
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index b80c8ddfbbdc..a3d489531d83 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -86,6 +86,12 @@ struct trace_iterator {
 	cpumask_var_t		started;
 };
 
+enum trace_iter_flags {
+	TRACE_FILE_LAT_FMT	= 1,
+	TRACE_FILE_ANNOTATE	= 2,
+	TRACE_FILE_TIME_IN_NS	= 4,
+};
+
 
 struct trace_event;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0d20620c0d27..d943e69569cd 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -484,10 +484,11 @@ static const char *trace_options[] = {
 static struct {
 	u64 (*func)(void);
 	const char *name;
+	int in_ns;		/* is this clock in nanoseconds? */
 } trace_clocks[] = {
-	{ trace_clock_local,	"local" },
-	{ trace_clock_global,	"global" },
-	{ trace_clock_counter,	"counter" },
+	{ trace_clock_local,	"local",	1 },
+	{ trace_clock_global,	"global",	1 },
+	{ trace_clock_counter,	"counter",	0 },
 	ARCH_TRACE_CLOCKS
 };
 
@@ -2478,6 +2479,10 @@ __tracing_open(struct inode *inode, struct file *file)
 	if (ring_buffer_overruns(iter->tr->buffer))
 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
 
+	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
+	if (trace_clocks[trace_clock_id].in_ns)
+		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
+
 	/* stop the trace while dumping */
 	tracing_stop();
 
@@ -3339,6 +3344,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 	if (trace_flags & TRACE_ITER_LATENCY_FMT)
 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
 
+	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
+	if (trace_clocks[trace_clock_id].in_ns)
+		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
+
 	iter->cpu_file = cpu_file;
 	iter->tr = &global_trace;
 	mutex_init(&iter->mutex);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 55010ed175f0..c75d7988902c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -406,10 +406,6 @@ void tracing_stop_sched_switch_record(void);
 void tracing_start_sched_switch_record(void);
 int register_tracer(struct tracer *type);
 int is_tracing_stopped(void);
-enum trace_file_type {
-	TRACE_FILE_LAT_FMT	= 1,
-	TRACE_FILE_ANNOTATE	= 2,
-};
 
 extern cpumask_var_t __read_mostly tracing_buffer_mask;
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 123b189c732c..194d79602dc7 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -610,24 +610,54 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 	return trace_print_lat_fmt(s, entry);
 }
 
-static unsigned long preempt_mark_thresh = 100;
+static unsigned long preempt_mark_thresh_us = 100;
 
 static int
-lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
-		    unsigned long rel_usecs)
+lat_print_timestamp(struct trace_iterator *iter, u64 next_ts)
 {
-	return trace_seq_printf(s, " %4lldus%c: ", abs_usecs,
-				rel_usecs > preempt_mark_thresh ? '!' :
-				  rel_usecs > 1 ? '+' : ' ');
+	unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE;
+	unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS;
+	unsigned long long abs_ts = iter->ts - iter->tr->time_start;
+	unsigned long long rel_ts = next_ts - iter->ts;
+	struct trace_seq *s = &iter->seq;
+
+	if (in_ns) {
+		abs_ts = ns2usecs(abs_ts);
+		rel_ts = ns2usecs(rel_ts);
+	}
+
+	if (verbose && in_ns) {
+		unsigned long abs_usec = do_div(abs_ts, USEC_PER_MSEC);
+		unsigned long abs_msec = (unsigned long)abs_ts;
+		unsigned long rel_usec = do_div(rel_ts, USEC_PER_MSEC);
+		unsigned long rel_msec = (unsigned long)rel_ts;
+
+		return trace_seq_printf(
+				s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ",
+				ns2usecs(iter->ts),
+				abs_msec, abs_usec,
+				rel_msec, rel_usec);
+	} else if (verbose && !in_ns) {
+		return trace_seq_printf(
+				s, "[%016llx] %lld (+%lld): ",
+				iter->ts, abs_ts, rel_ts);
+	} else if (!verbose && in_ns) {
+		return trace_seq_printf(
+				s, " %4lldus%c: ",
+				abs_ts,
+				rel_ts > preempt_mark_thresh_us ? '!' :
+				  rel_ts > 1 ? '+' : ' ');
+	} else { /* !verbose && !in_ns */
+		return trace_seq_printf(s, " %4lld: ", abs_ts);
+	}
 }
 
 int trace_print_context(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry = iter->ent;
-	unsigned long long t = ns2usecs(iter->ts);
-	unsigned long usec_rem = do_div(t, USEC_PER_SEC);
-	unsigned long secs = (unsigned long)t;
+	unsigned long long t;
+	unsigned long secs, usec_rem;
 	char comm[TASK_COMM_LEN];
 	int ret;
 
@@ -644,8 +674,13 @@ int trace_print_context(struct trace_iterator *iter)
 			return 0;
 	}
 
-	return trace_seq_printf(s, " %5lu.%06lu: ",
-				secs, usec_rem);
+	if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) {
+		t = ns2usecs(iter->ts);
+		usec_rem = do_div(t, USEC_PER_SEC);
+		secs = (unsigned long)t;
+		return trace_seq_printf(s, " %5lu.%06lu: ", secs, usec_rem);
+	} else
+		return trace_seq_printf(s, " %12llu: ", iter->ts);
 }
 
 int trace_print_lat_context(struct trace_iterator *iter)
@@ -659,36 +694,29 @@ int trace_print_lat_context(struct trace_iterator *iter)
 			   *next_entry = trace_find_next_entry(iter, NULL,
 							       &next_ts);
 	unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
-	unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
-	unsigned long rel_usecs;
 
 	/* Restore the original ent_size */
 	iter->ent_size = ent_size;
 
 	if (!next_entry)
 		next_ts = iter->ts;
-	rel_usecs = ns2usecs(next_ts - iter->ts);
 
 	if (verbose) {
 		char comm[TASK_COMM_LEN];
 
 		trace_find_cmdline(entry->pid, comm);
 
-		ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]"
-				       " %ld.%03ldms (+%ld.%03ldms): ", comm,
-				       entry->pid, iter->cpu, entry->flags,
-				       entry->preempt_count, iter->idx,
-				       ns2usecs(iter->ts),
-				       abs_usecs / USEC_PER_MSEC,
-				       abs_usecs % USEC_PER_MSEC,
-				       rel_usecs / USEC_PER_MSEC,
-				       rel_usecs % USEC_PER_MSEC);
+		ret = trace_seq_printf(
+				s, "%16s %5d %3d %d %08x %08lx ",
+				comm, entry->pid, iter->cpu, entry->flags,
+				entry->preempt_count, iter->idx);
 	} else {
 		ret = lat_print_generic(s, entry, iter->cpu);
-		if (ret)
-			ret = lat_print_timestamp(s, abs_usecs, rel_usecs);
 	}
 
+	if (ret)
+		ret = lat_print_timestamp(iter, next_ts);
+
 	return ret;
 }
 
-- 
cgit v1.2.3-55-g7522


From bb08f76d8419a163b7a4212a0e4ea6bd25acacd1 Mon Sep 17 00:00:00 2001
From: Paul E. McKenney
Date: Sat, 20 Oct 2012 12:33:37 -0700
Subject: rcu: Remove list_for_each_continue_rcu()

The list_for_each_continue_rcu() macro is no longer used, so this commit
removes it.  The list_for_each_entry_continue_rcu() macro should be
used instead.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/RCU/checklist.txt | 17 ++++++++---------
 Documentation/RCU/whatisRCU.txt |  4 +---
 include/linux/rculist.h         | 17 -----------------
 3 files changed, 9 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index cdb20d41a44a..31ef8fe07f82 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -271,15 +271,14 @@ over a rather long period of time, but improvements are always welcome!
 	The same cautions apply to call_rcu_bh() and call_rcu_sched().
 
 9.	All RCU list-traversal primitives, which include
-	rcu_dereference(), list_for_each_entry_rcu(),
-	list_for_each_continue_rcu(), and list_for_each_safe_rcu(),
-	must be either within an RCU read-side critical section or
-	must be protected by appropriate update-side locks.  RCU
-	read-side critical sections are delimited by rcu_read_lock()
-	and rcu_read_unlock(), or by similar primitives such as
-	rcu_read_lock_bh() and rcu_read_unlock_bh(), in which case
-	the matching rcu_dereference() primitive must be used in order
-	to keep lockdep happy, in this case, rcu_dereference_bh().
+	rcu_dereference(), list_for_each_entry_rcu(), and
+	list_for_each_safe_rcu(), must be either within an RCU read-side
+	critical section or must be protected by appropriate update-side
+	locks.	RCU read-side critical sections are delimited by
+	rcu_read_lock() and rcu_read_unlock(), or by similar primitives
+	such as rcu_read_lock_bh() and rcu_read_unlock_bh(), in which
+	case the matching rcu_dereference() primitive must be used in
+	order to keep lockdep happy, in this case, rcu_dereference_bh().
 
 	The reason that it is permissible to use RCU list-traversal
 	primitives when the update-side lock is held is that doing so
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index bf0f6de2aa00..9d30de00d730 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -789,9 +789,7 @@ RCU list traversal:
 	list_for_each_entry_rcu
 	hlist_for_each_entry_rcu
 	hlist_nulls_for_each_entry_rcu
-
-	list_for_each_continue_rcu	(to be deprecated in favor of new
-					 list_for_each_entry_continue_rcu)
+	list_for_each_entry_continue_rcu
 
 RCU pointer/list update:
 
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index e0f0fab20415..c92dd28eaa6c 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -286,23 +286,6 @@ static inline void list_splice_init_rcu(struct list_head *list,
 		&pos->member != (head); \
 		pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
-
-/**
- * list_for_each_continue_rcu
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- *
- * Iterate over an rcu-protected list, continuing after current point.
- *
- * This list-traversal primitive may safely run concurrently with
- * the _rcu list-mutation primitives such as list_add_rcu()
- * as long as the traversal is guarded by rcu_read_lock().
- */
-#define list_for_each_continue_rcu(pos, head) \
-	for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \
-		(pos) != (head); \
-		(pos) = rcu_dereference_raw(list_next_rcu(pos)))
-
 /**
  * list_for_each_entry_continue_rcu - continue iteration over list of given type
  * @pos:	the type * to use as a loop cursor.
-- 
cgit v1.2.3-55-g7522


From f0a0e6f282c72247e7c8ec17c68d528c1bb4d49e Mon Sep 17 00:00:00 2001
From: Paul E. McKenney
Date: Tue, 23 Oct 2012 13:47:01 -0700
Subject: rcu: Clarify memory-ordering properties of grace-period primitives

This commit explicitly states the memory-ordering properties of the
RCU grace-period primitives.  Although these properties were in some
sense implied by the fundmental property of RCU ("a grace period must
wait for all pre-existing RCU read-side critical sections to complete"),
stating it explicitly will be a great labor-saving device.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/rcupdate.h | 25 +++++++++++++++++++++++++
 kernel/rcutree.c         | 29 +++++++++++++++++++++++++----
 kernel/rcutree_plugin.h  |  8 ++++++++
 3 files changed, 58 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7c968e4f929e..6256759fb81e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -90,6 +90,25 @@ extern void do_trace_rcu_torture_read(char *rcutorturename,
  * that started after call_rcu() was invoked.  RCU read-side critical
  * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
  * and may be nested.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing RCU read-side critical section.  On systems with more
+ * than one CPU, this means that when "func()" is invoked, each CPU is
+ * guaranteed to have executed a full memory barrier since the end of its
+ * last RCU read-side critical section whose beginning preceded the call
+ * to call_rcu().  It also means that each CPU executing an RCU read-side
+ * critical section that continues beyond the start of "func()" must have
+ * executed a memory barrier after the call_rcu() but before the beginning
+ * of that RCU read-side critical section.  Note that these guarantees
+ * include CPUs that are offline, idle, or executing in user mode, as
+ * well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting RCU callback function "func()", then both CPU A and CPU B are
+ * guaranteed to execute a full memory barrier during the time interval
+ * between the call to call_rcu() and the invocation of "func()" -- even
+ * if CPU A and CPU B are the same CPU (but again only if the system has
+ * more than one CPU).
  */
 extern void call_rcu(struct rcu_head *head,
 			      void (*func)(struct rcu_head *head));
@@ -118,6 +137,9 @@ extern void call_rcu(struct rcu_head *head,
  *  OR
  *  - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
  *  These may be nested.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
  */
 extern void call_rcu_bh(struct rcu_head *head,
 			void (*func)(struct rcu_head *head));
@@ -137,6 +159,9 @@ extern void call_rcu_bh(struct rcu_head *head,
  *  OR
  *  anything that disables preemption.
  *  These may be nested.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
  */
 extern void call_rcu_sched(struct rcu_head *head,
 			   void (*func)(struct rcu_head *rcu));
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e4c2192b47c8..15a2beec320f 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -2228,10 +2228,28 @@ static inline int rcu_blocking_is_gp(void)
  * rcu_read_lock_sched().
  *
  * This means that all preempt_disable code sequences, including NMI and
- * hardware-interrupt handlers, in progress on entry will have completed
- * before this primitive returns.  However, this does not guarantee that
- * softirq handlers will have completed, since in some kernels, these
- * handlers can run in process context, and can block.
+ * non-threaded hardware-interrupt handlers, in progress on entry will
+ * have completed before this primitive returns.  However, this does not
+ * guarantee that softirq handlers will have completed, since in some
+ * kernels, these handlers can run in process context, and can block.
+ *
+ * Note that this guarantee implies further memory-ordering guarantees.
+ * On systems with more than one CPU, when synchronize_sched() returns,
+ * each CPU is guaranteed to have executed a full memory barrier since the
+ * end of its last RCU-sched read-side critical section whose beginning
+ * preceded the call to synchronize_sched().  In addition, each CPU having
+ * an RCU read-side critical section that extends beyond the return from
+ * synchronize_sched() is guaranteed to have executed a full memory barrier
+ * after the beginning of synchronize_sched() and before the beginning of
+ * that RCU read-side critical section.  Note that these guarantees include
+ * CPUs that are offline, idle, or executing in user mode, as well as CPUs
+ * that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked synchronize_sched(), which returned
+ * to its caller on CPU B, then both CPU A and CPU B are guaranteed
+ * to have executed a full memory barrier during the execution of
+ * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
+ * again only if the system has more than one CPU).
  *
  * This primitive provides the guarantees made by the (now removed)
  * synchronize_kernel() API.  In contrast, synchronize_rcu() only
@@ -2259,6 +2277,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched);
  * read-side critical sections have completed.  RCU read-side critical
  * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
  * and may be nested.
+ *
+ * See the description of synchronize_sched() for more detailed information
+ * on memory ordering guarantees.
  */
 void synchronize_rcu_bh(void)
 {
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index f92115488187..57e0ef8ed721 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -670,6 +670,9 @@ EXPORT_SYMBOL_GPL(kfree_call_rcu);
  * concurrently with new RCU read-side critical sections that began while
  * synchronize_rcu() was waiting.  RCU read-side critical sections are
  * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
+ *
+ * See the description of synchronize_sched() for more detailed information
+ * on memory ordering guarantees.
  */
 void synchronize_rcu(void)
 {
@@ -875,6 +878,11 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 
 /**
  * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
+ *
+ * Note that this primitive does not necessarily wait for an RCU grace period
+ * to complete.  For example, if there are no RCU callbacks queued anywhere
+ * in the system, then rcu_barrier() is within its rights to return
+ * immediately, without waiting for anything, much less an RCU grace period.
  */
 void rcu_barrier(void)
 {
-- 
cgit v1.2.3-55-g7522


From 2cd451792db174382aaca96c75bc3bf47a6065fe Mon Sep 17 00:00:00 2001
From: Daniel Mack
Date: Wed, 14 Nov 2012 11:14:26 +0800
Subject: spi/omap: fix D0/D1 direction confusion

0384e90b8 ("spi/mcspi: allow configuration of pin directions") did what
it claimed to do the wrong way around. D0/D1 is configured as output by
*clearing* the bits in the conf registers, hence also breaking the
former default behaviour.

Fix this before that change is merged to mainline.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 Documentation/devicetree/bindings/spi/omap-spi.txt | 6 +++---
 drivers/spi/spi-omap2-mcspi.c                      | 6 +++---
 include/linux/platform_data/spi-omap2-mcspi.h      | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/spi/omap-spi.txt b/Documentation/devicetree/bindings/spi/omap-spi.txt
index 2ef0a6b85653..938809c6829b 100644
--- a/Documentation/devicetree/bindings/spi/omap-spi.txt
+++ b/Documentation/devicetree/bindings/spi/omap-spi.txt
@@ -6,9 +6,9 @@ Required properties:
   - "ti,omap4-spi" for OMAP4+.
 - ti,spi-num-cs : Number of chipselect supported  by the instance.
 - ti,hwmods: Name of the hwmod associated to the McSPI
-- ti,pindir-d0-in-d1-out: Select the D0 pin as input and D1 as
-			  output. The default is D0 as output and
-			  D1 as input.
+- ti,pindir-d0-out-d1-in: Select the D0 pin as output and D1 as
+			  input. The default is D0 as input and
+			  D1 as output.
 
 Example:
 
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 51046332677c..89f73c425c3d 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -766,7 +766,7 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi,
 	/* standard 4-wire master mode:  SCK, MOSI/out, MISO/in, nCS
 	 * REVISIT: this controller could support SPI_3WIRE mode.
 	 */
-	if (mcspi->pin_dir == MCSPI_PINDIR_D0_OUT_D1_IN) {
+	if (mcspi->pin_dir == MCSPI_PINDIR_D0_IN_D1_OUT) {
 		l &= ~OMAP2_MCSPI_CHCONF_IS;
 		l &= ~OMAP2_MCSPI_CHCONF_DPE1;
 		l |= OMAP2_MCSPI_CHCONF_DPE0;
@@ -1188,8 +1188,8 @@ static int __devinit omap2_mcspi_probe(struct platform_device *pdev)
 		of_property_read_u32(node, "ti,spi-num-cs", &num_cs);
 		master->num_chipselect = num_cs;
 		master->bus_num = bus_num++;
-		if (of_get_property(node, "ti,pindir-d0-in-d1-out", NULL))
-			mcspi->pin_dir = MCSPI_PINDIR_D0_IN_D1_OUT;
+		if (of_get_property(node, "ti,pindir-d0-out-d1-in", NULL))
+			mcspi->pin_dir = MCSPI_PINDIR_D0_OUT_D1_IN;
 	} else {
 		pdata = pdev->dev.platform_data;
 		master->num_chipselect = pdata->num_cs;
diff --git a/include/linux/platform_data/spi-omap2-mcspi.h b/include/linux/platform_data/spi-omap2-mcspi.h
index ce70f7b5a8e1..a65572d53211 100644
--- a/include/linux/platform_data/spi-omap2-mcspi.h
+++ b/include/linux/platform_data/spi-omap2-mcspi.h
@@ -7,8 +7,8 @@
 
 #define OMAP4_MCSPI_REG_OFFSET 0x100
 
-#define MCSPI_PINDIR_D0_OUT_D1_IN	0
-#define MCSPI_PINDIR_D0_IN_D1_OUT	1
+#define MCSPI_PINDIR_D0_IN_D1_OUT	0
+#define MCSPI_PINDIR_D0_OUT_D1_IN	1
 
 struct omap2_mcspi_platform_config {
 	unsigned short	num_cs;
-- 
cgit v1.2.3-55-g7522


From fe1e43f7190da98b396265639845a6cc0d748aad Mon Sep 17 00:00:00 2001
From: Mark Brown
Date: Wed, 14 Nov 2012 16:47:10 +0900
Subject: regulator: core: Add regulator_is_supported_voltage_tol()

If consumers wish to set voltages based on a tolerance it stands to reason
that they will also want to query for support in the same manner.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@ti.com>
---
 include/linux/regulator/consumer.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index c43cd3556b1f..49415a23b2d8 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -367,4 +367,12 @@ static inline int regulator_set_voltage_tol(struct regulator *regulator,
 				     new_uV - tol_uV, new_uV + tol_uV);
 }
 
+static inline int regulator_is_supported_voltage_tol(struct regulator *regulator,
+						     int target_uV, int tol_uV)
+{
+	return regulator_is_supported_voltage(regulator,
+					      target_uV - tol_uV,
+					      target_uV + tol_uV);
+}
+
 #endif
-- 
cgit v1.2.3-55-g7522


From 49839dc93970789cea46f5171cd7f6ec11af64c7 Mon Sep 17 00:00:00 2001
From: Paul Walmsley
Date: Tue, 6 Nov 2012 16:31:32 +0000
Subject: Revert "ARM: OMAP: convert I2C driver to PM QoS for MPU latency
 constraints"

This reverts commit 3db11feffc1ad2ab9dea27789e6b5b3032827adc
(ARM: OMAP: convert I2C driver to PM QoS for MPU latency constraints).
This commit causes I2C timeouts to appear on several OMAP3430/3530-based
boards:

  http://marc.info/?l=linux-arm-kernel&m=135071372426971&w=2
  http://marc.info/?l=linux-arm-kernel&m=135067558415214&w=2
  http://marc.info/?l=linux-arm-kernel&m=135216013608196&w=2

and appears to have been sent for merging before one of its prerequisites
was merged:

  http://marc.info/?l=linux-arm-kernel&m=135219411617621&w=2

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Acked-by: Jean Pihet <j-pihet@ti.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 arch/arm/plat-omap/i2c.c      | 21 +++++++++++++++++++++
 drivers/i2c/busses/i2c-omap.c | 32 ++++++++++++++------------------
 include/linux/i2c-omap.h      |  1 +
 3 files changed, 36 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-omap/i2c.c b/arch/arm/plat-omap/i2c.c
index a5683a84c6ee..6013831a043e 100644
--- a/arch/arm/plat-omap/i2c.c
+++ b/arch/arm/plat-omap/i2c.c
@@ -26,12 +26,14 @@
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
+#include <linux/i2c-omap.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/clk.h>
 
 #include <mach/irqs.h>
 #include <plat/i2c.h>
+#include <plat/omap-pm.h>
 #include <plat/omap_device.h>
 
 #define OMAP_I2C_SIZE		0x3f
@@ -127,6 +129,16 @@ static inline int omap1_i2c_add_bus(int bus_id)
 
 
 #ifdef CONFIG_ARCH_OMAP2PLUS
+/*
+ * XXX This function is a temporary compatibility wrapper - only
+ * needed until the I2C driver can be converted to call
+ * omap_pm_set_max_dev_wakeup_lat() and handle a return code.
+ */
+static void omap_pm_set_max_mpu_wakeup_lat_compat(struct device *dev, long t)
+{
+	omap_pm_set_max_mpu_wakeup_lat(dev, t);
+}
+
 static inline int omap2_i2c_add_bus(int bus_id)
 {
 	int l;
@@ -158,6 +170,15 @@ static inline int omap2_i2c_add_bus(int bus_id)
 	dev_attr = (struct omap_i2c_dev_attr *)oh->dev_attr;
 	pdata->flags = dev_attr->flags;
 
+	/*
+	 * When waiting for completion of a i2c transfer, we need to
+	 * set a wake up latency constraint for the MPU. This is to
+	 * ensure quick enough wakeup from idle, when transfer
+	 * completes.
+	 * Only omap3 has support for constraints
+	 */
+	if (cpu_is_omap34xx())
+		pdata->set_mpu_wkup_lat = omap_pm_set_max_mpu_wakeup_lat_compat;
 	pdev = omap_device_build(name, bus_id, oh, pdata,
 			sizeof(struct omap_i2c_bus_platform_data),
 			NULL, 0, 0);
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index db31eaed6ea5..0b0254312d21 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -43,7 +43,6 @@
 #include <linux/slab.h>
 #include <linux/i2c-omap.h>
 #include <linux/pm_runtime.h>
-#include <linux/pm_qos.h>
 
 /* I2C controller revisions */
 #define OMAP_I2C_OMAP1_REV_2		0x20
@@ -187,8 +186,9 @@ struct omap_i2c_dev {
 	int			reg_shift;      /* bit shift for I2C register addresses */
 	struct completion	cmd_complete;
 	struct resource		*ioarea;
-	u32			latency;	/* maximum MPU wkup latency */
-	struct pm_qos_request	pm_qos_request;
+	u32			latency;	/* maximum mpu wkup latency */
+	void			(*set_mpu_wkup_lat)(struct device *dev,
+						    long latency);
 	u32			speed;		/* Speed of bus in kHz */
 	u32			dtrev;		/* extra revision from DT */
 	u32			flags;
@@ -494,7 +494,9 @@ static void omap_i2c_resize_fifo(struct omap_i2c_dev *dev, u8 size, bool is_rx)
 		dev->b_hw = 1; /* Enable hardware fixes */
 
 	/* calculate wakeup latency constraint for MPU */
-	dev->latency = (1000000 * dev->threshold) / (1000 * dev->speed / 8);
+	if (dev->set_mpu_wkup_lat != NULL)
+		dev->latency = (1000000 * dev->threshold) /
+			(1000 * dev->speed / 8);
 }
 
 /*
@@ -629,16 +631,8 @@ omap_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 	if (r < 0)
 		goto out;
 
-	/*
-	 * When waiting for completion of a i2c transfer, we need to
-	 * set a wake up latency constraint for the MPU. This is to
-	 * ensure quick enough wakeup from idle, when transfer
-	 * completes.
-	 */
-	if (dev->latency)
-		pm_qos_add_request(&dev->pm_qos_request,
-				   PM_QOS_CPU_DMA_LATENCY,
-				   dev->latency);
+	if (dev->set_mpu_wkup_lat != NULL)
+		dev->set_mpu_wkup_lat(dev->dev, dev->latency);
 
 	for (i = 0; i < num; i++) {
 		r = omap_i2c_xfer_msg(adap, &msgs[i], (i == (num - 1)));
@@ -646,8 +640,8 @@ omap_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 			break;
 	}
 
-	if (dev->latency)
-		pm_qos_remove_request(&dev->pm_qos_request);
+	if (dev->set_mpu_wkup_lat != NULL)
+		dev->set_mpu_wkup_lat(dev->dev, -1);
 
 	if (r == 0)
 		r = num;
@@ -1104,6 +1098,7 @@ omap_i2c_probe(struct platform_device *pdev)
 	} else if (pdata != NULL) {
 		dev->speed = pdata->clkrate;
 		dev->flags = pdata->flags;
+		dev->set_mpu_wkup_lat = pdata->set_mpu_wkup_lat;
 		dev->dtrev = pdata->rev;
 	}
 
@@ -1159,8 +1154,9 @@ omap_i2c_probe(struct platform_device *pdev)
 			dev->b_hw = 1; /* Enable hardware fixes */
 
 		/* calculate wakeup latency constraint for MPU */
-		dev->latency = (1000000 * dev->fifo_size) /
-			       (1000 * dev->speed / 8);
+		if (dev->set_mpu_wkup_lat != NULL)
+			dev->latency = (1000000 * dev->fifo_size) /
+				       (1000 * dev->speed / 8);
 	}
 
 	/* reset ASAP, clearing any IRQs */
diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h
index df804ba73e0b..92a0dc75bc74 100644
--- a/include/linux/i2c-omap.h
+++ b/include/linux/i2c-omap.h
@@ -34,6 +34,7 @@ struct omap_i2c_bus_platform_data {
 	u32		clkrate;
 	u32		rev;
 	u32		flags;
+	void		(*set_mpu_wkup_lat)(struct device *dev, long set);
 };
 
 #endif
-- 
cgit v1.2.3-55-g7522


From 45aa2c27ada4829bc91bd80455bc67059ac06bc5 Mon Sep 17 00:00:00 2001
From: Josh Cartwright
Date: Tue, 13 Nov 2012 17:26:48 -0600
Subject: clk: Add support for fundamental zynq clks

Provide simplified models for the necessary clocks on the zynq-7000
platform.  Currently, the PLLs, the CPU clock network, and the basic
peripheral clock networks (for SDIO, SMC, SPI, QSPI, UART) are modelled.

OF bindings are also provided and documented.

Signed-off-by: Josh Cartwright <josh.cartwright@ni.com>
Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
Acked-by: Mike Turquette <mturquette@linaro.org>
Acked-by: Michal Simek <michal.simek@xilinx.com>
---
 .../devicetree/bindings/clock/zynq-7000.txt        |  55 +++
 drivers/clk/clk-zynq.c                             | 383 +++++++++++++++++++++
 include/linux/clk/zynq.h                           |  24 ++
 3 files changed, 462 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/zynq-7000.txt
 create mode 100644 drivers/clk/clk-zynq.c
 create mode 100644 include/linux/clk/zynq.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/clock/zynq-7000.txt b/Documentation/devicetree/bindings/clock/zynq-7000.txt
new file mode 100644
index 000000000000..23ae1db1bc13
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/zynq-7000.txt
@@ -0,0 +1,55 @@
+Device Tree Clock bindings for the Zynq 7000 EPP
+
+The Zynq EPP has several different clk providers, each with there own bindings.
+The purpose of this document is to document their usage.
+
+See clock_bindings.txt for more information on the generic clock bindings.
+See Chapter 25 of Zynq TRM for more information about Zynq clocks.
+
+== PLLs ==
+
+Used to describe the ARM_PLL, DDR_PLL, and IO_PLL.
+
+Required properties:
+- #clock-cells : shall be 0 (only one clock is output from this node)
+- compatible : "xlnx,zynq-pll"
+- reg : pair of u32 values, which are the address offsets within the SLCR
+        of the relevant PLL_CTRL register and PLL_CFG register respectively
+- clocks : phandle for parent clock.  should be the phandle for ps_clk
+
+Optional properties:
+- clock-output-names : name of the output clock
+
+Example:
+	armpll: armpll {
+		#clock-cells = <0>;
+		compatible = "xlnx,zynq-pll";
+		clocks = <&ps_clk>;
+		reg = <0x100 0x110>;
+		clock-output-names = "armpll";
+	};
+
+== Peripheral clocks ==
+
+Describes clock node for the SDIO, SMC, SPI, QSPI, and UART clocks.
+
+Required properties:
+- #clock-cells : shall be 1
+- compatible : "xlnx,zynq-periph-clock"
+- reg : a single u32 value, describing the offset within the SLCR where
+        the CLK_CTRL register is found for this peripheral
+- clocks : phandle for parent clocks.  should hold phandles for
+           the IO_PLL, ARM_PLL, and DDR_PLL in order
+- clock-output-names : names of the output clock(s).  For peripherals that have
+                       two output clocks (for example, the UART), two clocks
+                       should be listed.
+
+Example:
+	uart_clk: uart_clk {
+		#clock-cells = <1>;
+		compatible = "xlnx,zynq-periph-clock";
+		clocks = <&iopll &armpll &ddrpll>;
+		reg = <0x154>;
+		clock-output-names = "uart0_ref_clk",
+				     "uart1_ref_clk";
+	};
diff --git a/drivers/clk/clk-zynq.c b/drivers/clk/clk-zynq.c
new file mode 100644
index 000000000000..37a30514fd66
--- /dev/null
+++ b/drivers/clk/clk-zynq.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2012 National Instruments
+ *
+ * Josh Cartwright <josh.cartwright@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/clk-provider.h>
+
+static void __iomem *slcr_base;
+
+struct zynq_pll_clk {
+	struct clk_hw	hw;
+	void __iomem	*pll_ctrl;
+	void __iomem	*pll_cfg;
+};
+
+#define to_zynq_pll_clk(hw)	container_of(hw, struct zynq_pll_clk, hw)
+
+#define CTRL_PLL_FDIV(x)	((x) >> 12)
+
+static unsigned long zynq_pll_recalc_rate(struct clk_hw *hw,
+					  unsigned long parent_rate)
+{
+	struct zynq_pll_clk *pll = to_zynq_pll_clk(hw);
+	return parent_rate * CTRL_PLL_FDIV(ioread32(pll->pll_ctrl));
+}
+
+static const struct clk_ops zynq_pll_clk_ops = {
+	.recalc_rate	= zynq_pll_recalc_rate,
+};
+
+static void __init zynq_pll_clk_setup(struct device_node *np)
+{
+	struct clk_init_data init;
+	struct zynq_pll_clk *pll;
+	const char *parent_name;
+	struct clk *clk;
+	u32 regs[2];
+	int ret;
+
+	ret = of_property_read_u32_array(np, "reg", regs, ARRAY_SIZE(regs));
+	if (WARN_ON(ret))
+		return;
+
+	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
+	if (WARN_ON(!pll))
+		return;
+
+	pll->pll_ctrl = slcr_base + regs[0];
+	pll->pll_cfg  = slcr_base + regs[1];
+
+	of_property_read_string(np, "clock-output-names", &init.name);
+
+	init.ops = &zynq_pll_clk_ops;
+	parent_name = of_clk_get_parent_name(np, 0);
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+
+	pll->hw.init = &init;
+
+	clk = clk_register(NULL, &pll->hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return;
+
+	ret = of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	if (WARN_ON(ret))
+		return;
+}
+
+struct zynq_periph_clk {
+	struct clk_hw		hw;
+	struct clk_onecell_data	onecell_data;
+	struct clk		*gates[2];
+	void __iomem		*clk_ctrl;
+	spinlock_t		clkact_lock;
+};
+
+#define to_zynq_periph_clk(hw)	container_of(hw, struct zynq_periph_clk, hw)
+
+static const u8 periph_clk_parent_map[] = {
+	0, 0, 1, 2
+};
+#define PERIPH_CLK_CTRL_SRC(x)	(periph_clk_parent_map[((x) & 0x30) >> 4])
+#define PERIPH_CLK_CTRL_DIV(x)	(((x) & 0x3F00) >> 8)
+
+static unsigned long zynq_periph_recalc_rate(struct clk_hw *hw,
+					     unsigned long parent_rate)
+{
+	struct zynq_periph_clk *periph = to_zynq_periph_clk(hw);
+	return parent_rate / PERIPH_CLK_CTRL_DIV(ioread32(periph->clk_ctrl));
+}
+
+static u8 zynq_periph_get_parent(struct clk_hw *hw)
+{
+	struct zynq_periph_clk *periph = to_zynq_periph_clk(hw);
+	return PERIPH_CLK_CTRL_SRC(ioread32(periph->clk_ctrl));
+}
+
+static const struct clk_ops zynq_periph_clk_ops = {
+	.recalc_rate	= zynq_periph_recalc_rate,
+	.get_parent	= zynq_periph_get_parent,
+};
+
+static void __init zynq_periph_clk_setup(struct device_node *np)
+{
+	struct zynq_periph_clk *periph;
+	const char *parent_names[3];
+	struct clk_init_data init;
+	int clk_num = 0, err;
+	const char *name;
+	struct clk *clk;
+	u32 reg;
+	int i;
+
+	err = of_property_read_u32(np, "reg", &reg);
+	if (WARN_ON(err))
+		return;
+
+	periph = kzalloc(sizeof(*periph), GFP_KERNEL);
+	if (WARN_ON(!periph))
+		return;
+
+	periph->clk_ctrl = slcr_base + reg;
+	spin_lock_init(&periph->clkact_lock);
+
+	init.name = np->name;
+	init.ops = &zynq_periph_clk_ops;
+	for (i = 0; i < ARRAY_SIZE(parent_names); i++)
+		parent_names[i] = of_clk_get_parent_name(np, i);
+	init.parent_names = parent_names;
+	init.num_parents = ARRAY_SIZE(parent_names);
+
+	periph->hw.init = &init;
+
+	clk = clk_register(NULL, &periph->hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return;
+
+	err = of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	if (WARN_ON(err))
+		return;
+
+	err = of_property_read_string_index(np, "clock-output-names", 0,
+					    &name);
+	if (WARN_ON(err))
+		return;
+
+	periph->gates[0] = clk_register_gate(NULL, name, np->name, 0,
+					     periph->clk_ctrl, 0, 0,
+					     &periph->clkact_lock);
+	if (WARN_ON(IS_ERR(periph->gates[0])))
+		return;
+	clk_num++;
+
+	/* some periph clks have 2 downstream gates */
+	err = of_property_read_string_index(np, "clock-output-names", 1,
+					    &name);
+	if (err != -ENODATA) {
+		periph->gates[1] = clk_register_gate(NULL, name, np->name, 0,
+						     periph->clk_ctrl, 1, 0,
+						     &periph->clkact_lock);
+		if (WARN_ON(IS_ERR(periph->gates[1])))
+			return;
+		clk_num++;
+	}
+
+	periph->onecell_data.clks = periph->gates;
+	periph->onecell_data.clk_num = clk_num;
+
+	err = of_clk_add_provider(np, of_clk_src_onecell_get,
+				  &periph->onecell_data);
+	if (WARN_ON(err))
+		return;
+}
+
+/* CPU Clock domain is modelled as a mux with 4 children subclks, whose
+ * derivative rates depend on CLK_621_TRUE
+ */
+
+struct zynq_cpu_clk {
+	struct clk_hw		hw;
+	struct clk_onecell_data	onecell_data;
+	struct clk		*subclks[4];
+	void __iomem		*clk_ctrl;
+	spinlock_t		clkact_lock;
+};
+
+#define to_zynq_cpu_clk(hw)	container_of(hw, struct zynq_cpu_clk, hw)
+
+static const u8 zynq_cpu_clk_parent_map[] = {
+	1, 1, 2, 0
+};
+#define CPU_CLK_SRCSEL(x)	(zynq_cpu_clk_parent_map[(((x) & 0x30) >> 4)])
+#define CPU_CLK_CTRL_DIV(x)	(((x) & 0x3F00) >> 8)
+
+static u8 zynq_cpu_clk_get_parent(struct clk_hw *hw)
+{
+	struct zynq_cpu_clk *cpuclk = to_zynq_cpu_clk(hw);
+	return CPU_CLK_SRCSEL(ioread32(cpuclk->clk_ctrl));
+}
+
+static unsigned long zynq_cpu_clk_recalc_rate(struct clk_hw *hw,
+					      unsigned long parent_rate)
+{
+	struct zynq_cpu_clk *cpuclk = to_zynq_cpu_clk(hw);
+	return parent_rate / CPU_CLK_CTRL_DIV(ioread32(cpuclk->clk_ctrl));
+}
+
+static const struct clk_ops zynq_cpu_clk_ops = {
+	.get_parent	= zynq_cpu_clk_get_parent,
+	.recalc_rate	= zynq_cpu_clk_recalc_rate,
+};
+
+struct zynq_cpu_subclk {
+	struct clk_hw	hw;
+	void __iomem	*clk_621;
+	enum {
+		CPU_SUBCLK_6X4X,
+		CPU_SUBCLK_3X2X,
+		CPU_SUBCLK_2X,
+		CPU_SUBCLK_1X,
+	} which;
+};
+
+#define CLK_621_TRUE(x)	((x) & 1)
+
+#define to_zynq_cpu_subclk(hw)	container_of(hw, struct zynq_cpu_subclk, hw);
+
+static unsigned long zynq_cpu_subclk_recalc_rate(struct clk_hw *hw,
+						 unsigned long parent_rate)
+{
+	unsigned long uninitialized_var(rate);
+	struct zynq_cpu_subclk *subclk;
+	bool is_621;
+
+	subclk = to_zynq_cpu_subclk(hw)
+	is_621 = CLK_621_TRUE(ioread32(subclk->clk_621));
+
+	switch (subclk->which) {
+	case CPU_SUBCLK_6X4X:
+		rate = parent_rate;
+		break;
+	case CPU_SUBCLK_3X2X:
+		rate = parent_rate / 2;
+		break;
+	case CPU_SUBCLK_2X:
+		rate = parent_rate / (is_621 ? 3 : 2);
+		break;
+	case CPU_SUBCLK_1X:
+		rate = parent_rate / (is_621 ? 6 : 4);
+		break;
+	};
+
+	return rate;
+}
+
+static const struct clk_ops zynq_cpu_subclk_ops = {
+	.recalc_rate	= zynq_cpu_subclk_recalc_rate,
+};
+
+static struct clk *zynq_cpu_subclk_setup(struct device_node *np, u8 which,
+					 void __iomem *clk_621)
+{
+	struct zynq_cpu_subclk *subclk;
+	struct clk_init_data init;
+	struct clk *clk;
+	int err;
+
+	err = of_property_read_string_index(np, "clock-output-names",
+					    which, &init.name);
+	if (WARN_ON(err))
+		goto err_read_output_name;
+
+	subclk = kzalloc(sizeof(*subclk), GFP_KERNEL);
+	if (!subclk)
+		goto err_subclk_alloc;
+
+	subclk->clk_621 = clk_621;
+	subclk->which = which;
+
+	init.ops = &zynq_cpu_subclk_ops;
+	init.parent_names = &np->name;
+	init.num_parents = 1;
+
+	subclk->hw.init = &init;
+
+	clk = clk_register(NULL, &subclk->hw);
+	if (WARN_ON(IS_ERR(clk)))
+		goto err_clk_register;
+
+	return clk;
+
+err_clk_register:
+	kfree(subclk);
+err_subclk_alloc:
+err_read_output_name:
+	return ERR_PTR(-EINVAL);
+}
+
+static void __init zynq_cpu_clk_setup(struct device_node *np)
+{
+	struct zynq_cpu_clk *cpuclk;
+	const char *parent_names[3];
+	struct clk_init_data init;
+	void __iomem *clk_621;
+	struct clk *clk;
+	u32 reg[2];
+	int err;
+	int i;
+
+	err = of_property_read_u32_array(np, "reg", reg, ARRAY_SIZE(reg));
+	if (WARN_ON(err))
+		return;
+
+	cpuclk = kzalloc(sizeof(*cpuclk), GFP_KERNEL);
+	if (WARN_ON(!cpuclk))
+		return;
+
+	cpuclk->clk_ctrl = slcr_base + reg[0];
+	clk_621 = slcr_base + reg[1];
+	spin_lock_init(&cpuclk->clkact_lock);
+
+	init.name = np->name;
+	init.ops = &zynq_cpu_clk_ops;
+	for (i = 0; i < ARRAY_SIZE(parent_names); i++)
+		parent_names[i] = of_clk_get_parent_name(np, i);
+	init.parent_names = parent_names;
+	init.num_parents = ARRAY_SIZE(parent_names);
+
+	cpuclk->hw.init = &init;
+
+	clk = clk_register(NULL, &cpuclk->hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return;
+
+	err = of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	if (WARN_ON(err))
+		return;
+
+	for (i = 0; i < 4; i++) {
+		cpuclk->subclks[i] = zynq_cpu_subclk_setup(np, i, clk_621);
+		if (WARN_ON(IS_ERR(cpuclk->subclks[i])))
+			return;
+	}
+
+	cpuclk->onecell_data.clks = cpuclk->subclks;
+	cpuclk->onecell_data.clk_num = i;
+
+	err = of_clk_add_provider(np, of_clk_src_onecell_get,
+				  &cpuclk->onecell_data);
+	if (WARN_ON(err))
+		return;
+}
+
+static const __initconst struct of_device_id zynq_clk_match[] = {
+	{ .compatible = "fixed-clock", .data = of_fixed_clk_setup, },
+	{ .compatible = "xlnx,zynq-pll", .data = zynq_pll_clk_setup, },
+	{ .compatible = "xlnx,zynq-periph-clock",
+		.data = zynq_periph_clk_setup, },
+	{ .compatible = "xlnx,zynq-cpu-clock", .data = zynq_cpu_clk_setup, },
+	{}
+};
+
+void __init xilinx_zynq_clocks_init(void __iomem *slcr)
+{
+	slcr_base = slcr;
+	of_clk_init(zynq_clk_match);
+}
diff --git a/include/linux/clk/zynq.h b/include/linux/clk/zynq.h
new file mode 100644
index 000000000000..56be7cd9aa8b
--- /dev/null
+++ b/include/linux/clk/zynq.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2012 National Instruments
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __LINUX_CLK_ZYNQ_H_
+#define __LINUX_CLK_ZYNQ_H_
+
+void __init xilinx_zynq_clocks_init(void __iomem *slcr);
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 2c88ab8c5af7d637d2a9d14b607fa6100fa64236 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti D
Date: Mon, 5 Nov 2012 17:53:39 +0530
Subject: ARM: i2c: omap: Remove the i207 errata flag

The commit [i2c: omap: use revision check for OMAP_I2C_FLAG_APPLY_ERRATA_I207]
uses the revision id instead of the flag. So the flag can be safely removed.

Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Shubhrajyoti D <shubhrajyoti@ti.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 arch/arm/mach-omap2/omap_hwmod_2430_data.c | 3 +--
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 9 +++------
 drivers/i2c/busses/i2c-omap.c              | 3 +--
 include/linux/i2c-omap.h                   | 1 -
 4 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
index c455e41b0237..b79ccf6efbe8 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
@@ -76,8 +76,7 @@ static struct omap_hwmod_class i2c_class = {
 
 static struct omap_i2c_dev_attr i2c_dev_attr = {
 	.fifo_depth	= 8, /* bytes */
-	.flags		= OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-			  OMAP_I2C_FLAG_BUS_SHIFT_2 |
+	.flags		= OMAP_I2C_FLAG_BUS_SHIFT_2 |
 			  OMAP_I2C_FLAG_FORCE_19200_INT_CLK,
 };
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index f67b7ee07dd4..943222c40489 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -791,8 +791,7 @@ static struct omap_hwmod omap3xxx_dss_venc_hwmod = {
 /* I2C1 */
 static struct omap_i2c_dev_attr i2c1_dev_attr = {
 	.fifo_depth	= 8, /* bytes */
-	.flags		= OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-			  OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
+	.flags		= OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
 			  OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
@@ -818,8 +817,7 @@ static struct omap_hwmod omap3xxx_i2c1_hwmod = {
 /* I2C2 */
 static struct omap_i2c_dev_attr i2c2_dev_attr = {
 	.fifo_depth	= 8, /* bytes */
-	.flags = OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-		 OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
+	.flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
 		 OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
@@ -845,8 +843,7 @@ static struct omap_hwmod omap3xxx_i2c2_hwmod = {
 /* I2C3 */
 static struct omap_i2c_dev_attr i2c3_dev_attr = {
 	.fifo_depth	= 64, /* bytes */
-	.flags = OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-		 OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
+	.flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
 		 OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 0ca50e71731b..11e645ab1e79 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -1028,8 +1028,7 @@ static const struct i2c_algorithm omap_i2c_algo = {
 #ifdef CONFIG_OF
 static struct omap_i2c_bus_platform_data omap3_pdata = {
 	.rev = OMAP_I2C_IP_VERSION_1,
-	.flags = OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-		 OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
+	.flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
 		 OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h
index 92a0dc75bc74..1b25c04f82d9 100644
--- a/include/linux/i2c-omap.h
+++ b/include/linux/i2c-omap.h
@@ -21,7 +21,6 @@
 #define OMAP_I2C_FLAG_SIMPLE_CLOCK		BIT(1)
 #define OMAP_I2C_FLAG_16BIT_DATA_REG		BIT(2)
 #define OMAP_I2C_FLAG_RESET_REGS_POSTIDLE	BIT(3)
-#define OMAP_I2C_FLAG_APPLY_ERRATA_I207	BIT(4)
 #define OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK	BIT(5)
 #define OMAP_I2C_FLAG_FORCE_19200_INT_CLK	BIT(6)
 /* how the CPU address bus must be translated for I2C unit access */
-- 
cgit v1.2.3-55-g7522


From e5cc8ef31267317f3e177415c84e3f3602e5bfc9 Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Fri, 2 Nov 2012 01:41:01 +0100
Subject: ACPI / PM: Provide ACPI PM callback routines for subsystems

Some bus types don't support power management natively, but generally
there may be device nodes in ACPI tables corresponding to the devices
whose bus types they are (under ACPI 5 those bus types may be SPI,
I2C and platform).  If that is the case, standard ACPI power
management may be applied to those devices, although currently the
kernel has no means for that.

For this reason, provide a set of routines that may be used as power
management callbacks for such devices.  This may be done in three
different ways.

 (1) Device drivers handling the devices in question may run
     acpi_dev_pm_attach() in their .probe() routines, which (on
     success) will cause the devices to be added to the general ACPI
     PM domain and ACPI power management will be used for them going
     forward.  Then, acpi_dev_pm_detach() may be used to remove the
     devices from the general ACPI PM domain if ACPI power management
     is not necessary for them any more.

 (2) The devices' subsystems may use acpi_subsys_runtime_suspend(),
     acpi_subsys_runtime_resume(), acpi_subsys_prepare(),
     acpi_subsys_suspend_late(), acpi_subsys_resume_early() as their
     power management callbacks in the same way as the general ACPI
     PM domain does that.

 (3) The devices' drivers may execute acpi_dev_suspend_late(),
     acpi_dev_resume_early(), acpi_dev_runtime_suspend(),
     acpi_dev_runtime_resume() from their power management callbacks
     as appropriate, if that's absolutely necessary, but it is not
     recommended to do that, because such drivers may not work
     without ACPI support as a result.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/device_pm.c | 317 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi.h     |  34 +++++
 2 files changed, 351 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 7ddd93463a2e..a8e059f69d50 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -224,6 +224,22 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p, int d_max_in)
 EXPORT_SYMBOL(acpi_pm_device_sleep_state);
 
 #ifdef CONFIG_PM_RUNTIME
+/**
+ * acpi_wakeup_device - Wakeup notification handler for ACPI devices.
+ * @handle: ACPI handle of the device the notification is for.
+ * @event: Type of the signaled event.
+ * @context: Device corresponding to @handle.
+ */
+static void acpi_wakeup_device(acpi_handle handle, u32 event, void *context)
+{
+	struct device *dev = context;
+
+	if (event == ACPI_NOTIFY_DEVICE_WAKE && dev) {
+		pm_wakeup_event(dev, 0);
+		pm_runtime_resume(dev);
+	}
+}
+
 /**
  * __acpi_device_run_wake - Enable/disable runtime remote wakeup for device.
  * @adev: ACPI device to enable/disable the remote wakeup for.
@@ -283,6 +299,9 @@ int acpi_pm_device_run_wake(struct device *phys_dev, bool enable)
 	return __acpi_device_run_wake(adev, enable);
 }
 EXPORT_SYMBOL(acpi_pm_device_run_wake);
+#else
+static inline void acpi_wakeup_device(acpi_handle handle, u32 event,
+				      void *context) {}
 #endif /* CONFIG_PM_RUNTIME */
 
  #ifdef CONFIG_PM_SLEEP
@@ -329,3 +348,301 @@ int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
 	return error;
 }
 #endif /* CONFIG_PM_SLEEP */
+
+/**
+ * acpi_dev_pm_get_node - Get ACPI device node for the given physical device.
+ * @dev: Device to get the ACPI node for.
+ */
+static struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
+{
+	acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
+	struct acpi_device *adev;
+
+	return handle && ACPI_SUCCESS(acpi_bus_get_device(handle, &adev)) ?
+		adev : NULL;
+}
+
+/**
+ * acpi_dev_pm_low_power - Put ACPI device into a low-power state.
+ * @dev: Device to put into a low-power state.
+ * @adev: ACPI device node corresponding to @dev.
+ * @system_state: System state to choose the device state for.
+ */
+static int acpi_dev_pm_low_power(struct device *dev, struct acpi_device *adev,
+				 u32 system_state)
+{
+	int power_state;
+
+	if (!acpi_device_power_manageable(adev))
+		return 0;
+
+	power_state = acpi_device_power_state(dev, adev, system_state,
+					      ACPI_STATE_D3, NULL);
+	if (power_state < ACPI_STATE_D0 || power_state > ACPI_STATE_D3)
+		return -EIO;
+
+	return acpi_device_set_power(adev, power_state);
+}
+
+/**
+ * acpi_dev_pm_full_power - Put ACPI device into the full-power state.
+ * @adev: ACPI device node to put into the full-power state.
+ */
+static int acpi_dev_pm_full_power(struct acpi_device *adev)
+{
+	return acpi_device_power_manageable(adev) ?
+		acpi_device_set_power(adev, ACPI_STATE_D0) : 0;
+}
+
+#ifdef CONFIG_PM_RUNTIME
+/**
+ * acpi_dev_runtime_suspend - Put device into a low-power state using ACPI.
+ * @dev: Device to put into a low-power state.
+ *
+ * Put the given device into a runtime low-power state using the standard ACPI
+ * mechanism.  Set up remote wakeup if desired, choose the state to put the
+ * device into (this checks if remote wakeup is expected to work too), and set
+ * the power state of the device.
+ */
+int acpi_dev_runtime_suspend(struct device *dev)
+{
+	struct acpi_device *adev = acpi_dev_pm_get_node(dev);
+	bool remote_wakeup;
+	int error;
+
+	if (!adev)
+		return 0;
+
+	remote_wakeup = dev_pm_qos_flags(dev, PM_QOS_FLAG_REMOTE_WAKEUP) >
+				PM_QOS_FLAGS_NONE;
+	error = __acpi_device_run_wake(adev, remote_wakeup);
+	if (remote_wakeup && error)
+		return -EAGAIN;
+
+	error = acpi_dev_pm_low_power(dev, adev, ACPI_STATE_S0);
+	if (error)
+		__acpi_device_run_wake(adev, false);
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_runtime_suspend);
+
+/**
+ * acpi_dev_runtime_resume - Put device into the full-power state using ACPI.
+ * @dev: Device to put into the full-power state.
+ *
+ * Put the given device into the full-power state using the standard ACPI
+ * mechanism at run time.  Set the power state of the device to ACPI D0 and
+ * disable remote wakeup.
+ */
+int acpi_dev_runtime_resume(struct device *dev)
+{
+	struct acpi_device *adev = acpi_dev_pm_get_node(dev);
+	int error;
+
+	if (!adev)
+		return 0;
+
+	error = acpi_dev_pm_full_power(adev);
+	__acpi_device_run_wake(adev, false);
+	return error;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_runtime_resume);
+
+/**
+ * acpi_subsys_runtime_suspend - Suspend device using ACPI.
+ * @dev: Device to suspend.
+ *
+ * Carry out the generic runtime suspend procedure for @dev and use ACPI to put
+ * it into a runtime low-power state.
+ */
+int acpi_subsys_runtime_suspend(struct device *dev)
+{
+	int ret = pm_generic_runtime_suspend(dev);
+	return ret ? ret : acpi_dev_runtime_suspend(dev);
+}
+EXPORT_SYMBOL_GPL(acpi_subsys_runtime_suspend);
+
+/**
+ * acpi_subsys_runtime_resume - Resume device using ACPI.
+ * @dev: Device to Resume.
+ *
+ * Use ACPI to put the given device into the full-power state and carry out the
+ * generic runtime resume procedure for it.
+ */
+int acpi_subsys_runtime_resume(struct device *dev)
+{
+	int ret = acpi_dev_runtime_resume(dev);
+	return ret ? ret : pm_generic_runtime_resume(dev);
+}
+EXPORT_SYMBOL_GPL(acpi_subsys_runtime_resume);
+#endif /* CONFIG_PM_RUNTIME */
+
+#ifdef CONFIG_PM_SLEEP
+/**
+ * acpi_dev_suspend_late - Put device into a low-power state using ACPI.
+ * @dev: Device to put into a low-power state.
+ *
+ * Put the given device into a low-power state during system transition to a
+ * sleep state using the standard ACPI mechanism.  Set up system wakeup if
+ * desired, choose the state to put the device into (this checks if system
+ * wakeup is expected to work too), and set the power state of the device.
+ */
+int acpi_dev_suspend_late(struct device *dev)
+{
+	struct acpi_device *adev = acpi_dev_pm_get_node(dev);
+	u32 target_state;
+	bool wakeup;
+	int error;
+
+	if (!adev)
+		return 0;
+
+	target_state = acpi_target_system_state();
+	wakeup = device_may_wakeup(dev);
+	error = __acpi_device_sleep_wake(adev, target_state, wakeup);
+	if (wakeup && error)
+		return error;
+
+	error = acpi_dev_pm_low_power(dev, adev, target_state);
+	if (error)
+		__acpi_device_sleep_wake(adev, ACPI_STATE_UNKNOWN, false);
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_suspend_late);
+
+/**
+ * acpi_dev_resume_early - Put device into the full-power state using ACPI.
+ * @dev: Device to put into the full-power state.
+ *
+ * Put the given device into the full-power state using the standard ACPI
+ * mechanism during system transition to the working state.  Set the power
+ * state of the device to ACPI D0 and disable remote wakeup.
+ */
+int acpi_dev_resume_early(struct device *dev)
+{
+	struct acpi_device *adev = acpi_dev_pm_get_node(dev);
+	int error;
+
+	if (!adev)
+		return 0;
+
+	error = acpi_dev_pm_full_power(adev);
+	__acpi_device_sleep_wake(adev, ACPI_STATE_UNKNOWN, false);
+	return error;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_resume_early);
+
+/**
+ * acpi_subsys_prepare - Prepare device for system transition to a sleep state.
+ * @dev: Device to prepare.
+ */
+int acpi_subsys_prepare(struct device *dev)
+{
+	/*
+	 * Follow PCI and resume devices suspended at run time before running
+	 * their system suspend callbacks.
+	 */
+	pm_runtime_resume(dev);
+	return pm_generic_prepare(dev);
+}
+EXPORT_SYMBOL_GPL(acpi_subsys_prepare);
+
+/**
+ * acpi_subsys_suspend_late - Suspend device using ACPI.
+ * @dev: Device to suspend.
+ *
+ * Carry out the generic late suspend procedure for @dev and use ACPI to put
+ * it into a low-power state during system transition into a sleep state.
+ */
+int acpi_subsys_suspend_late(struct device *dev)
+{
+	int ret = pm_generic_suspend_late(dev);
+	return ret ? ret : acpi_dev_suspend_late(dev);
+}
+EXPORT_SYMBOL_GPL(acpi_subsys_suspend_late);
+
+/**
+ * acpi_subsys_resume_early - Resume device using ACPI.
+ * @dev: Device to Resume.
+ *
+ * Use ACPI to put the given device into the full-power state and carry out the
+ * generic early resume procedure for it during system transition into the
+ * working state.
+ */
+int acpi_subsys_resume_early(struct device *dev)
+{
+	int ret = acpi_dev_resume_early(dev);
+	return ret ? ret : pm_generic_resume_early(dev);
+}
+EXPORT_SYMBOL_GPL(acpi_subsys_resume_early);
+#endif /* CONFIG_PM_SLEEP */
+
+static struct dev_pm_domain acpi_general_pm_domain = {
+	.ops = {
+#ifdef CONFIG_PM_RUNTIME
+		.runtime_suspend = acpi_subsys_runtime_suspend,
+		.runtime_resume = acpi_subsys_runtime_resume,
+		.runtime_idle = pm_generic_runtime_idle,
+#endif
+#ifdef CONFIG_PM_SLEEP
+		.prepare = acpi_subsys_prepare,
+		.suspend_late = acpi_subsys_suspend_late,
+		.resume_early = acpi_subsys_resume_early,
+		.poweroff_late = acpi_subsys_suspend_late,
+		.restore_early = acpi_subsys_resume_early,
+#endif
+	},
+};
+
+/**
+ * acpi_dev_pm_attach - Prepare device for ACPI power management.
+ * @dev: Device to prepare.
+ *
+ * If @dev has a valid ACPI handle that has a valid struct acpi_device object
+ * attached to it, install a wakeup notification handler for the device and
+ * add it to the general ACPI PM domain.
+ *
+ * This assumes that the @dev's bus type uses generic power management callbacks
+ * (or doesn't use any power management callbacks at all).
+ *
+ * Callers must ensure proper synchronization of this function with power
+ * management callbacks.
+ */
+int acpi_dev_pm_attach(struct device *dev)
+{
+	struct acpi_device *adev = acpi_dev_pm_get_node(dev);
+
+	if (!adev)
+		return -ENODEV;
+
+	if (dev->pm_domain)
+		return -EEXIST;
+
+	acpi_add_pm_notifier(adev, acpi_wakeup_device, dev);
+	dev->pm_domain = &acpi_general_pm_domain;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_pm_attach);
+
+/**
+ * acpi_dev_pm_detach - Remove ACPI power management from the device.
+ * @dev: Device to take care of.
+ *
+ * Remove the device from the general ACPI PM domain and remove its wakeup
+ * notifier.
+ *
+ * Callers must ensure proper synchronization of this function with power
+ * management callbacks.
+ */
+void acpi_dev_pm_detach(struct device *dev)
+{
+	struct acpi_device *adev = acpi_dev_pm_get_node(dev);
+
+	if (adev && dev->pm_domain == &acpi_general_pm_domain) {
+		dev->pm_domain = NULL;
+		acpi_remove_pm_notifier(adev, acpi_wakeup_device);
+	}
+}
+EXPORT_SYMBOL_GPL(acpi_dev_pm_detach);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 90be98981102..0676b6ac57fa 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -430,4 +430,38 @@ acpi_status acpi_os_prepare_sleep(u8 sleep_state,
 #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0)
 #endif
 
+#if defined(CONFIG_ACPI) && defined(CONFIG_PM_RUNTIME)
+int acpi_dev_runtime_suspend(struct device *dev);
+int acpi_dev_runtime_resume(struct device *dev);
+int acpi_subsys_runtime_suspend(struct device *dev);
+int acpi_subsys_runtime_resume(struct device *dev);
+#else
+static inline int acpi_dev_runtime_suspend(struct device *dev) { return 0; }
+static inline int acpi_dev_runtime_resume(struct device *dev) { return 0; }
+static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; }
+static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; }
+#endif
+
+#ifdef CONFIG_ACPI_SLEEP
+int acpi_dev_suspend_late(struct device *dev);
+int acpi_dev_resume_early(struct device *dev);
+int acpi_subsys_prepare(struct device *dev);
+int acpi_subsys_suspend_late(struct device *dev);
+int acpi_subsys_resume_early(struct device *dev);
+#else
+static inline int acpi_dev_suspend_late(struct device *dev) { return 0; }
+static inline int acpi_dev_resume_early(struct device *dev) { return 0; }
+static inline int acpi_subsys_prepare(struct device *dev) { return 0; }
+static inline int acpi_subsys_suspend_late(struct device *dev) { return 0; }
+static inline int acpi_subsys_resume_early(struct device *dev) { return 0; }
+#endif
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_PM)
+int acpi_dev_pm_attach(struct device *dev);
+int acpi_dev_pm_detach(struct device *dev);
+#else
+static inline int acpi_dev_pm_attach(struct device *dev) { return -ENODEV; }
+static inline void acpi_dev_pm_detach(struct device *dev) {}
+#endif
+
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v1.2.3-55-g7522


From 1bad2f19f7f79d1ec9e6c48168fd7ce8dc1c305f Mon Sep 17 00:00:00 2001
From: Kristen Carlson Accardi
Date: Fri, 26 Oct 2012 13:39:15 +0200
Subject: ACPI / Sleep: add acpi_sleep=nonvs_s3 parameter

The ACPI specificiation would like us to save NVS at hibernation time,
but makes no mention of saving NVS over S3.  Not all versions of
Windows do this either, and it is clear that not all machines need NVS
saved/restored over S3.  Allow the user to improve their suspend/resume
time by disabling the NVS save/restore at S3 time, but continue to do
the NVS save/restore for S4 as specified.

Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/kernel/acpi/sleep.c |  2 ++
 drivers/acpi/sleep.c         | 17 ++++++++++++++++-
 include/linux/acpi.h         |  1 +
 3 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 11676cf65aee..d5e0d717005a 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,6 +101,8 @@ static int __init acpi_sleep_setup(char *str)
 #endif
 		if (strncmp(str, "nonvs", 5) == 0)
 			acpi_nvs_nosave();
+		if (strncmp(str, "nonvs_s3", 8) == 0)
+			acpi_nvs_nosave_s3();
 		if (strncmp(str, "old_ordering", 12) == 0)
 			acpi_old_suspend_ordering();
 		str = strchr(str, ',');
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index fdcdbb652915..8640782944cc 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -97,6 +97,21 @@ void __init acpi_nvs_nosave(void)
 	nvs_nosave = true;
 }
 
+/*
+ * The ACPI specification wants us to save NVS memory regions during hibernation
+ * but says nothing about saving NVS during S3.  Not all versions of Windows
+ * save NVS on S3 suspend either, and it is clear that not all systems need
+ * NVS to be saved at S3 time.  To improve suspend/resume time, allow the
+ * user to disable saving NVS on S3 if their system does not require it, but
+ * continue to save/restore NVS for S4 as specified.
+ */
+static bool nvs_nosave_s3;
+
+void __init acpi_nvs_nosave_s3(void)
+{
+	nvs_nosave_s3 = true;
+}
+
 /*
  * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the
  * user to request that behavior by using the 'acpi_old_suspend_ordering'
@@ -243,7 +258,7 @@ static int acpi_suspend_begin(suspend_state_t pm_state)
 	u32 acpi_state = acpi_suspend_states[pm_state];
 	int error = 0;
 
-	error = nvs_nosave ? 0 : suspend_nvs_alloc();
+	error = (nvs_nosave || nvs_nosave_s3) ? 0 : suspend_nvs_alloc();
 	if (error)
 		return error;
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 90be98981102..3cf93491125c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -261,6 +261,7 @@ int acpi_resources_are_enforced(void);
 void __init acpi_no_s4_hw_signature(void);
 void __init acpi_old_suspend_ordering(void);
 void __init acpi_nvs_nosave(void);
+void __init acpi_nvs_nosave_s3(void);
 #endif /* CONFIG_PM_SLEEP */
 
 struct acpi_osc_context {
-- 
cgit v1.2.3-55-g7522


From 9743fdea9f9473cd2440741342a5ed8e19eb51bd Mon Sep 17 00:00:00 2001
From: Yuanhan Liu
Date: Fri, 26 Oct 2012 13:39:24 +0200
Subject: ACPI: move acpi_no_s4_hw_signature() declaration into #ifdef
 CONFIG_HIBERNATION

acpi_no_s4_hw_signature is defined in #ifdef CONFIG_HIBERNATION block,
but the current code put the declaration in #ifdef CONFIG_PM_SLEEP block.

I happened to meet this issue when I turned off PM_SLEEP config manually:
arch/x86/kernel/acpi/sleep.c:100:4: error: implicit declaration of function ‘acpi_no_s4_hw_signature’ [-Werror=implicit-function-declaration]

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3cf93491125c..87edfcc0ab62 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -257,8 +257,11 @@ int acpi_check_region(resource_size_t start, resource_size_t n,
 
 int acpi_resources_are_enforced(void);
 
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_HIBERNATION
 void __init acpi_no_s4_hw_signature(void);
+#endif
+
+#ifdef CONFIG_PM_SLEEP
 void __init acpi_old_suspend_ordering(void);
 void __init acpi_nvs_nosave(void);
 void __init acpi_nvs_nosave_s3(void);
-- 
cgit v1.2.3-55-g7522


From 06f64c8f239a47b359c60301914c783b56b32c13 Mon Sep 17 00:00:00 2001
From: Mika Westerberg
Date: Wed, 31 Oct 2012 22:44:33 +0100
Subject: driver core / ACPI: Move ACPI support to core device and driver types

With ACPI 5 we are starting to see devices that don't natively support
discovery but can be enumerated with the help of the ACPI namespace.
Typically, these devices can be represented in the Linux device driver
model as platform devices or some serial bus devices, like SPI or I2C
devices.

Since we want to re-use existing drivers for those devices, we need a
way for drivers to specify the ACPI IDs of supported devices, so that
they can be matched against device nodes in the ACPI namespace.  To
this end, it is sufficient to add a pointer to an array of supported
ACPI device IDs, that can be provided by the driver, to struct device.

Moreover, things like ACPI power management need to have access to
the ACPI handle of each supported device, because that handle is used
to invoke AML methods associated with the corresponding ACPI device
node.  The ACPI handles of devices are now stored in the archdata
member structure of struct device whose definition depends on the
architecture and includes the ACPI handle only on x86 and ia64. Since
the pointer to an array of supported ACPI IDs is added to struct
device_driver in an architecture-independent way, it is logical to
move the ACPI handle from archdata to struct device itself at the same
time.  This also makes code more straightforward in some places and
follows the example of Device Trees that have a poiter to struct
device_node in there too.

This changeset is based on Mika Westerberg's work.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/ia64/include/asm/device.h |  3 ---
 arch/x86/include/asm/device.h  |  3 ---
 drivers/acpi/glue.c            | 14 ++++++--------
 include/acpi/acpi_bus.h        |  2 +-
 include/linux/device.h         |  4 ++++
 5 files changed, 11 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h
index d05e78f6db94..f69c32ffbe6a 100644
--- a/arch/ia64/include/asm/device.h
+++ b/arch/ia64/include/asm/device.h
@@ -7,9 +7,6 @@
 #define _ASM_IA64_DEVICE_H
 
 struct dev_archdata {
-#ifdef CONFIG_ACPI
-	void	*acpi_handle;
-#endif
 #ifdef CONFIG_INTEL_IOMMU
 	void *iommu; /* hook for IOMMU specific extension */
 #endif
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 93e1c55f14ab..03dd72957d2f 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -2,9 +2,6 @@
 #define _ASM_X86_DEVICE_H
 
 struct dev_archdata {
-#ifdef CONFIG_ACPI
-	void	*acpi_handle;
-#endif
 #ifdef CONFIG_X86_DEV_DMA_OPS
 	struct dma_map_ops *dma_ops;
 #endif
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 08373086cd7e..2f3849aedc97 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -134,7 +134,7 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle)
 	char physical_node_name[sizeof(PHYSICAL_NODE_STRING) + 2];
 	int retval = -EINVAL;
 
-	if (dev->archdata.acpi_handle) {
+	if (dev->acpi_handle) {
 		dev_warn(dev, "Drivers changed 'acpi_handle'\n");
 		return -EINVAL;
 	}
@@ -169,7 +169,7 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle)
 	acpi_dev->physical_node_count++;
 	mutex_unlock(&acpi_dev->physical_node_lock);
 
-	dev->archdata.acpi_handle = handle;
+	dev->acpi_handle = handle;
 
 	if (!physical_node->node_id)
 		strcpy(physical_node_name, PHYSICAL_NODE_STRING);
@@ -198,11 +198,10 @@ static int acpi_unbind_one(struct device *dev)
 	acpi_status status;
 	struct list_head *node, *next;
 
-	if (!dev->archdata.acpi_handle)
+	if (!dev->acpi_handle)
 		return 0;
 
-	status = acpi_bus_get_device(dev->archdata.acpi_handle,
-		&acpi_dev);
+	status = acpi_bus_get_device(dev->acpi_handle, &acpi_dev);
 	if (ACPI_FAILURE(status))
 		goto err;
 
@@ -228,7 +227,7 @@ static int acpi_unbind_one(struct device *dev)
 
 		sysfs_remove_link(&acpi_dev->dev.kobj, physical_node_name);
 		sysfs_remove_link(&dev->kobj, "firmware_node");
-		dev->archdata.acpi_handle = NULL;
+		dev->acpi_handle = NULL;
 		/* acpi_bind_one increase refcnt by one */
 		put_device(dev);
 		kfree(entry);
@@ -269,8 +268,7 @@ static int acpi_platform_notify(struct device *dev)
 	if (!ret) {
 		struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 
-		acpi_get_name(dev->archdata.acpi_handle,
-			      ACPI_FULL_PATHNAME, &buffer);
+		acpi_get_name(dev->acpi_handle, ACPI_FULL_PATHNAME, &buffer);
 		DBG("Device %s -> %s\n", dev_name(dev), (char *)buffer.pointer);
 		kfree(buffer.pointer);
 	} else
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 0daa0fbd8654..bb1537c5e672 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -410,7 +410,7 @@ acpi_handle acpi_get_child(acpi_handle, u64);
 int acpi_is_root_bridge(acpi_handle);
 acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int);
 struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
-#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->archdata.acpi_handle))
+#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->acpi_handle))
 
 int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
 int acpi_disable_wakeup_device_power(struct acpi_device *dev);
diff --git a/include/linux/device.h b/include/linux/device.h
index 86ef6ab553b1..cc3aee57a46e 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -190,6 +190,7 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus);
  * @mod_name:	Used for built-in modules.
  * @suppress_bind_attrs: Disables bind/unbind via sysfs.
  * @of_match_table: The open firmware table.
+ * @acpi_match_table: The ACPI match table.
  * @probe:	Called to query the existence of a specific device,
  *		whether this driver can work with it, and bind the driver
  *		to a specific device.
@@ -223,6 +224,7 @@ struct device_driver {
 	bool suppress_bind_attrs;	/* disables bind/unbind via sysfs */
 
 	const struct of_device_id	*of_match_table;
+	const struct acpi_device_id	*acpi_match_table;
 
 	int (*probe) (struct device *dev);
 	int (*remove) (struct device *dev);
@@ -616,6 +618,7 @@ struct device_dma_parameters {
  * @dma_mem:	Internal for coherent mem override.
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
+ * @acpi_handle: Associated ACPI device node's namespace handle.
  * @devt:	For creating the sysfs "dev".
  * @id:		device instance
  * @devres_lock: Spinlock to protect the resource of the device.
@@ -680,6 +683,7 @@ struct device {
 	struct dev_archdata	archdata;
 
 	struct device_node	*of_node; /* associated device tree node */
+	void			*acpi_handle; /* associated ACPI device node */
 
 	dev_t			devt;	/* dev_t, creates the sysfs "dev" */
 	u32			id;	/* device instance */
-- 
cgit v1.2.3-55-g7522


From cf761af9ee0f2c172710ad2b7ca029016b5d4c45 Mon Sep 17 00:00:00 2001
From: Mika Westerberg
Date: Wed, 31 Oct 2012 22:44:41 +0100
Subject: ACPI: Provide generic functions for matching ACPI device nodes

Introduce function acpi_match_device() allowing callers to match
struct device objects with populated acpi_handle fields against
arrays of ACPI device IDs.  Also introduce function
acpi_driver_match_device() using acpi_match_device() internally and
allowing callers to match a struct device object against an array of
ACPI device IDs provided by a device driver.

Additionally, introduce macro ACPI_PTR() that may be used by device
drivers to escape pointers to data structures whose definitions
depend on CONFIG_ACPI.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c  | 40 +++++++++++++++++++++++++++++++++++-----
 include/linux/acpi.h | 28 ++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 1fcb8678665c..a0dfdffe54d4 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -340,8 +340,8 @@ static void acpi_device_remove_files(struct acpi_device *dev)
 			ACPI Bus operations
    -------------------------------------------------------------------------- */
 
-int acpi_match_device_ids(struct acpi_device *device,
-			  const struct acpi_device_id *ids)
+static const struct acpi_device_id *__acpi_match_device(
+	struct acpi_device *device, const struct acpi_device_id *ids)
 {
 	const struct acpi_device_id *id;
 	struct acpi_hardware_id *hwid;
@@ -351,14 +351,44 @@ int acpi_match_device_ids(struct acpi_device *device,
 	 * driver for it.
 	 */
 	if (!device->status.present)
-		return -ENODEV;
+		return NULL;
 
 	for (id = ids; id->id[0]; id++)
 		list_for_each_entry(hwid, &device->pnp.ids, list)
 			if (!strcmp((char *) id->id, hwid->id))
-				return 0;
+				return id;
+
+	return NULL;
+}
 
-	return -ENOENT;
+/**
+ * acpi_match_device - Match a struct device against a given list of ACPI IDs
+ * @ids: Array of struct acpi_device_id object to match against.
+ * @dev: The device structure to match.
+ *
+ * Check if @dev has a valid ACPI handle and if there is a struct acpi_device
+ * object for that handle and use that object to match against a given list of
+ * device IDs.
+ *
+ * Return a pointer to the first matching ID on success or %NULL on failure.
+ */
+const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
+					       const struct device *dev)
+{
+	struct acpi_device *adev;
+
+	if (!ids || !dev->acpi_handle
+	    || ACPI_FAILURE(acpi_bus_get_device(dev->acpi_handle, &adev)))
+		return NULL;
+
+	return __acpi_match_device(adev, ids);
+}
+EXPORT_SYMBOL_GPL(acpi_match_device);
+
+int acpi_match_device_ids(struct acpi_device *device,
+			  const struct acpi_device_id *ids)
+{
+	return __acpi_match_device(device, ids) ? 0 : -ENOENT;
 }
 EXPORT_SYMBOL(acpi_match_device_ids);
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 90be98981102..48761cb481db 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -26,6 +26,7 @@
 #define _LINUX_ACPI_H
 
 #include <linux/ioport.h>	/* for struct resource */
+#include <linux/device.h>
 
 #ifdef	CONFIG_ACPI
 
@@ -364,6 +365,17 @@ extern int acpi_nvs_register(__u64 start, __u64 size);
 extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
 				    void *data);
 
+const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
+					       const struct device *dev);
+
+static inline bool acpi_driver_match_device(struct device *dev,
+					    const struct device_driver *drv)
+{
+	return !!acpi_match_device(drv->acpi_match_table, dev);
+}
+
+#define ACPI_PTR(_ptr)	(_ptr)
+
 #else	/* !CONFIG_ACPI */
 
 #define acpi_disabled 1
@@ -418,6 +430,22 @@ static inline int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
 	return 0;
 }
 
+struct acpi_device_id;
+
+static inline const struct acpi_device_id *acpi_match_device(
+	const struct acpi_device_id *ids, const struct device *dev)
+{
+	return NULL;
+}
+
+static inline bool acpi_driver_match_device(struct device *dev,
+					    const struct device_driver *drv)
+{
+	return false;
+}
+
+#define ACPI_PTR(_ptr)	(NULL)
+
 #endif	/* !CONFIG_ACPI */
 
 #ifdef CONFIG_ACPI
-- 
cgit v1.2.3-55-g7522


From 046d9ce6820e99087e81511284045eada94950e8 Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Thu, 15 Nov 2012 00:30:01 +0100
Subject: ACPI: Move device resources interpretation code from PNP to ACPI core

Move some code used for parsing ACPI device resources from the PNP
subsystem to the ACPI core, so that other bus types (platform, SPI,
I2C) can use the same routines for parsing resources in a consistent
way, without duplicating code.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/acpi/Makefile          |   1 +
 drivers/acpi/resource.c        | 393 +++++++++++++++++++++++++++++++++++++++++
 drivers/pnp/base.h             |   2 +
 drivers/pnp/pnpacpi/rsparser.c | 296 ++++---------------------------
 drivers/pnp/resource.c         |  16 ++
 include/linux/acpi.h           |  10 ++
 6 files changed, 454 insertions(+), 264 deletions(-)
 create mode 100644 drivers/acpi/resource.c

(limited to 'include/linux')

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 227c82bbe9a1..3223edfb23b6 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -32,6 +32,7 @@ acpi-$(CONFIG_ACPI_SLEEP)	+= proc.o
 #
 acpi-y				+= bus.o glue.o
 acpi-y				+= scan.o
+acpi-y				+= resource.o
 acpi-y				+= processor_core.o
 acpi-y				+= ec.o
 acpi-$(CONFIG_ACPI_DOCK)	+= dock.o
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
new file mode 100644
index 000000000000..3e7fd349e29d
--- /dev/null
+++ b/drivers/acpi/resource.c
@@ -0,0 +1,393 @@
+/*
+ * drivers/acpi/resource.c - ACPI device resources interpretation.
+ *
+ * Copyright (C) 2012, Intel Corp.
+ * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+
+#ifdef CONFIG_X86
+#define valid_IRQ(i) (((i) != 0) && ((i) != 2))
+#else
+#define valid_IRQ(i) (true)
+#endif
+
+static unsigned long acpi_dev_memresource_flags(u64 len, u8 write_protect,
+						bool window)
+{
+	unsigned long flags = IORESOURCE_MEM;
+
+	if (len == 0)
+		flags |= IORESOURCE_DISABLED;
+
+	if (write_protect == ACPI_READ_WRITE_MEMORY)
+		flags |= IORESOURCE_MEM_WRITEABLE;
+
+	if (window)
+		flags |= IORESOURCE_WINDOW;
+
+	return flags;
+}
+
+static void acpi_dev_get_memresource(struct resource *res, u64 start, u64 len,
+				     u8 write_protect)
+{
+	res->start = start;
+	res->end = start + len - 1;
+	res->flags = acpi_dev_memresource_flags(len, write_protect, false);
+}
+
+/**
+ * acpi_dev_resource_memory - Extract ACPI memory resource information.
+ * @ares: Input ACPI resource object.
+ * @res: Output generic resource object.
+ *
+ * Check if the given ACPI resource object represents a memory resource and
+ * if that's the case, use the information in it to populate the generic
+ * resource object pointed to by @res.
+ */
+bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res)
+{
+	struct acpi_resource_memory24 *memory24;
+	struct acpi_resource_memory32 *memory32;
+	struct acpi_resource_fixed_memory32 *fixed_memory32;
+
+	switch (ares->type) {
+	case ACPI_RESOURCE_TYPE_MEMORY24:
+		memory24 = &ares->data.memory24;
+		acpi_dev_get_memresource(res, memory24->minimum,
+					 memory24->address_length,
+					 memory24->write_protect);
+		break;
+	case ACPI_RESOURCE_TYPE_MEMORY32:
+		memory32 = &ares->data.memory32;
+		acpi_dev_get_memresource(res, memory32->minimum,
+					 memory32->address_length,
+					 memory32->write_protect);
+		break;
+	case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
+		fixed_memory32 = &ares->data.fixed_memory32;
+		acpi_dev_get_memresource(res, fixed_memory32->address,
+					 fixed_memory32->address_length,
+					 fixed_memory32->write_protect);
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_resource_memory);
+
+static unsigned int acpi_dev_ioresource_flags(u64 start, u64 end, u8 io_decode,
+					      bool window)
+{
+	int flags = IORESOURCE_IO;
+
+	if (io_decode == ACPI_DECODE_16)
+		flags |= IORESOURCE_IO_16BIT_ADDR;
+
+	if (start > end || end >= 0x10003)
+		flags |= IORESOURCE_DISABLED;
+
+	if (window)
+		flags |= IORESOURCE_WINDOW;
+
+	return flags;
+}
+
+static void acpi_dev_get_ioresource(struct resource *res, u64 start, u64 len,
+				    u8 io_decode)
+{
+	u64 end = start + len - 1;
+
+	res->start = start;
+	res->end = end;
+	res->flags = acpi_dev_ioresource_flags(start, end, io_decode, false);
+}
+
+/**
+ * acpi_dev_resource_io - Extract ACPI I/O resource information.
+ * @ares: Input ACPI resource object.
+ * @res: Output generic resource object.
+ *
+ * Check if the given ACPI resource object represents an I/O resource and
+ * if that's the case, use the information in it to populate the generic
+ * resource object pointed to by @res.
+ */
+bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res)
+{
+	struct acpi_resource_io *io;
+	struct acpi_resource_fixed_io *fixed_io;
+
+	switch (ares->type) {
+	case ACPI_RESOURCE_TYPE_IO:
+		io = &ares->data.io;
+		acpi_dev_get_ioresource(res, io->minimum,
+					io->address_length,
+					io->io_decode);
+		break;
+	case ACPI_RESOURCE_TYPE_FIXED_IO:
+		fixed_io = &ares->data.fixed_io;
+		acpi_dev_get_ioresource(res, fixed_io->address,
+					fixed_io->address_length,
+					ACPI_DECODE_10);
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_resource_io);
+
+/**
+ * acpi_dev_resource_address_space - Extract ACPI address space information.
+ * @ares: Input ACPI resource object.
+ * @res: Output generic resource object.
+ *
+ * Check if the given ACPI resource object represents an address space resource
+ * and if that's the case, use the information in it to populate the generic
+ * resource object pointed to by @res.
+ */
+bool acpi_dev_resource_address_space(struct acpi_resource *ares,
+				     struct resource *res)
+{
+	acpi_status status;
+	struct acpi_resource_address64 addr;
+	bool window;
+	u64 len;
+	u8 io_decode;
+
+	switch (ares->type) {
+	case ACPI_RESOURCE_TYPE_ADDRESS16:
+	case ACPI_RESOURCE_TYPE_ADDRESS32:
+	case ACPI_RESOURCE_TYPE_ADDRESS64:
+		break;
+	default:
+		return false;
+	}
+
+	status = acpi_resource_to_address64(ares, &addr);
+	if (ACPI_FAILURE(status))
+		return true;
+
+	res->start = addr.minimum;
+	res->end = addr.maximum;
+	window = addr.producer_consumer == ACPI_PRODUCER;
+
+	switch(addr.resource_type) {
+	case ACPI_MEMORY_RANGE:
+		len = addr.maximum - addr.minimum + 1;
+		res->flags = acpi_dev_memresource_flags(len,
+						addr.info.mem.write_protect,
+						window);
+		break;
+	case ACPI_IO_RANGE:
+		io_decode = addr.granularity == 0xfff ?
+				ACPI_DECODE_10 : ACPI_DECODE_16;
+		res->flags = acpi_dev_ioresource_flags(addr.minimum,
+						       addr.maximum,
+						       io_decode, window);
+		break;
+	case ACPI_BUS_NUMBER_RANGE:
+		res->flags = IORESOURCE_BUS;
+		break;
+	default:
+		res->flags = 0;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_resource_address_space);
+
+/**
+ * acpi_dev_resource_ext_address_space - Extract ACPI address space information.
+ * @ares: Input ACPI resource object.
+ * @res: Output generic resource object.
+ *
+ * Check if the given ACPI resource object represents an extended address space
+ * resource and if that's the case, use the information in it to populate the
+ * generic resource object pointed to by @res.
+ */
+bool acpi_dev_resource_ext_address_space(struct acpi_resource *ares,
+					 struct resource *res)
+{
+	struct acpi_resource_extended_address64 *ext_addr;
+	bool window;
+	u64 len;
+	u8 io_decode;
+
+	if (ares->type != ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64)
+		return false;
+
+	ext_addr = &ares->data.ext_address64;
+
+	res->start = ext_addr->minimum;
+	res->end = ext_addr->maximum;
+	window = ext_addr->producer_consumer == ACPI_PRODUCER;
+
+	switch(ext_addr->resource_type) {
+	case ACPI_MEMORY_RANGE:
+		len = ext_addr->maximum - ext_addr->minimum + 1;
+		res->flags = acpi_dev_memresource_flags(len,
+					ext_addr->info.mem.write_protect,
+					window);
+		break;
+	case ACPI_IO_RANGE:
+		io_decode = ext_addr->granularity == 0xfff ?
+				ACPI_DECODE_10 : ACPI_DECODE_16;
+		res->flags = acpi_dev_ioresource_flags(ext_addr->minimum,
+						       ext_addr->maximum,
+						       io_decode, window);
+		break;
+	case ACPI_BUS_NUMBER_RANGE:
+		res->flags = IORESOURCE_BUS;
+		break;
+	default:
+		res->flags = 0;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_resource_ext_address_space);
+
+/**
+ * acpi_dev_irq_flags - Determine IRQ resource flags.
+ * @triggering: Triggering type as provided by ACPI.
+ * @polarity: Interrupt polarity as provided by ACPI.
+ * @shareable: Whether or not the interrupt is shareable.
+ */
+unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable)
+{
+	unsigned long flags;
+
+	if (triggering == ACPI_LEVEL_SENSITIVE)
+		flags = polarity == ACPI_ACTIVE_LOW ?
+			IORESOURCE_IRQ_LOWLEVEL : IORESOURCE_IRQ_HIGHLEVEL;
+	else
+		flags = polarity == ACPI_ACTIVE_LOW ?
+			IORESOURCE_IRQ_LOWEDGE : IORESOURCE_IRQ_HIGHEDGE;
+
+	if (shareable == ACPI_SHARED)
+		flags |= IORESOURCE_IRQ_SHAREABLE;
+
+	return flags | IORESOURCE_IRQ;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_irq_flags);
+
+static void acpi_dev_irqresource_disabled(struct resource *res, u32 gsi)
+{
+	res->start = gsi;
+	res->end = gsi;
+	res->flags = IORESOURCE_IRQ | IORESOURCE_DISABLED;
+}
+
+static void acpi_dev_get_irqresource(struct resource *res, u32 gsi,
+				     u8 triggering, u8 polarity, u8 shareable)
+{
+	int irq, p, t;
+
+	if (!valid_IRQ(gsi)) {
+		acpi_dev_irqresource_disabled(res, gsi);
+		return;
+	}
+
+	/*
+	 * In IO-APIC mode, use overrided attribute. Two reasons:
+	 * 1. BIOS bug in DSDT
+	 * 2. BIOS uses IO-APIC mode Interrupt Source Override
+	 */
+	if (!acpi_get_override_irq(gsi, &t, &p)) {
+		u8 trig = t ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
+		u8 pol = p ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
+
+		if (triggering != trig || polarity != pol) {
+			pr_warning("ACPI: IRQ %d override to %s, %s\n", gsi,
+				   t ? "edge" : "level", p ? "low" : "high");
+			triggering = trig;
+			polarity = pol;
+		}
+	}
+
+	res->flags = acpi_dev_irq_flags(triggering, polarity, shareable);
+	irq = acpi_register_gsi(NULL, gsi, triggering, polarity);
+	if (irq >= 0) {
+		res->start = irq;
+		res->end = irq;
+	} else {
+		acpi_dev_irqresource_disabled(res, gsi);
+	}
+}
+
+/**
+ * acpi_dev_resource_interrupt - Extract ACPI interrupt resource information.
+ * @ares: Input ACPI resource object.
+ * @index: Index into the array of GSIs represented by the resource.
+ * @res: Output generic resource object.
+ *
+ * Check if the given ACPI resource object represents an interrupt resource
+ * and @index does not exceed the resource's interrupt count (true is returned
+ * in that case regardless of the results of the other checks)).  If that's the
+ * case, register the GSI corresponding to @index from the array of interrupts
+ * represented by the resource and populate the generic resource object pointed
+ * to by @res accordingly.  If the registration of the GSI is not successful,
+ * IORESOURCE_DISABLED will be set it that object's flags.
+ */
+bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index,
+				 struct resource *res)
+{
+	struct acpi_resource_irq *irq;
+	struct acpi_resource_extended_irq *ext_irq;
+
+	switch (ares->type) {
+	case ACPI_RESOURCE_TYPE_IRQ:
+		/*
+		 * Per spec, only one interrupt per descriptor is allowed in
+		 * _CRS, but some firmware violates this, so parse them all.
+		 */
+		irq = &ares->data.irq;
+		if (index >= irq->interrupt_count) {
+			acpi_dev_irqresource_disabled(res, 0);
+			return false;
+		}
+		acpi_dev_get_irqresource(res, irq->interrupts[index],
+					 irq->triggering, irq->polarity,
+					 irq->sharable);
+		break;
+	case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
+		ext_irq = &ares->data.extended_irq;
+		if (index >= ext_irq->interrupt_count) {
+			acpi_dev_irqresource_disabled(res, 0);
+			return false;
+		}
+		acpi_dev_get_irqresource(res, ext_irq->interrupts[index],
+					 ext_irq->triggering, ext_irq->polarity,
+					 ext_irq->sharable);
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_resource_interrupt);
diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h
index fa4e0a5db3f8..ffd53e3eb92f 100644
--- a/drivers/pnp/base.h
+++ b/drivers/pnp/base.h
@@ -159,6 +159,8 @@ struct pnp_resource {
 
 void pnp_free_resource(struct pnp_resource *pnp_res);
 
+struct pnp_resource *pnp_add_resource(struct pnp_dev *dev,
+				      struct resource *res);
 struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq,
 					  int flags);
 struct pnp_resource *pnp_add_dma_resource(struct pnp_dev *dev, int dma,
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index 5be4a392a3ae..b8f4ea7b27fc 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -28,37 +28,6 @@
 #include "../base.h"
 #include "pnpacpi.h"
 
-#ifdef CONFIG_IA64
-#define valid_IRQ(i) (1)
-#else
-#define valid_IRQ(i) (((i) != 0) && ((i) != 2))
-#endif
-
-/*
- * Allocated Resources
- */
-static int irq_flags(int triggering, int polarity, int shareable)
-{
-	int flags;
-
-	if (triggering == ACPI_LEVEL_SENSITIVE) {
-		if (polarity == ACPI_ACTIVE_LOW)
-			flags = IORESOURCE_IRQ_LOWLEVEL;
-		else
-			flags = IORESOURCE_IRQ_HIGHLEVEL;
-	} else {
-		if (polarity == ACPI_ACTIVE_LOW)
-			flags = IORESOURCE_IRQ_LOWEDGE;
-		else
-			flags = IORESOURCE_IRQ_HIGHEDGE;
-	}
-
-	if (shareable == ACPI_SHARED)
-		flags |= IORESOURCE_IRQ_SHAREABLE;
-
-	return flags;
-}
-
 static void decode_irq_flags(struct pnp_dev *dev, int flags, int *triggering,
 			     int *polarity, int *shareable)
 {
@@ -94,45 +63,6 @@ static void decode_irq_flags(struct pnp_dev *dev, int flags, int *triggering,
 		*shareable = ACPI_EXCLUSIVE;
 }
 
-static void pnpacpi_parse_allocated_irqresource(struct pnp_dev *dev,
-						u32 gsi, int triggering,
-						int polarity, int shareable)
-{
-	int irq, flags;
-	int p, t;
-
-	if (!valid_IRQ(gsi)) {
-		pnp_add_irq_resource(dev, gsi, IORESOURCE_DISABLED);
-		return;
-	}
-
-	/*
-	 * in IO-APIC mode, use overrided attribute. Two reasons:
-	 * 1. BIOS bug in DSDT
-	 * 2. BIOS uses IO-APIC mode Interrupt Source Override
-	 */
-	if (!acpi_get_override_irq(gsi, &t, &p)) {
-		t = t ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
-		p = p ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
-
-		if (triggering != t || polarity != p) {
-			dev_warn(&dev->dev, "IRQ %d override to %s, %s\n",
-				gsi, t ? "edge":"level", p ? "low":"high");
-			triggering = t;
-			polarity = p;
-		}
-	}
-
-	flags = irq_flags(triggering, polarity, shareable);
-	irq = acpi_register_gsi(&dev->dev, gsi, triggering, polarity);
-	if (irq >= 0)
-		pcibios_penalize_isa_irq(irq, 1);
-	else
-		flags |= IORESOURCE_DISABLED;
-
-	pnp_add_irq_resource(dev, irq, flags);
-}
-
 static int dma_flags(struct pnp_dev *dev, int type, int bus_master,
 		     int transfer)
 {
@@ -177,21 +107,16 @@ static int dma_flags(struct pnp_dev *dev, int type, int bus_master,
 	return flags;
 }
 
-static void pnpacpi_parse_allocated_ioresource(struct pnp_dev *dev, u64 start,
-					       u64 len, int io_decode,
-					       int window)
-{
-	int flags = 0;
-	u64 end = start + len - 1;
+/*
+ * Allocated Resources
+ */
 
-	if (io_decode == ACPI_DECODE_16)
-		flags |= IORESOURCE_IO_16BIT_ADDR;
-	if (len == 0 || end >= 0x10003)
-		flags |= IORESOURCE_DISABLED;
-	if (window)
-		flags |= IORESOURCE_WINDOW;
+static void pnpacpi_add_irqresource(struct pnp_dev *dev, struct resource *r)
+{
+	if (!(r->flags & IORESOURCE_DISABLED))
+		pcibios_penalize_isa_irq(r->start, 1);
 
-	pnp_add_io_resource(dev, start, end, flags);
+	pnp_add_resource(dev, r);
 }
 
 /*
@@ -249,130 +174,49 @@ static void pnpacpi_parse_allocated_vendor(struct pnp_dev *dev,
 	}
 }
 
-static void pnpacpi_parse_allocated_memresource(struct pnp_dev *dev,
-						u64 start, u64 len,
-						int write_protect, int window)
-{
-	int flags = 0;
-	u64 end = start + len - 1;
-
-	if (len == 0)
-		flags |= IORESOURCE_DISABLED;
-	if (write_protect == ACPI_READ_WRITE_MEMORY)
-		flags |= IORESOURCE_MEM_WRITEABLE;
-	if (window)
-		flags |= IORESOURCE_WINDOW;
-
-	pnp_add_mem_resource(dev, start, end, flags);
-}
-
-static void pnpacpi_parse_allocated_busresource(struct pnp_dev *dev,
-						u64 start, u64 len)
-{
-	u64 end = start + len - 1;
-
-	pnp_add_bus_resource(dev, start, end);
-}
-
-static void pnpacpi_parse_allocated_address_space(struct pnp_dev *dev,
-						  struct acpi_resource *res)
-{
-	struct acpi_resource_address64 addr, *p = &addr;
-	acpi_status status;
-	int window;
-	u64 len;
-
-	status = acpi_resource_to_address64(res, p);
-	if (!ACPI_SUCCESS(status)) {
-		dev_warn(&dev->dev, "failed to convert resource type %d\n",
-			 res->type);
-		return;
-	}
-
-	/* Windows apparently computes length rather than using _LEN */
-	len = p->maximum - p->minimum + 1;
-	window = (p->producer_consumer == ACPI_PRODUCER) ? 1 : 0;
-
-	if (p->resource_type == ACPI_MEMORY_RANGE)
-		pnpacpi_parse_allocated_memresource(dev, p->minimum, len,
-			p->info.mem.write_protect, window);
-	else if (p->resource_type == ACPI_IO_RANGE)
-		pnpacpi_parse_allocated_ioresource(dev, p->minimum, len,
-			p->granularity == 0xfff ? ACPI_DECODE_10 :
-				ACPI_DECODE_16, window);
-	else if (p->resource_type == ACPI_BUS_NUMBER_RANGE)
-		pnpacpi_parse_allocated_busresource(dev, p->minimum, len);
-}
-
-static void pnpacpi_parse_allocated_ext_address_space(struct pnp_dev *dev,
-						      struct acpi_resource *res)
-{
-	struct acpi_resource_extended_address64 *p = &res->data.ext_address64;
-	int window;
-	u64 len;
-
-	/* Windows apparently computes length rather than using _LEN */
-	len = p->maximum - p->minimum + 1;
-	window = (p->producer_consumer == ACPI_PRODUCER) ? 1 : 0;
-
-	if (p->resource_type == ACPI_MEMORY_RANGE)
-		pnpacpi_parse_allocated_memresource(dev, p->minimum, len,
-			p->info.mem.write_protect, window);
-	else if (p->resource_type == ACPI_IO_RANGE)
-		pnpacpi_parse_allocated_ioresource(dev, p->minimum, len,
-			p->granularity == 0xfff ? ACPI_DECODE_10 :
-				ACPI_DECODE_16, window);
-	else if (p->resource_type == ACPI_BUS_NUMBER_RANGE)
-		pnpacpi_parse_allocated_busresource(dev, p->minimum, len);
-}
-
 static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
 					      void *data)
 {
 	struct pnp_dev *dev = data;
-	struct acpi_resource_irq *irq;
 	struct acpi_resource_dma *dma;
-	struct acpi_resource_io *io;
-	struct acpi_resource_fixed_io *fixed_io;
 	struct acpi_resource_vendor_typed *vendor_typed;
-	struct acpi_resource_memory24 *memory24;
-	struct acpi_resource_memory32 *memory32;
-	struct acpi_resource_fixed_memory32 *fixed_memory32;
-	struct acpi_resource_extended_irq *extended_irq;
+	struct resource r;
 	int i, flags;
 
-	switch (res->type) {
-	case ACPI_RESOURCE_TYPE_IRQ:
-		/*
-		 * Per spec, only one interrupt per descriptor is allowed in
-		 * _CRS, but some firmware violates this, so parse them all.
-		 */
-		irq = &res->data.irq;
-		if (irq->interrupt_count == 0)
-			pnp_add_irq_resource(dev, 0, IORESOURCE_DISABLED);
-		else {
-			for (i = 0; i < irq->interrupt_count; i++) {
-				pnpacpi_parse_allocated_irqresource(dev,
-					irq->interrupts[i],
-					irq->triggering,
-					irq->polarity,
-				    irq->sharable);
-			}
+	if (acpi_dev_resource_memory(res, &r)
+	    || acpi_dev_resource_io(res, &r)
+	    || acpi_dev_resource_address_space(res, &r)
+	    || acpi_dev_resource_ext_address_space(res, &r)) {
+		pnp_add_resource(dev, &r);
+		return AE_OK;
+	}
 
+	r.flags = 0;
+	if (acpi_dev_resource_interrupt(res, 0, &r)) {
+		pnpacpi_add_irqresource(dev, &r);
+		for (i = 1; acpi_dev_resource_interrupt(res, i, &r); i++)
+			pnpacpi_add_irqresource(dev, &r);
+
+		if (i > 1) {
 			/*
 			 * The IRQ encoder puts a single interrupt in each
 			 * descriptor, so if a _CRS descriptor has more than
 			 * one interrupt, we won't be able to re-encode it.
 			 */
-			if (pnp_can_write(dev) && irq->interrupt_count > 1) {
+			if (pnp_can_write(dev)) {
 				dev_warn(&dev->dev, "multiple interrupts in "
 					 "_CRS descriptor; configuration can't "
 					 "be changed\n");
 				dev->capabilities &= ~PNP_WRITE;
 			}
 		}
-		break;
+		return AE_OK;
+	} else if (r.flags & IORESOURCE_DISABLED) {
+		pnp_add_irq_resource(dev, 0, IORESOURCE_DISABLED);
+		return AE_OK;
+	}
 
+	switch (res->type) {
 	case ACPI_RESOURCE_TYPE_DMA:
 		dma = &res->data.dma;
 		if (dma->channel_count > 0 && dma->channels[0] != (u8) -1)
@@ -383,26 +227,10 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
 		pnp_add_dma_resource(dev, dma->channels[0], flags);
 		break;
 
-	case ACPI_RESOURCE_TYPE_IO:
-		io = &res->data.io;
-		pnpacpi_parse_allocated_ioresource(dev,
-			io->minimum,
-			io->address_length,
-			io->io_decode, 0);
-		break;
-
 	case ACPI_RESOURCE_TYPE_START_DEPENDENT:
 	case ACPI_RESOURCE_TYPE_END_DEPENDENT:
 		break;
 
-	case ACPI_RESOURCE_TYPE_FIXED_IO:
-		fixed_io = &res->data.fixed_io;
-		pnpacpi_parse_allocated_ioresource(dev,
-			fixed_io->address,
-			fixed_io->address_length,
-			ACPI_DECODE_10, 0);
-		break;
-
 	case ACPI_RESOURCE_TYPE_VENDOR:
 		vendor_typed = &res->data.vendor_typed;
 		pnpacpi_parse_allocated_vendor(dev, vendor_typed);
@@ -411,66 +239,6 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
 	case ACPI_RESOURCE_TYPE_END_TAG:
 		break;
 
-	case ACPI_RESOURCE_TYPE_MEMORY24:
-		memory24 = &res->data.memory24;
-		pnpacpi_parse_allocated_memresource(dev,
-			memory24->minimum,
-			memory24->address_length,
-			memory24->write_protect, 0);
-		break;
-	case ACPI_RESOURCE_TYPE_MEMORY32:
-		memory32 = &res->data.memory32;
-		pnpacpi_parse_allocated_memresource(dev,
-			memory32->minimum,
-			memory32->address_length,
-			memory32->write_protect, 0);
-		break;
-	case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
-		fixed_memory32 = &res->data.fixed_memory32;
-		pnpacpi_parse_allocated_memresource(dev,
-			fixed_memory32->address,
-			fixed_memory32->address_length,
-			fixed_memory32->write_protect, 0);
-		break;
-	case ACPI_RESOURCE_TYPE_ADDRESS16:
-	case ACPI_RESOURCE_TYPE_ADDRESS32:
-	case ACPI_RESOURCE_TYPE_ADDRESS64:
-		pnpacpi_parse_allocated_address_space(dev, res);
-		break;
-
-	case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64:
-		pnpacpi_parse_allocated_ext_address_space(dev, res);
-		break;
-
-	case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
-		extended_irq = &res->data.extended_irq;
-
-		if (extended_irq->interrupt_count == 0)
-			pnp_add_irq_resource(dev, 0, IORESOURCE_DISABLED);
-		else {
-			for (i = 0; i < extended_irq->interrupt_count; i++) {
-				pnpacpi_parse_allocated_irqresource(dev,
-					extended_irq->interrupts[i],
-					extended_irq->triggering,
-					extended_irq->polarity,
-					extended_irq->sharable);
-			}
-
-			/*
-			 * The IRQ encoder puts a single interrupt in each
-			 * descriptor, so if a _CRS descriptor has more than
-			 * one interrupt, we won't be able to re-encode it.
-			 */
-			if (pnp_can_write(dev) &&
-			    extended_irq->interrupt_count > 1) {
-				dev_warn(&dev->dev, "multiple interrupts in "
-					 "_CRS descriptor; configuration can't "
-					 "be changed\n");
-				dev->capabilities &= ~PNP_WRITE;
-			}
-		}
-		break;
-
 	case ACPI_RESOURCE_TYPE_GENERIC_REGISTER:
 		break;
 
@@ -531,7 +299,7 @@ static __init void pnpacpi_parse_irq_option(struct pnp_dev *dev,
 		if (p->interrupts[i])
 			__set_bit(p->interrupts[i], map.bits);
 
-	flags = irq_flags(p->triggering, p->polarity, p->sharable);
+	flags = acpi_dev_irq_flags(p->triggering, p->polarity, p->sharable);
 	pnp_register_irq_resource(dev, option_flags, &map, flags);
 }
 
@@ -555,7 +323,7 @@ static __init void pnpacpi_parse_ext_irq_option(struct pnp_dev *dev,
 		}
 	}
 
-	flags = irq_flags(p->triggering, p->polarity, p->sharable);
+	flags = acpi_dev_irq_flags(p->triggering, p->polarity, p->sharable);
 	pnp_register_irq_resource(dev, option_flags, &map, flags);
 }
 
diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c
index b0ecacbe53b1..3e6db1c1dc29 100644
--- a/drivers/pnp/resource.c
+++ b/drivers/pnp/resource.c
@@ -503,6 +503,22 @@ static struct pnp_resource *pnp_new_resource(struct pnp_dev *dev)
 	return pnp_res;
 }
 
+struct pnp_resource *pnp_add_resource(struct pnp_dev *dev,
+				      struct resource *res)
+{
+	struct pnp_resource *pnp_res;
+
+	pnp_res = pnp_new_resource(dev);
+	if (!pnp_res) {
+		dev_err(&dev->dev, "can't add resource %pR\n", res);
+		return NULL;
+	}
+
+	pnp_res->res = *res;
+	dev_dbg(&dev->dev, "%pR\n", res);
+	return pnp_res;
+}
+
 struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq,
 					  int flags)
 {
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 48761cb481db..16fcaf8dad3d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -251,6 +251,16 @@ extern int pnpacpi_disabled;
 
 #define PXM_INVAL	(-1)
 
+bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res);
+bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res);
+bool acpi_dev_resource_address_space(struct acpi_resource *ares,
+				     struct resource *res);
+bool acpi_dev_resource_ext_address_space(struct acpi_resource *ares,
+					 struct resource *res);
+unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable);
+bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index,
+				 struct resource *res);
+
 int acpi_check_resource_conflict(const struct resource *res);
 
 int acpi_check_region(resource_size_t start, resource_size_t n,
-- 
cgit v1.2.3-55-g7522


From 8e345c991c8c7a3c081199ef77deada79e37618a Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Thu, 15 Nov 2012 00:30:21 +0100
Subject: ACPI: Centralized processing of ACPI device resources

Currently, whoever wants to use ACPI device resources has to call
acpi_walk_resources() to browse the buffer returned by the _CRS
method for the given device and create filters passed to that
routine to apply to the individual resource items.  This generally
is cumbersome, time-consuming and inefficient.  Moreover, it may
be problematic if resource conflicts need to be resolved, because
the different users of _CRS will need to do that in a consistent
way.  However, if there are resource conflicts, the ACPI core
should be able to resolve them centrally instead of relying on
various users of acpi_walk_resources() to handle them correctly
together.

For this reason, introduce a new function, acpi_dev_get_resources(),
that can be used by subsystems to obtain a list of struct resource
objects corresponding to the ACPI device resources returned by
_CRS and, if necessary, to apply additional preprocessing routine
to the ACPI resources before converting them to the struct resource
format.

Make the ACPI code that creates platform device objects use
acpi_dev_get_resources() for resource processing instead of executing
acpi_walk_resources() twice by itself, which causes it to be much
more straightforward and easier to follow.

In the future, acpi_dev_get_resources() can be extended to meet
the needs of the ACPI PNP subsystem and other users of _CRS in
the kernel.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/acpi/acpi_platform.c |  94 ++++++------------------------
 drivers/acpi/resource.c      | 134 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi.h         |  10 ++++
 3 files changed, 161 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index bcbb00ce6d99..7ac20d8b8f07 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -19,61 +19,6 @@
 
 ACPI_MODULE_NAME("platform");
 
-struct resource_info {
-	struct device *dev;
-	struct resource *res;
-	size_t n, cur;
-};
-
-static acpi_status acpi_platform_count_resources(struct acpi_resource *res,
-						 void *data)
-{
-	struct acpi_resource_extended_irq *acpi_xirq;
-	struct acpi_resource_irq *acpi_irq;
-	struct resource_info *ri = data;
-
-	switch (res->type) {
-	case ACPI_RESOURCE_TYPE_IRQ:
-		acpi_irq = &res->data.irq;
-		ri->n += acpi_irq->interrupt_count;
-		break;
-	case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
-		acpi_xirq = &res->data.extended_irq;
-		ri->n += acpi_xirq->interrupt_count;
-		break;
-	default:
-		ri->n++;
-	}
-
-	return AE_OK;
-}
-
-static acpi_status acpi_platform_add_resources(struct acpi_resource *res,
-					       void *data)
-{
-	struct resource_info *ri = data;
-	struct resource *r;
-
-	r = ri->res + ri->cur;
-	if (acpi_dev_resource_memory(res, r)
-	    || acpi_dev_resource_io(res, r)
-	    || acpi_dev_resource_address_space(res, r)
-	    || acpi_dev_resource_ext_address_space(res, r)) {
-		ri->cur++;
-		return AE_OK;
-	}
-	if (acpi_dev_resource_interrupt(res, 0, r)) {
-		int i;
-
-		r++;
-		for (i = 1; acpi_dev_resource_interrupt(res, i, r); i++)
-			r++;
-
-		ri->cur += i;
-	}
-	return AE_OK;
-}
-
 /**
  * acpi_create_platform_device - Create platform device for ACPI device node
  * @adev: ACPI device node to create a platform device for.
@@ -89,35 +34,31 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
 	struct platform_device *pdev = NULL;
 	struct acpi_device *acpi_parent;
 	struct device *parent = NULL;
-	struct resource_info ri;
-	acpi_status status;
+	struct resource_list_entry *rentry;
+	struct list_head resource_list;
+	struct resource *resources;
+	int count;
 
 	/* If the ACPI node already has a physical device attached, skip it. */
 	if (adev->physical_node_count)
 		return NULL;
 
-	memset(&ri, 0, sizeof(ri));
-	/* First, count the resources. */
-	status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS,
-				     acpi_platform_count_resources, &ri);
-	if (ACPI_FAILURE(status) || !ri.n)
+	INIT_LIST_HEAD(&resource_list);
+	count = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
+	if (count <= 0)
 		return NULL;
 
-	/* Next, allocate memory for all the resources and populate it. */
-	ri.dev = &adev->dev;
-	ri.res = kzalloc(ri.n * sizeof(struct resource), GFP_KERNEL);
-	if (!ri.res) {
-		dev_err(&adev->dev,
-			"failed to allocate memory for resources\n");
+	resources = kmalloc(count * sizeof(struct resource), GFP_KERNEL);
+	if (!resources) {
+		dev_err(&adev->dev, "No memory for resources\n");
+		acpi_dev_free_resource_list(&resource_list);
 		return NULL;
 	}
+	count = 0;
+	list_for_each_entry(rentry, &resource_list, node)
+		resources[count++] = rentry->res;
 
-	status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS,
-				     acpi_platform_add_resources, &ri);
-	if (ACPI_FAILURE(status)) {
-		dev_err(&adev->dev, "failed to walk resources\n");
-		goto out;
-	}
+	acpi_dev_free_resource_list(&resource_list);
 
 	/*
 	 * If the ACPI node has a parent and that parent has a physical device
@@ -140,7 +81,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
 		mutex_unlock(&acpi_parent->physical_node_lock);
 	}
 	pdev = platform_device_register_resndata(parent, dev_name(&adev->dev),
-						 -1, ri.res, ri.cur, NULL, 0);
+						 -1, resources, count, NULL, 0);
 	if (IS_ERR(pdev)) {
 		dev_err(&adev->dev, "platform device creation failed: %ld\n",
 			PTR_ERR(pdev));
@@ -150,8 +91,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
 			dev_name(&pdev->dev));
 	}
 
- out:
-	kfree(ri.res);
+	kfree(resources);
 	return pdev;
 }
 
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 3e7fd349e29d..2bafc25482b3 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -26,6 +26,7 @@
 #include <linux/device.h>
 #include <linux/export.h>
 #include <linux/ioport.h>
+#include <linux/slab.h>
 
 #ifdef CONFIG_X86
 #define valid_IRQ(i) (((i) != 0) && ((i) != 2))
@@ -391,3 +392,136 @@ bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index,
 	return true;
 }
 EXPORT_SYMBOL_GPL(acpi_dev_resource_interrupt);
+
+/**
+ * acpi_dev_free_resource_list - Free resource from %acpi_dev_get_resources().
+ * @list: The head of the resource list to free.
+ */
+void acpi_dev_free_resource_list(struct list_head *list)
+{
+	struct resource_list_entry *rentry, *re;
+
+	list_for_each_entry_safe(rentry, re, list, node) {
+		list_del(&rentry->node);
+		kfree(rentry);
+	}
+}
+EXPORT_SYMBOL_GPL(acpi_dev_free_resource_list);
+
+struct res_proc_context {
+	struct list_head *list;
+	int (*preproc)(struct acpi_resource *, void *);
+	void *preproc_data;
+	int count;
+	int error;
+};
+
+static acpi_status acpi_dev_new_resource_entry(struct resource *r,
+					       struct res_proc_context *c)
+{
+	struct resource_list_entry *rentry;
+
+	rentry = kmalloc(sizeof(*rentry), GFP_KERNEL);
+	if (!rentry) {
+		c->error = -ENOMEM;
+		return AE_NO_MEMORY;
+	}
+	INIT_LIST_HEAD(&rentry->node);
+	rentry->res = *r;
+	list_add_tail(&rentry->node, c->list);
+	c->count++;
+	return AE_OK;
+}
+
+static acpi_status acpi_dev_process_resource(struct acpi_resource *ares,
+					     void *context)
+{
+	struct res_proc_context *c = context;
+	struct resource r;
+	int i;
+
+	if (c->preproc) {
+		int ret;
+
+		ret = c->preproc(ares, c->preproc_data);
+		if (ret < 0) {
+			c->error = ret;
+			return AE_ABORT_METHOD;
+		} else if (ret > 0) {
+			return AE_OK;
+		}
+	}
+
+	memset(&r, 0, sizeof(r));
+
+	if (acpi_dev_resource_memory(ares, &r)
+	    || acpi_dev_resource_io(ares, &r)
+	    || acpi_dev_resource_address_space(ares, &r)
+	    || acpi_dev_resource_ext_address_space(ares, &r))
+		return acpi_dev_new_resource_entry(&r, c);
+
+	for (i = 0; acpi_dev_resource_interrupt(ares, i, &r); i++) {
+		acpi_status status;
+
+		status = acpi_dev_new_resource_entry(&r, c);
+		if (ACPI_FAILURE(status))
+			return status;
+	}
+
+	return AE_OK;
+}
+
+/**
+ * acpi_dev_get_resources - Get current resources of a device.
+ * @adev: ACPI device node to get the resources for.
+ * @list: Head of the resultant list of resources (must be empty).
+ * @preproc: The caller's preprocessing routine.
+ * @preproc_data: Pointer passed to the caller's preprocessing routine.
+ *
+ * Evaluate the _CRS method for the given device node and process its output by
+ * (1) executing the @preproc() rountine provided by the caller, passing the
+ * resource pointer and @preproc_data to it as arguments, for each ACPI resource
+ * returned and (2) converting all of the returned ACPI resources into struct
+ * resource objects if possible.  If the return value of @preproc() in step (1)
+ * is different from 0, step (2) is not applied to the given ACPI resource and
+ * if that value is negative, the whole processing is aborted and that value is
+ * returned as the final error code.
+ *
+ * The resultant struct resource objects are put on the list pointed to by
+ * @list, that must be empty initially, as members of struct resource_list_entry
+ * objects.  Callers of this routine should use %acpi_dev_free_resource_list() to
+ * free that list.
+ *
+ * The number of resources in the output list is returned on success, an error
+ * code reflecting the error condition is returned otherwise.
+ */
+int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list,
+			   int (*preproc)(struct acpi_resource *, void *),
+			   void *preproc_data)
+{
+	struct res_proc_context c;
+	acpi_handle not_used;
+	acpi_status status;
+
+	if (!adev || !adev->handle || !list_empty(list))
+		return -EINVAL;
+
+	status = acpi_get_handle(adev->handle, METHOD_NAME__CRS, &not_used);
+	if (ACPI_FAILURE(status))
+		return 0;
+
+	c.list = list;
+	c.preproc = preproc;
+	c.preproc_data = preproc_data;
+	c.count = 0;
+	c.error = 0;
+	status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS,
+				     acpi_dev_process_resource, &c);
+	if (ACPI_FAILURE(status)) {
+		acpi_dev_free_resource_list(list);
+		return c.error ? c.error : -EIO;
+	}
+
+	return c.count;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_get_resources);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 16fcaf8dad3d..32fbc4e73a56 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -261,6 +261,16 @@ unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable);
 bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index,
 				 struct resource *res);
 
+struct resource_list_entry {
+	struct list_head node;
+	struct resource res;
+};
+
+void acpi_dev_free_resource_list(struct list_head *list);
+int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list,
+			   int (*preproc)(struct acpi_resource *, void *),
+			   void *preproc_data);
+
 int acpi_check_resource_conflict(const struct resource *res);
 
 int acpi_check_region(resource_size_t start, resource_size_t n,
-- 
cgit v1.2.3-55-g7522


From 4b972f0b04eaae645b22d99479b9aea43c3d64e7 Mon Sep 17 00:00:00 2001
From: viresh kumar
Date: Tue, 23 Oct 2012 01:23:43 +0200
Subject: cpufreq / core: Fix printing of governor and driver name

Arrays for governer and driver name are of size CPUFREQ_NAME_LEN or 16.
i.e. 15 bytes for name and 1 for trailing '\0'.

When cpufreq driver print these names (for sysfs), it includes '\n' or ' ' in
the fmt string and still passes length as CPUFREQ_NAME_LEN. If the driver or
governor names are using all 15 fields allocated to them, then the trailing '\n'
or ' ' will never be printed. And so commands like:

root@linaro-developer# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_driver

will print something like:

cpufreq_foodrvroot@linaro-developer#

Fix this by increasing print length by one character.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 6 +++---
 include/linux/cpufreq.h   | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 021973b7dc56..db6e337ad337 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -445,7 +445,7 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
 	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
 		return sprintf(buf, "performance\n");
 	else if (policy->governor)
-		return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
+		return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n",
 				policy->governor->name);
 	return -EINVAL;
 }
@@ -491,7 +491,7 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
  */
 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
 {
-	return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
+	return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n", cpufreq_driver->name);
 }
 
 /**
@@ -512,7 +512,7 @@ static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
 		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
 		    - (CPUFREQ_NAME_LEN + 2)))
 			goto out;
-		i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
+		i += scnprintf(&buf[i], CPUFREQ_NAME_PLEN, "%s ", t->name);
 	}
 out:
 	i += sprintf(&buf[i], "\n");
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index b60f6ba01d0c..fc4b78510151 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -22,6 +22,8 @@
 #include <asm/div64.h>
 
 #define CPUFREQ_NAME_LEN 16
+/* Print length for names. Extra 1 space for accomodating '\n' in prints */
+#define CPUFREQ_NAME_PLEN (CPUFREQ_NAME_LEN + 1)
 
 
 /*********************************************************************
-- 
cgit v1.2.3-55-g7522


From 2aacdfff9c6958723aa5076003247933cefc32ea Mon Sep 17 00:00:00 2001
From: viresh kumar
Date: Tue, 23 Oct 2012 01:28:05 +0200
Subject: cpufreq: Move common part from governors to separate file, v2

Multiple cpufreq governers have defined similar get_cpu_idle_time_***()
routines. These routines must be moved to some common place, so that all
governors can use them.

So moving them to cpufreq_governor.c, which seems to be a better place for
keeping these routines.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/Makefile               |  4 +--
 drivers/cpufreq/cpufreq_conservative.c | 34 ----------------------
 drivers/cpufreq/cpufreq_governor.c     | 52 ++++++++++++++++++++++++++++++++++
 drivers/cpufreq/cpufreq_ondemand.c     | 34 ----------------------
 include/linux/cpufreq.h                |  5 ++++
 5 files changed, 59 insertions(+), 70 deletions(-)
 create mode 100644 drivers/cpufreq/cpufreq_governor.c

(limited to 'include/linux')

diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 1bc90e1306d8..5b1413e47216 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -7,8 +7,8 @@ obj-$(CONFIG_CPU_FREQ_STAT)             += cpufreq_stats.o
 obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE)	+= cpufreq_performance.o
 obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE)	+= cpufreq_powersave.o
 obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE)	+= cpufreq_userspace.o
-obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND)	+= cpufreq_ondemand.o
-obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE)	+= cpufreq_conservative.o
+obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND)	+= cpufreq_ondemand.o cpufreq_governor.o
+obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE)	+= cpufreq_conservative.o cpufreq_governor.o
 
 # CPUfreq cross-arch helpers
 obj-$(CONFIG_CPU_FREQ_TABLE)		+= freq_table.o
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index a152af7e1991..181abad07266 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -95,40 +95,6 @@ static struct dbs_tuners {
 	.freq_step = 5,
 };
 
-static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
-{
-	u64 idle_time;
-	u64 cur_wall_time;
-	u64 busy_time;
-
-	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
-
-	busy_time  = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
-
-	idle_time = cur_wall_time - busy_time;
-	if (wall)
-		*wall = jiffies_to_usecs(cur_wall_time);
-
-	return jiffies_to_usecs(idle_time);
-}
-
-static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
-{
-	u64 idle_time = get_cpu_idle_time_us(cpu, NULL);
-
-	if (idle_time == -1ULL)
-		return get_cpu_idle_time_jiffy(cpu, wall);
-	else
-		idle_time += get_cpu_iowait_time_us(cpu, wall);
-
-	return idle_time;
-}
-
 /* keep track of frequency transitions */
 static int
 dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
new file mode 100644
index 000000000000..0001071cdcdb
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -0,0 +1,52 @@
+/*
+ * drivers/cpufreq/cpufreq_governor.c
+ *
+ * CPUFREQ governors common code
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/cputime.h>
+#include <linux/export.h>
+#include <linux/kernel_stat.h>
+#include <linux/tick.h>
+#include <linux/types.h>
+/*
+ * Code picked from earlier governer implementations
+ */
+static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
+{
+	u64 idle_time;
+	u64 cur_wall_time;
+	u64 busy_time;
+
+	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+
+	busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
+
+	idle_time = cur_wall_time - busy_time;
+	if (wall)
+		*wall = jiffies_to_usecs(cur_wall_time);
+
+	return jiffies_to_usecs(idle_time);
+}
+
+cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
+{
+	u64 idle_time = get_cpu_idle_time_us(cpu, NULL);
+
+	if (idle_time == -1ULL)
+		return get_cpu_idle_time_jiffy(cpu, wall);
+	else
+		idle_time += get_cpu_iowait_time_us(cpu, wall);
+
+	return idle_time;
+}
+EXPORT_SYMBOL_GPL(get_cpu_idle_time);
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 396322f2a83f..d7f774bb49dd 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -119,40 +119,6 @@ static struct dbs_tuners {
 	.powersave_bias = 0,
 };
 
-static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
-{
-	u64 idle_time;
-	u64 cur_wall_time;
-	u64 busy_time;
-
-	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
-
-	busy_time  = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
-
-	idle_time = cur_wall_time - busy_time;
-	if (wall)
-		*wall = jiffies_to_usecs(cur_wall_time);
-
-	return jiffies_to_usecs(idle_time);
-}
-
-static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
-{
-	u64 idle_time = get_cpu_idle_time_us(cpu, NULL);
-
-	if (idle_time == -1ULL)
-		return get_cpu_idle_time_jiffy(cpu, wall);
-	else
-		idle_time += get_cpu_iowait_time_us(cpu, wall);
-
-	return idle_time;
-}
-
 static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall)
 {
 	u64 iowait_time = get_cpu_iowait_time_us(cpu, wall);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index fc4b78510151..d03c21993052 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -11,6 +11,7 @@
 #ifndef _LINUX_CPUFREQ_H
 #define _LINUX_CPUFREQ_H
 
+#include <asm/cputime.h>
 #include <linux/mutex.h>
 #include <linux/notifier.h>
 #include <linux/threads.h>
@@ -407,5 +408,9 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
 
+/*********************************************************************
+ *                     Governor Helpers				     *
+ *********************************************************************/
+cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall);
 
 #endif /* _LINUX_CPUFREQ_H */
-- 
cgit v1.2.3-55-g7522


From 4471a34f9a1f2da220272e823bdb8e8fa83a7661 Mon Sep 17 00:00:00 2001
From: Viresh Kumar
Date: Fri, 26 Oct 2012 00:47:42 +0200
Subject: cpufreq: governors: remove redundant code

Initially ondemand governor was written and then using its code conservative
governor is written. It used a lot of code from ondemand governor, but copy of
code was created instead of using the same routines from both governors. Which
increased code redundancy, which is difficult to manage.

This patch is an attempt to move common part of both the governors to
cpufreq_governor.c file to come over above mentioned issues.

This shouldn't change anything from functionality point of view.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_conservative.c | 548 ++++++++------------------
 drivers/cpufreq/cpufreq_governor.c     | 276 ++++++++++++-
 drivers/cpufreq/cpufreq_governor.h     | 177 +++++++++
 drivers/cpufreq/cpufreq_ondemand.c     | 698 +++++++++++----------------------
 include/linux/cpufreq.h                |   6 -
 5 files changed, 832 insertions(+), 873 deletions(-)
 create mode 100644 drivers/cpufreq/cpufreq_governor.h

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 181abad07266..64ef737e7e72 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -11,83 +11,30 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
 #include <linux/cpufreq.h>
-#include <linux/cpu.h>
-#include <linux/jiffies.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/kernel_stat.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/hrtimer.h>
-#include <linux/tick.h>
-#include <linux/ktime.h>
-#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/percpu-defs.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
 
-/*
- * dbs is used in this file as a shortform for demandbased switching
- * It helps to keep variable names smaller, simpler
- */
+#include "cpufreq_governor.h"
 
+/* Conservative governor macors */
 #define DEF_FREQUENCY_UP_THRESHOLD		(80)
 #define DEF_FREQUENCY_DOWN_THRESHOLD		(20)
-
-/*
- * The polling frequency of this governor depends on the capability of
- * the processor. Default polling frequency is 1000 times the transition
- * latency of the processor. The governor will work on any processor with
- * transition latency <= 10mS, using appropriate sampling
- * rate.
- * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
- * this governor will not work.
- * All times here are in uS.
- */
-#define MIN_SAMPLING_RATE_RATIO			(2)
-
-static unsigned int min_sampling_rate;
-
-#define LATENCY_MULTIPLIER			(1000)
-#define MIN_LATENCY_MULTIPLIER			(100)
 #define DEF_SAMPLING_DOWN_FACTOR		(1)
 #define MAX_SAMPLING_DOWN_FACTOR		(10)
-#define TRANSITION_LATENCY_LIMIT		(10 * 1000 * 1000)
-
-static void do_dbs_timer(struct work_struct *work);
-
-struct cpu_dbs_info_s {
-	cputime64_t prev_cpu_idle;
-	cputime64_t prev_cpu_wall;
-	cputime64_t prev_cpu_nice;
-	struct cpufreq_policy *cur_policy;
-	struct delayed_work work;
-	unsigned int down_skip;
-	unsigned int requested_freq;
-	int cpu;
-	unsigned int enable:1;
-	/*
-	 * percpu mutex that serializes governor limit change with
-	 * do_dbs_timer invocation. We do not want do_dbs_timer to run
-	 * when user is changing the governor or limits.
-	 */
-	struct mutex timer_mutex;
-};
-static DEFINE_PER_CPU(struct cpu_dbs_info_s, cs_cpu_dbs_info);
 
-static unsigned int dbs_enable;	/* number of CPUs using this policy */
+static struct dbs_data cs_dbs_data;
+static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info);
 
-/*
- * dbs_mutex protects dbs_enable in governor start/stop.
- */
-static DEFINE_MUTEX(dbs_mutex);
-
-static struct dbs_tuners {
-	unsigned int sampling_rate;
-	unsigned int sampling_down_factor;
-	unsigned int up_threshold;
-	unsigned int down_threshold;
-	unsigned int ignore_nice;
-	unsigned int freq_step;
-} dbs_tuners_ins = {
+static struct cs_dbs_tuners cs_tuners = {
 	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
 	.down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD,
 	.sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
@@ -95,61 +42,121 @@ static struct dbs_tuners {
 	.freq_step = 5,
 };
 
-/* keep track of frequency transitions */
-static int
-dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
-		     void *data)
+/*
+ * Every sampling_rate, we check, if current idle time is less than 20%
+ * (default), then we try to increase frequency Every sampling_rate *
+ * sampling_down_factor, we check, if current idle time is more than 80%, then
+ * we try to decrease frequency
+ *
+ * Any frequency increase takes it to the maximum frequency. Frequency reduction
+ * happens at minimum steps of 5% (default) of maximum frequency
+ */
+static void cs_check_cpu(int cpu, unsigned int load)
 {
-	struct cpufreq_freqs *freq = data;
-	struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cs_cpu_dbs_info,
-							freq->cpu);
+	struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu);
+	struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy;
+	unsigned int freq_target;
+
+	/*
+	 * break out if we 'cannot' reduce the speed as the user might
+	 * want freq_step to be zero
+	 */
+	if (cs_tuners.freq_step == 0)
+		return;
+
+	/* Check for frequency increase */
+	if (load > cs_tuners.up_threshold) {
+		dbs_info->down_skip = 0;
+
+		/* if we are already at full speed then break out early */
+		if (dbs_info->requested_freq == policy->max)
+			return;
+
+		freq_target = (cs_tuners.freq_step * policy->max) / 100;
+
+		/* max freq cannot be less than 100. But who knows.... */
+		if (unlikely(freq_target == 0))
+			freq_target = 5;
+
+		dbs_info->requested_freq += freq_target;
+		if (dbs_info->requested_freq > policy->max)
+			dbs_info->requested_freq = policy->max;
 
+		__cpufreq_driver_target(policy, dbs_info->requested_freq,
+			CPUFREQ_RELATION_H);
+		return;
+	}
+
+	/*
+	 * The optimal frequency is the frequency that is the lowest that can
+	 * support the current CPU usage without triggering the up policy. To be
+	 * safe, we focus 10 points under the threshold.
+	 */
+	if (load < (cs_tuners.down_threshold - 10)) {
+		freq_target = (cs_tuners.freq_step * policy->max) / 100;
+
+		dbs_info->requested_freq -= freq_target;
+		if (dbs_info->requested_freq < policy->min)
+			dbs_info->requested_freq = policy->min;
+
+		/*
+		 * if we cannot reduce the frequency anymore, break out early
+		 */
+		if (policy->cur == policy->min)
+			return;
+
+		__cpufreq_driver_target(policy, dbs_info->requested_freq,
+				CPUFREQ_RELATION_H);
+		return;
+	}
+}
+
+static void cs_dbs_timer(struct work_struct *work)
+{
+	struct cs_cpu_dbs_info_s *dbs_info = container_of(work,
+			struct cs_cpu_dbs_info_s, cdbs.work.work);
+	unsigned int cpu = dbs_info->cdbs.cpu;
+	int delay = delay_for_sampling_rate(cs_tuners.sampling_rate);
+
+	mutex_lock(&dbs_info->cdbs.timer_mutex);
+
+	dbs_check_cpu(&cs_dbs_data, cpu);
+
+	schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay);
+	mutex_unlock(&dbs_info->cdbs.timer_mutex);
+}
+
+static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+		void *data)
+{
+	struct cpufreq_freqs *freq = data;
+	struct cs_cpu_dbs_info_s *dbs_info =
+					&per_cpu(cs_cpu_dbs_info, freq->cpu);
 	struct cpufreq_policy *policy;
 
-	if (!this_dbs_info->enable)
+	if (!dbs_info->enable)
 		return 0;
 
-	policy = this_dbs_info->cur_policy;
+	policy = dbs_info->cdbs.cur_policy;
 
 	/*
-	 * we only care if our internally tracked freq moves outside
-	 * the 'valid' ranges of freqency available to us otherwise
-	 * we do not change it
+	 * we only care if our internally tracked freq moves outside the 'valid'
+	 * ranges of freqency available to us otherwise we do not change it
 	*/
-	if (this_dbs_info->requested_freq > policy->max
-			|| this_dbs_info->requested_freq < policy->min)
-		this_dbs_info->requested_freq = freq->new;
+	if (dbs_info->requested_freq > policy->max
+			|| dbs_info->requested_freq < policy->min)
+		dbs_info->requested_freq = freq->new;
 
 	return 0;
 }
 
-static struct notifier_block dbs_cpufreq_notifier_block = {
-	.notifier_call = dbs_cpufreq_notifier
-};
-
 /************************** sysfs interface ************************/
 static ssize_t show_sampling_rate_min(struct kobject *kobj,
 				      struct attribute *attr, char *buf)
 {
-	return sprintf(buf, "%u\n", min_sampling_rate);
+	return sprintf(buf, "%u\n", cs_dbs_data.min_sampling_rate);
 }
 
-define_one_global_ro(sampling_rate_min);
-
-/* cpufreq_conservative Governor Tunables */
-#define show_one(file_name, object)					\
-static ssize_t show_##file_name						\
-(struct kobject *kobj, struct attribute *attr, char *buf)		\
-{									\
-	return sprintf(buf, "%u\n", dbs_tuners_ins.object);		\
-}
-show_one(sampling_rate, sampling_rate);
-show_one(sampling_down_factor, sampling_down_factor);
-show_one(up_threshold, up_threshold);
-show_one(down_threshold, down_threshold);
-show_one(ignore_nice_load, ignore_nice);
-show_one(freq_step, freq_step);
-
 static ssize_t store_sampling_down_factor(struct kobject *a,
 					  struct attribute *b,
 					  const char *buf, size_t count)
@@ -161,7 +168,7 @@ static ssize_t store_sampling_down_factor(struct kobject *a,
 	if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
 		return -EINVAL;
 
-	dbs_tuners_ins.sampling_down_factor = input;
+	cs_tuners.sampling_down_factor = input;
 	return count;
 }
 
@@ -175,7 +182,7 @@ static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
 	if (ret != 1)
 		return -EINVAL;
 
-	dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
+	cs_tuners.sampling_rate = max(input, cs_dbs_data.min_sampling_rate);
 	return count;
 }
 
@@ -186,11 +193,10 @@ static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
 	int ret;
 	ret = sscanf(buf, "%u", &input);
 
-	if (ret != 1 || input > 100 ||
-			input <= dbs_tuners_ins.down_threshold)
+	if (ret != 1 || input > 100 || input <= cs_tuners.down_threshold)
 		return -EINVAL;
 
-	dbs_tuners_ins.up_threshold = input;
+	cs_tuners.up_threshold = input;
 	return count;
 }
 
@@ -203,21 +209,19 @@ static ssize_t store_down_threshold(struct kobject *a, struct attribute *b,
 
 	/* cannot be lower than 11 otherwise freq will not fall */
 	if (ret != 1 || input < 11 || input > 100 ||
-			input >= dbs_tuners_ins.up_threshold)
+			input >= cs_tuners.up_threshold)
 		return -EINVAL;
 
-	dbs_tuners_ins.down_threshold = input;
+	cs_tuners.down_threshold = input;
 	return count;
 }
 
 static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
 				      const char *buf, size_t count)
 {
-	unsigned int input;
+	unsigned int input, j;
 	int ret;
 
-	unsigned int j;
-
 	ret = sscanf(buf, "%u", &input);
 	if (ret != 1)
 		return -EINVAL;
@@ -225,19 +229,20 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
 	if (input > 1)
 		input = 1;
 
-	if (input == dbs_tuners_ins.ignore_nice) /* nothing to do */
+	if (input == cs_tuners.ignore_nice) /* nothing to do */
 		return count;
 
-	dbs_tuners_ins.ignore_nice = input;
+	cs_tuners.ignore_nice = input;
 
 	/* we need to re-evaluate prev_cpu_idle */
 	for_each_online_cpu(j) {
-		struct cpu_dbs_info_s *dbs_info;
+		struct cs_cpu_dbs_info_s *dbs_info;
 		dbs_info = &per_cpu(cs_cpu_dbs_info, j);
-		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
-						&dbs_info->prev_cpu_wall);
-		if (dbs_tuners_ins.ignore_nice)
-			dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+		dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
+						&dbs_info->cdbs.prev_cpu_wall);
+		if (cs_tuners.ignore_nice)
+			dbs_info->cdbs.prev_cpu_nice =
+				kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 	}
 	return count;
 }
@@ -255,18 +260,28 @@ static ssize_t store_freq_step(struct kobject *a, struct attribute *b,
 	if (input > 100)
 		input = 100;
 
-	/* no need to test here if freq_step is zero as the user might actually
-	 * want this, they would be crazy though :) */
-	dbs_tuners_ins.freq_step = input;
+	/*
+	 * no need to test here if freq_step is zero as the user might actually
+	 * want this, they would be crazy though :)
+	 */
+	cs_tuners.freq_step = input;
 	return count;
 }
 
+show_one(cs, sampling_rate, sampling_rate);
+show_one(cs, sampling_down_factor, sampling_down_factor);
+show_one(cs, up_threshold, up_threshold);
+show_one(cs, down_threshold, down_threshold);
+show_one(cs, ignore_nice_load, ignore_nice);
+show_one(cs, freq_step, freq_step);
+
 define_one_global_rw(sampling_rate);
 define_one_global_rw(sampling_down_factor);
 define_one_global_rw(up_threshold);
 define_one_global_rw(down_threshold);
 define_one_global_rw(ignore_nice_load);
 define_one_global_rw(freq_step);
+define_one_global_ro(sampling_rate_min);
 
 static struct attribute *dbs_attributes[] = {
 	&sampling_rate_min.attr,
@@ -279,283 +294,38 @@ static struct attribute *dbs_attributes[] = {
 	NULL
 };
 
-static struct attribute_group dbs_attr_group = {
+static struct attribute_group cs_attr_group = {
 	.attrs = dbs_attributes,
 	.name = "conservative",
 };
 
 /************************** sysfs end ************************/
 
-static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
-{
-	unsigned int load = 0;
-	unsigned int max_load = 0;
-	unsigned int freq_target;
-
-	struct cpufreq_policy *policy;
-	unsigned int j;
-
-	policy = this_dbs_info->cur_policy;
-
-	/*
-	 * Every sampling_rate, we check, if current idle time is less
-	 * than 20% (default), then we try to increase frequency
-	 * Every sampling_rate*sampling_down_factor, we check, if current
-	 * idle time is more than 80%, then we try to decrease frequency
-	 *
-	 * Any frequency increase takes it to the maximum frequency.
-	 * Frequency reduction happens at minimum steps of
-	 * 5% (default) of maximum frequency
-	 */
-
-	/* Get Absolute Load */
-	for_each_cpu(j, policy->cpus) {
-		struct cpu_dbs_info_s *j_dbs_info;
-		cputime64_t cur_wall_time, cur_idle_time;
-		unsigned int idle_time, wall_time;
-
-		j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);
-
-		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
-
-		wall_time = (unsigned int)
-			(cur_wall_time - j_dbs_info->prev_cpu_wall);
-		j_dbs_info->prev_cpu_wall = cur_wall_time;
-
-		idle_time = (unsigned int)
-			(cur_idle_time - j_dbs_info->prev_cpu_idle);
-		j_dbs_info->prev_cpu_idle = cur_idle_time;
-
-		if (dbs_tuners_ins.ignore_nice) {
-			u64 cur_nice;
-			unsigned long cur_nice_jiffies;
-
-			cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
-					 j_dbs_info->prev_cpu_nice;
-			/*
-			 * Assumption: nice time between sampling periods will
-			 * be less than 2^32 jiffies for 32 bit sys
-			 */
-			cur_nice_jiffies = (unsigned long)
-					cputime64_to_jiffies64(cur_nice);
-
-			j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
-			idle_time += jiffies_to_usecs(cur_nice_jiffies);
-		}
-
-		if (unlikely(!wall_time || wall_time < idle_time))
-			continue;
-
-		load = 100 * (wall_time - idle_time) / wall_time;
-
-		if (load > max_load)
-			max_load = load;
-	}
+define_get_cpu_dbs_routines(cs_cpu_dbs_info);
 
-	/*
-	 * break out if we 'cannot' reduce the speed as the user might
-	 * want freq_step to be zero
-	 */
-	if (dbs_tuners_ins.freq_step == 0)
-		return;
-
-	/* Check for frequency increase */
-	if (max_load > dbs_tuners_ins.up_threshold) {
-		this_dbs_info->down_skip = 0;
-
-		/* if we are already at full speed then break out early */
-		if (this_dbs_info->requested_freq == policy->max)
-			return;
-
-		freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100;
-
-		/* max freq cannot be less than 100. But who knows.... */
-		if (unlikely(freq_target == 0))
-			freq_target = 5;
-
-		this_dbs_info->requested_freq += freq_target;
-		if (this_dbs_info->requested_freq > policy->max)
-			this_dbs_info->requested_freq = policy->max;
-
-		__cpufreq_driver_target(policy, this_dbs_info->requested_freq,
-			CPUFREQ_RELATION_H);
-		return;
-	}
-
-	/*
-	 * The optimal frequency is the frequency that is the lowest that
-	 * can support the current CPU usage without triggering the up
-	 * policy. To be safe, we focus 10 points under the threshold.
-	 */
-	if (max_load < (dbs_tuners_ins.down_threshold - 10)) {
-		freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100;
-
-		this_dbs_info->requested_freq -= freq_target;
-		if (this_dbs_info->requested_freq < policy->min)
-			this_dbs_info->requested_freq = policy->min;
-
-		/*
-		 * if we cannot reduce the frequency anymore, break out early
-		 */
-		if (policy->cur == policy->min)
-			return;
-
-		__cpufreq_driver_target(policy, this_dbs_info->requested_freq,
-				CPUFREQ_RELATION_H);
-		return;
-	}
-}
-
-static void do_dbs_timer(struct work_struct *work)
-{
-	struct cpu_dbs_info_s *dbs_info =
-		container_of(work, struct cpu_dbs_info_s, work.work);
-	unsigned int cpu = dbs_info->cpu;
-
-	/* We want all CPUs to do sampling nearly on same jiffy */
-	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
-
-	delay -= jiffies % delay;
-
-	mutex_lock(&dbs_info->timer_mutex);
-
-	dbs_check_cpu(dbs_info);
-
-	schedule_delayed_work_on(cpu, &dbs_info->work, delay);
-	mutex_unlock(&dbs_info->timer_mutex);
-}
-
-static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
-{
-	/* We want all CPUs to do sampling nearly on same jiffy */
-	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
-	delay -= jiffies % delay;
+static struct notifier_block cs_cpufreq_notifier_block = {
+	.notifier_call = dbs_cpufreq_notifier,
+};
 
-	dbs_info->enable = 1;
-	INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer);
-	schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
-}
+static struct cs_ops cs_ops = {
+	.notifier_block = &cs_cpufreq_notifier_block,
+};
 
-static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
-{
-	dbs_info->enable = 0;
-	cancel_delayed_work_sync(&dbs_info->work);
-}
+static struct dbs_data cs_dbs_data = {
+	.governor = GOV_CONSERVATIVE,
+	.attr_group = &cs_attr_group,
+	.tuners = &cs_tuners,
+	.get_cpu_cdbs = get_cpu_cdbs,
+	.get_cpu_dbs_info_s = get_cpu_dbs_info_s,
+	.gov_dbs_timer = cs_dbs_timer,
+	.gov_check_cpu = cs_check_cpu,
+	.gov_ops = &cs_ops,
+};
 
-static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
+static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy,
 				   unsigned int event)
 {
-	unsigned int cpu = policy->cpu;
-	struct cpu_dbs_info_s *this_dbs_info;
-	unsigned int j;
-	int rc;
-
-	this_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu);
-
-	switch (event) {
-	case CPUFREQ_GOV_START:
-		if ((!cpu_online(cpu)) || (!policy->cur))
-			return -EINVAL;
-
-		mutex_lock(&dbs_mutex);
-
-		for_each_cpu(j, policy->cpus) {
-			struct cpu_dbs_info_s *j_dbs_info;
-			j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);
-			j_dbs_info->cur_policy = policy;
-
-			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
-						&j_dbs_info->prev_cpu_wall);
-			if (dbs_tuners_ins.ignore_nice)
-				j_dbs_info->prev_cpu_nice =
-						kcpustat_cpu(j).cpustat[CPUTIME_NICE];
-		}
-		this_dbs_info->cpu = cpu;
-		this_dbs_info->down_skip = 0;
-		this_dbs_info->requested_freq = policy->cur;
-
-		mutex_init(&this_dbs_info->timer_mutex);
-		dbs_enable++;
-		/*
-		 * Start the timerschedule work, when this governor
-		 * is used for first time
-		 */
-		if (dbs_enable == 1) {
-			unsigned int latency;
-			/* policy latency is in nS. Convert it to uS first */
-			latency = policy->cpuinfo.transition_latency / 1000;
-			if (latency == 0)
-				latency = 1;
-
-			rc = sysfs_create_group(cpufreq_global_kobject,
-						&dbs_attr_group);
-			if (rc) {
-				mutex_unlock(&dbs_mutex);
-				return rc;
-			}
-
-			/*
-			 * conservative does not implement micro like ondemand
-			 * governor, thus we are bound to jiffes/HZ
-			 */
-			min_sampling_rate =
-				MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
-			/* Bring kernel and HW constraints together */
-			min_sampling_rate = max(min_sampling_rate,
-					MIN_LATENCY_MULTIPLIER * latency);
-			dbs_tuners_ins.sampling_rate =
-				max(min_sampling_rate,
-				    latency * LATENCY_MULTIPLIER);
-
-			cpufreq_register_notifier(
-					&dbs_cpufreq_notifier_block,
-					CPUFREQ_TRANSITION_NOTIFIER);
-		}
-		mutex_unlock(&dbs_mutex);
-
-		dbs_timer_init(this_dbs_info);
-
-		break;
-
-	case CPUFREQ_GOV_STOP:
-		dbs_timer_exit(this_dbs_info);
-
-		mutex_lock(&dbs_mutex);
-		dbs_enable--;
-		mutex_destroy(&this_dbs_info->timer_mutex);
-
-		/*
-		 * Stop the timerschedule work, when this governor
-		 * is used for first time
-		 */
-		if (dbs_enable == 0)
-			cpufreq_unregister_notifier(
-					&dbs_cpufreq_notifier_block,
-					CPUFREQ_TRANSITION_NOTIFIER);
-
-		mutex_unlock(&dbs_mutex);
-		if (!dbs_enable)
-			sysfs_remove_group(cpufreq_global_kobject,
-					   &dbs_attr_group);
-
-		break;
-
-	case CPUFREQ_GOV_LIMITS:
-		mutex_lock(&this_dbs_info->timer_mutex);
-		if (policy->max < this_dbs_info->cur_policy->cur)
-			__cpufreq_driver_target(
-					this_dbs_info->cur_policy,
-					policy->max, CPUFREQ_RELATION_H);
-		else if (policy->min > this_dbs_info->cur_policy->cur)
-			__cpufreq_driver_target(
-					this_dbs_info->cur_policy,
-					policy->min, CPUFREQ_RELATION_L);
-		dbs_check_cpu(this_dbs_info);
-		mutex_unlock(&this_dbs_info->timer_mutex);
-
-		break;
-	}
-	return 0;
+	return cpufreq_governor_dbs(&cs_dbs_data, policy, event);
 }
 
 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
@@ -563,13 +333,14 @@ static
 #endif
 struct cpufreq_governor cpufreq_gov_conservative = {
 	.name			= "conservative",
-	.governor		= cpufreq_governor_dbs,
+	.governor		= cs_cpufreq_governor_dbs,
 	.max_transition_latency	= TRANSITION_LATENCY_LIMIT,
 	.owner			= THIS_MODULE,
 };
 
 static int __init cpufreq_gov_dbs_init(void)
 {
+	mutex_init(&cs_dbs_data.mutex);
 	return cpufreq_register_governor(&cpufreq_gov_conservative);
 }
 
@@ -578,7 +349,6 @@ static void __exit cpufreq_gov_dbs_exit(void)
 	cpufreq_unregister_governor(&cpufreq_gov_conservative);
 }
 
-
 MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>");
 MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for "
 		"Low Latency Frequency Transition capable processors "
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 679842a8d34a..5ea2c829a796 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -3,19 +3,31 @@
  *
  * CPUFREQ governors common code
  *
+ * Copyright	(C) 2001 Russell King
+ *		(C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
+ *		(C) 2003 Jun Nakajima <jun.nakajima@intel.com>
+ *		(C) 2009 Alexander Clouter <alex@digriz.org.uk>
+ *		(c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <asm/cputime.h>
+#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
 #include <linux/export.h>
 #include <linux/kernel_stat.h>
+#include <linux/mutex.h>
 #include <linux/tick.h>
 #include <linux/types.h>
-/*
- * Code picked from earlier governer implementations
- */
+#include <linux/workqueue.h>
+
+#include "cpufreq_governor.h"
+
 static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 {
 	u64 idle_time;
@@ -33,9 +45,9 @@ static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 
 	idle_time = cur_wall_time - busy_time;
 	if (wall)
-		*wall = cputime_to_usecs(cur_wall_time);
+		*wall = jiffies_to_usecs(cur_wall_time);
 
-	return cputime_to_usecs(idle_time);
+	return jiffies_to_usecs(idle_time);
 }
 
 cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
@@ -50,3 +62,257 @@ cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
 	return idle_time;
 }
 EXPORT_SYMBOL_GPL(get_cpu_idle_time);
+
+void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
+{
+	struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu);
+	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
+	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
+	struct cpufreq_policy *policy;
+	unsigned int max_load = 0;
+	unsigned int ignore_nice;
+	unsigned int j;
+
+	if (dbs_data->governor == GOV_ONDEMAND)
+		ignore_nice = od_tuners->ignore_nice;
+	else
+		ignore_nice = cs_tuners->ignore_nice;
+
+	policy = cdbs->cur_policy;
+
+	/* Get Absolute Load (in terms of freq for ondemand gov) */
+	for_each_cpu(j, policy->cpus) {
+		struct cpu_dbs_common_info *j_cdbs;
+		cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
+		unsigned int idle_time, wall_time, iowait_time;
+		unsigned int load;
+
+		j_cdbs = dbs_data->get_cpu_cdbs(j);
+
+		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
+
+		wall_time = (unsigned int)
+			(cur_wall_time - j_cdbs->prev_cpu_wall);
+		j_cdbs->prev_cpu_wall = cur_wall_time;
+
+		idle_time = (unsigned int)
+			(cur_idle_time - j_cdbs->prev_cpu_idle);
+		j_cdbs->prev_cpu_idle = cur_idle_time;
+
+		if (ignore_nice) {
+			u64 cur_nice;
+			unsigned long cur_nice_jiffies;
+
+			cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
+					 cdbs->prev_cpu_nice;
+			/*
+			 * Assumption: nice time between sampling periods will
+			 * be less than 2^32 jiffies for 32 bit sys
+			 */
+			cur_nice_jiffies = (unsigned long)
+					cputime64_to_jiffies64(cur_nice);
+
+			cdbs->prev_cpu_nice =
+				kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+			idle_time += jiffies_to_usecs(cur_nice_jiffies);
+		}
+
+		if (dbs_data->governor == GOV_ONDEMAND) {
+			struct od_cpu_dbs_info_s *od_j_dbs_info =
+				dbs_data->get_cpu_dbs_info_s(cpu);
+
+			cur_iowait_time = get_cpu_iowait_time_us(j,
+					&cur_wall_time);
+			if (cur_iowait_time == -1ULL)
+				cur_iowait_time = 0;
+
+			iowait_time = (unsigned int) (cur_iowait_time -
+					od_j_dbs_info->prev_cpu_iowait);
+			od_j_dbs_info->prev_cpu_iowait = cur_iowait_time;
+
+			/*
+			 * For the purpose of ondemand, waiting for disk IO is
+			 * an indication that you're performance critical, and
+			 * not that the system is actually idle. So subtract the
+			 * iowait time from the cpu idle time.
+			 */
+			if (od_tuners->io_is_busy && idle_time >= iowait_time)
+				idle_time -= iowait_time;
+		}
+
+		if (unlikely(!wall_time || wall_time < idle_time))
+			continue;
+
+		load = 100 * (wall_time - idle_time) / wall_time;
+
+		if (dbs_data->governor == GOV_ONDEMAND) {
+			int freq_avg = __cpufreq_driver_getavg(policy, j);
+			if (freq_avg <= 0)
+				freq_avg = policy->cur;
+
+			load *= freq_avg;
+		}
+
+		if (load > max_load)
+			max_load = load;
+	}
+
+	dbs_data->gov_check_cpu(cpu, max_load);
+}
+EXPORT_SYMBOL_GPL(dbs_check_cpu);
+
+static inline void dbs_timer_init(struct dbs_data *dbs_data,
+		struct cpu_dbs_common_info *cdbs, unsigned int sampling_rate)
+{
+	int delay = delay_for_sampling_rate(sampling_rate);
+
+	INIT_DEFERRABLE_WORK(&cdbs->work, dbs_data->gov_dbs_timer);
+	schedule_delayed_work_on(cdbs->cpu, &cdbs->work, delay);
+}
+
+static inline void dbs_timer_exit(struct cpu_dbs_common_info *cdbs)
+{
+	cancel_delayed_work_sync(&cdbs->work);
+}
+
+int cpufreq_governor_dbs(struct dbs_data *dbs_data,
+		struct cpufreq_policy *policy, unsigned int event)
+{
+	struct od_cpu_dbs_info_s *od_dbs_info = NULL;
+	struct cs_cpu_dbs_info_s *cs_dbs_info = NULL;
+	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
+	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
+	struct cpu_dbs_common_info *cpu_cdbs;
+	unsigned int *sampling_rate, latency, ignore_nice, j, cpu = policy->cpu;
+	int rc;
+
+	cpu_cdbs = dbs_data->get_cpu_cdbs(cpu);
+
+	if (dbs_data->governor == GOV_CONSERVATIVE) {
+		cs_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu);
+		sampling_rate = &cs_tuners->sampling_rate;
+		ignore_nice = cs_tuners->ignore_nice;
+	} else {
+		od_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu);
+		sampling_rate = &od_tuners->sampling_rate;
+		ignore_nice = od_tuners->ignore_nice;
+	}
+
+	switch (event) {
+	case CPUFREQ_GOV_START:
+		if ((!cpu_online(cpu)) || (!policy->cur))
+			return -EINVAL;
+
+		mutex_lock(&dbs_data->mutex);
+
+		dbs_data->enable++;
+		cpu_cdbs->cpu = cpu;
+		for_each_cpu(j, policy->cpus) {
+			struct cpu_dbs_common_info *j_cdbs;
+			j_cdbs = dbs_data->get_cpu_cdbs(j);
+
+			j_cdbs->cur_policy = policy;
+			j_cdbs->prev_cpu_idle = get_cpu_idle_time(j,
+					&j_cdbs->prev_cpu_wall);
+			if (ignore_nice)
+				j_cdbs->prev_cpu_nice =
+					kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+		}
+
+		/*
+		 * Start the timerschedule work, when this governor is used for
+		 * first time
+		 */
+		if (dbs_data->enable != 1)
+			goto second_time;
+
+		rc = sysfs_create_group(cpufreq_global_kobject,
+				dbs_data->attr_group);
+		if (rc) {
+			mutex_unlock(&dbs_data->mutex);
+			return rc;
+		}
+
+		/* policy latency is in nS. Convert it to uS first */
+		latency = policy->cpuinfo.transition_latency / 1000;
+		if (latency == 0)
+			latency = 1;
+
+		/*
+		 * conservative does not implement micro like ondemand
+		 * governor, thus we are bound to jiffes/HZ
+		 */
+		if (dbs_data->governor == GOV_CONSERVATIVE) {
+			struct cs_ops *ops = dbs_data->gov_ops;
+
+			cpufreq_register_notifier(ops->notifier_block,
+					CPUFREQ_TRANSITION_NOTIFIER);
+
+			dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
+				jiffies_to_usecs(10);
+		} else {
+			struct od_ops *ops = dbs_data->gov_ops;
+
+			od_tuners->io_is_busy = ops->io_busy();
+		}
+
+		/* Bring kernel and HW constraints together */
+		dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
+				MIN_LATENCY_MULTIPLIER * latency);
+		*sampling_rate = max(dbs_data->min_sampling_rate, latency *
+				LATENCY_MULTIPLIER);
+
+second_time:
+		if (dbs_data->governor == GOV_CONSERVATIVE) {
+			cs_dbs_info->down_skip = 0;
+			cs_dbs_info->enable = 1;
+			cs_dbs_info->requested_freq = policy->cur;
+		} else {
+			struct od_ops *ops = dbs_data->gov_ops;
+			od_dbs_info->rate_mult = 1;
+			od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
+			ops->powersave_bias_init_cpu(cpu);
+		}
+		mutex_unlock(&dbs_data->mutex);
+
+		mutex_init(&cpu_cdbs->timer_mutex);
+		dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate);
+		break;
+
+	case CPUFREQ_GOV_STOP:
+		if (dbs_data->governor == GOV_CONSERVATIVE)
+			cs_dbs_info->enable = 0;
+
+		dbs_timer_exit(cpu_cdbs);
+
+		mutex_lock(&dbs_data->mutex);
+		mutex_destroy(&cpu_cdbs->timer_mutex);
+		dbs_data->enable--;
+		if (!dbs_data->enable) {
+			struct cs_ops *ops = dbs_data->gov_ops;
+
+			sysfs_remove_group(cpufreq_global_kobject,
+					dbs_data->attr_group);
+			if (dbs_data->governor == GOV_CONSERVATIVE)
+				cpufreq_unregister_notifier(ops->notifier_block,
+						CPUFREQ_TRANSITION_NOTIFIER);
+		}
+		mutex_unlock(&dbs_data->mutex);
+
+		break;
+
+	case CPUFREQ_GOV_LIMITS:
+		mutex_lock(&cpu_cdbs->timer_mutex);
+		if (policy->max < cpu_cdbs->cur_policy->cur)
+			__cpufreq_driver_target(cpu_cdbs->cur_policy,
+					policy->max, CPUFREQ_RELATION_H);
+		else if (policy->min > cpu_cdbs->cur_policy->cur)
+			__cpufreq_driver_target(cpu_cdbs->cur_policy,
+					policy->min, CPUFREQ_RELATION_L);
+		dbs_check_cpu(dbs_data, cpu);
+		mutex_unlock(&cpu_cdbs->timer_mutex);
+		break;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
new file mode 100644
index 000000000000..34e14adfc3f9
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -0,0 +1,177 @@
+/*
+ * drivers/cpufreq/cpufreq_governor.h
+ *
+ * Header file for CPUFreq governors common code
+ *
+ * Copyright	(C) 2001 Russell King
+ *		(C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
+ *		(C) 2003 Jun Nakajima <jun.nakajima@intel.com>
+ *		(C) 2009 Alexander Clouter <alex@digriz.org.uk>
+ *		(c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _CPUFREQ_GOVERNER_H
+#define _CPUFREQ_GOVERNER_H
+
+#include <asm/cputime.h>
+#include <linux/cpufreq.h>
+#include <linux/kobject.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/sysfs.h>
+
+/*
+ * The polling frequency depends on the capability of the processor. Default
+ * polling frequency is 1000 times the transition latency of the processor. The
+ * governor will work on any processor with transition latency <= 10mS, using
+ * appropriate sampling rate.
+ *
+ * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
+ * this governor will not work. All times here are in uS.
+ */
+#define MIN_SAMPLING_RATE_RATIO			(2)
+#define LATENCY_MULTIPLIER			(1000)
+#define MIN_LATENCY_MULTIPLIER			(100)
+#define TRANSITION_LATENCY_LIMIT		(10 * 1000 * 1000)
+
+/* Ondemand Sampling types */
+enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
+
+/* Macro creating sysfs show routines */
+#define show_one(_gov, file_name, object)				\
+static ssize_t show_##file_name						\
+(struct kobject *kobj, struct attribute *attr, char *buf)		\
+{									\
+	return sprintf(buf, "%u\n", _gov##_tuners.object);		\
+}
+
+#define define_get_cpu_dbs_routines(_dbs_info)				\
+static struct cpu_dbs_common_info *get_cpu_cdbs(int cpu)		\
+{									\
+	return &per_cpu(_dbs_info, cpu).cdbs;				\
+}									\
+									\
+static void *get_cpu_dbs_info_s(int cpu)				\
+{									\
+	return &per_cpu(_dbs_info, cpu);				\
+}
+
+/*
+ * Abbreviations:
+ * dbs: used as a shortform for demand based switching It helps to keep variable
+ *	names smaller, simpler
+ * cdbs: common dbs
+ * on_*: On-demand governor
+ * cs_*: Conservative governor
+ */
+
+/* Per cpu structures */
+struct cpu_dbs_common_info {
+	int cpu;
+	cputime64_t prev_cpu_idle;
+	cputime64_t prev_cpu_wall;
+	cputime64_t prev_cpu_nice;
+	struct cpufreq_policy *cur_policy;
+	struct delayed_work work;
+	/*
+	 * percpu mutex that serializes governor limit change with gov_dbs_timer
+	 * invocation. We do not want gov_dbs_timer to run when user is changing
+	 * the governor or limits.
+	 */
+	struct mutex timer_mutex;
+};
+
+struct od_cpu_dbs_info_s {
+	struct cpu_dbs_common_info cdbs;
+	cputime64_t prev_cpu_iowait;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned int freq_lo;
+	unsigned int freq_lo_jiffies;
+	unsigned int freq_hi_jiffies;
+	unsigned int rate_mult;
+	unsigned int sample_type:1;
+};
+
+struct cs_cpu_dbs_info_s {
+	struct cpu_dbs_common_info cdbs;
+	unsigned int down_skip;
+	unsigned int requested_freq;
+	unsigned int enable:1;
+};
+
+/* Governers sysfs tunables */
+struct od_dbs_tuners {
+	unsigned int ignore_nice;
+	unsigned int sampling_rate;
+	unsigned int sampling_down_factor;
+	unsigned int up_threshold;
+	unsigned int down_differential;
+	unsigned int powersave_bias;
+	unsigned int io_is_busy;
+};
+
+struct cs_dbs_tuners {
+	unsigned int ignore_nice;
+	unsigned int sampling_rate;
+	unsigned int sampling_down_factor;
+	unsigned int up_threshold;
+	unsigned int down_threshold;
+	unsigned int freq_step;
+};
+
+/* Per Governer data */
+struct dbs_data {
+	/* Common across governors */
+	#define GOV_ONDEMAND		0
+	#define GOV_CONSERVATIVE	1
+	int governor;
+	unsigned int min_sampling_rate;
+	unsigned int enable; /* number of CPUs using this policy */
+	struct attribute_group *attr_group;
+	void *tuners;
+
+	/* dbs_mutex protects dbs_enable in governor start/stop */
+	struct mutex mutex;
+
+	struct cpu_dbs_common_info *(*get_cpu_cdbs)(int cpu);
+	void *(*get_cpu_dbs_info_s)(int cpu);
+	void (*gov_dbs_timer)(struct work_struct *work);
+	void (*gov_check_cpu)(int cpu, unsigned int load);
+
+	/* Governor specific ops, see below */
+	void *gov_ops;
+};
+
+/* Governor specific ops, will be passed to dbs_data->gov_ops */
+struct od_ops {
+	int (*io_busy)(void);
+	void (*powersave_bias_init_cpu)(int cpu);
+	unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy,
+			unsigned int freq_next, unsigned int relation);
+	void (*freq_increase)(struct cpufreq_policy *p, unsigned int freq);
+};
+
+struct cs_ops {
+	struct notifier_block *notifier_block;
+};
+
+static inline int delay_for_sampling_rate(unsigned int sampling_rate)
+{
+	int delay = usecs_to_jiffies(sampling_rate);
+
+	/* We want all CPUs to do sampling nearly on same jiffy */
+	if (num_online_cpus() > 1)
+		delay -= jiffies % delay;
+
+	return delay;
+}
+
+cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall);
+void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
+int cpufreq_governor_dbs(struct dbs_data *dbs_data,
+		struct cpufreq_policy *policy, unsigned int event);
+#endif /* _CPUFREQ_GOVERNER_H */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index d7f774bb49dd..bdaab9206303 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -10,24 +10,23 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/cpufreq.h>
-#include <linux/cpu.h>
-#include <linux/jiffies.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/kernel_stat.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/hrtimer.h>
+#include <linux/percpu-defs.h>
+#include <linux/sysfs.h>
 #include <linux/tick.h>
-#include <linux/ktime.h>
-#include <linux/sched.h>
+#include <linux/types.h>
 
-/*
- * dbs is used in this file as a shortform for demandbased switching
- * It helps to keep variable names smaller, simpler
- */
+#include "cpufreq_governor.h"
 
+/* On-demand governor macors */
 #define DEF_FREQUENCY_DOWN_DIFFERENTIAL		(10)
 #define DEF_FREQUENCY_UP_THRESHOLD		(80)
 #define DEF_SAMPLING_DOWN_FACTOR		(1)
@@ -38,80 +37,10 @@
 #define MIN_FREQUENCY_UP_THRESHOLD		(11)
 #define MAX_FREQUENCY_UP_THRESHOLD		(100)
 
-/*
- * The polling frequency of this governor depends on the capability of
- * the processor. Default polling frequency is 1000 times the transition
- * latency of the processor. The governor will work on any processor with
- * transition latency <= 10mS, using appropriate sampling
- * rate.
- * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
- * this governor will not work.
- * All times here are in uS.
- */
-#define MIN_SAMPLING_RATE_RATIO			(2)
-
-static unsigned int min_sampling_rate;
-
-#define LATENCY_MULTIPLIER			(1000)
-#define MIN_LATENCY_MULTIPLIER			(100)
-#define TRANSITION_LATENCY_LIMIT		(10 * 1000 * 1000)
-
-static void do_dbs_timer(struct work_struct *work);
-static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
-				unsigned int event);
-
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
-static
-#endif
-struct cpufreq_governor cpufreq_gov_ondemand = {
-       .name                   = "ondemand",
-       .governor               = cpufreq_governor_dbs,
-       .max_transition_latency = TRANSITION_LATENCY_LIMIT,
-       .owner                  = THIS_MODULE,
-};
-
-/* Sampling types */
-enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
-
-struct cpu_dbs_info_s {
-	cputime64_t prev_cpu_idle;
-	cputime64_t prev_cpu_iowait;
-	cputime64_t prev_cpu_wall;
-	cputime64_t prev_cpu_nice;
-	struct cpufreq_policy *cur_policy;
-	struct delayed_work work;
-	struct cpufreq_frequency_table *freq_table;
-	unsigned int freq_lo;
-	unsigned int freq_lo_jiffies;
-	unsigned int freq_hi_jiffies;
-	unsigned int rate_mult;
-	int cpu;
-	unsigned int sample_type:1;
-	/*
-	 * percpu mutex that serializes governor limit change with
-	 * do_dbs_timer invocation. We do not want do_dbs_timer to run
-	 * when user is changing the governor or limits.
-	 */
-	struct mutex timer_mutex;
-};
-static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
-
-static unsigned int dbs_enable;	/* number of CPUs using this policy */
+static struct dbs_data od_dbs_data;
+static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info);
 
-/*
- * dbs_mutex protects dbs_enable in governor start/stop.
- */
-static DEFINE_MUTEX(dbs_mutex);
-
-static struct dbs_tuners {
-	unsigned int sampling_rate;
-	unsigned int up_threshold;
-	unsigned int down_differential;
-	unsigned int ignore_nice;
-	unsigned int sampling_down_factor;
-	unsigned int powersave_bias;
-	unsigned int io_is_busy;
-} dbs_tuners_ins = {
+static struct od_dbs_tuners od_tuners = {
 	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
 	.sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
 	.down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
@@ -119,14 +48,35 @@ static struct dbs_tuners {
 	.powersave_bias = 0,
 };
 
-static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall)
+static void ondemand_powersave_bias_init_cpu(int cpu)
 {
-	u64 iowait_time = get_cpu_iowait_time_us(cpu, wall);
+	struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
 
-	if (iowait_time == -1ULL)
-		return 0;
+	dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
+	dbs_info->freq_lo = 0;
+}
 
-	return iowait_time;
+/*
+ * Not all CPUs want IO time to be accounted as busy; this depends on how
+ * efficient idling at a higher frequency/voltage is.
+ * Pavel Machek says this is not so for various generations of AMD and old
+ * Intel systems.
+ * Mike Chan (androidlcom) calis this is also not true for ARM.
+ * Because of this, whitelist specific known (series) of CPUs by default, and
+ * leave all others up to the user.
+ */
+static int should_io_be_busy(void)
+{
+#if defined(CONFIG_X86)
+	/*
+	 * For Intel, Core 2 (model 15) andl later have an efficient idle.
+	 */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+			boot_cpu_data.x86 == 6 &&
+			boot_cpu_data.x86_model >= 15)
+		return 1;
+#endif
+	return 0;
 }
 
 /*
@@ -135,14 +85,13 @@ static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wal
  * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
  */
 static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
-					  unsigned int freq_next,
-					  unsigned int relation)
+		unsigned int freq_next, unsigned int relation)
 {
 	unsigned int freq_req, freq_reduc, freq_avg;
 	unsigned int freq_hi, freq_lo;
 	unsigned int index = 0;
 	unsigned int jiffies_total, jiffies_hi, jiffies_lo;
-	struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
+	struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
 						   policy->cpu);
 
 	if (!dbs_info->freq_table) {
@@ -154,7 +103,7 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
 	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
 			relation, &index);
 	freq_req = dbs_info->freq_table[index].frequency;
-	freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
+	freq_reduc = freq_req * od_tuners.powersave_bias / 1000;
 	freq_avg = freq_req - freq_reduc;
 
 	/* Find freq bounds for freq_avg in freq_table */
@@ -173,7 +122,7 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
 		dbs_info->freq_lo_jiffies = 0;
 		return freq_lo;
 	}
-	jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+	jiffies_total = usecs_to_jiffies(od_tuners.sampling_rate);
 	jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
 	jiffies_hi += ((freq_hi - freq_lo) / 2);
 	jiffies_hi /= (freq_hi - freq_lo);
@@ -184,13 +133,6 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
 	return freq_hi;
 }
 
-static void ondemand_powersave_bias_init_cpu(int cpu)
-{
-	struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
-	dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
-	dbs_info->freq_lo = 0;
-}
-
 static void ondemand_powersave_bias_init(void)
 {
 	int i;
@@ -199,53 +141,138 @@ static void ondemand_powersave_bias_init(void)
 	}
 }
 
-/************************** sysfs interface ************************/
+static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
+{
+	if (od_tuners.powersave_bias)
+		freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H);
+	else if (p->cur == p->max)
+		return;
 
-static ssize_t show_sampling_rate_min(struct kobject *kobj,
-				      struct attribute *attr, char *buf)
+	__cpufreq_driver_target(p, freq, od_tuners.powersave_bias ?
+			CPUFREQ_RELATION_L : CPUFREQ_RELATION_H);
+}
+
+/*
+ * Every sampling_rate, we check, if current idle time is less than 20%
+ * (default), then we try to increase frequency Every sampling_rate, we look for
+ * a the lowest frequency which can sustain the load while keeping idle time
+ * over 30%. If such a frequency exist, we try to decrease to this frequency.
+ *
+ * Any frequency increase takes it to the maximum frequency. Frequency reduction
+ * happens at minimum steps of 5% (default) of current frequency
+ */
+static void od_check_cpu(int cpu, unsigned int load_freq)
 {
-	return sprintf(buf, "%u\n", min_sampling_rate);
+	struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
+	struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy;
+
+	dbs_info->freq_lo = 0;
+
+	/* Check for frequency increase */
+	if (load_freq > od_tuners.up_threshold * policy->cur) {
+		/* If switching to max speed, apply sampling_down_factor */
+		if (policy->cur < policy->max)
+			dbs_info->rate_mult =
+				od_tuners.sampling_down_factor;
+		dbs_freq_increase(policy, policy->max);
+		return;
+	}
+
+	/* Check for frequency decrease */
+	/* if we cannot reduce the frequency anymore, break out early */
+	if (policy->cur == policy->min)
+		return;
+
+	/*
+	 * The optimal frequency is the frequency that is the lowest that can
+	 * support the current CPU usage without triggering the up policy. To be
+	 * safe, we focus 10 points under the threshold.
+	 */
+	if (load_freq < (od_tuners.up_threshold - od_tuners.down_differential) *
+			policy->cur) {
+		unsigned int freq_next;
+		freq_next = load_freq / (od_tuners.up_threshold -
+				od_tuners.down_differential);
+
+		/* No longer fully busy, reset rate_mult */
+		dbs_info->rate_mult = 1;
+
+		if (freq_next < policy->min)
+			freq_next = policy->min;
+
+		if (!od_tuners.powersave_bias) {
+			__cpufreq_driver_target(policy, freq_next,
+					CPUFREQ_RELATION_L);
+		} else {
+			int freq = powersave_bias_target(policy, freq_next,
+					CPUFREQ_RELATION_L);
+			__cpufreq_driver_target(policy, freq,
+					CPUFREQ_RELATION_L);
+		}
+	}
 }
 
-define_one_global_ro(sampling_rate_min);
+static void od_dbs_timer(struct work_struct *work)
+{
+	struct od_cpu_dbs_info_s *dbs_info =
+		container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work);
+	unsigned int cpu = dbs_info->cdbs.cpu;
+	int delay, sample_type = dbs_info->sample_type;
 
-/* cpufreq_ondemand Governor Tunables */
-#define show_one(file_name, object)					\
-static ssize_t show_##file_name						\
-(struct kobject *kobj, struct attribute *attr, char *buf)              \
-{									\
-	return sprintf(buf, "%u\n", dbs_tuners_ins.object);		\
+	mutex_lock(&dbs_info->cdbs.timer_mutex);
+
+	/* Common NORMAL_SAMPLE setup */
+	dbs_info->sample_type = OD_NORMAL_SAMPLE;
+	if (sample_type == OD_SUB_SAMPLE) {
+		delay = dbs_info->freq_lo_jiffies;
+		__cpufreq_driver_target(dbs_info->cdbs.cur_policy,
+			dbs_info->freq_lo, CPUFREQ_RELATION_H);
+	} else {
+		dbs_check_cpu(&od_dbs_data, cpu);
+		if (dbs_info->freq_lo) {
+			/* Setup timer for SUB_SAMPLE */
+			dbs_info->sample_type = OD_SUB_SAMPLE;
+			delay = dbs_info->freq_hi_jiffies;
+		} else {
+			delay = delay_for_sampling_rate(dbs_info->rate_mult);
+		}
+	}
+
+	schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay);
+	mutex_unlock(&dbs_info->cdbs.timer_mutex);
+}
+
+/************************** sysfs interface ************************/
+
+static ssize_t show_sampling_rate_min(struct kobject *kobj,
+				      struct attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", od_dbs_data.min_sampling_rate);
 }
-show_one(sampling_rate, sampling_rate);
-show_one(io_is_busy, io_is_busy);
-show_one(up_threshold, up_threshold);
-show_one(sampling_down_factor, sampling_down_factor);
-show_one(ignore_nice_load, ignore_nice);
-show_one(powersave_bias, powersave_bias);
 
 /**
  * update_sampling_rate - update sampling rate effective immediately if needed.
  * @new_rate: new sampling rate
  *
  * If new rate is smaller than the old, simply updaing
- * dbs_tuners_int.sampling_rate might not be appropriate. For example,
- * if the original sampling_rate was 1 second and the requested new sampling
- * rate is 10 ms because the user needs immediate reaction from ondemand
- * governor, but not sure if higher frequency will be required or not,
- * then, the governor may change the sampling rate too late; up to 1 second
- * later. Thus, if we are reducing the sampling rate, we need to make the
- * new value effective immediately.
+ * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the
+ * original sampling_rate was 1 second and the requested new sampling rate is 10
+ * ms because the user needs immediate reaction from ondemand governor, but not
+ * sure if higher frequency will be required or not, then, the governor may
+ * change the sampling rate too late; up to 1 second later. Thus, if we are
+ * reducing the sampling rate, we need to make the new value effective
+ * immediately.
  */
 static void update_sampling_rate(unsigned int new_rate)
 {
 	int cpu;
 
-	dbs_tuners_ins.sampling_rate = new_rate
-				     = max(new_rate, min_sampling_rate);
+	od_tuners.sampling_rate = new_rate = max(new_rate,
+			od_dbs_data.min_sampling_rate);
 
 	for_each_online_cpu(cpu) {
 		struct cpufreq_policy *policy;
-		struct cpu_dbs_info_s *dbs_info;
+		struct od_cpu_dbs_info_s *dbs_info;
 		unsigned long next_sampling, appointed_at;
 
 		policy = cpufreq_cpu_get(cpu);
@@ -254,28 +281,28 @@ static void update_sampling_rate(unsigned int new_rate)
 		dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu);
 		cpufreq_cpu_put(policy);
 
-		mutex_lock(&dbs_info->timer_mutex);
+		mutex_lock(&dbs_info->cdbs.timer_mutex);
 
-		if (!delayed_work_pending(&dbs_info->work)) {
-			mutex_unlock(&dbs_info->timer_mutex);
+		if (!delayed_work_pending(&dbs_info->cdbs.work)) {
+			mutex_unlock(&dbs_info->cdbs.timer_mutex);
 			continue;
 		}
 
-		next_sampling  = jiffies + usecs_to_jiffies(new_rate);
-		appointed_at = dbs_info->work.timer.expires;
-
+		next_sampling = jiffies + usecs_to_jiffies(new_rate);
+		appointed_at = dbs_info->cdbs.work.timer.expires;
 
 		if (time_before(next_sampling, appointed_at)) {
 
-			mutex_unlock(&dbs_info->timer_mutex);
-			cancel_delayed_work_sync(&dbs_info->work);
-			mutex_lock(&dbs_info->timer_mutex);
+			mutex_unlock(&dbs_info->cdbs.timer_mutex);
+			cancel_delayed_work_sync(&dbs_info->cdbs.work);
+			mutex_lock(&dbs_info->cdbs.timer_mutex);
 
-			schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work,
-						 usecs_to_jiffies(new_rate));
+			schedule_delayed_work_on(dbs_info->cdbs.cpu,
+					&dbs_info->cdbs.work,
+					usecs_to_jiffies(new_rate));
 
 		}
-		mutex_unlock(&dbs_info->timer_mutex);
+		mutex_unlock(&dbs_info->cdbs.timer_mutex);
 	}
 }
 
@@ -300,7 +327,7 @@ static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b,
 	ret = sscanf(buf, "%u", &input);
 	if (ret != 1)
 		return -EINVAL;
-	dbs_tuners_ins.io_is_busy = !!input;
+	od_tuners.io_is_busy = !!input;
 	return count;
 }
 
@@ -315,7 +342,7 @@ static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
 			input < MIN_FREQUENCY_UP_THRESHOLD) {
 		return -EINVAL;
 	}
-	dbs_tuners_ins.up_threshold = input;
+	od_tuners.up_threshold = input;
 	return count;
 }
 
@@ -328,12 +355,12 @@ static ssize_t store_sampling_down_factor(struct kobject *a,
 
 	if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
 		return -EINVAL;
-	dbs_tuners_ins.sampling_down_factor = input;
+	od_tuners.sampling_down_factor = input;
 
 	/* Reset down sampling multiplier in case it was active */
 	for_each_online_cpu(j) {
-		struct cpu_dbs_info_s *dbs_info;
-		dbs_info = &per_cpu(od_cpu_dbs_info, j);
+		struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
+				j);
 		dbs_info->rate_mult = 1;
 	}
 	return count;
@@ -354,19 +381,20 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
 	if (input > 1)
 		input = 1;
 
-	if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
+	if (input == od_tuners.ignore_nice) { /* nothing to do */
 		return count;
 	}
-	dbs_tuners_ins.ignore_nice = input;
+	od_tuners.ignore_nice = input;
 
 	/* we need to re-evaluate prev_cpu_idle */
 	for_each_online_cpu(j) {
-		struct cpu_dbs_info_s *dbs_info;
+		struct od_cpu_dbs_info_s *dbs_info;
 		dbs_info = &per_cpu(od_cpu_dbs_info, j);
-		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
-						&dbs_info->prev_cpu_wall);
-		if (dbs_tuners_ins.ignore_nice)
-			dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+		dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
+						&dbs_info->cdbs.prev_cpu_wall);
+		if (od_tuners.ignore_nice)
+			dbs_info->cdbs.prev_cpu_nice =
+				kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 
 	}
 	return count;
@@ -385,17 +413,25 @@ static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b,
 	if (input > 1000)
 		input = 1000;
 
-	dbs_tuners_ins.powersave_bias = input;
+	od_tuners.powersave_bias = input;
 	ondemand_powersave_bias_init();
 	return count;
 }
 
+show_one(od, sampling_rate, sampling_rate);
+show_one(od, io_is_busy, io_is_busy);
+show_one(od, up_threshold, up_threshold);
+show_one(od, sampling_down_factor, sampling_down_factor);
+show_one(od, ignore_nice_load, ignore_nice);
+show_one(od, powersave_bias, powersave_bias);
+
 define_one_global_rw(sampling_rate);
 define_one_global_rw(io_is_busy);
 define_one_global_rw(up_threshold);
 define_one_global_rw(sampling_down_factor);
 define_one_global_rw(ignore_nice_load);
 define_one_global_rw(powersave_bias);
+define_one_global_ro(sampling_rate_min);
 
 static struct attribute *dbs_attributes[] = {
 	&sampling_rate_min.attr,
@@ -408,354 +444,71 @@ static struct attribute *dbs_attributes[] = {
 	NULL
 };
 
-static struct attribute_group dbs_attr_group = {
+static struct attribute_group od_attr_group = {
 	.attrs = dbs_attributes,
 	.name = "ondemand",
 };
 
 /************************** sysfs end ************************/
 
-static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
-{
-	if (dbs_tuners_ins.powersave_bias)
-		freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H);
-	else if (p->cur == p->max)
-		return;
-
-	__cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ?
-			CPUFREQ_RELATION_L : CPUFREQ_RELATION_H);
-}
-
-static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
-{
-	unsigned int max_load_freq;
-
-	struct cpufreq_policy *policy;
-	unsigned int j;
-
-	this_dbs_info->freq_lo = 0;
-	policy = this_dbs_info->cur_policy;
-
-	/*
-	 * Every sampling_rate, we check, if current idle time is less
-	 * than 20% (default), then we try to increase frequency
-	 * Every sampling_rate, we look for a the lowest
-	 * frequency which can sustain the load while keeping idle time over
-	 * 30%. If such a frequency exist, we try to decrease to this frequency.
-	 *
-	 * Any frequency increase takes it to the maximum frequency.
-	 * Frequency reduction happens at minimum steps of
-	 * 5% (default) of current frequency
-	 */
-
-	/* Get Absolute Load - in terms of freq */
-	max_load_freq = 0;
-
-	for_each_cpu(j, policy->cpus) {
-		struct cpu_dbs_info_s *j_dbs_info;
-		cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
-		unsigned int idle_time, wall_time, iowait_time;
-		unsigned int load, load_freq;
-		int freq_avg;
-
-		j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
-
-		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
-		cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
-
-		wall_time = (unsigned int)
-			(cur_wall_time - j_dbs_info->prev_cpu_wall);
-		j_dbs_info->prev_cpu_wall = cur_wall_time;
-
-		idle_time = (unsigned int)
-			(cur_idle_time - j_dbs_info->prev_cpu_idle);
-		j_dbs_info->prev_cpu_idle = cur_idle_time;
-
-		iowait_time = (unsigned int)
-			(cur_iowait_time - j_dbs_info->prev_cpu_iowait);
-		j_dbs_info->prev_cpu_iowait = cur_iowait_time;
-
-		if (dbs_tuners_ins.ignore_nice) {
-			u64 cur_nice;
-			unsigned long cur_nice_jiffies;
-
-			cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
-					 j_dbs_info->prev_cpu_nice;
-			/*
-			 * Assumption: nice time between sampling periods will
-			 * be less than 2^32 jiffies for 32 bit sys
-			 */
-			cur_nice_jiffies = (unsigned long)
-					cputime64_to_jiffies64(cur_nice);
-
-			j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
-			idle_time += jiffies_to_usecs(cur_nice_jiffies);
-		}
-
-		/*
-		 * For the purpose of ondemand, waiting for disk IO is an
-		 * indication that you're performance critical, and not that
-		 * the system is actually idle. So subtract the iowait time
-		 * from the cpu idle time.
-		 */
-
-		if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time)
-			idle_time -= iowait_time;
+define_get_cpu_dbs_routines(od_cpu_dbs_info);
 
-		if (unlikely(!wall_time || wall_time < idle_time))
-			continue;
-
-		load = 100 * (wall_time - idle_time) / wall_time;
-
-		freq_avg = __cpufreq_driver_getavg(policy, j);
-		if (freq_avg <= 0)
-			freq_avg = policy->cur;
-
-		load_freq = load * freq_avg;
-		if (load_freq > max_load_freq)
-			max_load_freq = load_freq;
-	}
-
-	/* Check for frequency increase */
-	if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
-		/* If switching to max speed, apply sampling_down_factor */
-		if (policy->cur < policy->max)
-			this_dbs_info->rate_mult =
-				dbs_tuners_ins.sampling_down_factor;
-		dbs_freq_increase(policy, policy->max);
-		return;
-	}
-
-	/* Check for frequency decrease */
-	/* if we cannot reduce the frequency anymore, break out early */
-	if (policy->cur == policy->min)
-		return;
-
-	/*
-	 * The optimal frequency is the frequency that is the lowest that
-	 * can support the current CPU usage without triggering the up
-	 * policy. To be safe, we focus 10 points under the threshold.
-	 */
-	if (max_load_freq <
-	    (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
-	     policy->cur) {
-		unsigned int freq_next;
-		freq_next = max_load_freq /
-				(dbs_tuners_ins.up_threshold -
-				 dbs_tuners_ins.down_differential);
-
-		/* No longer fully busy, reset rate_mult */
-		this_dbs_info->rate_mult = 1;
-
-		if (freq_next < policy->min)
-			freq_next = policy->min;
-
-		if (!dbs_tuners_ins.powersave_bias) {
-			__cpufreq_driver_target(policy, freq_next,
-					CPUFREQ_RELATION_L);
-		} else {
-			int freq = powersave_bias_target(policy, freq_next,
-					CPUFREQ_RELATION_L);
-			__cpufreq_driver_target(policy, freq,
-				CPUFREQ_RELATION_L);
-		}
-	}
-}
-
-static void do_dbs_timer(struct work_struct *work)
-{
-	struct cpu_dbs_info_s *dbs_info =
-		container_of(work, struct cpu_dbs_info_s, work.work);
-	unsigned int cpu = dbs_info->cpu;
-	int sample_type = dbs_info->sample_type;
-
-	int delay;
-
-	mutex_lock(&dbs_info->timer_mutex);
-
-	/* Common NORMAL_SAMPLE setup */
-	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
-	if (!dbs_tuners_ins.powersave_bias ||
-	    sample_type == DBS_NORMAL_SAMPLE) {
-		dbs_check_cpu(dbs_info);
-		if (dbs_info->freq_lo) {
-			/* Setup timer for SUB_SAMPLE */
-			dbs_info->sample_type = DBS_SUB_SAMPLE;
-			delay = dbs_info->freq_hi_jiffies;
-		} else {
-			/* We want all CPUs to do sampling nearly on
-			 * same jiffy
-			 */
-			delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate
-				* dbs_info->rate_mult);
-
-			if (num_online_cpus() > 1)
-				delay -= jiffies % delay;
-		}
-	} else {
-		__cpufreq_driver_target(dbs_info->cur_policy,
-			dbs_info->freq_lo, CPUFREQ_RELATION_H);
-		delay = dbs_info->freq_lo_jiffies;
-	}
-	schedule_delayed_work_on(cpu, &dbs_info->work, delay);
-	mutex_unlock(&dbs_info->timer_mutex);
-}
-
-static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
-{
-	/* We want all CPUs to do sampling nearly on same jiffy */
-	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
-
-	if (num_online_cpus() > 1)
-		delay -= jiffies % delay;
+static struct od_ops od_ops = {
+	.io_busy = should_io_be_busy,
+	.powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu,
+	.powersave_bias_target = powersave_bias_target,
+	.freq_increase = dbs_freq_increase,
+};
 
-	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
-	INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer);
-	schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
-}
+static struct dbs_data od_dbs_data = {
+	.governor = GOV_ONDEMAND,
+	.attr_group = &od_attr_group,
+	.tuners = &od_tuners,
+	.get_cpu_cdbs = get_cpu_cdbs,
+	.get_cpu_dbs_info_s = get_cpu_dbs_info_s,
+	.gov_dbs_timer = od_dbs_timer,
+	.gov_check_cpu = od_check_cpu,
+	.gov_ops = &od_ops,
+};
 
-static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
+static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy,
+		unsigned int event)
 {
-	cancel_delayed_work_sync(&dbs_info->work);
+	return cpufreq_governor_dbs(&od_dbs_data, policy, event);
 }
 
-/*
- * Not all CPUs want IO time to be accounted as busy; this dependson how
- * efficient idling at a higher frequency/voltage is.
- * Pavel Machek says this is not so for various generations of AMD and old
- * Intel systems.
- * Mike Chan (androidlcom) calis this is also not true for ARM.
- * Because of this, whitelist specific known (series) of CPUs by default, and
- * leave all others up to the user.
- */
-static int should_io_be_busy(void)
-{
-#if defined(CONFIG_X86)
-	/*
-	 * For Intel, Core 2 (model 15) andl later have an efficient idle.
-	 */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
-	    boot_cpu_data.x86 == 6 &&
-	    boot_cpu_data.x86_model >= 15)
-		return 1;
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
+static
 #endif
-	return 0;
-}
-
-static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
-				   unsigned int event)
-{
-	unsigned int cpu = policy->cpu;
-	struct cpu_dbs_info_s *this_dbs_info;
-	unsigned int j;
-	int rc;
-
-	this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
-
-	switch (event) {
-	case CPUFREQ_GOV_START:
-		if ((!cpu_online(cpu)) || (!policy->cur))
-			return -EINVAL;
-
-		mutex_lock(&dbs_mutex);
-
-		dbs_enable++;
-		for_each_cpu(j, policy->cpus) {
-			struct cpu_dbs_info_s *j_dbs_info;
-			j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
-			j_dbs_info->cur_policy = policy;
-
-			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
-						&j_dbs_info->prev_cpu_wall);
-			if (dbs_tuners_ins.ignore_nice)
-				j_dbs_info->prev_cpu_nice =
-						kcpustat_cpu(j).cpustat[CPUTIME_NICE];
-		}
-		this_dbs_info->cpu = cpu;
-		this_dbs_info->rate_mult = 1;
-		ondemand_powersave_bias_init_cpu(cpu);
-		/*
-		 * Start the timerschedule work, when this governor
-		 * is used for first time
-		 */
-		if (dbs_enable == 1) {
-			unsigned int latency;
-
-			rc = sysfs_create_group(cpufreq_global_kobject,
-						&dbs_attr_group);
-			if (rc) {
-				mutex_unlock(&dbs_mutex);
-				return rc;
-			}
-
-			/* policy latency is in nS. Convert it to uS first */
-			latency = policy->cpuinfo.transition_latency / 1000;
-			if (latency == 0)
-				latency = 1;
-			/* Bring kernel and HW constraints together */
-			min_sampling_rate = max(min_sampling_rate,
-					MIN_LATENCY_MULTIPLIER * latency);
-			dbs_tuners_ins.sampling_rate =
-				max(min_sampling_rate,
-				    latency * LATENCY_MULTIPLIER);
-			dbs_tuners_ins.io_is_busy = should_io_be_busy();
-		}
-		mutex_unlock(&dbs_mutex);
-
-		mutex_init(&this_dbs_info->timer_mutex);
-		dbs_timer_init(this_dbs_info);
-		break;
-
-	case CPUFREQ_GOV_STOP:
-		dbs_timer_exit(this_dbs_info);
-
-		mutex_lock(&dbs_mutex);
-		mutex_destroy(&this_dbs_info->timer_mutex);
-		dbs_enable--;
-		mutex_unlock(&dbs_mutex);
-		if (!dbs_enable)
-			sysfs_remove_group(cpufreq_global_kobject,
-					   &dbs_attr_group);
-
-		break;
-
-	case CPUFREQ_GOV_LIMITS:
-		mutex_lock(&this_dbs_info->timer_mutex);
-		if (policy->max < this_dbs_info->cur_policy->cur)
-			__cpufreq_driver_target(this_dbs_info->cur_policy,
-				policy->max, CPUFREQ_RELATION_H);
-		else if (policy->min > this_dbs_info->cur_policy->cur)
-			__cpufreq_driver_target(this_dbs_info->cur_policy,
-				policy->min, CPUFREQ_RELATION_L);
-		dbs_check_cpu(this_dbs_info);
-		mutex_unlock(&this_dbs_info->timer_mutex);
-		break;
-	}
-	return 0;
-}
+struct cpufreq_governor cpufreq_gov_ondemand = {
+	.name			= "ondemand",
+	.governor		= od_cpufreq_governor_dbs,
+	.max_transition_latency	= TRANSITION_LATENCY_LIMIT,
+	.owner			= THIS_MODULE,
+};
 
 static int __init cpufreq_gov_dbs_init(void)
 {
 	u64 idle_time;
 	int cpu = get_cpu();
 
+	mutex_init(&od_dbs_data.mutex);
 	idle_time = get_cpu_idle_time_us(cpu, NULL);
 	put_cpu();
 	if (idle_time != -1ULL) {
 		/* Idle micro accounting is supported. Use finer thresholds */
-		dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
-		dbs_tuners_ins.down_differential =
-					MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
+		od_tuners.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
+		od_tuners.down_differential = MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
 		/*
 		 * In nohz/micro accounting case we set the minimum frequency
 		 * not depending on HZ, but fixed (very low). The deferred
 		 * timer might skip some samples if idle/sleeping as needed.
 		*/
-		min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
+		od_dbs_data.min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
 	} else {
 		/* For correct statistics, we need 10 ticks for each measure */
-		min_sampling_rate =
-			MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
+		od_dbs_data.min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
+			jiffies_to_usecs(10);
 	}
 
 	return cpufreq_register_governor(&cpufreq_gov_ondemand);
@@ -766,7 +519,6 @@ static void __exit cpufreq_gov_dbs_exit(void)
 	cpufreq_unregister_governor(&cpufreq_gov_ondemand);
 }
 
-
 MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
 MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
 MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index d03c21993052..a55b88eaf96a 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -407,10 +407,4 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 				      unsigned int cpu);
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
-
-/*********************************************************************
- *                     Governor Helpers				     *
- *********************************************************************/
-cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall);
-
 #endif /* _LINUX_CPUFREQ_H */
-- 
cgit v1.2.3-55-g7522


From 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 Mon Sep 17 00:00:00 2001
From: Youquan Song
Date: Fri, 26 Oct 2012 12:26:41 +0200
Subject: cpuidle: Quickly notice prediction failure for repeat mode

The prediction for future is difficult and when the cpuidle governor prediction
fails and govenor possibly choose the shallower C-state than it should. How to
quickly notice and find the failure becomes important for power saving.

cpuidle menu governor has a method to predict the repeat pattern if there are 8
C-states residency which are continuous and the same or very close, so it will
predict the next C-states residency will keep same residency time.

There is a real case that turbostat utility (tools/power/x86/turbostat)
at kernel 3.3 or early. turbostat utility will read 10 registers one by one at
Sandybridge, so it will generate 10 IPIs to wake up idle CPUs. So cpuidle menu
 governor will predict it is repeat mode and there is another IPI wake up idle
 CPU soon, so it keeps idle CPU stay at C1 state even though CPU is totally
idle. However, in the turbostat, following 10 registers reading is sleep 5
seconds by default, so the idle CPU will keep at C1 for a long time though it is
 idle until break event occurs.
In a idle Sandybridge system, run "./turbostat -v", we will notice that deep
C-state dangles between "70% ~ 99%". After patched the kernel, we will notice
deep C-state stays at >99.98%.

In the patch, a timer is added when menu governor detects a repeat mode and
choose a shallow C-state. The timer is set to a time out value that greater
than predicted time, and we conclude repeat mode prediction failure if timer is
triggered. When repeat mode happens as expected, the timer is not triggered
and CPU waken up from C-states and it will cancel the timer initiatively.
When repeat mode does not happen, the timer will be time out and menu governor
will quickly notice that the repeat mode prediction fails and then re-evaluates
deeper C-states possibility.

Below is another case which will clearly show the patch much benefit:

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#include <sys/time.h>
#include <time.h>
#include <pthread.h>

volatile int * shutdown;
volatile long * count;
int delay = 20;
int loop = 8;

void usage(void)
{
	fprintf(stderr,
		"Usage: idle_predict [options]\n"
		"  --help	-h  Print this help\n"
		"  --thread	-n  Thread number\n"
		"  --loop     	-l  Loop times in shallow Cstate\n"
		"  --delay	-t  Sleep time (uS)in shallow Cstate\n");
}

void *simple_loop() {
	int idle_num = 1;
	while (!(*shutdown)) {
		*count = *count + 1;

		if (idle_num % loop)
			usleep(delay);
		else {
			/* sleep 1 second */
			usleep(1000000);
			idle_num = 0;
		}
		idle_num++;
	}

}

static void sighand(int sig)
{
	*shutdown = 1;
}

int main(int argc, char *argv[])
{
	sigset_t sigset;
	int signum = SIGALRM;
	int i, c, er = 0, thread_num = 8;
	pthread_t pt[1024];

	static char optstr[] = "n:l:t:h:";

	while ((c = getopt(argc, argv, optstr)) != EOF)
		switch (c) {
			case 'n':
				thread_num = atoi(optarg);
				break;
			case 'l':
				loop = atoi(optarg);
				break;
			case 't':
				delay = atoi(optarg);
				break;
			case 'h':
			default:
				usage();
				exit(1);
		}

	printf("thread=%d,loop=%d,delay=%d\n",thread_num,loop,delay);
	count = malloc(sizeof(long));
	shutdown = malloc(sizeof(int));
	*count = 0;
	*shutdown = 0;

	sigemptyset(&sigset);
	sigaddset(&sigset, signum);
	sigprocmask (SIG_BLOCK, &sigset, NULL);
	signal(SIGINT, sighand);
	signal(SIGTERM, sighand);

	for(i = 0; i < thread_num ; i++)
		pthread_create(&pt[i], NULL, simple_loop, NULL);

	for (i = 0; i < thread_num; i++)
		pthread_join(pt[i], NULL);

	exit(0);
}

Get powertop V2 from git://github.com/fenrus75/powertop, build powertop.
After build the above test application, then run it.
Test plaform can be Intel Sandybridge or other recent platforms.
#./idle_predict -l 10 &
#./powertop

We will find that deep C-state will dangle between 40%~100% and much time spent
on C1 state. It is because menu governor wrongly predict that repeat mode
is kept, so it will choose the C1 shallow C-state even though it has chance to
sleep 1 second in deep C-state.

While after patched the kernel, we find that deep C-state will keep >99.6%.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/menu.c | 75 +++++++++++++++++++++++++++++++++++++---
 include/linux/tick.h             |  6 ++++
 kernel/time/tick-sched.c         |  4 +++
 3 files changed, 80 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 5b1f2c372c1f..37c0ff6c805c 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -28,6 +28,13 @@
 #define MAX_INTERESTING 50000
 #define STDDEV_THRESH 400
 
+/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */
+#define MAX_DEVIATION 60
+
+static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer);
+static DEFINE_PER_CPU(int, hrtimer_status);
+/* menu hrtimer mode */
+enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT};
 
 /*
  * Concepts and ideas behind the menu governor
@@ -191,17 +198,42 @@ static u64 div_round64(u64 dividend, u32 divisor)
 	return div_u64(dividend + (divisor / 2), divisor);
 }
 
+/* Cancel the hrtimer if it is not triggered yet */
+void menu_hrtimer_cancel(void)
+{
+	int cpu = smp_processor_id();
+	struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
+
+	/* The timer is still not time out*/
+	if (per_cpu(hrtimer_status, cpu)) {
+		hrtimer_cancel(hrtmr);
+		per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
+	}
+}
+EXPORT_SYMBOL_GPL(menu_hrtimer_cancel);
+
+/* Call back for hrtimer is triggered */
+static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer)
+{
+	int cpu = smp_processor_id();
+
+	per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
+
+	return HRTIMER_NORESTART;
+}
+
 /*
  * Try detecting repeating patterns by keeping track of the last 8
  * intervals, and checking if the standard deviation of that set
  * of points is below a threshold. If it is... then use the
  * average of these 8 points as the estimated value.
  */
-static void detect_repeating_patterns(struct menu_device *data)
+static int detect_repeating_patterns(struct menu_device *data)
 {
 	int i;
 	uint64_t avg = 0;
 	uint64_t stddev = 0; /* contains the square of the std deviation */
+	int ret = 0;
 
 	/* first calculate average and standard deviation of the past */
 	for (i = 0; i < INTERVALS; i++)
@@ -210,7 +242,7 @@ static void detect_repeating_patterns(struct menu_device *data)
 
 	/* if the avg is beyond the known next tick, it's worthless */
 	if (avg > data->expected_us)
-		return;
+		return 0;
 
 	for (i = 0; i < INTERVALS; i++)
 		stddev += (data->intervals[i] - avg) *
@@ -223,8 +255,12 @@ static void detect_repeating_patterns(struct menu_device *data)
 	 * repeating pattern and predict we keep doing this.
 	 */
 
-	if (avg && stddev < STDDEV_THRESH)
+	if (avg && stddev < STDDEV_THRESH) {
 		data->predicted_us = avg;
+		ret = 1;
+	}
+
+	return ret;
 }
 
 /**
@@ -240,6 +276,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	int i;
 	int multiplier;
 	struct timespec t;
+	int repeat = 0, low_predicted = 0;
+	int cpu = smp_processor_id();
+	struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
 
 	if (data->needs_update) {
 		menu_update(drv, dev);
@@ -274,7 +313,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket],
 					 RESOLUTION * DECAY);
 
-	detect_repeating_patterns(data);
+	repeat = detect_repeating_patterns(data);
 
 	/*
 	 * We want to default to C1 (hlt), not to busy polling
@@ -295,8 +334,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
 		if (s->disabled || su->disable)
 			continue;
-		if (s->target_residency > data->predicted_us)
+		if (s->target_residency > data->predicted_us) {
+			low_predicted = 1;
 			continue;
+		}
 		if (s->exit_latency > latency_req)
 			continue;
 		if (s->exit_latency * multiplier > data->predicted_us)
@@ -309,6 +350,27 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		}
 	}
 
+	/* not deepest C-state chosen for low predicted residency */
+	if (low_predicted) {
+		unsigned int timer_us = 0;
+
+		/*
+		 * Set a timer to detect whether this sleep is much
+		 * longer than repeat mode predicted.  If the timer
+		 * triggers, the code will evaluate whether to put
+		 * the CPU into a deeper C-state.
+		 * The timer is cancelled on CPU wakeup.
+		 */
+		timer_us = 2 * (data->predicted_us + MAX_DEVIATION);
+
+		if (repeat && (4 * timer_us < data->expected_us)) {
+			hrtimer_start(hrtmr, ns_to_ktime(1000 * timer_us),
+				HRTIMER_MODE_REL_PINNED);
+			/* In repeat case, menu hrtimer is started */
+			per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT;
+		}
+	}
+
 	return data->last_state_idx;
 }
 
@@ -399,6 +461,9 @@ static int menu_enable_device(struct cpuidle_driver *drv,
 				struct cpuidle_device *dev)
 {
 	struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
+	struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu);
+	hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	t->function = menu_hrtimer_notify;
 
 	memset(data, 0, sizeof(struct menu_device));
 
diff --git a/include/linux/tick.h b/include/linux/tick.h
index f37fceb69b73..1a6567b48492 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -142,4 +142,10 @@ static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
 # endif /* !NO_HZ */
 
+# ifdef CONFIG_CPU_IDLE_GOV_MENU
+extern void menu_hrtimer_cancel(void);
+# else
+static inline void menu_hrtimer_cancel(void) {}
+# endif /* CONFIG_CPU_IDLE_GOV_MENU */
+
 #endif
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a40260885265..6f337068dc4c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -526,6 +526,8 @@ void tick_nohz_irq_exit(void)
 	if (!ts->inidle)
 		return;
 
+	/* Cancel the timer because CPU already waken up from the C-states*/
+	menu_hrtimer_cancel();
 	__tick_nohz_idle_enter(ts);
 }
 
@@ -621,6 +623,8 @@ void tick_nohz_idle_exit(void)
 
 	ts->inidle = 0;
 
+	/* Cancel the timer because CPU already waken up from the C-states*/
+	menu_hrtimer_cancel();
 	if (ts->idle_active || ts->tick_stopped)
 		now = ktime_get();
 
-- 
cgit v1.2.3-55-g7522


From 349631e0e411fefa2fed7e0a30b97704562dbd6b Mon Sep 17 00:00:00 2001
From: Daniel Lezcano
Date: Wed, 31 Oct 2012 01:05:16 +0100
Subject: cpuidle / sysfs: move structure declaration into the sysfs.c file

The structure cpuidle_state_kobj is not used anywhere except
in the sysfs.c file. The definition of this structure is not
needed in the cpuidle header file. This patch moves it to the
sysfs.c file in order to encapsulate the code a bit more.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/sysfs.c | 7 +++++++
 include/linux/cpuidle.h | 7 -------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index ed87399bb02b..f15c1e56e16f 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -297,6 +297,13 @@ static struct attribute *cpuidle_state_default_attrs[] = {
 	NULL
 };
 
+struct cpuidle_state_kobj {
+	struct cpuidle_state *state;
+	struct cpuidle_state_usage *state_usage;
+	struct completion kobj_unregister;
+	struct kobject kobj;
+};
+
 #define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj)
 #define kobj_to_state(k) (kobj_to_state_obj(k)->state)
 #define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage)
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 279b1eaa8b73..7daf0e3b93bd 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -82,13 +82,6 @@ cpuidle_set_statedata(struct cpuidle_state_usage *st_usage, void *data)
 	st_usage->driver_data = data;
 }
 
-struct cpuidle_state_kobj {
-	struct cpuidle_state *state;
-	struct cpuidle_state_usage *state_usage;
-	struct completion kobj_unregister;
-	struct kobject kobj;
-};
-
 struct cpuidle_device {
 	unsigned int		registered:1;
 	unsigned int		enabled:1;
-- 
cgit v1.2.3-55-g7522


From 42f67f2acab2b7179c0d1ab234869e391448dfa6 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano
Date: Wed, 31 Oct 2012 16:44:45 +0000
Subject: cpuidle: move driver's refcount to cpuidle

We want to support different cpuidle drivers co-existing together.
In this case we should move the refcount to the cpuidle_driver
structure to handle several drivers at a time.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/driver.c | 13 ++++++++-----
 include/linux/cpuidle.h  |  1 +
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 87db3877fead..39ba8e181e96 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -16,7 +16,6 @@
 
 static struct cpuidle_driver *cpuidle_curr_driver;
 DEFINE_SPINLOCK(cpuidle_driver_lock);
-int cpuidle_driver_refcount;
 
 static void set_power_states(struct cpuidle_driver *drv)
 {
@@ -61,6 +60,8 @@ int cpuidle_register_driver(struct cpuidle_driver *drv)
 	if (!drv->power_specified)
 		set_power_states(drv);
 
+	drv->refcnt = 0;
+
 	cpuidle_curr_driver = drv;
 
 	spin_unlock(&cpuidle_driver_lock);
@@ -92,7 +93,7 @@ void cpuidle_unregister_driver(struct cpuidle_driver *drv)
 
 	spin_lock(&cpuidle_driver_lock);
 
-	if (!WARN_ON(cpuidle_driver_refcount > 0))
+	if (!WARN_ON(drv->refcnt > 0))
 		cpuidle_curr_driver = NULL;
 
 	spin_unlock(&cpuidle_driver_lock);
@@ -106,7 +107,7 @@ struct cpuidle_driver *cpuidle_driver_ref(void)
 	spin_lock(&cpuidle_driver_lock);
 
 	drv = cpuidle_curr_driver;
-	cpuidle_driver_refcount++;
+	drv->refcnt++;
 
 	spin_unlock(&cpuidle_driver_lock);
 	return drv;
@@ -114,10 +115,12 @@ struct cpuidle_driver *cpuidle_driver_ref(void)
 
 void cpuidle_driver_unref(void)
 {
+	struct cpuidle_driver *drv = cpuidle_curr_driver;
+
 	spin_lock(&cpuidle_driver_lock);
 
-	if (!WARN_ON(cpuidle_driver_refcount <= 0))
-		cpuidle_driver_refcount--;
+	if (drv && !WARN_ON(drv->refcnt <= 0))
+		drv->refcnt--;
 
 	spin_unlock(&cpuidle_driver_lock);
 }
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 7daf0e3b93bd..d08e1afa4919 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -124,6 +124,7 @@ static inline int cpuidle_get_last_residency(struct cpuidle_device *dev)
 struct cpuidle_driver {
 	const char		*name;
 	struct module 		*owner;
+	int                     refcnt;
 
 	unsigned int		power_specified:1;
 	/* set to 1 to use the core cpuidle time keeping (for all states). */
-- 
cgit v1.2.3-55-g7522


From bf4d1b5ddb78f86078ac6ae0415802d5f0c68f92 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano
Date: Wed, 31 Oct 2012 16:44:48 +0000
Subject: cpuidle: support multiple drivers

With the tegra3 and the big.LITTLE [1] new architectures, several cpus
with different characteristics (latencies and states) can co-exists on the
system.

The cpuidle framework has the limitation of handling only identical cpus.

This patch removes this limitation by introducing the multiple driver support
for cpuidle.

This option is configurable at compile time and should be enabled for the
architectures mentioned above. So there is no impact for the other platforms
if the option is disabled. The option defaults to 'n'. Note the multiple drivers
support is also compatible with the existing drivers, even if just one driver is
needed, all the cpu will be tied to this driver using an extra small chunk of
processor memory.

The multiple driver support use a per-cpu driver pointer instead of a global
variable and the accessor to this variable are done from a cpu context.

In order to keep the compatibility with the existing drivers, the function
'cpuidle_register_driver' and 'cpuidle_unregister_driver' will register
the specified driver for all the cpus.

The semantic for the output of /sys/devices/system/cpu/cpuidle/current_driver
remains the same except the driver name will be related to the current cpu.

The /sys/devices/system/cpu/cpu[0-9]/cpuidle/driver/name files are added
allowing to read the per cpu driver name.

[1] http://lwn.net/Articles/481055/

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/Kconfig   |   9 +++
 drivers/cpuidle/cpuidle.c |  36 ++++++----
 drivers/cpuidle/cpuidle.h |   4 +-
 drivers/cpuidle/driver.c  | 166 ++++++++++++++++++++++++++++++++++++++-----
 drivers/cpuidle/sysfs.c   | 174 ++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/cpuidle.h   |   7 +-
 6 files changed, 356 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index a76b689e553b..234ae651b38f 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -9,6 +9,15 @@ config CPU_IDLE
 
 	  If you're using an ACPI-enabled platform, you should say Y here.
 
+config CPU_IDLE_MULTIPLE_DRIVERS
+        bool "Support multiple cpuidle drivers"
+        depends on CPU_IDLE
+        default n
+        help
+         Allows the cpuidle framework to use different drivers for each CPU.
+         This is useful if you have a system with different CPU latencies and
+         states. If unsure say N.
+
 config CPU_IDLE_GOV_LADDER
 	bool
 	depends on CPU_IDLE
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index ce4cac706dd1..711dd83fd3ba 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -68,7 +68,7 @@ static cpuidle_enter_t cpuidle_enter_ops;
 int cpuidle_play_dead(void)
 {
 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
-	struct cpuidle_driver *drv = cpuidle_get_driver();
+	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
 	int i, dead_state = -1;
 	int power_usage = -1;
 
@@ -128,7 +128,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 int cpuidle_idle_call(void)
 {
 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
-	struct cpuidle_driver *drv = cpuidle_get_driver();
+	struct cpuidle_driver *drv;
 	int next_state, entered_state;
 
 	if (off)
@@ -141,6 +141,8 @@ int cpuidle_idle_call(void)
 	if (!dev || !dev->enabled)
 		return -EBUSY;
 
+	drv = cpuidle_get_cpu_driver(dev);
+
 	/* ask the governor for the next state */
 	next_state = cpuidle_curr_governor->select(drv, dev);
 	if (need_resched()) {
@@ -312,15 +314,19 @@ static void poll_idle_init(struct cpuidle_driver *drv) {}
 int cpuidle_enable_device(struct cpuidle_device *dev)
 {
 	int ret, i;
-	struct cpuidle_driver *drv = cpuidle_get_driver();
+	struct cpuidle_driver *drv;
 
 	if (!dev)
 		return -EINVAL;
 
 	if (dev->enabled)
 		return 0;
+
+	drv = cpuidle_get_cpu_driver(dev);
+
 	if (!drv || !cpuidle_curr_governor)
 		return -EIO;
+
 	if (!dev->state_count)
 		dev->state_count = drv->state_count;
 
@@ -335,7 +341,8 @@ int cpuidle_enable_device(struct cpuidle_device *dev)
 
 	poll_idle_init(drv);
 
-	if ((ret = cpuidle_add_state_sysfs(dev)))
+	ret = cpuidle_add_device_sysfs(dev);
+	if (ret)
 		return ret;
 
 	if (cpuidle_curr_governor->enable &&
@@ -356,7 +363,7 @@ int cpuidle_enable_device(struct cpuidle_device *dev)
 	return 0;
 
 fail_sysfs:
-	cpuidle_remove_state_sysfs(dev);
+	cpuidle_remove_device_sysfs(dev);
 
 	return ret;
 }
@@ -372,17 +379,20 @@ EXPORT_SYMBOL_GPL(cpuidle_enable_device);
  */
 void cpuidle_disable_device(struct cpuidle_device *dev)
 {
+	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+
 	if (!dev || !dev->enabled)
 		return;
-	if (!cpuidle_get_driver() || !cpuidle_curr_governor)
+
+	if (!drv || !cpuidle_curr_governor)
 		return;
 
 	dev->enabled = 0;
 
 	if (cpuidle_curr_governor->disable)
-		cpuidle_curr_governor->disable(cpuidle_get_driver(), dev);
+		cpuidle_curr_governor->disable(drv, dev);
 
-	cpuidle_remove_state_sysfs(dev);
+	cpuidle_remove_device_sysfs(dev);
 	enabled_devices--;
 }
 
@@ -398,9 +408,9 @@ EXPORT_SYMBOL_GPL(cpuidle_disable_device);
 static int __cpuidle_register_device(struct cpuidle_device *dev)
 {
 	int ret;
-	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
+	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
 
-	if (!try_module_get(cpuidle_driver->owner))
+	if (!try_module_get(drv->owner))
 		return -EINVAL;
 
 	per_cpu(cpuidle_devices, dev->cpu) = dev;
@@ -421,7 +431,7 @@ err_coupled:
 err_sysfs:
 	list_del(&dev->device_list);
 	per_cpu(cpuidle_devices, dev->cpu) = NULL;
-	module_put(cpuidle_driver->owner);
+	module_put(drv->owner);
 	return ret;
 }
 
@@ -460,7 +470,7 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device);
  */
 void cpuidle_unregister_device(struct cpuidle_device *dev)
 {
-	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
+	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
 
 	if (dev->registered == 0)
 		return;
@@ -477,7 +487,7 @@ void cpuidle_unregister_device(struct cpuidle_device *dev)
 
 	cpuidle_resume_and_unlock();
 
-	module_put(cpuidle_driver->owner);
+	module_put(drv->owner);
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_unregister_device);
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
index f6b0923c2253..ee97e9672ecf 100644
--- a/drivers/cpuidle/cpuidle.h
+++ b/drivers/cpuidle/cpuidle.h
@@ -28,8 +28,8 @@ struct device;
 
 extern int cpuidle_add_interface(struct device *dev);
 extern void cpuidle_remove_interface(struct device *dev);
-extern int cpuidle_add_state_sysfs(struct cpuidle_device *device);
-extern void cpuidle_remove_state_sysfs(struct cpuidle_device *device);
+extern int cpuidle_add_device_sysfs(struct cpuidle_device *device);
+extern void cpuidle_remove_device_sysfs(struct cpuidle_device *device);
 extern int cpuidle_add_sysfs(struct cpuidle_device *dev);
 extern void cpuidle_remove_sysfs(struct cpuidle_device *dev);
 
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 8246662f594a..3af841fb397a 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -14,9 +14,11 @@
 
 #include "cpuidle.h"
 
-static struct cpuidle_driver *cpuidle_curr_driver;
 DEFINE_SPINLOCK(cpuidle_driver_lock);
 
+static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu);
+static struct cpuidle_driver * __cpuidle_get_cpu_driver(int cpu);
+
 static void set_power_states(struct cpuidle_driver *drv)
 {
 	int i;
@@ -47,12 +49,7 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
 		set_power_states(drv);
 }
 
-static void cpuidle_set_driver(struct cpuidle_driver *drv)
-{
-	cpuidle_curr_driver = drv;
-}
-
-static int __cpuidle_register_driver(struct cpuidle_driver *drv)
+static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu)
 {
 	if (!drv || !drv->state_count)
 		return -EINVAL;
@@ -60,23 +57,84 @@ static int __cpuidle_register_driver(struct cpuidle_driver *drv)
 	if (cpuidle_disabled())
 		return -ENODEV;
 
-	if (cpuidle_get_driver())
+	if (__cpuidle_get_cpu_driver(cpu))
 		return -EBUSY;
 
 	__cpuidle_driver_init(drv);
 
-	cpuidle_set_driver(drv);
+	__cpuidle_set_cpu_driver(drv, cpu);
 
 	return 0;
 }
 
-static void __cpuidle_unregister_driver(struct cpuidle_driver *drv)
+static void __cpuidle_unregister_driver(struct cpuidle_driver *drv, int cpu)
 {
-	if (drv != cpuidle_get_driver())
+	if (drv != __cpuidle_get_cpu_driver(cpu))
 		return;
 
 	if (!WARN_ON(drv->refcnt > 0))
-		cpuidle_set_driver(NULL);
+		__cpuidle_set_cpu_driver(NULL, cpu);
+}
+
+#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
+
+static DEFINE_PER_CPU(struct cpuidle_driver *, cpuidle_drivers);
+
+static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu)
+{
+	per_cpu(cpuidle_drivers, cpu) = drv;
+}
+
+static struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
+{
+	return per_cpu(cpuidle_drivers, cpu);
+}
+
+static void __cpuidle_unregister_all_cpu_driver(struct cpuidle_driver *drv)
+{
+	int cpu;
+	for_each_present_cpu(cpu)
+		__cpuidle_unregister_driver(drv, cpu);
+}
+
+static int __cpuidle_register_all_cpu_driver(struct cpuidle_driver *drv)
+{
+	int ret = 0;
+	int i, cpu;
+
+	for_each_present_cpu(cpu) {
+		ret = __cpuidle_register_driver(drv, cpu);
+		if (ret)
+			break;
+	}
+
+	if (ret)
+		for_each_present_cpu(i) {
+			if (i == cpu)
+				break;
+			__cpuidle_unregister_driver(drv, i);
+		}
+
+
+	return ret;
+}
+
+int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu)
+{
+	int ret;
+
+	spin_lock(&cpuidle_driver_lock);
+	ret = __cpuidle_register_driver(drv, cpu);
+	spin_unlock(&cpuidle_driver_lock);
+
+	return ret;
+}
+
+void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu)
+{
+	spin_lock(&cpuidle_driver_lock);
+	__cpuidle_unregister_driver(drv, cpu);
+	spin_unlock(&cpuidle_driver_lock);
 }
 
 /**
@@ -88,7 +146,7 @@ int cpuidle_register_driver(struct cpuidle_driver *drv)
 	int ret;
 
 	spin_lock(&cpuidle_driver_lock);
-	ret = __cpuidle_register_driver(drv);
+	ret = __cpuidle_register_all_cpu_driver(drv);
 	spin_unlock(&cpuidle_driver_lock);
 
 	return ret;
@@ -96,13 +154,48 @@ int cpuidle_register_driver(struct cpuidle_driver *drv)
 EXPORT_SYMBOL_GPL(cpuidle_register_driver);
 
 /**
- * cpuidle_get_driver - return the current driver
+ * cpuidle_unregister_driver - unregisters a driver
+ * @drv: the driver
  */
-struct cpuidle_driver *cpuidle_get_driver(void)
+void cpuidle_unregister_driver(struct cpuidle_driver *drv)
+{
+	spin_lock(&cpuidle_driver_lock);
+	__cpuidle_unregister_all_cpu_driver(drv);
+	spin_unlock(&cpuidle_driver_lock);
+}
+EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
+
+#else
+
+static struct cpuidle_driver *cpuidle_curr_driver;
+
+static inline void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu)
+{
+	cpuidle_curr_driver = drv;
+}
+
+static inline struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
 {
 	return cpuidle_curr_driver;
 }
-EXPORT_SYMBOL_GPL(cpuidle_get_driver);
+
+/**
+ * cpuidle_register_driver - registers a driver
+ * @drv: the driver
+ */
+int cpuidle_register_driver(struct cpuidle_driver *drv)
+{
+	int ret, cpu;
+
+	cpu = get_cpu();
+	spin_lock(&cpuidle_driver_lock);
+	ret = __cpuidle_register_driver(drv, cpu);
+	spin_unlock(&cpuidle_driver_lock);
+	put_cpu();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cpuidle_register_driver);
 
 /**
  * cpuidle_unregister_driver - unregisters a driver
@@ -110,11 +203,50 @@ EXPORT_SYMBOL_GPL(cpuidle_get_driver);
  */
 void cpuidle_unregister_driver(struct cpuidle_driver *drv)
 {
+	int cpu;
+
+	cpu = get_cpu();
 	spin_lock(&cpuidle_driver_lock);
-	__cpuidle_unregister_driver(drv);
+	__cpuidle_unregister_driver(drv, cpu);
 	spin_unlock(&cpuidle_driver_lock);
+	put_cpu();
 }
 EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
+#endif
+
+/**
+ * cpuidle_get_driver - return the current driver
+ */
+struct cpuidle_driver *cpuidle_get_driver(void)
+{
+	struct cpuidle_driver *drv;
+	int cpu;
+
+	cpu = get_cpu();
+	drv = __cpuidle_get_cpu_driver(cpu);
+	put_cpu();
+
+	return drv;
+}
+EXPORT_SYMBOL_GPL(cpuidle_get_driver);
+
+/**
+ * cpuidle_get_cpu_driver - return the driver tied with a cpu
+ */
+struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
+{
+	struct cpuidle_driver *drv;
+
+	if (!dev)
+		return NULL;
+
+	spin_lock(&cpuidle_driver_lock);
+	drv = __cpuidle_get_cpu_driver(dev->cpu);
+	spin_unlock(&cpuidle_driver_lock);
+
+	return drv;
+}
+EXPORT_SYMBOL_GPL(cpuidle_get_cpu_driver);
 
 struct cpuidle_driver *cpuidle_driver_ref(void)
 {
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 49b1f4bcc1b3..340942946106 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -364,17 +364,17 @@ static inline void cpuidle_free_state_kobj(struct cpuidle_device *device, int i)
 }
 
 /**
- * cpuidle_add_driver_sysfs - adds driver-specific sysfs attributes
+ * cpuidle_add_state_sysfs - adds cpuidle states sysfs attributes
  * @device: the target device
  */
-int cpuidle_add_state_sysfs(struct cpuidle_device *device)
+static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
 {
 	int i, ret = -ENOMEM;
 	struct cpuidle_state_kobj *kobj;
-	struct cpuidle_driver *drv = cpuidle_get_driver();
+	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device);
 
 	/* state statistics */
-	for (i = 0; i < device->state_count; i++) {
+	for (i = 0; i < drv->state_count; i++) {
 		kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
 		if (!kobj)
 			goto error_state;
@@ -401,10 +401,10 @@ error_state:
 }
 
 /**
- * cpuidle_remove_driver_sysfs - removes driver-specific sysfs attributes
+ * cpuidle_remove_driver_sysfs - removes the cpuidle states sysfs attributes
  * @device: the target device
  */
-void cpuidle_remove_state_sysfs(struct cpuidle_device *device)
+static void cpuidle_remove_state_sysfs(struct cpuidle_device *device)
 {
 	int i;
 
@@ -412,6 +412,168 @@ void cpuidle_remove_state_sysfs(struct cpuidle_device *device)
 		cpuidle_free_state_kobj(device, i);
 }
 
+#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
+#define kobj_to_driver_kobj(k) container_of(k, struct cpuidle_driver_kobj, kobj)
+#define attr_to_driver_attr(a) container_of(a, struct cpuidle_driver_attr, attr)
+
+#define define_one_driver_ro(_name, show)                       \
+	static struct cpuidle_driver_attr attr_driver_##_name = \
+		__ATTR(_name, 0644, show, NULL)
+
+struct cpuidle_driver_kobj {
+	struct cpuidle_driver *drv;
+	struct completion kobj_unregister;
+	struct kobject kobj;
+};
+
+struct cpuidle_driver_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct cpuidle_driver *, char *);
+	ssize_t (*store)(struct cpuidle_driver *, const char *, size_t);
+};
+
+static ssize_t show_driver_name(struct cpuidle_driver *drv, char *buf)
+{
+	ssize_t ret;
+
+	spin_lock(&cpuidle_driver_lock);
+	ret = sprintf(buf, "%s\n", drv ? drv->name : "none");
+	spin_unlock(&cpuidle_driver_lock);
+
+	return ret;
+}
+
+static void cpuidle_driver_sysfs_release(struct kobject *kobj)
+{
+	struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj);
+	complete(&driver_kobj->kobj_unregister);
+}
+
+static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute * attr,
+				   char * buf)
+{
+	int ret = -EIO;
+	struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj);
+	struct cpuidle_driver_attr *dattr = attr_to_driver_attr(attr);
+
+	if (dattr->show)
+		ret = dattr->show(driver_kobj->drv, buf);
+
+	return ret;
+}
+
+static ssize_t cpuidle_driver_store(struct kobject *kobj, struct attribute *attr,
+				    const char *buf, size_t size)
+{
+	int ret = -EIO;
+	struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj);
+	struct cpuidle_driver_attr *dattr = attr_to_driver_attr(attr);
+
+	if (dattr->store)
+		ret = dattr->store(driver_kobj->drv, buf, size);
+
+	return ret;
+}
+
+define_one_driver_ro(name, show_driver_name);
+
+static const struct sysfs_ops cpuidle_driver_sysfs_ops = {
+	.show = cpuidle_driver_show,
+	.store = cpuidle_driver_store,
+};
+
+static struct attribute *cpuidle_driver_default_attrs[] = {
+	&attr_driver_name.attr,
+	NULL
+};
+
+static struct kobj_type ktype_driver_cpuidle = {
+	.sysfs_ops = &cpuidle_driver_sysfs_ops,
+	.default_attrs = cpuidle_driver_default_attrs,
+	.release = cpuidle_driver_sysfs_release,
+};
+
+/**
+ * cpuidle_add_driver_sysfs - adds the driver name sysfs attribute
+ * @device: the target device
+ */
+static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
+{
+	struct cpuidle_driver_kobj *kdrv;
+	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+	int ret;
+
+	kdrv = kzalloc(sizeof(*kdrv), GFP_KERNEL);
+	if (!kdrv)
+		return -ENOMEM;
+
+	kdrv->drv = drv;
+	init_completion(&kdrv->kobj_unregister);
+
+	ret = kobject_init_and_add(&kdrv->kobj, &ktype_driver_cpuidle,
+				   &dev->kobj, "driver");
+	if (ret) {
+		kfree(kdrv);
+		return ret;
+	}
+
+	kobject_uevent(&kdrv->kobj, KOBJ_ADD);
+	dev->kobj_driver = kdrv;
+
+	return ret;
+}
+
+/**
+ * cpuidle_remove_driver_sysfs - removes the driver name sysfs attribute
+ * @device: the target device
+ */
+static void cpuidle_remove_driver_sysfs(struct cpuidle_device *dev)
+{
+	struct cpuidle_driver_kobj *kdrv = dev->kobj_driver;
+	kobject_put(&kdrv->kobj);
+	wait_for_completion(&kdrv->kobj_unregister);
+	kfree(kdrv);
+}
+#else
+static inline int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
+{
+	return 0;
+}
+
+static inline void cpuidle_remove_driver_sysfs(struct cpuidle_device *dev)
+{
+	;
+}
+#endif
+
+/**
+ * cpuidle_add_device_sysfs - adds device specific sysfs attributes
+ * @device: the target device
+ */
+int cpuidle_add_device_sysfs(struct cpuidle_device *device)
+{
+	int ret;
+
+	ret = cpuidle_add_state_sysfs(device);
+	if (ret)
+		return ret;
+
+	ret = cpuidle_add_driver_sysfs(device);
+	if (ret)
+		cpuidle_remove_state_sysfs(device);
+	return ret;
+}
+
+/**
+ * cpuidle_remove_device_sysfs : removes device specific sysfs attributes
+ * @device : the target device
+ */
+void cpuidle_remove_device_sysfs(struct cpuidle_device *device)
+{
+	cpuidle_remove_driver_sysfs(device);
+	cpuidle_remove_state_sysfs(device);
+}
+
 /**
  * cpuidle_add_sysfs - creates a sysfs instance for the target device
  * @dev: the target device
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index d08e1afa4919..3711b34dc4f9 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -91,7 +91,7 @@ struct cpuidle_device {
 	int			state_count;
 	struct cpuidle_state_usage	states_usage[CPUIDLE_STATE_MAX];
 	struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
-
+	struct cpuidle_driver_kobj *kobj_driver;
 	struct list_head 	device_list;
 	struct kobject		kobj;
 	struct completion	kobj_unregister;
@@ -157,6 +157,10 @@ extern int cpuidle_wrap_enter(struct cpuidle_device *dev,
 					struct cpuidle_driver *drv, int index));
 extern int cpuidle_play_dead(void);
 
+extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev);
+extern int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu);
+extern void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu);
+
 #else
 static inline void disable_cpuidle(void) { }
 static inline int cpuidle_idle_call(void) { return -ENODEV; }
@@ -183,7 +187,6 @@ static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
 					struct cpuidle_driver *drv, int index))
 { return -ENODEV; }
 static inline int cpuidle_play_dead(void) {return -ENODEV; }
-
 #endif
 
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
-- 
cgit v1.2.3-55-g7522


From 7e6fdd4bad033fa2d73716377b184fa975b0d985 Mon Sep 17 00:00:00 2001
From: Rajagopal Venkat
Date: Fri, 26 Oct 2012 01:50:09 +0200
Subject: PM / devfreq: Core updates to support devices which can idle

Prepare devfreq core framework to support devices which
can idle. When device idleness is detected perhaps through
runtime-pm, need some mechanism to suspend devfreq load
monitoring and resume back when device is online. Present
code continues monitoring unless device is removed from
devfreq core.

This patch introduces following design changes,

 - use per device work instead of global work to monitor device
   load. This enables suspend/resume of device devfreq and
   reduces monitoring code complexity.
 - decouple delayed work based load monitoring logic from core
   by introducing helpers functions to be used by governors. This
   provides flexibility for governors either to use delayed work
   based monitoring functions or to implement their own mechanism.
 - devfreq core interacts with governors via events to perform
   specific actions. These events include start/stop devfreq.
   This sets ground for adding suspend/resume events.

The devfreq apis are not modified and are kept intact.

Signed-off-by: Rajagopal Venkat <rajagopal.venkat@linaro.org>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/ABI/testing/sysfs-class-devfreq |   8 -
 drivers/devfreq/devfreq.c                     | 442 +++++++++++---------------
 drivers/devfreq/governor.h                    |  11 +
 drivers/devfreq/governor_performance.c        |  16 +-
 drivers/devfreq/governor_powersave.c          |  16 +-
 drivers/devfreq/governor_simpleondemand.c     |  24 ++
 drivers/devfreq/governor_userspace.c          |  23 +-
 include/linux/devfreq.h                       |  34 +-
 8 files changed, 278 insertions(+), 296 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
index 23d78b5aab11..89283b1b0240 100644
--- a/Documentation/ABI/testing/sysfs-class-devfreq
+++ b/Documentation/ABI/testing/sysfs-class-devfreq
@@ -21,14 +21,6 @@ Description:
 		The /sys/class/devfreq/.../cur_freq shows the current
 		frequency of the corresponding devfreq object.
 
-What:		/sys/class/devfreq/.../central_polling
-Date:		September 2011
-Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
-Description:
-		The /sys/class/devfreq/.../central_polling shows whether
-		the devfreq ojbect is using devfreq-provided central
-		polling mechanism or not.
-
 What:		/sys/class/devfreq/.../polling_interval
 Date:		September 2011
 Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index b146d76f04cf..1aaf1aeb1f1d 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -30,17 +30,11 @@
 struct class *devfreq_class;
 
 /*
- * devfreq_work periodically monitors every registered device.
- * The minimum polling interval is one jiffy. The polling interval is
- * determined by the minimum polling period among all polling devfreq
- * devices. The resolution of polling interval is one jiffy.
+ * devfreq core provides delayed work based load monitoring helper
+ * functions. Governors can use these or can implement their own
+ * monitoring mechanism.
  */
-static bool polling;
 static struct workqueue_struct *devfreq_wq;
-static struct delayed_work devfreq_work;
-
-/* wait removing if this is to be removed */
-static struct devfreq *wait_remove_device;
 
 /* The list of all device-devfreq */
 static LIST_HEAD(devfreq_list);
@@ -72,6 +66,8 @@ static struct devfreq *find_device_devfreq(struct device *dev)
 	return ERR_PTR(-ENODEV);
 }
 
+/* Load monitoring helper functions for governors use */
+
 /**
  * update_devfreq() - Reevaluate the device and configure frequency.
  * @devfreq:	the devfreq instance.
@@ -120,6 +116,152 @@ int update_devfreq(struct devfreq *devfreq)
 	return err;
 }
 
+/**
+ * devfreq_monitor() - Periodically poll devfreq objects.
+ * @work:	the work struct used to run devfreq_monitor periodically.
+ *
+ */
+static void devfreq_monitor(struct work_struct *work)
+{
+	int err;
+	struct devfreq *devfreq = container_of(work,
+					struct devfreq, work.work);
+
+	mutex_lock(&devfreq->lock);
+	err = update_devfreq(devfreq);
+	if (err)
+		dev_err(&devfreq->dev, "dvfs failed with (%d) error\n", err);
+
+	queue_delayed_work(devfreq_wq, &devfreq->work,
+				msecs_to_jiffies(devfreq->profile->polling_ms));
+	mutex_unlock(&devfreq->lock);
+}
+
+/**
+ * devfreq_monitor_start() - Start load monitoring of devfreq instance
+ * @devfreq:	the devfreq instance.
+ *
+ * Helper function for starting devfreq device load monitoing. By
+ * default delayed work based monitoring is supported. Function
+ * to be called from governor in response to DEVFREQ_GOV_START
+ * event when device is added to devfreq framework.
+ */
+void devfreq_monitor_start(struct devfreq *devfreq)
+{
+	INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
+	if (devfreq->profile->polling_ms)
+		queue_delayed_work(devfreq_wq, &devfreq->work,
+			msecs_to_jiffies(devfreq->profile->polling_ms));
+}
+
+/**
+ * devfreq_monitor_stop() - Stop load monitoring of a devfreq instance
+ * @devfreq:	the devfreq instance.
+ *
+ * Helper function to stop devfreq device load monitoing. Function
+ * to be called from governor in response to DEVFREQ_GOV_STOP
+ * event when device is removed from devfreq framework.
+ */
+void devfreq_monitor_stop(struct devfreq *devfreq)
+{
+	cancel_delayed_work_sync(&devfreq->work);
+}
+
+/**
+ * devfreq_monitor_suspend() - Suspend load monitoring of a devfreq instance
+ * @devfreq:	the devfreq instance.
+ *
+ * Helper function to suspend devfreq device load monitoing. Function
+ * to be called from governor in response to DEVFREQ_GOV_SUSPEND
+ * event or when polling interval is set to zero.
+ *
+ * Note: Though this function is same as devfreq_monitor_stop(),
+ * intentionally kept separate to provide hooks for collecting
+ * transition statistics.
+ */
+void devfreq_monitor_suspend(struct devfreq *devfreq)
+{
+	mutex_lock(&devfreq->lock);
+	if (devfreq->stop_polling) {
+		mutex_unlock(&devfreq->lock);
+		return;
+	}
+
+	devfreq->stop_polling = true;
+	mutex_unlock(&devfreq->lock);
+	cancel_delayed_work_sync(&devfreq->work);
+}
+
+/**
+ * devfreq_monitor_resume() - Resume load monitoring of a devfreq instance
+ * @devfreq:    the devfreq instance.
+ *
+ * Helper function to resume devfreq device load monitoing. Function
+ * to be called from governor in response to DEVFREQ_GOV_RESUME
+ * event or when polling interval is set to non-zero.
+ */
+void devfreq_monitor_resume(struct devfreq *devfreq)
+{
+	mutex_lock(&devfreq->lock);
+	if (!devfreq->stop_polling)
+		goto out;
+
+	if (!delayed_work_pending(&devfreq->work) &&
+			devfreq->profile->polling_ms)
+		queue_delayed_work(devfreq_wq, &devfreq->work,
+			msecs_to_jiffies(devfreq->profile->polling_ms));
+	devfreq->stop_polling = false;
+
+out:
+	mutex_unlock(&devfreq->lock);
+}
+
+/**
+ * devfreq_interval_update() - Update device devfreq monitoring interval
+ * @devfreq:    the devfreq instance.
+ * @delay:      new polling interval to be set.
+ *
+ * Helper function to set new load monitoring polling interval. Function
+ * to be called from governor in response to DEVFREQ_GOV_INTERVAL event.
+ */
+void devfreq_interval_update(struct devfreq *devfreq, unsigned int *delay)
+{
+	unsigned int cur_delay = devfreq->profile->polling_ms;
+	unsigned int new_delay = *delay;
+
+	mutex_lock(&devfreq->lock);
+	devfreq->profile->polling_ms = new_delay;
+
+	if (devfreq->stop_polling)
+		goto out;
+
+	/* if new delay is zero, stop polling */
+	if (!new_delay) {
+		mutex_unlock(&devfreq->lock);
+		cancel_delayed_work_sync(&devfreq->work);
+		return;
+	}
+
+	/* if current delay is zero, start polling with new delay */
+	if (!cur_delay) {
+		queue_delayed_work(devfreq_wq, &devfreq->work,
+			msecs_to_jiffies(devfreq->profile->polling_ms));
+		goto out;
+	}
+
+	/* if current delay is greater than new delay, restart polling */
+	if (cur_delay > new_delay) {
+		mutex_unlock(&devfreq->lock);
+		cancel_delayed_work_sync(&devfreq->work);
+		mutex_lock(&devfreq->lock);
+		if (!devfreq->stop_polling)
+			queue_delayed_work(devfreq_wq, &devfreq->work,
+			      msecs_to_jiffies(devfreq->profile->polling_ms));
+	}
+out:
+	mutex_unlock(&devfreq->lock);
+}
+
 /**
  * devfreq_notifier_call() - Notify that the device frequency requirements
  *			   has been changed out of devfreq framework.
@@ -143,59 +285,32 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
 }
 
 /**
- * _remove_devfreq() - Remove devfreq from the device.
+ * _remove_devfreq() - Remove devfreq from the list and release its resources.
  * @devfreq:	the devfreq struct
  * @skip:	skip calling device_unregister().
- *
- * Note that the caller should lock devfreq->lock before calling
- * this. _remove_devfreq() will unlock it and free devfreq
- * internally. devfreq_list_lock should be locked by the caller
- * as well (not relased at return)
- *
- * Lock usage:
- * devfreq->lock: locked before call.
- *		  unlocked at return (and freed)
- * devfreq_list_lock: locked before call.
- *		      kept locked at return.
- *		      if devfreq is centrally polled.
- *
- * Freed memory:
- * devfreq
  */
 static void _remove_devfreq(struct devfreq *devfreq, bool skip)
 {
-	if (!mutex_is_locked(&devfreq->lock)) {
-		WARN(true, "devfreq->lock must be locked by the caller.\n");
-		return;
-	}
-	if (!devfreq->governor->no_central_polling &&
-	    !mutex_is_locked(&devfreq_list_lock)) {
-		WARN(true, "devfreq_list_lock must be locked by the caller.\n");
+	mutex_lock(&devfreq_list_lock);
+	if (IS_ERR(find_device_devfreq(devfreq->dev.parent))) {
+		mutex_unlock(&devfreq_list_lock);
+		dev_warn(&devfreq->dev, "releasing devfreq which doesn't exist\n");
 		return;
 	}
+	list_del(&devfreq->node);
+	mutex_unlock(&devfreq_list_lock);
 
-	if (devfreq->being_removed)
-		return;
-
-	devfreq->being_removed = true;
+	devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_STOP, NULL);
 
 	if (devfreq->profile->exit)
 		devfreq->profile->exit(devfreq->dev.parent);
 
-	if (devfreq->governor->exit)
-		devfreq->governor->exit(devfreq);
-
 	if (!skip && get_device(&devfreq->dev)) {
 		device_unregister(&devfreq->dev);
 		put_device(&devfreq->dev);
 	}
 
-	if (!devfreq->governor->no_central_polling)
-		list_del(&devfreq->node);
-
-	mutex_unlock(&devfreq->lock);
 	mutex_destroy(&devfreq->lock);
-
 	kfree(devfreq);
 }
 
@@ -210,130 +325,8 @@ static void _remove_devfreq(struct devfreq *devfreq, bool skip)
 static void devfreq_dev_release(struct device *dev)
 {
 	struct devfreq *devfreq = to_devfreq(dev);
-	bool central_polling = !devfreq->governor->no_central_polling;
-
-	/*
-	 * If devfreq_dev_release() was called by device_unregister() of
-	 * _remove_devfreq(), we cannot mutex_lock(&devfreq->lock) and
-	 * being_removed is already set. This also partially checks the case
-	 * where devfreq_dev_release() is called from a thread other than
-	 * the one called _remove_devfreq(); however, this case is
-	 * dealt completely with another following being_removed check.
-	 *
-	 * Because being_removed is never being
-	 * unset, we do not need to worry about race conditions on
-	 * being_removed.
-	 */
-	if (devfreq->being_removed)
-		return;
 
-	if (central_polling)
-		mutex_lock(&devfreq_list_lock);
-
-	mutex_lock(&devfreq->lock);
-
-	/*
-	 * Check being_removed flag again for the case where
-	 * devfreq_dev_release() was called in a thread other than the one
-	 * possibly called _remove_devfreq().
-	 */
-	if (devfreq->being_removed) {
-		mutex_unlock(&devfreq->lock);
-		goto out;
-	}
-
-	/* devfreq->lock is unlocked and removed in _removed_devfreq() */
 	_remove_devfreq(devfreq, true);
-
-out:
-	if (central_polling)
-		mutex_unlock(&devfreq_list_lock);
-}
-
-/**
- * devfreq_monitor() - Periodically poll devfreq objects.
- * @work: the work struct used to run devfreq_monitor periodically.
- *
- */
-static void devfreq_monitor(struct work_struct *work)
-{
-	static unsigned long last_polled_at;
-	struct devfreq *devfreq, *tmp;
-	int error;
-	unsigned long jiffies_passed;
-	unsigned long next_jiffies = ULONG_MAX, now = jiffies;
-	struct device *dev;
-
-	/* Initially last_polled_at = 0, polling every device at bootup */
-	jiffies_passed = now - last_polled_at;
-	last_polled_at = now;
-	if (jiffies_passed == 0)
-		jiffies_passed = 1;
-
-	mutex_lock(&devfreq_list_lock);
-	list_for_each_entry_safe(devfreq, tmp, &devfreq_list, node) {
-		mutex_lock(&devfreq->lock);
-		dev = devfreq->dev.parent;
-
-		/* Do not remove tmp for a while */
-		wait_remove_device = tmp;
-
-		if (devfreq->governor->no_central_polling ||
-		    devfreq->next_polling == 0) {
-			mutex_unlock(&devfreq->lock);
-			continue;
-		}
-		mutex_unlock(&devfreq_list_lock);
-
-		/*
-		 * Reduce more next_polling if devfreq_wq took an extra
-		 * delay. (i.e., CPU has been idled.)
-		 */
-		if (devfreq->next_polling <= jiffies_passed) {
-			error = update_devfreq(devfreq);
-
-			/* Remove a devfreq with an error. */
-			if (error && error != -EAGAIN) {
-
-				dev_err(dev, "Due to update_devfreq error(%d), devfreq(%s) is removed from the device\n",
-					error, devfreq->governor->name);
-
-				/*
-				 * Unlock devfreq before locking the list
-				 * in order to avoid deadlock with
-				 * find_device_devfreq or others
-				 */
-				mutex_unlock(&devfreq->lock);
-				mutex_lock(&devfreq_list_lock);
-				/* Check if devfreq is already removed */
-				if (IS_ERR(find_device_devfreq(dev)))
-					continue;
-				mutex_lock(&devfreq->lock);
-				/* This unlocks devfreq->lock and free it */
-				_remove_devfreq(devfreq, false);
-				continue;
-			}
-			devfreq->next_polling = devfreq->polling_jiffies;
-		} else {
-			devfreq->next_polling -= jiffies_passed;
-		}
-
-		if (devfreq->next_polling)
-			next_jiffies = (next_jiffies > devfreq->next_polling) ?
-					devfreq->next_polling : next_jiffies;
-
-		mutex_unlock(&devfreq->lock);
-		mutex_lock(&devfreq_list_lock);
-	}
-	wait_remove_device = NULL;
-	mutex_unlock(&devfreq_list_lock);
-
-	if (next_jiffies > 0 && next_jiffies < ULONG_MAX) {
-		polling = true;
-		queue_delayed_work(devfreq_wq, &devfreq_work, next_jiffies);
-	} else {
-		polling = false;
-	}
 }
 
 /**
@@ -357,16 +350,13 @@ struct devfreq *devfreq_add_device(struct device *dev,
 		return ERR_PTR(-EINVAL);
 	}
 
-
-	if (!governor->no_central_polling) {
-		mutex_lock(&devfreq_list_lock);
-		devfreq = find_device_devfreq(dev);
-		mutex_unlock(&devfreq_list_lock);
-		if (!IS_ERR(devfreq)) {
-			dev_err(dev, "%s: Unable to create devfreq for the device. It already has one.\n", __func__);
-			err = -EINVAL;
-			goto err_out;
-		}
+	mutex_lock(&devfreq_list_lock);
+	devfreq = find_device_devfreq(dev);
+	mutex_unlock(&devfreq_list_lock);
+	if (!IS_ERR(devfreq)) {
+		dev_err(dev, "%s: Unable to create devfreq for the device. It already has one.\n", __func__);
+		err = -EINVAL;
+		goto err_out;
 	}
 
 	devfreq = kzalloc(sizeof(struct devfreq), GFP_KERNEL);
@@ -386,48 +376,41 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	devfreq->governor = governor;
 	devfreq->previous_freq = profile->initial_freq;
 	devfreq->data = data;
-	devfreq->next_polling = devfreq->polling_jiffies
-			      = msecs_to_jiffies(devfreq->profile->polling_ms);
 	devfreq->nb.notifier_call = devfreq_notifier_call;
 
 	dev_set_name(&devfreq->dev, dev_name(dev));
 	err = device_register(&devfreq->dev);
 	if (err) {
 		put_device(&devfreq->dev);
+		mutex_unlock(&devfreq->lock);
 		goto err_dev;
 	}
 
-	if (governor->init)
-		err = governor->init(devfreq);
-	if (err)
-		goto err_init;
-
 	mutex_unlock(&devfreq->lock);
 
-	if (governor->no_central_polling)
-		goto out;
-
 	mutex_lock(&devfreq_list_lock);
-
 	list_add(&devfreq->node, &devfreq_list);
+	mutex_unlock(&devfreq_list_lock);
 
-	if (devfreq_wq && devfreq->next_polling && !polling) {
-		polling = true;
-		queue_delayed_work(devfreq_wq, &devfreq_work,
-				   devfreq->next_polling);
+	err = devfreq->governor->event_handler(devfreq,
+				DEVFREQ_GOV_START, NULL);
+	if (err) {
+		dev_err(dev, "%s: Unable to start governor for the device\n",
+			__func__);
+		goto err_init;
 	}
-	mutex_unlock(&devfreq_list_lock);
-out:
+
 	return devfreq;
 
 err_init:
+	list_del(&devfreq->node);
 	device_unregister(&devfreq->dev);
 err_dev:
-	mutex_unlock(&devfreq->lock);
 	kfree(devfreq);
 err_out:
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL(devfreq_add_device);
 
 /**
  * devfreq_remove_device() - Remove devfreq feature from a device.
@@ -435,30 +418,14 @@ err_out:
  */
 int devfreq_remove_device(struct devfreq *devfreq)
 {
-	bool central_polling;
-
 	if (!devfreq)
 		return -EINVAL;
 
-	central_polling = !devfreq->governor->no_central_polling;
-
-	if (central_polling) {
-		mutex_lock(&devfreq_list_lock);
-		while (wait_remove_device == devfreq) {
-			mutex_unlock(&devfreq_list_lock);
-			schedule();
-			mutex_lock(&devfreq_list_lock);
-		}
-	}
-
-	mutex_lock(&devfreq->lock);
-	_remove_devfreq(devfreq, false); /* it unlocks devfreq->lock */
-
-	if (central_polling)
-		mutex_unlock(&devfreq_list_lock);
+	_remove_devfreq(devfreq, false);
 
 	return 0;
 }
+EXPORT_SYMBOL(devfreq_remove_device);
 
 static ssize_t show_governor(struct device *dev,
 			     struct device_attribute *attr, char *buf)
@@ -490,35 +457,13 @@ static ssize_t store_polling_interval(struct device *dev,
 	if (ret != 1)
 		goto out;
 
-	mutex_lock(&df->lock);
-	df->profile->polling_ms = value;
-	df->next_polling = df->polling_jiffies
-			 = msecs_to_jiffies(value);
-	mutex_unlock(&df->lock);
-
+	df->governor->event_handler(df, DEVFREQ_GOV_INTERVAL, &value);
 	ret = count;
 
-	if (df->governor->no_central_polling)
-		goto out;
-
-	mutex_lock(&devfreq_list_lock);
-	if (df->next_polling > 0 && !polling) {
-		polling = true;
-		queue_delayed_work(devfreq_wq, &devfreq_work,
-				   df->next_polling);
-	}
-	mutex_unlock(&devfreq_list_lock);
 out:
 	return ret;
 }
 
-static ssize_t show_central_polling(struct device *dev,
-				    struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%d\n",
-		       !to_devfreq(dev)->governor->no_central_polling);
-}
-
 static ssize_t store_min_freq(struct device *dev, struct device_attribute *attr,
 			      const char *buf, size_t count)
 {
@@ -590,7 +535,6 @@ static ssize_t show_max_freq(struct device *dev, struct device_attribute *attr,
 static struct device_attribute devfreq_attrs[] = {
 	__ATTR(governor, S_IRUGO, show_governor, NULL),
 	__ATTR(cur_freq, S_IRUGO, show_freq, NULL),
-	__ATTR(central_polling, S_IRUGO, show_central_polling, NULL),
 	__ATTR(polling_interval, S_IRUGO | S_IWUSR, show_polling_interval,
 	       store_polling_interval),
 	__ATTR(min_freq, S_IRUGO | S_IWUSR, show_min_freq, store_min_freq),
@@ -598,23 +542,6 @@ static struct device_attribute devfreq_attrs[] = {
 	{ },
 };
 
-/**
- * devfreq_start_polling() - Initialize data structure for devfreq framework and
- *			   start polling registered devfreq devices.
- */
-static int __init devfreq_start_polling(void)
-{
-	mutex_lock(&devfreq_list_lock);
-	polling = false;
-	devfreq_wq = create_freezable_workqueue("devfreq_wq");
-	INIT_DEFERRABLE_WORK(&devfreq_work, devfreq_monitor);
-	mutex_unlock(&devfreq_list_lock);
-
-	devfreq_monitor(&devfreq_work.work);
-	return 0;
-}
-late_initcall(devfreq_start_polling);
-
 static int __init devfreq_init(void)
 {
 	devfreq_class = class_create(THIS_MODULE, "devfreq");
@@ -622,7 +549,15 @@ static int __init devfreq_init(void)
 		pr_err("%s: couldn't create class\n", __FILE__);
 		return PTR_ERR(devfreq_class);
 	}
+
+	devfreq_wq = create_freezable_workqueue("devfreq_wq");
+	if (IS_ERR(devfreq_wq)) {
+		class_destroy(devfreq_class);
+		pr_err("%s: couldn't create workqueue\n", __FILE__);
+		return PTR_ERR(devfreq_wq);
+	}
 	devfreq_class->dev_attrs = devfreq_attrs;
+
 	return 0;
 }
 subsys_initcall(devfreq_init);
@@ -630,6 +565,7 @@ subsys_initcall(devfreq_init);
 static void __exit devfreq_exit(void)
 {
 	class_destroy(devfreq_class);
+	destroy_workqueue(devfreq_wq);
 }
 module_exit(devfreq_exit);
 
diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
index ea7f13c58ded..bb3aff32d627 100644
--- a/drivers/devfreq/governor.h
+++ b/drivers/devfreq/governor.h
@@ -18,7 +18,18 @@
 
 #define to_devfreq(DEV)	container_of((DEV), struct devfreq, dev)
 
+/* Devfreq events */
+#define DEVFREQ_GOV_START			0x1
+#define DEVFREQ_GOV_STOP			0x2
+#define DEVFREQ_GOV_INTERVAL			0x3
+
 /* Caution: devfreq->lock must be locked before calling update_devfreq */
 extern int update_devfreq(struct devfreq *devfreq);
 
+extern void devfreq_monitor_start(struct devfreq *devfreq);
+extern void devfreq_monitor_stop(struct devfreq *devfreq);
+extern void devfreq_monitor_suspend(struct devfreq *devfreq);
+extern void devfreq_monitor_resume(struct devfreq *devfreq);
+extern void devfreq_interval_update(struct devfreq *devfreq,
+					unsigned int *delay);
 #endif /* _GOVERNOR_H */
diff --git a/drivers/devfreq/governor_performance.c b/drivers/devfreq/governor_performance.c
index af75ddd4f158..eea3f9bd7894 100644
--- a/drivers/devfreq/governor_performance.c
+++ b/drivers/devfreq/governor_performance.c
@@ -26,14 +26,22 @@ static int devfreq_performance_func(struct devfreq *df,
 	return 0;
 }
 
-static int performance_init(struct devfreq *devfreq)
+static int devfreq_performance_handler(struct devfreq *devfreq,
+				unsigned int event, void *data)
 {
-	return update_devfreq(devfreq);
+	int ret = 0;
+
+	if (event == DEVFREQ_GOV_START) {
+		mutex_lock(&devfreq->lock);
+		ret = update_devfreq(devfreq);
+		mutex_unlock(&devfreq->lock);
+	}
+
+	return ret;
 }
 
 const struct devfreq_governor devfreq_performance = {
 	.name = "performance",
-	.init = performance_init,
 	.get_target_freq = devfreq_performance_func,
-	.no_central_polling = true,
+	.event_handler = devfreq_performance_handler,
 };
diff --git a/drivers/devfreq/governor_powersave.c b/drivers/devfreq/governor_powersave.c
index fec0cdbd2477..2868d98ed3e2 100644
--- a/drivers/devfreq/governor_powersave.c
+++ b/drivers/devfreq/governor_powersave.c
@@ -23,14 +23,22 @@ static int devfreq_powersave_func(struct devfreq *df,
 	return 0;
 }
 
-static int powersave_init(struct devfreq *devfreq)
+static int devfreq_powersave_handler(struct devfreq *devfreq,
+				unsigned int event, void *data)
 {
-	return update_devfreq(devfreq);
+	int ret = 0;
+
+	if (event == DEVFREQ_GOV_START) {
+		mutex_lock(&devfreq->lock);
+		ret = update_devfreq(devfreq);
+		mutex_unlock(&devfreq->lock);
+	}
+
+	return ret;
 }
 
 const struct devfreq_governor devfreq_powersave = {
 	.name = "powersave",
-	.init = powersave_init,
 	.get_target_freq = devfreq_powersave_func,
-	.no_central_polling = true,
+	.event_handler = devfreq_powersave_handler,
 };
diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c
index a2e3eae79011..3716a659122b 100644
--- a/drivers/devfreq/governor_simpleondemand.c
+++ b/drivers/devfreq/governor_simpleondemand.c
@@ -12,6 +12,7 @@
 #include <linux/errno.h>
 #include <linux/devfreq.h>
 #include <linux/math64.h>
+#include "governor.h"
 
 /* Default constants for DevFreq-Simple-Ondemand (DFSO) */
 #define DFSO_UPTHRESHOLD	(90)
@@ -88,7 +89,30 @@ static int devfreq_simple_ondemand_func(struct devfreq *df,
 	return 0;
 }
 
+static int devfreq_simple_ondemand_handler(struct devfreq *devfreq,
+				unsigned int event, void *data)
+{
+	switch (event) {
+	case DEVFREQ_GOV_START:
+		devfreq_monitor_start(devfreq);
+		break;
+
+	case DEVFREQ_GOV_STOP:
+		devfreq_monitor_stop(devfreq);
+		break;
+
+	case DEVFREQ_GOV_INTERVAL:
+		devfreq_interval_update(devfreq, (unsigned int *)data);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 const struct devfreq_governor devfreq_simple_ondemand = {
 	.name = "simple_ondemand",
 	.get_target_freq = devfreq_simple_ondemand_func,
+	.event_handler = devfreq_simple_ondemand_handler,
 };
diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c
index 0681246fc89d..7067555bd444 100644
--- a/drivers/devfreq/governor_userspace.c
+++ b/drivers/devfreq/governor_userspace.c
@@ -116,10 +116,27 @@ static void userspace_exit(struct devfreq *devfreq)
 	devfreq->data = NULL;
 }
 
+static int devfreq_userspace_handler(struct devfreq *devfreq,
+			unsigned int event, void *data)
+{
+	int ret = 0;
+
+	switch (event) {
+	case DEVFREQ_GOV_START:
+		ret = userspace_init(devfreq);
+		break;
+	case DEVFREQ_GOV_STOP:
+		userspace_exit(devfreq);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
 const struct devfreq_governor devfreq_userspace = {
 	.name = "userspace",
 	.get_target_freq = devfreq_userspace_func,
-	.init = userspace_init,
-	.exit = userspace_exit,
-	.no_central_polling = true,
+	.event_handler = devfreq_userspace_handler,
 };
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 281c72a3b9d5..9cdffde74bb5 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -91,25 +91,18 @@ struct devfreq_dev_profile {
  *			status of the device (load = busy_time / total_time).
  *			If no_central_polling is set, this callback is called
  *			only with update_devfreq() notified by OPP.
- * @init		Called when the devfreq is being attached to a device
- * @exit		Called when the devfreq is being removed from a
- *			device. Governor should stop any internal routines
- *			before return because related data may be
- *			freed after exit().
- * @no_central_polling	Do not use devfreq's central polling mechanism.
- *			When this is set, devfreq will not call
- *			get_target_freq with devfreq_monitor(). However,
- *			devfreq will call get_target_freq with
- *			devfreq_update() notified by OPP framework.
+ * @event_handler       Callback for devfreq core framework to notify events
+ *                      to governors. Events include per device governor
+ *                      init and exit, opp changes out of devfreq, suspend
+ *                      and resume of per device devfreq during device idle.
  *
  * Note that the callbacks are called with devfreq->lock locked by devfreq.
  */
 struct devfreq_governor {
 	const char name[DEVFREQ_NAME_LEN];
 	int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
-	int (*init)(struct devfreq *this);
-	void (*exit)(struct devfreq *this);
-	const bool no_central_polling;
+	int (*event_handler)(struct devfreq *devfreq,
+				unsigned int event, void *data);
 };
 
 /**
@@ -124,18 +117,13 @@ struct devfreq_governor {
  * @nb		notifier block used to notify devfreq object that it should
  *		reevaluate operable frequencies. Devfreq users may use
  *		devfreq.nb to the corresponding register notifier call chain.
- * @polling_jiffies	interval in jiffies.
+ * @work	delayed work for load monitoring.
  * @previous_freq	previously configured frequency value.
- * @next_polling	the number of remaining jiffies to poll with
- *			"devfreq_monitor" executions to reevaluate
- *			frequency/voltage of the device. Set by
- *			profile's polling_ms interval.
  * @data	Private data of the governor. The devfreq framework does not
  *		touch this.
- * @being_removed	a flag to mark that this object is being removed in
- *			order to prevent trying to remove the object multiple times.
  * @min_freq	Limit minimum frequency requested by user (0: none)
  * @max_freq	Limit maximum frequency requested by user (0: none)
+ * @stop_polling	 devfreq polling status of a device.
  *
  * This structure stores the devfreq information for a give device.
  *
@@ -153,17 +141,15 @@ struct devfreq {
 	struct devfreq_dev_profile *profile;
 	const struct devfreq_governor *governor;
 	struct notifier_block nb;
+	struct delayed_work work;
 
-	unsigned long polling_jiffies;
 	unsigned long previous_freq;
-	unsigned int next_polling;
 
 	void *data; /* private data for governors */
 
-	bool being_removed;
-
 	unsigned long min_freq;
 	unsigned long max_freq;
+	bool stop_polling;
 };
 
 #if defined(CONFIG_PM_DEVFREQ)
-- 
cgit v1.2.3-55-g7522


From 206c30cfeb7c05dfb9fdfd81b1deb933627e43c1 Mon Sep 17 00:00:00 2001
From: Rajagopal Venkat
Date: Fri, 26 Oct 2012 01:50:18 +0200
Subject: PM / devfreq: Add suspend and resume apis

Add devfreq suspend/resume apis for devfreq users. This patch
supports suspend and resume of devfreq load monitoring, required
for devices which can idle.

Signed-off-by: Rajagopal Venkat <rajagopal.venkat@linaro.org>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/devfreq/devfreq.c                 | 28 ++++++++++++++++++++++++++++
 drivers/devfreq/governor.h                |  2 ++
 drivers/devfreq/governor_simpleondemand.c |  9 +++++++++
 include/linux/devfreq.h                   | 12 ++++++++++++
 4 files changed, 51 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 1aaf1aeb1f1d..999600da21c7 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -427,6 +427,34 @@ int devfreq_remove_device(struct devfreq *devfreq)
 }
 EXPORT_SYMBOL(devfreq_remove_device);
 
+/**
+ * devfreq_suspend_device() - Suspend devfreq of a device.
+ * @devfreq: the devfreq instance to be suspended
+ */
+int devfreq_suspend_device(struct devfreq *devfreq)
+{
+	if (!devfreq)
+		return -EINVAL;
+
+	return devfreq->governor->event_handler(devfreq,
+				DEVFREQ_GOV_SUSPEND, NULL);
+}
+EXPORT_SYMBOL(devfreq_suspend_device);
+
+/**
+ * devfreq_resume_device() - Resume devfreq of a device.
+ * @devfreq: the devfreq instance to be resumed
+ */
+int devfreq_resume_device(struct devfreq *devfreq)
+{
+	if (!devfreq)
+		return -EINVAL;
+
+	return devfreq->governor->event_handler(devfreq,
+				DEVFREQ_GOV_RESUME, NULL);
+}
+EXPORT_SYMBOL(devfreq_resume_device);
+
 static ssize_t show_governor(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
index bb3aff32d627..26432ac0a398 100644
--- a/drivers/devfreq/governor.h
+++ b/drivers/devfreq/governor.h
@@ -22,6 +22,8 @@
 #define DEVFREQ_GOV_START			0x1
 #define DEVFREQ_GOV_STOP			0x2
 #define DEVFREQ_GOV_INTERVAL			0x3
+#define DEVFREQ_GOV_SUSPEND			0x4
+#define DEVFREQ_GOV_RESUME			0x5
 
 /* Caution: devfreq->lock must be locked before calling update_devfreq */
 extern int update_devfreq(struct devfreq *devfreq);
diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c
index 3716a659122b..b5cf0fb24efe 100644
--- a/drivers/devfreq/governor_simpleondemand.c
+++ b/drivers/devfreq/governor_simpleondemand.c
@@ -104,6 +104,15 @@ static int devfreq_simple_ondemand_handler(struct devfreq *devfreq,
 	case DEVFREQ_GOV_INTERVAL:
 		devfreq_interval_update(devfreq, (unsigned int *)data);
 		break;
+
+	case DEVFREQ_GOV_SUSPEND:
+		devfreq_monitor_suspend(devfreq);
+		break;
+
+	case DEVFREQ_GOV_RESUME:
+		devfreq_monitor_resume(devfreq);
+		break;
+
 	default:
 		break;
 	}
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 9cdffde74bb5..ee243a3229b8 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -158,6 +158,8 @@ extern struct devfreq *devfreq_add_device(struct device *dev,
 				  const struct devfreq_governor *governor,
 				  void *data);
 extern int devfreq_remove_device(struct devfreq *devfreq);
+extern int devfreq_suspend_device(struct devfreq *devfreq);
+extern int devfreq_resume_device(struct devfreq *devfreq);
 
 /* Helper functions for devfreq user device driver with OPP. */
 extern struct opp *devfreq_recommended_opp(struct device *dev,
@@ -211,6 +213,16 @@ static int devfreq_remove_device(struct devfreq *devfreq)
 	return 0;
 }
 
+static int devfreq_suspend_device(struct devfreq *devfreq)
+{
+	return 0;
+}
+
+static int devfreq_resume_device(struct devfreq *devfreq)
+{
+	return 0;
+}
+
 static struct opp *devfreq_recommended_opp(struct device *dev,
 					   unsigned long *freq, u32 flags)
 {
-- 
cgit v1.2.3-55-g7522


From 7f98a905dca6e4f144cdd4462edeac00c2bdc379 Mon Sep 17 00:00:00 2001
From: Rajagopal Venkat
Date: Fri, 26 Oct 2012 01:50:26 +0200
Subject: PM / devfreq: Add current freq callback in device profile

Devfreq returns governor predicted frequency as current frequency
via sysfs interface. But device may not support all frequencies
that governor predicts. So add a callback in device profile to get
current freq from driver. Also add a new sysfs node to expose
governor predicted next target frequency.

Signed-off-by: Rajagopal Venkat <rajagopal.venkat@linaro.org>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/ABI/testing/sysfs-class-devfreq | 11 ++++++++++-
 drivers/devfreq/devfreq.c                     | 14 ++++++++++++++
 include/linux/devfreq.h                       |  3 +++
 3 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
index 89283b1b0240..e6cf08e6734d 100644
--- a/Documentation/ABI/testing/sysfs-class-devfreq
+++ b/Documentation/ABI/testing/sysfs-class-devfreq
@@ -19,7 +19,16 @@ Date:		September 2011
 Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
 Description:
 		The /sys/class/devfreq/.../cur_freq shows the current
-		frequency of the corresponding devfreq object.
+		frequency of the corresponding devfreq object. Same as
+		target_freq when get_cur_freq() is not implemented by
+		devfreq driver.
+
+What:		/sys/class/devfreq/.../target_freq
+Date:		September 2012
+Contact:	Rajagopal Venkat <rajagopal.venkat@linaro.org>
+Description:
+		The /sys/class/devfreq/.../target_freq shows the next governor
+		predicted target frequency of the corresponding devfreq object.
 
 What:		/sys/class/devfreq/.../polling_interval
 Date:		September 2011
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 999600da21c7..f2f8a976c465 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -463,6 +463,19 @@ static ssize_t show_governor(struct device *dev,
 
 static ssize_t show_freq(struct device *dev,
 			 struct device_attribute *attr, char *buf)
+{
+	unsigned long freq;
+	struct devfreq *devfreq = to_devfreq(dev);
+
+	if (devfreq->profile->get_cur_freq &&
+		!devfreq->profile->get_cur_freq(devfreq->dev.parent, &freq))
+			return sprintf(buf, "%lu\n", freq);
+
+	return sprintf(buf, "%lu\n", devfreq->previous_freq);
+}
+
+static ssize_t show_target_freq(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%lu\n", to_devfreq(dev)->previous_freq);
 }
@@ -563,6 +576,7 @@ static ssize_t show_max_freq(struct device *dev, struct device_attribute *attr,
 static struct device_attribute devfreq_attrs[] = {
 	__ATTR(governor, S_IRUGO, show_governor, NULL),
 	__ATTR(cur_freq, S_IRUGO, show_freq, NULL),
+	__ATTR(target_freq, S_IRUGO, show_target_freq, NULL),
 	__ATTR(polling_interval, S_IRUGO | S_IWUSR, show_polling_interval,
 	       store_polling_interval),
 	__ATTR(min_freq, S_IRUGO | S_IWUSR, show_min_freq, store_min_freq),
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index ee243a3229b8..7e2e2ea4a70f 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -66,6 +66,8 @@ struct devfreq_dev_status {
  *			explained above with "DEVFREQ_FLAG_*" macros.
  * @get_dev_status	The device should provide the current performance
  *			status to devfreq, which is used by governors.
+ * @get_cur_freq	The device should provide the current frequency
+ *			at which it is operating.
  * @exit		An optional callback that is called when devfreq
  *			is removing the devfreq object due to error or
  *			from devfreq_remove_device() call. If the user
@@ -79,6 +81,7 @@ struct devfreq_dev_profile {
 	int (*target)(struct device *dev, unsigned long *freq, u32 flags);
 	int (*get_dev_status)(struct device *dev,
 			      struct devfreq_dev_status *stat);
+	int (*get_cur_freq)(struct device *dev, unsigned long *freq);
 	void (*exit)(struct device *dev);
 };
 
-- 
cgit v1.2.3-55-g7522


From aa0010f880ab542da3ad0e72992f2dc518ac68a0 Mon Sep 17 00:00:00 2001
From: Amerigo Wang
Date: Sun, 11 Nov 2012 21:52:33 +0000
Subject: net: convert __IPTUNNEL_XMIT() to an inline function

__IPTUNNEL_XMIT() is an ugly macro, convert it to a static
inline function, so make it more readable.

IPTUNNEL_XMIT() is unused, just remove it.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c       |  2 +-
 include/linux/if_tunnel.h | 10 ++++++++++
 include/net/ipip.h        | 40 +++++++++++++++++++++-------------------
 net/ipv4/ip_gre.c         | 14 +-------------
 net/ipv4/ip_vti.c         |  9 ---------
 net/ipv4/ipip.c           | 14 +-------------
 net/ipv6/ip6_gre.c        |  9 ---------
 net/ipv6/ip6_tunnel.c     |  8 --------
 net/ipv6/sit.c            | 14 +-------------
 9 files changed, 35 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 8aca888734da..9814d67237f1 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -769,7 +769,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	vxlan_set_owner(dev, skb);
 
-	/* See __IPTUNNEL_XMIT */
+	/* See iptunnel_xmit() */
 	skb->ip_summed = CHECKSUM_NONE;
 	ip_select_ident(iph, &rt->dst, NULL);
 
diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index 1cc595a67cc9..06d1d5badd36 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -4,5 +4,15 @@
 #include <linux/ip.h>
 #include <linux/in6.h>
 #include <uapi/linux/if_tunnel.h>
+#include <linux/u64_stats_sync.h>
+
+/* often modified stats are per cpu, other are shared (netdev->stats) */
+struct pcpu_tstats {
+	u64	rx_packets;
+	u64	rx_bytes;
+	u64	tx_packets;
+	u64	tx_bytes;
+	struct u64_stats_sync	syncp;
+};
 
 #endif /* _IF_TUNNEL_H_ */
diff --git a/include/net/ipip.h b/include/net/ipip.h
index ddc077c51f32..21947cf4fa46 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -48,25 +48,27 @@ struct ip_tunnel_prl_entry {
 	struct rcu_head			rcu_head;
 };
 
-#define __IPTUNNEL_XMIT(stats1, stats2) do {				\
-	int err;							\
-	int pkt_len = skb->len - skb_transport_offset(skb);		\
-									\
-	skb->ip_summed = CHECKSUM_NONE;					\
-	ip_select_ident(iph, &rt->dst, NULL);				\
-									\
-	err = ip_local_out(skb);					\
-	if (likely(net_xmit_eval(err) == 0)) {				\
-		u64_stats_update_begin(&(stats1)->syncp);		\
-		(stats1)->tx_bytes += pkt_len;				\
-		(stats1)->tx_packets++;					\
-		u64_stats_update_end(&(stats1)->syncp);			\
-	} else {							\
-		(stats2)->tx_errors++;					\
-		(stats2)->tx_aborted_errors++;				\
-	}								\
-} while (0)
+static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	int err;
+	struct iphdr *iph = ip_hdr(skb);
+	int pkt_len = skb->len - skb_transport_offset(skb);
+	struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats);
 
-#define IPTUNNEL_XMIT() __IPTUNNEL_XMIT(txq, stats)
+	nf_reset(skb);
+	skb->ip_summed = CHECKSUM_NONE;
+	ip_select_ident(iph, skb_dst(skb), NULL);
+
+	err = ip_local_out(skb);
+	if (likely(net_xmit_eval(err) == 0)) {
+		u64_stats_update_begin(&tstats->syncp);
+		tstats->tx_bytes += pkt_len;
+		tstats->tx_packets++;
+		u64_stats_update_end(&tstats->syncp);
+	} else {
+		dev->stats.tx_errors++;
+		dev->stats.tx_aborted_errors++;
+	}
+}
 
 #endif
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7240f8e2dd45..37000ae24c55 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -171,15 +171,6 @@ struct ipgre_net {
 #define for_each_ip_tunnel_rcu(start) \
 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
-	u64	rx_packets;
-	u64	rx_bytes;
-	u64	tx_packets;
-	u64	tx_bytes;
-	struct u64_stats_sync	syncp;
-};
-
 static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
 						   struct rtnl_link_stats64 *tot)
 {
@@ -753,7 +744,6 @@ drop:
 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
-	struct pcpu_tstats *tstats;
 	const struct iphdr  *old_iph = ip_hdr(skb);
 	const struct iphdr  *tiph;
 	struct flowi4 fl4;
@@ -977,9 +967,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 		}
 	}
 
-	nf_reset(skb);
-	tstats = this_cpu_ptr(dev->tstats);
-	__IPTUNNEL_XMIT(tstats, &dev->stats);
+	iptunnel_xmit(skb, dev);
 	return NETDEV_TX_OK;
 
 #if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1831092f999f..e0f2c88f03c1 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -71,15 +71,6 @@ static int vti_tunnel_bind_dev(struct net_device *dev);
 #define for_each_ip_tunnel_rcu(start) \
 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
-	u64	rx_packets;
-	u64	rx_bytes;
-	u64	tx_packets;
-	u64	tx_bytes;
-	struct	u64_stats_sync	syncp;
-};
-
 #define VTI_XMIT(stats1, stats2) do {				\
 	int err;						\
 	int pkt_len = skb->len;					\
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 720855e41100..3a4ad7d82f67 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -147,15 +147,6 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly;
 #define for_each_ip_tunnel_rcu(start) \
 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
-	u64	rx_packets;
-	u64	rx_bytes;
-	u64	tx_packets;
-	u64	tx_bytes;
-	struct u64_stats_sync	syncp;
-};
-
 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
 						  struct rtnl_link_stats64 *tot)
 {
@@ -465,7 +456,6 @@ drop:
 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
-	struct pcpu_tstats *tstats;
 	const struct iphdr  *tiph = &tunnel->parms.iph;
 	u8     tos = tunnel->parms.iph.tos;
 	__be16 df = tiph->frag_off;
@@ -592,9 +582,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if ((iph->ttl = tiph->ttl) == 0)
 		iph->ttl	=	old_iph->ttl;
 
-	nf_reset(skb);
-	tstats = this_cpu_ptr(dev->tstats);
-	__IPTUNNEL_XMIT(tstats, &dev->stats);
+	iptunnel_xmit(skb, dev);
 	return NETDEV_TX_OK;
 
 tx_error_icmp:
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 12aa473e9793..672101db71ee 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -116,15 +116,6 @@ static u32 HASH_ADDR(const struct in6_addr *addr)
 #define for_each_ip_tunnel_rcu(start) \
 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
-	u64	rx_packets;
-	u64	rx_bytes;
-	u64	tx_packets;
-	u64	tx_bytes;
-	struct u64_stats_sync	syncp;
-};
-
 static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
 		struct rtnl_link_stats64 *tot)
 {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 424ed45ef122..8db4d9b7ab14 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -95,14 +95,6 @@ struct ip6_tnl_net {
 	struct ip6_tnl __rcu **tnls[2];
 };
 
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
-	unsigned long	rx_packets;
-	unsigned long	rx_bytes;
-	unsigned long	tx_packets;
-	unsigned long	tx_bytes;
-} __attribute__((aligned(4*sizeof(unsigned long))));
-
 static struct net_device_stats *ip6_get_stats(struct net_device *dev)
 {
 	struct pcpu_tstats sum = { 0 };
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b543c56cad28..ffe83ef70cf7 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -88,15 +88,6 @@ struct sit_net {
 #define for_each_ip_tunnel_rcu(start) \
 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
-	u64	rx_packets;
-	u64	rx_bytes;
-	u64	tx_packets;
-	u64	tx_bytes;
-	struct u64_stats_sync	syncp;
-};
-
 static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev,
 						   struct rtnl_link_stats64 *tot)
 {
@@ -685,7 +676,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 				     struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
-	struct pcpu_tstats *tstats;
 	const struct iphdr  *tiph = &tunnel->parms.iph;
 	const struct ipv6hdr *iph6 = ipv6_hdr(skb);
 	u8     tos = tunnel->parms.iph.tos;
@@ -866,9 +856,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	if ((iph->ttl = tiph->ttl) == 0)
 		iph->ttl	=	iph6->hop_limit;
 
-	nf_reset(skb);
-	tstats = this_cpu_ptr(dev->tstats);
-	__IPTUNNEL_XMIT(tstats, &dev->stats);
+	iptunnel_xmit(skb, dev);
 	return NETDEV_TX_OK;
 
 tx_error_icmp:
-- 
cgit v1.2.3-55-g7522


From e086cadc08e259150b2ab8f7f4a16dbf9e3c2f22 Mon Sep 17 00:00:00 2001
From: Amerigo Wang
Date: Sun, 11 Nov 2012 21:52:34 +0000
Subject: net: unify for_each_ip_tunnel_rcu()

The defitions of for_each_ip_tunnel_rcu() are same,
so unify it. Also, don't hide the parameter 't'.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tunnel.h |  7 +++++++
 net/ipv4/ip_gre.c         | 14 ++++----------
 net/ipv4/ip_vti.c         | 13 ++++---------
 net/ipv4/ipip.c           | 13 +++----------
 net/ipv6/ip6_gre.c        | 14 ++++----------
 net/ipv6/sit.c            | 13 +++----------
 6 files changed, 25 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index 06d1d5badd36..f4e56ecd0b1a 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -6,6 +6,13 @@
 #include <uapi/linux/if_tunnel.h>
 #include <linux/u64_stats_sync.h>
 
+/*
+ * Locking : hash tables are protected by RCU and RTNL
+ */
+
+#define for_each_ip_tunnel_rcu(pos, start) \
+	for (pos = rcu_dereference(start); pos; pos = rcu_dereference(pos->next))
+
 /* often modified stats are per cpu, other are shared (netdev->stats) */
 struct pcpu_tstats {
 	u64	rx_packets;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 37000ae24c55..127f2a1e67f5 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -164,12 +164,6 @@ struct ipgre_net {
 #define tunnels_r	tunnels[2]
 #define tunnels_l	tunnels[1]
 #define tunnels_wc	tunnels[0]
-/*
- * Locking : hash tables are protected by RCU and RTNL
- */
-
-#define for_each_ip_tunnel_rcu(start) \
-	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 
 static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
 						   struct rtnl_link_stats64 *tot)
@@ -241,7 +235,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
 		       ARPHRD_ETHER : ARPHRD_IPGRE;
 	int score, cand_score = 4;
 
-	for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
+	for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
 		if (local != t->parms.iph.saddr ||
 		    remote != t->parms.iph.daddr ||
 		    !(t->dev->flags & IFF_UP))
@@ -268,7 +262,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
 		}
 	}
 
-	for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
+	for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
 		if (remote != t->parms.iph.daddr ||
 		    !(t->dev->flags & IFF_UP))
 			continue;
@@ -294,7 +288,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
 		}
 	}
 
-	for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
+	for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
 		if ((local != t->parms.iph.saddr &&
 		     (local != t->parms.iph.daddr ||
 		      !ipv4_is_multicast(local))) ||
@@ -322,7 +316,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
 		}
 	}
 
-	for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
+	for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
 		if (t->parms.i_key != key ||
 		    !(t->dev->flags & IFF_UP))
 			continue;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index e0f2c88f03c1..516188b0dc1e 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -66,11 +66,6 @@ static void vti_tunnel_setup(struct net_device *dev);
 static void vti_dev_free(struct net_device *dev);
 static int vti_tunnel_bind_dev(struct net_device *dev);
 
-/* Locking : hash tables are protected by RCU and RTNL */
-
-#define for_each_ip_tunnel_rcu(start) \
-	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
-
 #define VTI_XMIT(stats1, stats2) do {				\
 	int err;						\
 	int pkt_len = skb->len;					\
@@ -133,19 +128,19 @@ static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
 	struct ip_tunnel *t;
 	struct vti_net *ipn = net_generic(net, vti_net_id);
 
-	for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
+	for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
 		if (local == t->parms.iph.saddr &&
 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 			return t;
-	for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
+	for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 			return t;
 
-	for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
+	for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 			return t;
 
-	for_each_ip_tunnel_rcu(ipn->tunnels_wc[0])
+	for_each_ip_tunnel_rcu(t, ipn->tunnels_wc[0])
 		if (t && (t->dev->flags&IFF_UP))
 			return t;
 	return NULL;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 3a4ad7d82f67..099fc1c428b4 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -140,13 +140,6 @@ static void ipip_tunnel_setup(struct net_device *dev);
 static void ipip_dev_free(struct net_device *dev);
 static struct rtnl_link_ops ipip_link_ops __read_mostly;
 
-/*
- * Locking : hash tables are protected by RCU and RTNL
- */
-
-#define for_each_ip_tunnel_rcu(start) \
-	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
-
 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
 						  struct rtnl_link_stats64 *tot)
 {
@@ -189,16 +182,16 @@ static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
 	struct ip_tunnel *t;
 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
 
-	for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
+	for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
 		if (local == t->parms.iph.saddr &&
 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 			return t;
 
-	for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
+	for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 			return t;
 
-	for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
+	for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 			return t;
 
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 672101db71ee..823fd64d0136 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -109,12 +109,6 @@ static u32 HASH_ADDR(const struct in6_addr *addr)
 #define tunnels_r	tunnels[2]
 #define tunnels_l	tunnels[1]
 #define tunnels_wc	tunnels[0]
-/*
- * Locking : hash tables are protected by RCU and RTNL
- */
-
-#define for_each_ip_tunnel_rcu(start) \
-	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 
 static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
 		struct rtnl_link_stats64 *tot)
@@ -172,7 +166,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
 		       ARPHRD_ETHER : ARPHRD_IP6GRE;
 	int score, cand_score = 4;
 
-	for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
+	for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
 		if (!ipv6_addr_equal(local, &t->parms.laddr) ||
 		    !ipv6_addr_equal(remote, &t->parms.raddr) ||
 		    key != t->parms.i_key ||
@@ -197,7 +191,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
 		}
 	}
 
-	for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
+	for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
 		if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
 		    key != t->parms.i_key ||
 		    !(t->dev->flags & IFF_UP))
@@ -221,7 +215,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
 		}
 	}
 
-	for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
+	for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
 		if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
 			  (!ipv6_addr_equal(local, &t->parms.raddr) ||
 				 !ipv6_addr_is_multicast(local))) ||
@@ -247,7 +241,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
 		}
 	}
 
-	for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
+	for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
 		if (t->parms.i_key != key ||
 		    !(t->dev->flags & IFF_UP))
 			continue;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index ffe83ef70cf7..5bce2f698044 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -81,13 +81,6 @@ struct sit_net {
 	struct net_device *fb_tunnel_dev;
 };
 
-/*
- * Locking : hash tables are protected by RCU and RTNL
- */
-
-#define for_each_ip_tunnel_rcu(start) \
-	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
-
 static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev,
 						   struct rtnl_link_stats64 *tot)
 {
@@ -133,20 +126,20 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
 	struct ip_tunnel *t;
 	struct sit_net *sitn = net_generic(net, sit_net_id);
 
-	for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) {
+	for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
 		if (local == t->parms.iph.saddr &&
 		    remote == t->parms.iph.daddr &&
 		    (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
 		    (t->dev->flags & IFF_UP))
 			return t;
 	}
-	for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) {
+	for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
 		if (remote == t->parms.iph.daddr &&
 		    (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
 		    (t->dev->flags & IFF_UP))
 			return t;
 	}
-	for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) {
+	for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
 		if (local == t->parms.iph.saddr &&
 		    (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
 		    (t->dev->flags & IFF_UP))
-- 
cgit v1.2.3-55-g7522


From 1cf3d8b3d24cd383ddfd5442c83ec5c355ffc2f7 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot
Date: Tue, 2 Oct 2012 16:57:57 +0000
Subject: powerpc+of: Add of node/property notification chain for adds and
 removes

This patch moves the notification chain for updates to the device tree
from the powerpc/pseries code to the base OF code. This makes this
functionality available to all architectures.

Additionally the notification chain is updated to allow notifications
for property add/remove/update. To make this work a pointer to a new
struct (of_prop_reconfig) is passed to the routines in the notification chain.
The of_prop_reconfig property contains a pointer to the node containing the
property and a pointer to the property itself. In the case of property
updates, the property pointer refers to the new property.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/pSeries_reconfig.h     | 32 -----------
 arch/powerpc/kernel/prom.c                      |  6 +-
 arch/powerpc/platforms/pseries/dlpar.c          | 14 +++--
 arch/powerpc/platforms/pseries/hotplug-cpu.c    |  8 +--
 arch/powerpc/platforms/pseries/hotplug-memory.c | 60 ++++++++++++++------
 arch/powerpc/platforms/pseries/iommu.c          |  6 +-
 arch/powerpc/platforms/pseries/reconfig.c       | 68 ++---------------------
 arch/powerpc/platforms/pseries/setup.c          |  6 +-
 drivers/crypto/nx/nx-842.c                      | 20 +++----
 drivers/crypto/nx/nx.c                          |  1 -
 drivers/of/base.c                               | 74 +++++++++++++++++++++++--
 include/linux/of.h                              | 22 ++++++--
 12 files changed, 163 insertions(+), 154 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/pSeries_reconfig.h b/arch/powerpc/include/asm/pSeries_reconfig.h
index c07edfe98b98..adc00d2e75b0 100644
--- a/arch/powerpc/include/asm/pSeries_reconfig.h
+++ b/arch/powerpc/include/asm/pSeries_reconfig.h
@@ -2,43 +2,11 @@
 #define _PPC64_PSERIES_RECONFIG_H
 #ifdef __KERNEL__
 
-#include <linux/notifier.h>
-
-/*
- * Use this API if your code needs to know about OF device nodes being
- * added or removed on pSeries systems.
- */
-
-#define PSERIES_RECONFIG_ADD		0x0001
-#define PSERIES_RECONFIG_REMOVE		0x0002
-#define PSERIES_DRCONF_MEM_ADD		0x0003
-#define PSERIES_DRCONF_MEM_REMOVE	0x0004
-#define PSERIES_UPDATE_PROPERTY		0x0005
-
-/**
- * pSeries_reconfig_notify - Notifier value structure for OFDT property updates
- *
- * @node: Device tree node which owns the property being updated
- * @property: Updated property
- */
-struct pSeries_reconfig_prop_update {
-	struct device_node *node;
-	struct property *property;
-};
-
 #ifdef CONFIG_PPC_PSERIES
-extern int pSeries_reconfig_notifier_register(struct notifier_block *);
-extern void pSeries_reconfig_notifier_unregister(struct notifier_block *);
-extern int pSeries_reconfig_notify(unsigned long action, void *p);
 /* Not the best place to put this, will be fixed when we move some
  * of the rtas suspend-me stuff to pseries */
 extern void pSeries_coalesce_init(void);
 #else /* !CONFIG_PPC_PSERIES */
-static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb)
-{
-	return 0;
-}
-static inline void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) { }
 static inline void pSeries_coalesce_init(void) { }
 #endif /* CONFIG_PPC_PSERIES */
 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 37725e86651e..6feb60c3c6e3 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -32,6 +32,7 @@
 #include <linux/debugfs.h>
 #include <linux/irq.h>
 #include <linux/memblock.h>
+#include <linux/of.h>
 
 #include <asm/prom.h>
 #include <asm/rtas.h>
@@ -49,7 +50,6 @@
 #include <asm/btext.h>
 #include <asm/sections.h>
 #include <asm/machdep.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/pci-bridge.h>
 #include <asm/kexec.h>
 #include <asm/opal.h>
@@ -802,7 +802,7 @@ static int prom_reconfig_notifier(struct notifier_block *nb,
 	int err;
 
 	switch (action) {
-	case PSERIES_RECONFIG_ADD:
+	case OF_RECONFIG_ATTACH_NODE:
 		err = of_finish_dynamic_node(node);
 		if (err < 0)
 			printk(KERN_ERR "finish_node returned %d\n", err);
@@ -821,7 +821,7 @@ static struct notifier_block prom_reconfig_nb = {
 
 static int __init prom_reconfig_setup(void)
 {
-	return pSeries_reconfig_notifier_register(&prom_reconfig_nb);
+	return of_reconfig_notifier_register(&prom_reconfig_nb);
 }
 __initcall(prom_reconfig_setup);
 #endif
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index e36789bd4e6c..a1a7b9a67ffd 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -16,13 +16,13 @@
 #include <linux/spinlock.h>
 #include <linux/cpu.h>
 #include <linux/slab.h>
+#include <linux/of.h>
 #include "offline_states.h"
 
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <asm/uaccess.h>
 #include <asm/rtas.h>
-#include <asm/pSeries_reconfig.h>
 
 struct cc_workarea {
 	u32	drc_index;
@@ -262,24 +262,26 @@ int dlpar_attach_node(struct device_node *dn)
 	if (!dn->parent)
 		return -ENOMEM;
 
-	rc = pSeries_reconfig_notify(PSERIES_RECONFIG_ADD, dn);
+	rc = of_attach_node(dn);
 	if (rc) {
 		printk(KERN_ERR "Failed to add device node %s\n",
 		       dn->full_name);
 		return rc;
 	}
 
-	of_attach_node(dn);
 	of_node_put(dn->parent);
 	return 0;
 }
 
 int dlpar_detach_node(struct device_node *dn)
 {
-	pSeries_reconfig_notify(PSERIES_RECONFIG_REMOVE, dn);
-	of_detach_node(dn);
-	of_node_put(dn); /* Must decrement the refcount */
+	int rc;
+
+	rc = of_detach_node(dn);
+	if (rc)
+		return rc;
 
+	of_node_put(dn); /* Must decrement the refcount */
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 64c97d8ac0c5..a38956269fbf 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -23,12 +23,12 @@
 #include <linux/delay.h>
 #include <linux/sched.h>	/* for idle_task_exit */
 #include <linux/cpu.h>
+#include <linux/of.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
 #include <asm/firmware.h>
 #include <asm/machdep.h>
 #include <asm/vdso_datapage.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/xics.h>
 #include "plpar_wrappers.h"
 #include "offline_states.h"
@@ -333,10 +333,10 @@ static int pseries_smp_notifier(struct notifier_block *nb,
 	int err = 0;
 
 	switch (action) {
-	case PSERIES_RECONFIG_ADD:
+	case OF_RECONFIG_ATTACH_NODE:
 		err = pseries_add_processor(node);
 		break;
-	case PSERIES_RECONFIG_REMOVE:
+	case OF_RECONFIG_DETACH_NODE:
 		pseries_remove_processor(node);
 		break;
 	}
@@ -399,7 +399,7 @@ static int __init pseries_cpu_hotplug_init(void)
 
 	/* Processors can be added/removed only on LPAR */
 	if (firmware_has_feature(FW_FEATURE_LPAR)) {
-		pSeries_reconfig_notifier_register(&pseries_smp_nb);
+		of_reconfig_notifier_register(&pseries_smp_nb);
 		cpu_maps_update_begin();
 		if (cede_offline_enabled && parse_cede_parameters() == 0) {
 			default_offline_state = CPU_STATE_INACTIVE;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index ecdb0a6b3171..2372c609fa2b 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -16,7 +16,6 @@
 
 #include <asm/firmware.h>
 #include <asm/machdep.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/sparsemem.h>
 
 static unsigned long get_memblock_size(void)
@@ -187,42 +186,69 @@ static int pseries_add_memory(struct device_node *np)
 	return (ret < 0) ? -EINVAL : 0;
 }
 
-static int pseries_drconf_memory(unsigned long *base, unsigned int action)
+static int pseries_update_drconf_memory(struct of_prop_reconfig *pr)
 {
+	struct of_drconf_cell *new_drmem, *old_drmem;
 	unsigned long memblock_size;
-	int rc;
+	u32 entries;
+	u32 *p;
+	int i, rc = -EINVAL;
 
 	memblock_size = get_memblock_size();
 	if (!memblock_size)
 		return -EINVAL;
 
-	if (action == PSERIES_DRCONF_MEM_ADD) {
-		rc = memblock_add(*base, memblock_size);
-		rc = (rc < 0) ? -EINVAL : 0;
-	} else if (action == PSERIES_DRCONF_MEM_REMOVE) {
-		rc = pseries_remove_memblock(*base, memblock_size);
-	} else {
-		rc = -EINVAL;
+	p = (u32 *)of_get_property(pr->dn, "ibm,dynamic-memory", NULL);
+	if (!p)
+		return -EINVAL;
+
+	/* The first int of the property is the number of lmb's described
+	 * by the property. This is followed by an array of of_drconf_cell
+	 * entries. Get the niumber of entries and skip to the array of
+	 * of_drconf_cell's.
+	 */
+	entries = *p++;
+	old_drmem = (struct of_drconf_cell *)p;
+
+	p = (u32 *)pr->prop->value;
+	p++;
+	new_drmem = (struct of_drconf_cell *)p;
+
+	for (i = 0; i < entries; i++) {
+		if ((old_drmem[i].flags & DRCONF_MEM_ASSIGNED) &&
+		    (!(new_drmem[i].flags & DRCONF_MEM_ASSIGNED))) {
+			rc = pseries_remove_memblock(old_drmem[i].base_addr,
+						     memblock_size);
+			break;
+		} else if ((!(old_drmem[i].flags & DRCONF_MEM_ASSIGNED)) &&
+			   (new_drmem[i].flags & DRCONF_MEM_ASSIGNED)) {
+			rc = memblock_add(old_drmem[i].base_addr,
+					  memblock_size);
+			rc = (rc < 0) ? -EINVAL : 0;
+			break;
+		}
 	}
 
 	return rc;
 }
 
 static int pseries_memory_notifier(struct notifier_block *nb,
-				unsigned long action, void *node)
+				   unsigned long action, void *node)
 {
+	struct of_prop_reconfig *pr;
 	int err = 0;
 
 	switch (action) {
-	case PSERIES_RECONFIG_ADD:
+	case OF_RECONFIG_ATTACH_NODE:
 		err = pseries_add_memory(node);
 		break;
-	case PSERIES_RECONFIG_REMOVE:
+	case OF_RECONFIG_DETACH_NODE:
 		err = pseries_remove_memory(node);
 		break;
-	case PSERIES_DRCONF_MEM_ADD:
-	case PSERIES_DRCONF_MEM_REMOVE:
-		err = pseries_drconf_memory(node, action);
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		pr = (struct of_prop_reconfig *)node;
+		if (!strcmp(pr->prop->name, "ibm,dynamic-memory"))
+			err = pseries_update_drconf_memory(pr);
 		break;
 	}
 	return notifier_from_errno(err);
@@ -235,7 +261,7 @@ static struct notifier_block pseries_mem_nb = {
 static int __init pseries_memory_hotplug_init(void)
 {
 	if (firmware_has_feature(FW_FEATURE_LPAR))
-		pSeries_reconfig_notifier_register(&pseries_mem_nb);
+		of_reconfig_notifier_register(&pseries_mem_nb);
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 6153eea27ce7..da5594c441e4 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -36,13 +36,13 @@
 #include <linux/dma-mapping.h>
 #include <linux/crash_dump.h>
 #include <linux/memory.h>
+#include <linux/of.h>
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
 #include <asm/iommu.h>
 #include <asm/pci-bridge.h>
 #include <asm/machdep.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/firmware.h>
 #include <asm/tce.h>
 #include <asm/ppc-pci.h>
@@ -1294,7 +1294,7 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
 	struct direct_window *window;
 
 	switch (action) {
-	case PSERIES_RECONFIG_REMOVE:
+	case OF_RECONFIG_DETACH_NODE:
 		if (pci && pci->iommu_table)
 			iommu_free_table(pci->iommu_table, np->full_name);
 
@@ -1357,7 +1357,7 @@ void iommu_init_early_pSeries(void)
 	}
 
 
-	pSeries_reconfig_notifier_register(&iommu_reconfig_nb);
+	of_reconfig_notifier_register(&iommu_reconfig_nb);
 	register_memory_notifier(&iommu_mem_nb);
 
 	set_pci_dma_ops(&dma_iommu_ops);
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index f99f1ca8035b..720a0cc2e69f 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -16,11 +16,11 @@
 #include <linux/notifier.h>
 #include <linux/proc_fs.h>
 #include <linux/slab.h>
+#include <linux/of.h>
 
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <asm/uaccess.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/mmu.h>
 
 /**
@@ -55,28 +55,6 @@ static struct device_node *derive_parent(const char *path)
 	return parent;
 }
 
-static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
-
-int pSeries_reconfig_notifier_register(struct notifier_block *nb)
-{
-	return blocking_notifier_chain_register(&pSeries_reconfig_chain, nb);
-}
-EXPORT_SYMBOL_GPL(pSeries_reconfig_notifier_register);
-
-void pSeries_reconfig_notifier_unregister(struct notifier_block *nb)
-{
-	blocking_notifier_chain_unregister(&pSeries_reconfig_chain, nb);
-}
-EXPORT_SYMBOL_GPL(pSeries_reconfig_notifier_unregister);
-
-int pSeries_reconfig_notify(unsigned long action, void *p)
-{
-	int err = blocking_notifier_call_chain(&pSeries_reconfig_chain,
-						action, p);
-
-	return notifier_to_errno(err);
-}
-
 static int pSeries_reconfig_add_node(const char *path, struct property *proplist)
 {
 	struct device_node *np;
@@ -100,13 +78,12 @@ static int pSeries_reconfig_add_node(const char *path, struct property *proplist
 		goto out_err;
 	}
 
-	err = pSeries_reconfig_notify(PSERIES_RECONFIG_ADD, np);
+	err = of_attach_node(np);
 	if (err) {
 		printk(KERN_ERR "Failed to add device node %s\n", path);
 		goto out_err;
 	}
 
-	of_attach_node(np);
 	of_node_put(np->parent);
 
 	return 0;
@@ -134,9 +111,7 @@ static int pSeries_reconfig_remove_node(struct device_node *np)
 		return -EBUSY;
 	}
 
-	pSeries_reconfig_notify(PSERIES_RECONFIG_REMOVE, np);
 	of_detach_node(np);
-
 	of_node_put(parent);
 	of_node_put(np); /* Must decrement the refcount */
 	return 0;
@@ -381,10 +356,9 @@ static int do_remove_property(char *buf, size_t bufsize)
 static int do_update_property(char *buf, size_t bufsize)
 {
 	struct device_node *np;
-	struct pSeries_reconfig_prop_update upd_value;
 	unsigned char *value;
 	char *name, *end, *next_prop;
-	int rc, length;
+	int length;
 	struct property *newprop;
 	buf = parse_node(buf, bufsize, &np);
 	end = buf + bufsize;
@@ -406,41 +380,7 @@ static int do_update_property(char *buf, size_t bufsize)
 	if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size"))
 		slb_set_size(*(int *)value);
 
-	upd_value.node = np;
-	upd_value.property = newprop;
-	pSeries_reconfig_notify(PSERIES_UPDATE_PROPERTY, &upd_value);
-
-	rc = prom_update_property(np, newprop);
-	if (rc)
-		return rc;
-
-	/* For memory under the ibm,dynamic-reconfiguration-memory node
-	 * of the device tree, adding and removing memory is just an update
-	 * to the ibm,dynamic-memory property instead of adding/removing a
-	 * memory node in the device tree.  For these cases we still need to
-	 * involve the notifier chain.
-	 */
-	if (!strcmp(name, "ibm,dynamic-memory")) {
-		int action;
-
-		next_prop = parse_next_property(next_prop, end, &name,
-						&length, &value);
-		if (!next_prop)
-			return -EINVAL;
-
-		if (!strcmp(name, "add"))
-			action = PSERIES_DRCONF_MEM_ADD;
-		else
-			action = PSERIES_DRCONF_MEM_REMOVE;
-
-		rc = pSeries_reconfig_notify(action, value);
-		if (rc) {
-			prom_update_property(np, newprop);
-			return rc;
-		}
-	}
-
-	return 0;
+	return prom_update_property(np, newprop);
 }
 
 /**
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index e3cb7ae61658..e1a5b8a32d25 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -40,6 +40,7 @@
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
 #include <linux/cpuidle.h>
+#include <linux/of.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
@@ -63,7 +64,6 @@
 #include <asm/smp.h>
 #include <asm/firmware.h>
 #include <asm/eeh.h>
-#include <asm/pSeries_reconfig.h>
 
 #include "plpar_wrappers.h"
 #include "pseries.h"
@@ -258,7 +258,7 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act
 	int err = NOTIFY_OK;
 
 	switch (action) {
-	case PSERIES_RECONFIG_ADD:
+	case OF_RECONFIG_ATTACH_NODE:
 		pci = np->parent->data;
 		if (pci) {
 			update_dn_pci_info(np, pci->phb);
@@ -389,7 +389,7 @@ static void __init pSeries_setup_arch(void)
 	/* Find and initialize PCI host bridges */
 	init_pci_config_tokens();
 	find_and_init_phbs();
-	pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
+	of_reconfig_notifier_register(&pci_dn_reconfig_nb);
 
 	pSeries_nvram_init();
 
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 0ce625738677..6c4c000671c5 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -28,7 +28,6 @@
 #include <linux/slab.h>
 
 #include <asm/page.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/vio.h>
 
 #include "nx_csbcpb.h" /* struct nx_csbcpb */
@@ -1014,26 +1013,23 @@ error_out:
  *	NOTIFY_BAD encoded with error number on failure, use
  *		notifier_to_errno() to decode this value
  */
-static int nx842_OF_notifier(struct notifier_block *np,
-					unsigned long action,
-					void *update)
+static int nx842_OF_notifier(struct notifier_block *np, unsigned long action,
+			     void *update)
 {
-	struct pSeries_reconfig_prop_update *upd;
+	struct of_prop_reconfig *upd = update;
 	struct nx842_devdata *local_devdata;
 	struct device_node *node = NULL;
 
-	upd = (struct pSeries_reconfig_prop_update *)update;
-
 	rcu_read_lock();
 	local_devdata = rcu_dereference(devdata);
 	if (local_devdata)
 		node = local_devdata->dev->of_node;
 
 	if (local_devdata &&
-			action == PSERIES_UPDATE_PROPERTY &&
-			!strcmp(upd->node->name, node->name)) {
+			action == OF_RECONFIG_UPDATE_PROPERTY &&
+			!strcmp(upd->dn->name, node->name)) {
 		rcu_read_unlock();
-		nx842_OF_upd(upd->property);
+		nx842_OF_upd(upd->prop);
 	} else
 		rcu_read_unlock();
 
@@ -1182,7 +1178,7 @@ static int __init nx842_probe(struct vio_dev *viodev,
 	synchronize_rcu();
 	kfree(old_devdata);
 
-	pSeries_reconfig_notifier_register(&nx842_of_nb);
+	of_reconfig_notifier_register(&nx842_of_nb);
 
 	ret = nx842_OF_upd(NULL);
 	if (ret && ret != -ENODEV) {
@@ -1228,7 +1224,7 @@ static int __exit nx842_remove(struct vio_dev *viodev)
 	spin_lock_irqsave(&devdata_mutex, flags);
 	old_devdata = rcu_dereference_check(devdata,
 			lockdep_is_held(&devdata_mutex));
-	pSeries_reconfig_notifier_unregister(&nx842_of_nb);
+	of_reconfig_notifier_unregister(&nx842_of_nb);
 	rcu_assign_pointer(devdata, NULL);
 	spin_unlock_irqrestore(&devdata_mutex, flags);
 	synchronize_rcu();
diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
index 638110efae9b..f7a8a16aa7d3 100644
--- a/drivers/crypto/nx/nx.c
+++ b/drivers/crypto/nx/nx.c
@@ -33,7 +33,6 @@
 #include <linux/scatterlist.h>
 #include <linux/device.h>
 #include <linux/of.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/hvcall.h>
 #include <asm/vio.h>
 
diff --git a/drivers/of/base.c b/drivers/of/base.c
index bbd073f53c9f..87b63850e8dc 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1028,6 +1028,24 @@ int of_parse_phandle_with_args(struct device_node *np, const char *list_name,
 }
 EXPORT_SYMBOL(of_parse_phandle_with_args);
 
+#if defined(CONFIG_OF_DYNAMIC)
+static int of_property_notify(int action, struct device_node *np,
+			      struct property *prop)
+{
+	struct of_prop_reconfig pr;
+
+	pr.dn = np;
+	pr.prop = prop;
+	return of_reconfig_notify(action, &pr);
+}
+#else
+static int of_property_notify(int action, struct device_node *np,
+			      struct property *prop)
+{
+	return 0;
+}
+#endif
+
 /**
  * prom_add_property - Add a property to a node
  */
@@ -1035,6 +1053,11 @@ int prom_add_property(struct device_node *np, struct property *prop)
 {
 	struct property **next;
 	unsigned long flags;
+	int rc;
+
+	rc = of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop);
+	if (rc)
+		return rc;
 
 	prop->next = NULL;
 	write_lock_irqsave(&devtree_lock, flags);
@@ -1072,6 +1095,11 @@ int prom_remove_property(struct device_node *np, struct property *prop)
 	struct property **next;
 	unsigned long flags;
 	int found = 0;
+	int rc;
+
+	rc = of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop);
+	if (rc)
+		return rc;
 
 	write_lock_irqsave(&devtree_lock, flags);
 	next = &np->properties;
@@ -1114,7 +1142,11 @@ int prom_update_property(struct device_node *np,
 {
 	struct property **next, *oldprop;
 	unsigned long flags;
-	int found = 0;
+	int rc, found = 0;
+
+	rc = of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop);
+	if (rc)
+		return rc;
 
 	if (!newprop->name)
 		return -EINVAL;
@@ -1160,6 +1192,26 @@ int prom_update_property(struct device_node *np,
  * device tree nodes.
  */
 
+static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain);
+
+int of_reconfig_notifier_register(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&of_reconfig_chain, nb);
+}
+
+int of_reconfig_notifier_unregister(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&of_reconfig_chain, nb);
+}
+
+int of_reconfig_notify(unsigned long action, void *p)
+{
+	int rc;
+
+	rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p);
+	return notifier_to_errno(rc);
+}
+
 #ifdef CONFIG_PROC_DEVICETREE
 static void of_add_proc_dt_entry(struct device_node *dn)
 {
@@ -1179,9 +1231,14 @@ static void of_add_proc_dt_entry(struct device_node *dn)
 /**
  * of_attach_node - Plug a device node into the tree and global list.
  */
-void of_attach_node(struct device_node *np)
+int of_attach_node(struct device_node *np)
 {
 	unsigned long flags;
+	int rc;
+
+	rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
+	if (rc)
+		return rc;
 
 	write_lock_irqsave(&devtree_lock, flags);
 	np->sibling = np->parent->child;
@@ -1191,6 +1248,7 @@ void of_attach_node(struct device_node *np)
 	write_unlock_irqrestore(&devtree_lock, flags);
 
 	of_add_proc_dt_entry(np);
+	return 0;
 }
 
 #ifdef CONFIG_PROC_DEVICETREE
@@ -1220,23 +1278,28 @@ static void of_remove_proc_dt_entry(struct device_node *dn)
  * The caller must hold a reference to the node.  The memory associated with
  * the node is not freed until its refcount goes to zero.
  */
-void of_detach_node(struct device_node *np)
+int of_detach_node(struct device_node *np)
 {
 	struct device_node *parent;
 	unsigned long flags;
+	int rc = 0;
+
+	rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
+	if (rc)
+		return rc;
 
 	write_lock_irqsave(&devtree_lock, flags);
 
 	if (of_node_check_flag(np, OF_DETACHED)) {
 		/* someone already detached it */
 		write_unlock_irqrestore(&devtree_lock, flags);
-		return;
+		return rc;
 	}
 
 	parent = np->parent;
 	if (!parent) {
 		write_unlock_irqrestore(&devtree_lock, flags);
-		return;
+		return rc;
 	}
 
 	if (allnodes == np)
@@ -1265,6 +1328,7 @@ void of_detach_node(struct device_node *np)
 	write_unlock_irqrestore(&devtree_lock, flags);
 
 	of_remove_proc_dt_entry(np);
+	return rc;
 }
 #endif /* defined(CONFIG_OF_DYNAMIC) */
 
diff --git a/include/linux/of.h b/include/linux/of.h
index 72843b72a2b2..fb5d87b66e3e 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -22,6 +22,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/spinlock.h>
 #include <linux/topology.h>
+#include <linux/notifier.h>
 
 #include <asm/byteorder.h>
 #include <asm/errno.h>
@@ -272,11 +273,24 @@ extern int prom_remove_property(struct device_node *np, struct property *prop);
 extern int prom_update_property(struct device_node *np,
 				struct property *newprop);
 
-#if defined(CONFIG_OF_DYNAMIC)
 /* For updating the device tree at runtime */
-extern void of_attach_node(struct device_node *);
-extern void of_detach_node(struct device_node *);
-#endif
+#define OF_RECONFIG_ATTACH_NODE		0x0001
+#define OF_RECONFIG_DETACH_NODE		0x0002
+#define OF_RECONFIG_ADD_PROPERTY	0x0003
+#define OF_RECONFIG_REMOVE_PROPERTY	0x0004
+#define OF_RECONFIG_UPDATE_PROPERTY	0x0005
+
+struct of_prop_reconfig {
+	struct device_node	*dn;
+	struct property		*prop;
+};
+
+extern int of_reconfig_notifier_register(struct notifier_block *);
+extern int of_reconfig_notifier_unregister(struct notifier_block *);
+extern int of_reconfig_notify(unsigned long, void *);
+
+extern int of_attach_node(struct device_node *);
+extern int of_detach_node(struct device_node *);
 
 #define of_match_ptr(_ptr)	(_ptr)
 
-- 
cgit v1.2.3-55-g7522


From 79d1c712958f94372482ad74578b00f44e744c12 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot
Date: Tue, 2 Oct 2012 16:58:46 +0000
Subject: powerpc+of: Rename the drivers/of prom_* functions to of_*

Rename the prom_*_property routines of the generic OF code to of_*_property.
This brings them in line with the naming used by the rest of the OF code.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Acked-by: Geoff Levand <geoff@infradead.org>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/arm/mach-mxs/mach-mxs.c              |  2 +-
 arch/powerpc/kernel/machine_kexec.c       | 14 +++++++-------
 arch/powerpc/kernel/machine_kexec_64.c    |  8 ++++----
 arch/powerpc/kernel/pci_32.c              |  2 +-
 arch/powerpc/platforms/85xx/p1022_ds.c    |  6 +++---
 arch/powerpc/platforms/ps3/os-area.c      |  6 +++---
 arch/powerpc/platforms/pseries/iommu.c    |  4 ++--
 arch/powerpc/platforms/pseries/mobility.c |  4 ++--
 arch/powerpc/platforms/pseries/reconfig.c |  6 +++---
 drivers/macintosh/smu.c                   |  2 +-
 drivers/of/base.c                         | 15 +++++++--------
 include/linux/of.h                        |  7 +++----
 12 files changed, 37 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c
index 4748ec551a68..d61b915ce52c 100644
--- a/arch/arm/mach-mxs/mach-mxs.c
+++ b/arch/arm/mach-mxs/mach-mxs.c
@@ -211,7 +211,7 @@ static void __init update_fec_mac_prop(enum mac_oui oui)
 		macaddr[4] = (val >> 8) & 0xff;
 		macaddr[5] = (val >> 0) & 0xff;
 
-		prom_update_property(np, newmac);
+		of_update_property(np, newmac);
 	}
 }
 
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index fa9f6c72f557..e1ec57e87b3b 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -218,23 +218,23 @@ static void __init export_crashk_values(struct device_node *node)
 	 * be sure what's in them, so remove them. */
 	prop = of_find_property(node, "linux,crashkernel-base", NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 
 	prop = of_find_property(node, "linux,crashkernel-size", NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 
 	if (crashk_res.start != 0) {
-		prom_add_property(node, &crashk_base_prop);
+		of_add_property(node, &crashk_base_prop);
 		crashk_size = resource_size(&crashk_res);
-		prom_add_property(node, &crashk_size_prop);
+		of_add_property(node, &crashk_size_prop);
 	}
 
 	/*
 	 * memory_limit is required by the kexec-tools to limit the
 	 * crash regions to the actual memory used.
 	 */
-	prom_update_property(node, &memory_limit_prop);
+	of_update_property(node, &memory_limit_prop);
 }
 
 static int __init kexec_setup(void)
@@ -249,11 +249,11 @@ static int __init kexec_setup(void)
 	/* remove any stale properties so ours can be found */
 	prop = of_find_property(node, kernel_end_prop.name, NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 
 	/* information needed by userspace when using default_machine_kexec */
 	kernel_end = __pa(_end);
-	prom_add_property(node, &kernel_end_prop);
+	of_add_property(node, &kernel_end_prop);
 
 	export_crashk_values(node);
 
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index d7f609086a99..7206701b1ff1 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -389,14 +389,14 @@ static int __init export_htab_values(void)
 	/* remove any stale propertys so ours can be found */
 	prop = of_find_property(node, htab_base_prop.name, NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 	prop = of_find_property(node, htab_size_prop.name, NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 
 	htab_base = __pa(htab_address);
-	prom_add_property(node, &htab_base_prop);
-	prom_add_property(node, &htab_size_prop);
+	of_add_property(node, &htab_base_prop);
+	of_add_property(node, &htab_size_prop);
 
 	of_node_put(node);
 	return 0;
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 4b06ec5a502e..64f526a321f5 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -208,7 +208,7 @@ pci_create_OF_bus_map(void)
 		of_prop->name = "pci-OF-bus-map";
 		of_prop->length = 256;
 		of_prop->value = &of_prop[1];
-		prom_add_property(dn, of_prop);
+		of_add_property(dn, of_prop);
 		of_node_put(dn);
 	}
 }
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 848a3e98e1c1..8fb12570b2f5 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -539,7 +539,7 @@ static void __init p1022_ds_setup_arch(void)
 				};
 
 				/*
-				 * prom_update_property() is called before
+				 * of_update_property() is called before
 				 * kmalloc() is available, so the 'new' object
 				 * should be allocated in the global area.
 				 * The easiest way is to do that is to
@@ -548,7 +548,7 @@ static void __init p1022_ds_setup_arch(void)
 				 */
 				pr_info("p1022ds: disabling %s node",
 					np2->full_name);
-				prom_update_property(np2, &nor_status);
+				of_update_property(np2, &nor_status);
 				of_node_put(np2);
 			}
 
@@ -564,7 +564,7 @@ static void __init p1022_ds_setup_arch(void)
 
 				pr_info("p1022ds: disabling %s node",
 					np2->full_name);
-				prom_update_property(np2, &nand_status);
+				of_update_property(np2, &nand_status);
 				of_node_put(np2);
 			}
 
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index 56d26bc4fd41..09787139834d 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -280,13 +280,13 @@ static void os_area_set_property(struct device_node *node,
 
 	if (tmp) {
 		pr_debug("%s:%d found %s\n", __func__, __LINE__, prop->name);
-		prom_remove_property(node, tmp);
+		of_remove_property(node, tmp);
 	}
 
-	result = prom_add_property(node, prop);
+	result = of_add_property(node, prop);
 
 	if (result)
-		pr_debug("%s:%d prom_set_property failed\n", __func__,
+		pr_debug("%s:%d of_set_property failed\n", __func__,
 			__LINE__);
 }
 
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index da5594c441e4..e2685badb5db 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -760,7 +760,7 @@ static void remove_ddw(struct device_node *np)
 	__remove_ddw(np, ddw_avail, liobn);
 
 delprop:
-	ret = prom_remove_property(np, win64);
+	ret = of_remove_property(np, win64);
 	if (ret)
 		pr_warning("%s: failed to remove direct window property: %d\n",
 			np->full_name, ret);
@@ -1070,7 +1070,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 		goto out_free_window;
 	}
 
-	ret = prom_add_property(pdn, win64);
+	ret = of_add_property(pdn, win64);
 	if (ret) {
 		dev_err(&dev->dev, "unable to add dma window property for %s: %d",
 			 pdn->full_name, ret);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index dd30b12edfe4..6573808cc5f3 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -116,7 +116,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop,
 	}
 
 	if (!more) {
-		prom_update_property(dn, new_prop);
+		of_update_property(dn, new_prop);
 		new_prop = NULL;
 	}
 
@@ -172,7 +172,7 @@ static int update_dt_node(u32 phandle)
 
 			case 0x80000000:
 				prop = of_find_property(dn, prop_name, NULL);
-				prom_remove_property(dn, prop);
+				of_remove_property(dn, prop);
 				prop = NULL;
 				break;
 
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 720a0cc2e69f..30b358dc2beb 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -326,7 +326,7 @@ static int do_add_property(char *buf, size_t bufsize)
 	if (!prop)
 		return -ENOMEM;
 
-	prom_add_property(np, prop);
+	of_add_property(np, prop);
 
 	return 0;
 }
@@ -350,7 +350,7 @@ static int do_remove_property(char *buf, size_t bufsize)
 
 	prop = of_find_property(np, buf, NULL);
 
-	return prom_remove_property(np, prop);
+	return of_remove_property(np, prop);
 }
 
 static int do_update_property(char *buf, size_t bufsize)
@@ -380,7 +380,7 @@ static int do_update_property(char *buf, size_t bufsize)
 	if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size"))
 		slb_set_size(*(int *)value);
 
-	return prom_update_property(np, newprop);
+	return of_update_property(np, newprop);
 }
 
 /**
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 7d5a6b40b31c..5b939509db3b 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -997,7 +997,7 @@ static struct smu_sdbp_header *smu_create_sdb_partition(int id)
 		       "%02x !\n", id, hdr->id);
 		goto failure;
 	}
-	if (prom_add_property(smu->of_node, prop)) {
+	if (of_add_property(smu->of_node, prop)) {
 		printk(KERN_DEBUG "SMU: Failed creating sdb-partition-%02x "
 		       "property !\n", id);
 		goto failure;
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 87b63850e8dc..02d94c4ea83c 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1047,9 +1047,9 @@ static int of_property_notify(int action, struct device_node *np,
 #endif
 
 /**
- * prom_add_property - Add a property to a node
+ * of_add_property - Add a property to a node
  */
-int prom_add_property(struct device_node *np, struct property *prop)
+int of_add_property(struct device_node *np, struct property *prop)
 {
 	struct property **next;
 	unsigned long flags;
@@ -1083,14 +1083,14 @@ int prom_add_property(struct device_node *np, struct property *prop)
 }
 
 /**
- * prom_remove_property - Remove a property from a node.
+ * of_remove_property - Remove a property from a node.
  *
  * Note that we don't actually remove it, since we have given out
  * who-knows-how-many pointers to the data using get-property.
  * Instead we just move the property to the "dead properties"
  * list, so it won't be found any more.
  */
-int prom_remove_property(struct device_node *np, struct property *prop)
+int of_remove_property(struct device_node *np, struct property *prop)
 {
 	struct property **next;
 	unsigned long flags;
@@ -1129,7 +1129,7 @@ int prom_remove_property(struct device_node *np, struct property *prop)
 }
 
 /*
- * prom_update_property - Update a property in a node, if the property does
+ * of_update_property - Update a property in a node, if the property does
  * not exist, add it.
  *
  * Note that we don't actually remove it, since we have given out
@@ -1137,8 +1137,7 @@ int prom_remove_property(struct device_node *np, struct property *prop)
  * Instead we just move the property to the "dead properties" list,
  * and add the new property to the property list
  */
-int prom_update_property(struct device_node *np,
-			 struct property *newprop)
+int of_update_property(struct device_node *np, struct property *newprop)
 {
 	struct property **next, *oldprop;
 	unsigned long flags;
@@ -1153,7 +1152,7 @@ int prom_update_property(struct device_node *np,
 
 	oldprop = of_find_property(np, newprop->name, NULL);
 	if (!oldprop)
-		return prom_add_property(np, newprop);
+		return of_add_property(np, newprop);
 
 	write_lock_irqsave(&devtree_lock, flags);
 	next = &np->properties;
diff --git a/include/linux/of.h b/include/linux/of.h
index fb5d87b66e3e..a093b2fe5dfb 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -268,10 +268,9 @@ extern int of_alias_get_id(struct device_node *np, const char *stem);
 
 extern int of_machine_is_compatible(const char *compat);
 
-extern int prom_add_property(struct device_node* np, struct property* prop);
-extern int prom_remove_property(struct device_node *np, struct property *prop);
-extern int prom_update_property(struct device_node *np,
-				struct property *newprop);
+extern int of_add_property(struct device_node *np, struct property *prop);
+extern int of_remove_property(struct device_node *np, struct property *prop);
+extern int of_update_property(struct device_node *np, struct property *newprop);
 
 /* For updating the device tree at runtime */
 #define OF_RECONFIG_ATTACH_NODE		0x0001
-- 
cgit v1.2.3-55-g7522


From 3d567e0e291c4ffd041cf653aea3c38a1d5f4620 Mon Sep 17 00:00:00 2001
From: Nithin Nayak Sujir
Date: Wed, 14 Nov 2012 14:44:26 +0000
Subject: tg3: Set 10_100_ONLY flag for additional 10/100 Mbps devices

- Also refactor the conditional to use the existing tg3_pci_tbl array.
- Set flags in the driver_data field of the pci_device_id structure to
identify these devices.
- Add PCI_DEVICE_SUB() to pci.h to declare PCI 4-part IDs to match these
devices.

Signed-off-by: Nithin Nayak Sujir <nsujir@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/tg3.c | 87 +++++++++++++++++++++++++------------
 drivers/net/ethernet/broadcom/tg3.h |  5 +++
 include/linux/pci.h                 | 14 ++++++
 3 files changed, 79 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 038ce0215e3e..8d4581bdba3c 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -226,6 +226,9 @@ static int tg3_debug = -1;	/* -1 == use TG3_DEF_MSG_ENABLE as value */
 module_param(tg3_debug, int, 0);
 MODULE_PARM_DESC(tg3_debug, "Tigon3 bitmapped debugging message enable value");
 
+#define TG3_DRV_DATA_FLAG_10_100_ONLY	0x0001
+#define TG3_DRV_DATA_FLAG_5705_10_100	0x0002
+
 static DEFINE_PCI_DEVICE_TABLE(tg3_pci_tbl) = {
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5700)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5701)},
@@ -245,20 +248,28 @@ static DEFINE_PCI_DEVICE_TABLE(tg3_pci_tbl) = {
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5782)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5788)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5789)},
-	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901)},
-	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901_2)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901),
+	 .driver_data = TG3_DRV_DATA_FLAG_10_100_ONLY |
+			TG3_DRV_DATA_FLAG_5705_10_100},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901_2),
+	 .driver_data = TG3_DRV_DATA_FLAG_10_100_ONLY |
+			TG3_DRV_DATA_FLAG_5705_10_100},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S_2)},
-	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705F)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705F),
+	 .driver_data = TG3_DRV_DATA_FLAG_10_100_ONLY |
+			TG3_DRV_DATA_FLAG_5705_10_100},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5722)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751M)},
-	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F),
+	 .driver_data = TG3_DRV_DATA_FLAG_10_100_ONLY},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752M)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753M)},
-	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753F)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753F),
+	 .driver_data = TG3_DRV_DATA_FLAG_10_100_ONLY},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754M)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755)},
@@ -266,8 +277,13 @@ static DEFINE_PCI_DEVICE_TABLE(tg3_pci_tbl) = {
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5756)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5786)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787)},
+	{PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5787M,
+			PCI_VENDOR_ID_LENOVO,
+			TG3PCI_SUBDEVICE_ID_LENOVO_5787M),
+	 .driver_data = TG3_DRV_DATA_FLAG_10_100_ONLY},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787M)},
-	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787F)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787F),
+	 .driver_data = TG3_DRV_DATA_FLAG_10_100_ONLY},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714S)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715)},
@@ -286,9 +302,16 @@ static DEFINE_PCI_DEVICE_TABLE(tg3_pci_tbl) = {
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5761SE)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5785_G)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5785_F)},
+	{PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57780,
+			PCI_VENDOR_ID_AI, TG3PCI_SUBDEVICE_ID_ACER_57780_A),
+	 .driver_data = TG3_DRV_DATA_FLAG_10_100_ONLY},
+	{PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57780,
+			PCI_VENDOR_ID_AI, TG3PCI_SUBDEVICE_ID_ACER_57780_B),
+	 .driver_data = TG3_DRV_DATA_FLAG_10_100_ONLY},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57780)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57760)},
-	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57790)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57790),
+	 .driver_data = TG3_DRV_DATA_FLAG_10_100_ONLY},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57788)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5717)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5717_C)},
@@ -297,8 +320,10 @@ static DEFINE_PCI_DEVICE_TABLE(tg3_pci_tbl) = {
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57785)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57761)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57765)},
-	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57791)},
-	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57795)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57791),
+	 .driver_data = TG3_DRV_DATA_FLAG_10_100_ONLY},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57795),
+	 .driver_data = TG3_DRV_DATA_FLAG_10_100_ONLY},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5719)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5720)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57762)},
@@ -14466,7 +14491,30 @@ static void __devinit tg3_detect_asic_rev(struct tg3 *tp, u32 misc_ctrl_reg)
 		tg3_flag_set(tp, 5705_PLUS);
 }
 
-static int __devinit tg3_get_invariants(struct tg3 *tp)
+static bool tg3_10_100_only_device(struct tg3 *tp,
+				   const struct pci_device_id *ent)
+{
+	u32 grc_misc_cfg = tr32(GRC_MISC_CFG) & GRC_MISC_CFG_BOARD_ID_MASK;
+
+	if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 &&
+	    (grc_misc_cfg == 0x8000 || grc_misc_cfg == 0x4000)) ||
+	    (tp->phy_flags & TG3_PHYFLG_IS_FET))
+		return true;
+
+	if (ent->driver_data & TG3_DRV_DATA_FLAG_10_100_ONLY) {
+		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) {
+			if (ent->driver_data & TG3_DRV_DATA_FLAG_5705_10_100)
+				return true;
+		} else {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static int __devinit tg3_get_invariants(struct tg3 *tp,
+					const struct pci_device_id *ent)
 {
 	u32 misc_ctrl_reg;
 	u32 pci_state_reg, grc_misc_cfg;
@@ -15145,22 +15193,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	else
 		tp->mac_mode = 0;
 
-	/* these are limited to 10/100 only */
-	if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 &&
-	     (grc_misc_cfg == 0x8000 || grc_misc_cfg == 0x4000)) ||
-	    (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 &&
-	     tp->pdev->vendor == PCI_VENDOR_ID_BROADCOM &&
-	     (tp->pdev->device == PCI_DEVICE_ID_TIGON3_5901 ||
-	      tp->pdev->device == PCI_DEVICE_ID_TIGON3_5901_2 ||
-	      tp->pdev->device == PCI_DEVICE_ID_TIGON3_5705F)) ||
-	    (tp->pdev->vendor == PCI_VENDOR_ID_BROADCOM &&
-	     (tp->pdev->device == PCI_DEVICE_ID_TIGON3_5751F ||
-	      tp->pdev->device == PCI_DEVICE_ID_TIGON3_5753F ||
-	      tp->pdev->device == PCI_DEVICE_ID_TIGON3_5787F)) ||
-	    tp->pdev->device == TG3PCI_DEVICE_TIGON3_57790 ||
-	    tp->pdev->device == TG3PCI_DEVICE_TIGON3_57791 ||
-	    tp->pdev->device == TG3PCI_DEVICE_TIGON3_57795 ||
-	    (tp->phy_flags & TG3_PHYFLG_IS_FET))
+	if (tg3_10_100_only_device(tp, ent))
 		tp->phy_flags |= TG3_PHYFLG_10_100_ONLY;
 
 	err = tg3_phy_probe(tp);
@@ -16039,7 +16072,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 	dev->netdev_ops = &tg3_netdev_ops;
 	dev->irq = pdev->irq;
 
-	err = tg3_get_invariants(tp);
+	err = tg3_get_invariants(tp, ent);
 	if (err) {
 		dev_err(&pdev->dev,
 			"Problem fetching invariants of chip, aborting\n");
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index b3c2bf2c082f..6ff9964d2ea1 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -44,6 +44,7 @@
 #define  TG3PCI_DEVICE_TIGON3_5761S	 0x1688
 #define  TG3PCI_DEVICE_TIGON3_5761SE	 0x1689
 #define  TG3PCI_DEVICE_TIGON3_57780	 0x1692
+#define  TG3PCI_DEVICE_TIGON3_5787M	 0x1693
 #define  TG3PCI_DEVICE_TIGON3_57760	 0x1690
 #define  TG3PCI_DEVICE_TIGON3_57790	 0x1694
 #define  TG3PCI_DEVICE_TIGON3_57788	 0x1691
@@ -96,6 +97,10 @@
 #define TG3PCI_SUBDEVICE_ID_COMPAQ_NC7780_2	0x0099
 #define TG3PCI_SUBVENDOR_ID_IBM			PCI_VENDOR_ID_IBM
 #define TG3PCI_SUBDEVICE_ID_IBM_5703SAX2	0x0281
+#define TG3PCI_SUBDEVICE_ID_ACER_57780_A	0x0601
+#define TG3PCI_SUBDEVICE_ID_ACER_57780_B	0x0612
+#define TG3PCI_SUBDEVICE_ID_LENOVO_5787M	0x3056
+
 /* 0x30 --> 0x64 unused */
 #define TG3PCI_MSI_DATA			0x00000064
 /* 0x66 --> 0x68 unused */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ee2179546c63..9cbd6705d033 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -603,6 +603,20 @@ struct pci_driver {
 	.vendor = (vend), .device = (dev), \
 	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
 
+/**
+ * PCI_DEVICE_SUB - macro used to describe a specific pci device with subsystem
+ * @vend: the 16 bit PCI Vendor ID
+ * @dev: the 16 bit PCI Device ID
+ * @subvend: the 16 bit PCI Subvendor ID
+ * @subdev: the 16 bit PCI Subdevice ID
+ *
+ * This macro is used to create a struct pci_device_id that matches a
+ * specific device with subsystem information.
+ */
+#define PCI_DEVICE_SUB(vend, dev, subvend, subdev) \
+	.vendor = (vend), .device = (dev), \
+	.subvendor = (subvend), .subdevice = (subdev)
+
 /**
  * PCI_DEVICE_CLASS - macro used to describe a specific pci device class
  * @dev_class: the class, subclass, prog-if triple for this device
-- 
cgit v1.2.3-55-g7522


From 549985ee9c723fea8fd7759b5046fb8249896d50 Mon Sep 17 00:00:00 2001
From: Richard Cochran
Date: Wed, 14 Nov 2012 09:07:56 +0000
Subject: cpsw: simplify the setup of the register pointers

Instead of having a host of different register offsets in the device tree,
this patch simplifies the CPSW code by letting the driver set the proper
register offsets automatically, based on the CPSW version.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Mugunthan V N <mugunthanvnm@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/cpsw.txt |  42 +----
 drivers/net/ethernet/ti/cpsw.c                 | 242 +++++++++++--------------
 include/linux/platform_data/cpsw.h             |  21 +--
 3 files changed, 108 insertions(+), 197 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt
index 221460714c56..6ddd0286a9b7 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -9,15 +9,7 @@ Required properties:
 			  number
 - interrupt-parent	: The parent interrupt controller
 - cpdma_channels 	: Specifies number of channels in CPDMA
-- host_port_no		: Specifies host port shift
-- cpdma_reg_ofs		: Specifies CPDMA submodule register offset
-- cpdma_sram_ofs	: Specifies CPDMA SRAM offset
-- ale_reg_ofs		: Specifies ALE submodule register offset
 - ale_entries		: Specifies No of entries ALE can hold
-- host_port_reg_ofs	: Specifies host port register offset
-- hw_stats_reg_ofs	: Specifies hardware statistics register offset
-- cpts_reg_ofs		: Specifies the offset of the CPTS registers
-- bd_ram_ofs		: Specifies internal desciptor RAM offset
 - bd_ram_size		: Specifies internal descriptor RAM size
 - rx_descs		: Specifies number of Rx descriptors
 - mac_control		: Specifies Default MAC control register content
@@ -26,8 +18,6 @@ Required properties:
 - cpts_active_slave	: Specifies the slave to use for time stamping
 - cpts_clock_mult	: Numerator to convert input clock ticks into nanoseconds
 - cpts_clock_shift	: Denominator to convert input clock ticks into nanoseconds
-- slave_reg_ofs		: Specifies slave register offset
-- sliver_reg_ofs	: Specifies slave sliver register offset
 - phy_id		: Specifies slave phy id
 - mac-address		: Specifies slave MAC address
 
@@ -49,15 +39,7 @@ Examples:
 		interrupts = <55 0x4>;
 		interrupt-parent = <&intc>;
 		cpdma_channels = <8>;
-		host_port_no = <0>;
-		cpdma_reg_ofs = <0x800>;
-		cpdma_sram_ofs = <0xa00>;
-		ale_reg_ofs = <0xd00>;
 		ale_entries = <1024>;
-		host_port_reg_ofs = <0x108>;
-		hw_stats_reg_ofs = <0x900>;
-		cpts_reg_ofs = <0xc00>;
-		bd_ram_ofs = <0x2000>;
 		bd_ram_size = <0x2000>;
 		no_bd_ram = <0>;
 		rx_descs = <64>;
@@ -67,16 +49,12 @@ Examples:
 		cpts_clock_mult = <0x80000000>;
 		cpts_clock_shift = <29>;
 		cpsw_emac0: slave@0 {
-			slave_reg_ofs = <0x200>;
-			sliver_reg_ofs = <0xd80>;
-			phy_id = "davinci_mdio.16:00";
+			phy_id = <&davinci_mdio>, <0>;
 			/* Filled in by U-Boot */
 			mac-address = [ 00 00 00 00 00 00 ];
 		};
 		cpsw_emac1: slave@1 {
-			slave_reg_ofs = <0x300>;
-			sliver_reg_ofs = <0xdc0>;
-			phy_id = "davinci_mdio.16:01";
+			phy_id = <&davinci_mdio>, <1>;
 			/* Filled in by U-Boot */
 			mac-address = [ 00 00 00 00 00 00 ];
 		};
@@ -87,15 +65,7 @@ Examples:
 		compatible = "ti,cpsw";
 		ti,hwmods = "cpgmac0";
 		cpdma_channels = <8>;
-		host_port_no = <0>;
-		cpdma_reg_ofs = <0x800>;
-		cpdma_sram_ofs = <0xa00>;
-		ale_reg_ofs = <0xd00>;
 		ale_entries = <1024>;
-		host_port_reg_ofs = <0x108>;
-		hw_stats_reg_ofs = <0x900>;
-		cpts_reg_ofs = <0xc00>;
-		bd_ram_ofs = <0x2000>;
 		bd_ram_size = <0x2000>;
 		no_bd_ram = <0>;
 		rx_descs = <64>;
@@ -105,16 +75,12 @@ Examples:
 		cpts_clock_mult = <0x80000000>;
 		cpts_clock_shift = <29>;
 		cpsw_emac0: slave@0 {
-			slave_reg_ofs = <0x200>;
-			sliver_reg_ofs = <0xd80>;
-			phy_id = "davinci_mdio.16:00";
+			phy_id = <&davinci_mdio>, <0>;
 			/* Filled in by U-Boot */
 			mac-address = [ 00 00 00 00 00 00 ];
 		};
 		cpsw_emac1: slave@1 {
-			slave_reg_ofs = <0x300>;
-			sliver_reg_ofs = <0xdc0>;
-			phy_id = "davinci_mdio.16:01";
+			phy_id = <&davinci_mdio>, <1>;
 			/* Filled in by U-Boot */
 			mac-address = [ 00 00 00 00 00 00 ];
 		};
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 7007abaf8c82..0da9c753c567 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -80,6 +80,29 @@ do {								\
 
 #define CPSW_VERSION_1		0x19010a
 #define CPSW_VERSION_2		0x19010c
+
+#define HOST_PORT_NUM		0
+#define SLIVER_SIZE		0x40
+
+#define CPSW1_HOST_PORT_OFFSET	0x028
+#define CPSW1_SLAVE_OFFSET	0x050
+#define CPSW1_SLAVE_SIZE	0x040
+#define CPSW1_CPDMA_OFFSET	0x100
+#define CPSW1_STATERAM_OFFSET	0x200
+#define CPSW1_CPTS_OFFSET	0x500
+#define CPSW1_ALE_OFFSET	0x600
+#define CPSW1_SLIVER_OFFSET	0x700
+
+#define CPSW2_HOST_PORT_OFFSET	0x108
+#define CPSW2_SLAVE_OFFSET	0x200
+#define CPSW2_SLAVE_SIZE	0x100
+#define CPSW2_CPDMA_OFFSET	0x800
+#define CPSW2_STATERAM_OFFSET	0xa00
+#define CPSW2_CPTS_OFFSET	0xc00
+#define CPSW2_ALE_OFFSET	0xd00
+#define CPSW2_SLIVER_OFFSET	0xd80
+#define CPSW2_BD_OFFSET		0x2000
+
 #define CPDMA_RXTHRESH		0x0c0
 #define CPDMA_RXFREE		0x0e0
 #define CPDMA_TXHDP		0x00
@@ -87,21 +110,6 @@ do {								\
 #define CPDMA_TXCP		0x40
 #define CPDMA_RXCP		0x60
 
-#define cpsw_dma_regs(base, offset)		\
-	(void __iomem *)((base) + (offset))
-#define cpsw_dma_rxthresh(base, offset)		\
-	(void __iomem *)((base) + (offset) + CPDMA_RXTHRESH)
-#define cpsw_dma_rxfree(base, offset)		\
-	(void __iomem *)((base) + (offset) + CPDMA_RXFREE)
-#define cpsw_dma_txhdp(base, offset)		\
-	(void __iomem *)((base) + (offset) + CPDMA_TXHDP)
-#define cpsw_dma_rxhdp(base, offset)		\
-	(void __iomem *)((base) + (offset) + CPDMA_RXHDP)
-#define cpsw_dma_txcp(base, offset)		\
-	(void __iomem *)((base) + (offset) + CPDMA_TXCP)
-#define cpsw_dma_rxcp(base, offset)		\
-	(void __iomem *)((base) + (offset) + CPDMA_RXCP)
-
 #define CPSW_POLL_WEIGHT	64
 #define CPSW_MIN_PACKET_SIZE	60
 #define CPSW_MAX_PACKET_SIZE	(1500 + 14 + 4 + 4)
@@ -629,8 +637,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
 
 	pm_runtime_get_sync(&priv->pdev->dev);
 
-	reg = __raw_readl(&priv->regs->id_ver);
-	priv->version = reg;
+	reg = priv->version;
 
 	dev_info(priv->dev, "initializing cpsw version %d.%d (%d)\n",
 		 CPSW_MAJOR_VERSION(reg), CPSW_MINOR_VERSION(reg),
@@ -995,15 +1002,16 @@ static const struct ethtool_ops cpsw_ethtool_ops = {
 	.get_ts_info	= cpsw_get_ts_info,
 };
 
-static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv)
+static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv,
+			    u32 slave_reg_ofs, u32 sliver_reg_ofs)
 {
 	void __iomem		*regs = priv->regs;
 	int			slave_num = slave->slave_num;
 	struct cpsw_slave_data	*data = priv->data.slave_data + slave_num;
 
 	slave->data	= data;
-	slave->regs	= regs + data->slave_reg_ofs;
-	slave->sliver	= regs + data->sliver_reg_ofs;
+	slave->regs	= regs + slave_reg_ofs;
+	slave->sliver	= regs + sliver_reg_ofs;
 }
 
 static int cpsw_probe_dt(struct cpsw_platform_data *data,
@@ -1051,8 +1059,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 		return -EINVAL;
 	}
 
-	data->no_bd_ram = of_property_read_bool(node, "no_bd_ram");
-
 	if (of_property_read_u32(node, "cpdma_channels", &prop)) {
 		pr_err("Missing cpdma_channels property in the DT.\n");
 		ret = -EINVAL;
@@ -1060,34 +1066,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 	}
 	data->channels = prop;
 
-	if (of_property_read_u32(node, "host_port_no", &prop)) {
-		pr_err("Missing host_port_no property in the DT.\n");
-		ret = -EINVAL;
-		goto error_ret;
-	}
-	data->host_port_num = prop;
-
-	if (of_property_read_u32(node, "cpdma_reg_ofs", &prop)) {
-		pr_err("Missing cpdma_reg_ofs property in the DT.\n");
-		ret = -EINVAL;
-		goto error_ret;
-	}
-	data->cpdma_reg_ofs = prop;
-
-	if (of_property_read_u32(node, "cpdma_sram_ofs", &prop)) {
-		pr_err("Missing cpdma_sram_ofs property in the DT.\n");
-		ret = -EINVAL;
-		goto error_ret;
-	}
-	data->cpdma_sram_ofs = prop;
-
-	if (of_property_read_u32(node, "ale_reg_ofs", &prop)) {
-		pr_err("Missing ale_reg_ofs property in the DT.\n");
-		ret = -EINVAL;
-		goto error_ret;
-	}
-	data->ale_reg_ofs = prop;
-
 	if (of_property_read_u32(node, "ale_entries", &prop)) {
 		pr_err("Missing ale_entries property in the DT.\n");
 		ret = -EINVAL;
@@ -1095,34 +1073,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 	}
 	data->ale_entries = prop;
 
-	if (of_property_read_u32(node, "host_port_reg_ofs", &prop)) {
-		pr_err("Missing host_port_reg_ofs property in the DT.\n");
-		ret = -EINVAL;
-		goto error_ret;
-	}
-	data->host_port_reg_ofs = prop;
-
-	if (of_property_read_u32(node, "hw_stats_reg_ofs", &prop)) {
-		pr_err("Missing hw_stats_reg_ofs property in the DT.\n");
-		ret = -EINVAL;
-		goto error_ret;
-	}
-	data->hw_stats_reg_ofs = prop;
-
-	if (of_property_read_u32(node, "cpts_reg_ofs", &prop)) {
-		pr_err("Missing cpts_reg_ofs property in the DT.\n");
-		ret = -EINVAL;
-		goto error_ret;
-	}
-	data->cpts_reg_ofs = prop;
-
-	if (of_property_read_u32(node, "bd_ram_ofs", &prop)) {
-		pr_err("Missing bd_ram_ofs property in the DT.\n");
-		ret = -EINVAL;
-		goto error_ret;
-	}
-	data->bd_ram_ofs = prop;
-
 	if (of_property_read_u32(node, "bd_ram_size", &prop)) {
 		pr_err("Missing bd_ram_size property in the DT.\n");
 		ret = -EINVAL;
@@ -1144,33 +1094,34 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 	}
 	data->mac_control = prop;
 
+	/*
+	 * Populate all the child nodes here...
+	 */
+	ret = of_platform_populate(node, NULL, NULL, &pdev->dev);
+	/* We do not want to force this, as in some cases may not have child */
+	if (ret)
+		pr_warn("Doesn't have any child node\n");
+
 	for_each_node_by_name(slave_node, "slave") {
 		struct cpsw_slave_data *slave_data = data->slave_data + i;
-		const char *phy_id = NULL;
 		const void *mac_addr = NULL;
-
-		if (of_property_read_string(slave_node, "phy_id", &phy_id)) {
+		u32 phyid;
+		int lenp;
+		const __be32 *parp;
+		struct device_node *mdio_node;
+		struct platform_device *mdio;
+
+		parp = of_get_property(slave_node, "phy_id", &lenp);
+		if ((parp == NULL) && (lenp != (sizeof(void *) * 2))) {
 			pr_err("Missing slave[%d] phy_id property\n", i);
 			ret = -EINVAL;
 			goto error_ret;
 		}
-		slave_data->phy_id = phy_id;
-
-		if (of_property_read_u32(slave_node, "slave_reg_ofs", &prop)) {
-			pr_err("Missing slave[%d] slave_reg_ofs property\n", i);
-			ret = -EINVAL;
-			goto error_ret;
-		}
-		slave_data->slave_reg_ofs = prop;
-
-		if (of_property_read_u32(slave_node, "sliver_reg_ofs",
-					 &prop)) {
-			pr_err("Missing slave[%d] sliver_reg_ofs property\n",
-				i);
-			ret = -EINVAL;
-			goto error_ret;
-		}
-		slave_data->sliver_reg_ofs = prop;
+		mdio_node = of_find_node_by_phandle(be32_to_cpup(parp));
+		phyid = be32_to_cpup(parp+1);
+		mdio = of_find_device_by_node(mdio_node);
+		snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
+			 PHY_ID_FMT, mdio->name, phyid);
 
 		mac_addr = of_get_mac_address(slave_node);
 		if (mac_addr)
@@ -1179,14 +1130,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 		i++;
 	}
 
-	/*
-	 * Populate all the child nodes here...
-	 */
-	ret = of_platform_populate(node, NULL, NULL, &pdev->dev);
-	/* We do not want to force this, as in some cases may not have child */
-	if (ret)
-		pr_warn("Doesn't have any child node\n");
-
 	return 0;
 
 error_ret:
@@ -1201,8 +1144,9 @@ static int __devinit cpsw_probe(struct platform_device *pdev)
 	struct cpsw_priv		*priv;
 	struct cpdma_params		dma_params;
 	struct cpsw_ale_params		ale_params;
-	void __iomem			*regs;
+	void __iomem			*ss_regs, *wr_regs;
 	struct resource			*res;
+	u32 slave_offset, sliver_offset, slave_size;
 	int ret = 0, i, k = 0;
 
 	ndev = alloc_etherdev(sizeof(struct cpsw_priv));
@@ -1270,15 +1214,14 @@ static int __devinit cpsw_probe(struct platform_device *pdev)
 		ret = -ENXIO;
 		goto clean_clk_ret;
 	}
-	regs = ioremap(priv->cpsw_res->start, resource_size(priv->cpsw_res));
-	if (!regs) {
+	ss_regs = ioremap(priv->cpsw_res->start, resource_size(priv->cpsw_res));
+	if (!ss_regs) {
 		dev_err(priv->dev, "unable to map i/o region\n");
 		goto clean_cpsw_iores_ret;
 	}
-	priv->regs = regs;
-	priv->host_port = data->host_port_num;
-	priv->host_port_regs = regs + data->host_port_reg_ofs;
-	priv->cpts.reg = regs + data->cpts_reg_ofs;
+	priv->regs = ss_regs;
+	priv->version = __raw_readl(&priv->regs->id_ver);
+	priv->host_port = HOST_PORT_NUM;
 
 	priv->cpsw_wr_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	if (!priv->cpsw_wr_res) {
@@ -1292,32 +1235,59 @@ static int __devinit cpsw_probe(struct platform_device *pdev)
 		ret = -ENXIO;
 		goto clean_iomap_ret;
 	}
-	regs = ioremap(priv->cpsw_wr_res->start,
+	wr_regs = ioremap(priv->cpsw_wr_res->start,
 				resource_size(priv->cpsw_wr_res));
-	if (!regs) {
+	if (!wr_regs) {
 		dev_err(priv->dev, "unable to map i/o region\n");
 		goto clean_cpsw_wr_iores_ret;
 	}
-	priv->wr_regs = regs;
-
-	for_each_slave(priv, cpsw_slave_init, priv);
+	priv->wr_regs = wr_regs;
 
 	memset(&dma_params, 0, sizeof(dma_params));
+	memset(&ale_params, 0, sizeof(ale_params));
+
+	switch (priv->version) {
+	case CPSW_VERSION_1:
+		priv->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET;
+		priv->cpts.reg       = ss_regs + CPSW1_CPTS_OFFSET;
+		dma_params.dmaregs   = ss_regs + CPSW1_CPDMA_OFFSET;
+		dma_params.txhdp     = ss_regs + CPSW1_STATERAM_OFFSET;
+		ale_params.ale_regs  = ss_regs + CPSW1_ALE_OFFSET;
+		slave_offset         = CPSW1_SLAVE_OFFSET;
+		slave_size           = CPSW1_SLAVE_SIZE;
+		sliver_offset        = CPSW1_SLIVER_OFFSET;
+		dma_params.desc_mem_phys = 0;
+		break;
+	case CPSW_VERSION_2:
+		priv->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET;
+		priv->cpts.reg       = ss_regs + CPSW2_CPTS_OFFSET;
+		dma_params.dmaregs   = ss_regs + CPSW2_CPDMA_OFFSET;
+		dma_params.txhdp     = ss_regs + CPSW2_STATERAM_OFFSET;
+		ale_params.ale_regs  = ss_regs + CPSW2_ALE_OFFSET;
+		slave_offset         = CPSW2_SLAVE_OFFSET;
+		slave_size           = CPSW2_SLAVE_SIZE;
+		sliver_offset        = CPSW2_SLIVER_OFFSET;
+		dma_params.desc_mem_phys =
+			(u32 __force) priv->cpsw_res->start + CPSW2_BD_OFFSET;
+		break;
+	default:
+		dev_err(priv->dev, "unknown version 0x%08x\n", priv->version);
+		ret = -ENODEV;
+		goto clean_cpsw_wr_iores_ret;
+	}
+	for (i = 0; i < priv->data.slaves; i++) {
+		struct cpsw_slave *slave = &priv->slaves[i];
+		cpsw_slave_init(slave, priv, slave_offset, sliver_offset);
+		slave_offset  += slave_size;
+		sliver_offset += SLIVER_SIZE;
+	}
+
 	dma_params.dev		= &pdev->dev;
-	dma_params.dmaregs	= cpsw_dma_regs((u32)priv->regs,
-						data->cpdma_reg_ofs);
-	dma_params.rxthresh	= cpsw_dma_rxthresh((u32)priv->regs,
-						    data->cpdma_reg_ofs);
-	dma_params.rxfree	= cpsw_dma_rxfree((u32)priv->regs,
-						  data->cpdma_reg_ofs);
-	dma_params.txhdp	= cpsw_dma_txhdp((u32)priv->regs,
-						 data->cpdma_sram_ofs);
-	dma_params.rxhdp	= cpsw_dma_rxhdp((u32)priv->regs,
-						 data->cpdma_sram_ofs);
-	dma_params.txcp		= cpsw_dma_txcp((u32)priv->regs,
-						data->cpdma_sram_ofs);
-	dma_params.rxcp		= cpsw_dma_rxcp((u32)priv->regs,
-						data->cpdma_sram_ofs);
+	dma_params.rxthresh	= dma_params.dmaregs + CPDMA_RXTHRESH;
+	dma_params.rxfree	= dma_params.dmaregs + CPDMA_RXFREE;
+	dma_params.rxhdp	= dma_params.txhdp + CPDMA_RXHDP;
+	dma_params.txcp		= dma_params.txhdp + CPDMA_TXCP;
+	dma_params.rxcp		= dma_params.txhdp + CPDMA_RXCP;
 
 	dma_params.num_chan		= data->channels;
 	dma_params.has_soft_reset	= true;
@@ -1325,10 +1295,7 @@ static int __devinit cpsw_probe(struct platform_device *pdev)
 	dma_params.desc_mem_size	= data->bd_ram_size;
 	dma_params.desc_align		= 16;
 	dma_params.has_ext_regs		= true;
-	dma_params.desc_mem_phys        = data->no_bd_ram ? 0 :
-			(u32 __force)priv->cpsw_res->start + data->bd_ram_ofs;
-	dma_params.desc_hw_addr         = data->hw_ram_addr ?
-			data->hw_ram_addr : dma_params.desc_mem_phys ;
+	dma_params.desc_hw_addr         = dma_params.desc_mem_phys;
 
 	priv->dma = cpdma_ctlr_create(&dma_params);
 	if (!priv->dma) {
@@ -1348,10 +1315,7 @@ static int __devinit cpsw_probe(struct platform_device *pdev)
 		goto clean_dma_ret;
 	}
 
-	memset(&ale_params, 0, sizeof(ale_params));
 	ale_params.dev			= &ndev->dev;
-	ale_params.ale_regs		= (void *)((u32)priv->regs) +
-						((u32)data->ale_reg_ofs);
 	ale_params.ale_ageout		= ale_ageout;
 	ale_params.ale_entries		= data->ale_entries;
 	ale_params.ale_ports		= data->slaves;
diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h
index b5c16c3df458..24368a2e8b87 100644
--- a/include/linux/platform_data/cpsw.h
+++ b/include/linux/platform_data/cpsw.h
@@ -18,9 +18,7 @@
 #include <linux/if_ether.h>
 
 struct cpsw_slave_data {
-	u32		slave_reg_ofs;
-	u32		sliver_reg_ofs;
-	const char	*phy_id;
+	char		phy_id[MII_BUS_ID_SIZE];
 	int		phy_if;
 	u8		mac_addr[ETH_ALEN];
 };
@@ -28,31 +26,14 @@ struct cpsw_slave_data {
 struct cpsw_platform_data {
 	u32	ss_reg_ofs;	/* Subsystem control register offset */
 	u32	channels;	/* number of cpdma channels (symmetric) */
-	u32	cpdma_reg_ofs;	/* cpdma register offset */
-	u32	cpdma_sram_ofs;	/* cpdma sram offset */
-
 	u32	slaves;		/* number of slave cpgmac ports */
 	struct cpsw_slave_data	*slave_data;
 	u32	cpts_active_slave; /* time stamping slave */
 	u32	cpts_clock_mult;  /* convert input clock ticks to nanoseconds */
 	u32	cpts_clock_shift; /* convert input clock ticks to nanoseconds */
-
-	u32	ale_reg_ofs;	/* address lookup engine reg offset */
 	u32	ale_entries;	/* ale table size */
-
-	u32	host_port_reg_ofs; /* cpsw cpdma host port registers */
-	u32     host_port_num; /* The port number for the host port */
-
-	u32	hw_stats_reg_ofs;  /* cpsw hardware statistics counters */
-	u32	cpts_reg_ofs;      /* cpts registers */
-
-	u32	bd_ram_ofs;   /* embedded buffer descriptor RAM offset*/
 	u32	bd_ram_size;  /*buffer descriptor ram size */
-	u32	hw_ram_addr; /*if the HW address for BD RAM is different */
-	bool	no_bd_ram; /* no embedded BD ram*/
-
 	u32	rx_descs;	/* Number of Rx Descriptios */
-
 	u32	mac_control;	/* Mac control register */
 };
 
-- 
cgit v1.2.3-55-g7522


From 64e481603ab46bcd1466fdaffca50f25bf123f83 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Thu, 18 Oct 2012 19:36:09 +0530
Subject: mfd: tps6586x: move regulator dt parsing to regulator driver

Moving regulator node parsing to regulator driver in place
of parsing it on mfd driver.
The motivation for this change are:
- MFD core driver should not depends on regulator and able
  to instantiate device without regulator.
- The API for matching regulators are in regulator core and
  it is good that regulator driver only calls this API.
- Regulator specific support should be in regulator driver only
  to ease any enhancement/modification for regulators.
- The regulator driver is now registered as mfd sub device and
  all regulator registration is done from single probe call.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/mfd/Kconfig                    |   1 -
 drivers/mfd/tps6586x.c                 |  76 ++------------
 drivers/regulator/tps6586x-regulator.c | 181 ++++++++++++++++++++++++++++-----
 include/linux/mfd/tps6586x.h           |   3 +
 4 files changed, 166 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index acab3ef8a310..05acef8c7641 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -201,7 +201,6 @@ config MFD_TPS6586X
 	depends on I2C=y && GENERIC_HARDIRQS
 	select MFD_CORE
 	select REGMAP_I2C
-	depends on REGULATOR
 	help
 	  If you say yes here you get support for the TPS6586X series of
 	  Power Management chips.
diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index 467464368773..9d67bd92edf8 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c
@@ -24,8 +24,6 @@
 #include <linux/err.h>
 #include <linux/i2c.h>
 #include <linux/regmap.h>
-#include <linux/regulator/of_regulator.h>
-#include <linux/regulator/machine.h>
 
 #include <linux/mfd/core.h>
 #include <linux/mfd/tps6586x.h>
@@ -98,6 +96,9 @@ static struct mfd_cell tps6586x_cell[] = {
 	{
 		.name = "tps6586x-gpio",
 	},
+	{
+		.name = "tps6586x-pmic",
+	},
 	{
 		.name = "tps6586x-rtc",
 	},
@@ -350,80 +351,19 @@ failed:
 }
 
 #ifdef CONFIG_OF
-static struct of_regulator_match tps6586x_matches[] = {
-	{ .name = "sys",     .driver_data = (void *)TPS6586X_ID_SYS     },
-	{ .name = "sm0",     .driver_data = (void *)TPS6586X_ID_SM_0    },
-	{ .name = "sm1",     .driver_data = (void *)TPS6586X_ID_SM_1    },
-	{ .name = "sm2",     .driver_data = (void *)TPS6586X_ID_SM_2    },
-	{ .name = "ldo0",    .driver_data = (void *)TPS6586X_ID_LDO_0   },
-	{ .name = "ldo1",    .driver_data = (void *)TPS6586X_ID_LDO_1   },
-	{ .name = "ldo2",    .driver_data = (void *)TPS6586X_ID_LDO_2   },
-	{ .name = "ldo3",    .driver_data = (void *)TPS6586X_ID_LDO_3   },
-	{ .name = "ldo4",    .driver_data = (void *)TPS6586X_ID_LDO_4   },
-	{ .name = "ldo5",    .driver_data = (void *)TPS6586X_ID_LDO_5   },
-	{ .name = "ldo6",    .driver_data = (void *)TPS6586X_ID_LDO_6   },
-	{ .name = "ldo7",    .driver_data = (void *)TPS6586X_ID_LDO_7   },
-	{ .name = "ldo8",    .driver_data = (void *)TPS6586X_ID_LDO_8   },
-	{ .name = "ldo9",    .driver_data = (void *)TPS6586X_ID_LDO_9   },
-	{ .name = "ldo_rtc", .driver_data = (void *)TPS6586X_ID_LDO_RTC },
-};
-
 static struct tps6586x_platform_data *tps6586x_parse_dt(struct i2c_client *client)
 {
-	const unsigned int num = ARRAY_SIZE(tps6586x_matches);
 	struct device_node *np = client->dev.of_node;
 	struct tps6586x_platform_data *pdata;
-	struct tps6586x_subdev_info *devs;
-	struct device_node *regs;
-	const char *sys_rail_name = NULL;
-	unsigned int count;
-	unsigned int i, j;
-	int err;
-
-	regs = of_find_node_by_name(np, "regulators");
-	if (!regs)
-		return NULL;
-
-	err = of_regulator_match(&client->dev, regs, tps6586x_matches, num);
-	if (err < 0) {
-		of_node_put(regs);
-		return NULL;
-	}
-
-	of_node_put(regs);
-	count = err;
-
-	devs = devm_kzalloc(&client->dev, count * sizeof(*devs), GFP_KERNEL);
-	if (!devs)
-		return NULL;
-
-	for (i = 0, j = 0; i < num && j < count; i++) {
-		struct regulator_init_data *reg_idata;
-
-		if (!tps6586x_matches[i].init_data)
-			continue;
-
-		reg_idata  = tps6586x_matches[i].init_data;
-		devs[j].name = "tps6586x-regulator";
-		devs[j].platform_data = tps6586x_matches[i].init_data;
-		devs[j].id = (int)tps6586x_matches[i].driver_data;
-		if (devs[j].id == TPS6586X_ID_SYS)
-			sys_rail_name = reg_idata->constraints.name;
-
-		if ((devs[j].id == TPS6586X_ID_LDO_5) ||
-			(devs[j].id == TPS6586X_ID_LDO_RTC))
-			reg_idata->supply_regulator = sys_rail_name;
-
-		devs[j].of_node = tps6586x_matches[i].of_node;
-		j++;
-	}
 
 	pdata = devm_kzalloc(&client->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
+	if (!pdata) {
+		dev_err(&client->dev, "Memory allocation failed\n");
 		return NULL;
+	}
 
-	pdata->num_subdevs = count;
-	pdata->subdevs = devs;
+	pdata->num_subdevs = 0;
+	pdata->subdevs = NULL;
 	pdata->gpio_base = -1;
 	pdata->irq_base = -1;
 	pdata->pm_off = of_property_read_bool(np, "ti,system-power-controller");
diff --git a/drivers/regulator/tps6586x-regulator.c b/drivers/regulator/tps6586x-regulator.c
index ce1e7cb8d513..913c903bc3ee 100644
--- a/drivers/regulator/tps6586x-regulator.c
+++ b/drivers/regulator/tps6586x-regulator.c
@@ -17,10 +17,12 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/err.h>
+#include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
+#include <linux/regulator/of_regulator.h>
 #include <linux/mfd/tps6586x.h>
 
 /* supply control and voltage setting  */
@@ -255,10 +257,10 @@ static inline int tps6586x_regulator_preinit(struct device *parent,
 				 1 << ri->enable_bit[1]);
 }
 
-static int tps6586x_regulator_set_slew_rate(struct platform_device *pdev)
+static int tps6586x_regulator_set_slew_rate(struct platform_device *pdev,
+			int id, struct regulator_init_data *p)
 {
 	struct device *parent = pdev->dev.parent;
-	struct regulator_init_data *p = pdev->dev.platform_data;
 	struct tps6586x_settings *setting = p->driver_data;
 	uint8_t reg;
 
@@ -269,7 +271,7 @@ static int tps6586x_regulator_set_slew_rate(struct platform_device *pdev)
 		return 0;
 
 	/* only SM0 and SM1 can have the slew rate settings */
-	switch (pdev->id) {
+	switch (id) {
 	case TPS6586X_ID_SM_0:
 		reg = TPS6586X_SM0SL;
 		break;
@@ -298,54 +300,181 @@ static inline struct tps6586x_regulator *find_regulator_info(int id)
 	return NULL;
 }
 
+#ifdef CONFIG_OF
+static struct of_regulator_match tps6586x_matches[] = {
+	{ .name = "sys",     .driver_data = (void *)TPS6586X_ID_SYS     },
+	{ .name = "sm0",     .driver_data = (void *)TPS6586X_ID_SM_0    },
+	{ .name = "sm1",     .driver_data = (void *)TPS6586X_ID_SM_1    },
+	{ .name = "sm2",     .driver_data = (void *)TPS6586X_ID_SM_2    },
+	{ .name = "ldo0",    .driver_data = (void *)TPS6586X_ID_LDO_0   },
+	{ .name = "ldo1",    .driver_data = (void *)TPS6586X_ID_LDO_1   },
+	{ .name = "ldo2",    .driver_data = (void *)TPS6586X_ID_LDO_2   },
+	{ .name = "ldo3",    .driver_data = (void *)TPS6586X_ID_LDO_3   },
+	{ .name = "ldo4",    .driver_data = (void *)TPS6586X_ID_LDO_4   },
+	{ .name = "ldo5",    .driver_data = (void *)TPS6586X_ID_LDO_5   },
+	{ .name = "ldo6",    .driver_data = (void *)TPS6586X_ID_LDO_6   },
+	{ .name = "ldo7",    .driver_data = (void *)TPS6586X_ID_LDO_7   },
+	{ .name = "ldo8",    .driver_data = (void *)TPS6586X_ID_LDO_8   },
+	{ .name = "ldo9",    .driver_data = (void *)TPS6586X_ID_LDO_9   },
+	{ .name = "ldo_rtc", .driver_data = (void *)TPS6586X_ID_LDO_RTC },
+};
+
+static struct tps6586x_platform_data *tps6586x_parse_regulator_dt(
+		struct platform_device *pdev,
+		struct of_regulator_match **tps6586x_reg_matches)
+{
+	const unsigned int num = ARRAY_SIZE(tps6586x_matches);
+	struct device_node *np = pdev->dev.parent->of_node;
+	struct device_node *regs;
+	const char *sys_rail = NULL;
+	unsigned int i;
+	struct tps6586x_platform_data *pdata;
+	int err;
+
+	regs = of_find_node_by_name(np, "regulators");
+	if (!regs) {
+		dev_err(&pdev->dev, "regulator node not found\n");
+		return NULL;
+	}
+
+	err = of_regulator_match(&pdev->dev, regs, tps6586x_matches, num);
+	if (err < 0) {
+		dev_err(&pdev->dev, "Regulator match failed, e %d\n", err);
+		of_node_put(regs);
+		return NULL;
+	}
+
+	of_node_put(regs);
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata) {
+		dev_err(&pdev->dev, "Memory alloction failed\n");
+		return NULL;
+	}
+
+	for (i = 0; i < num; i++) {
+		int id;
+		if (!tps6586x_matches[i].init_data)
+			continue;
+
+		pdata->reg_init_data[i] = tps6586x_matches[i].init_data;
+		id = (int)tps6586x_matches[i].driver_data;
+		if (id == TPS6586X_ID_SYS)
+			sys_rail = pdata->reg_init_data[i]->constraints.name;
+
+		if ((id == TPS6586X_ID_LDO_5) || (id == TPS6586X_ID_LDO_RTC))
+			pdata->reg_init_data[i]->supply_regulator = sys_rail;
+	}
+	*tps6586x_reg_matches = tps6586x_matches;
+	return pdata;
+}
+#else
+static struct tps6586x_platform_data *tps6586x_parse_regulator_dt(
+		struct platform_device *pdev,
+		struct of_regulator_match **tps6586x_reg_matches)
+{
+	*tps6586x_reg_matches = NULL;
+	return NULL;
+}
+#endif
+
 static int __devinit tps6586x_regulator_probe(struct platform_device *pdev)
 {
 	struct tps6586x_regulator *ri = NULL;
 	struct regulator_config config = { };
-	struct regulator_dev *rdev;
-	int id = pdev->id;
+	struct regulator_dev **rdev;
+	struct regulator_init_data *reg_data;
+	struct tps6586x_platform_data *pdata;
+	struct of_regulator_match *tps6586x_reg_matches = NULL;
+	int id;
 	int err;
 
 	dev_dbg(&pdev->dev, "Probing regulator %d\n", id);
 
-	ri = find_regulator_info(id);
-	if (ri == NULL) {
-		dev_err(&pdev->dev, "invalid regulator ID specified\n");
-		return -EINVAL;
-	}
+	pdata = dev_get_platdata(pdev->dev.parent);
+	if ((!pdata) && (pdev->dev.parent->of_node))
+		pdata = tps6586x_parse_regulator_dt(pdev,
+					&tps6586x_reg_matches);
 
-	err = tps6586x_regulator_preinit(pdev->dev.parent, ri);
-	if (err)
-		return err;
+	if (!pdata) {
+		dev_err(&pdev->dev, "Platform data not available, exiting\n");
+		return -ENODEV;
+	}
 
-	config.dev = pdev->dev.parent;
-	config.of_node = pdev->dev.of_node;
-	config.init_data = pdev->dev.platform_data;
-	config.driver_data = ri;
+	rdev = devm_kzalloc(&pdev->dev, TPS6586X_ID_MAX_REGULATOR *
+				sizeof(*rdev), GFP_KERNEL);
+	if (!rdev) {
+		dev_err(&pdev->dev, "Mmemory alloc failed\n");
+		return -ENOMEM;
+	}
 
-	rdev = regulator_register(&ri->desc, &config);
-	if (IS_ERR(rdev)) {
-		dev_err(&pdev->dev, "failed to register regulator %s\n",
-				ri->desc.name);
-		return PTR_ERR(rdev);
+	for (id = 0; id < TPS6586X_ID_MAX_REGULATOR; ++id) {
+		reg_data = pdata->reg_init_data[id];
+
+		ri = find_regulator_info(id);
+		if (!ri) {
+			dev_err(&pdev->dev, "invalid regulator ID specified\n");
+			err = -EINVAL;
+			goto fail;
+		}
+
+		err = tps6586x_regulator_preinit(pdev->dev.parent, ri);
+		if (err) {
+			dev_err(&pdev->dev,
+				"regulator %d preinit failed, e %d\n", id, err);
+			goto fail;
+		}
+
+		config.dev = pdev->dev.parent;
+		config.init_data = reg_data;
+		config.driver_data = ri;
+
+		if (tps6586x_reg_matches)
+			config.of_node = tps6586x_reg_matches[id].of_node;
+
+		rdev[id] = regulator_register(&ri->desc, &config);
+		if (IS_ERR(rdev[id])) {
+			dev_err(&pdev->dev, "failed to register regulator %s\n",
+					ri->desc.name);
+			err = PTR_ERR(rdev[id]);
+			goto fail;
+		}
+
+		if (reg_data) {
+			err = tps6586x_regulator_set_slew_rate(pdev, id,
+					reg_data);
+			if (err < 0) {
+				dev_err(&pdev->dev,
+					"Slew rate config failed, e %d\n", err);
+				regulator_unregister(rdev[id]);
+				goto fail;
+			}
+		}
 	}
 
 	platform_set_drvdata(pdev, rdev);
+	return 0;
 
-	return tps6586x_regulator_set_slew_rate(pdev);
+fail:
+	while (--id >= 0)
+		regulator_unregister(rdev[id]);
+	return err;
 }
 
 static int __devexit tps6586x_regulator_remove(struct platform_device *pdev)
 {
-	struct regulator_dev *rdev = platform_get_drvdata(pdev);
+	struct regulator_dev **rdev = platform_get_drvdata(pdev);
+	int id = TPS6586X_ID_MAX_REGULATOR;
+
+	while (--id >= 0)
+		regulator_unregister(rdev[id]);
 
-	regulator_unregister(rdev);
 	return 0;
 }
 
 static struct platform_driver tps6586x_regulator_driver = {
 	.driver	= {
-		.name	= "tps6586x-regulator",
+		.name	= "tps6586x-pmic",
 		.owner	= THIS_MODULE,
 	},
 	.probe		= tps6586x_regulator_probe,
diff --git a/include/linux/mfd/tps6586x.h b/include/linux/mfd/tps6586x.h
index 2dd123194958..f8da0e152567 100644
--- a/include/linux/mfd/tps6586x.h
+++ b/include/linux/mfd/tps6586x.h
@@ -29,6 +29,7 @@ enum {
 	TPS6586X_ID_LDO_8,
 	TPS6586X_ID_LDO_9,
 	TPS6586X_ID_LDO_RTC,
+	TPS6586X_ID_MAX_REGULATOR,
 };
 
 enum {
@@ -79,6 +80,8 @@ struct tps6586x_platform_data {
 	int gpio_base;
 	int irq_base;
 	bool pm_off;
+
+	struct regulator_init_data *reg_init_data[TPS6586X_ID_MAX_REGULATOR];
 };
 
 /*
-- 
cgit v1.2.3-55-g7522


From f6130be652d0b4fbf710d83a816298c007e59ed1 Mon Sep 17 00:00:00 2001
From: Ashish Jangam
Date: Thu, 1 Nov 2012 13:57:56 +0530
Subject: regulator: DA9055 regulator driver

This is the Regulator patch for the DA9055 PMIC and has got dependency on
the DA9055 MFD core.

This patch support all of the DA9055 regulators. The output voltages are
fully programmable through I2C interface only. The platform data with regulation
constraints is passed down from the board to the regulator.

This patch is functionaly tested on SMDK6410 board. DA9055 Evaluation board
was connected to the SMDK6410 board.

Signed-off-by: David Dajun Chen <dchen@diasemi.com>
Signed-off-by: Ashish Jangam <ashish.jangam@kpitcummins.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/Kconfig            |  10 +
 drivers/regulator/Makefile           |   1 +
 drivers/regulator/da9055-regulator.c | 663 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/da9055/pdata.h     |  27 +-
 4 files changed, 698 insertions(+), 3 deletions(-)
 create mode 100644 drivers/regulator/da9055-regulator.c

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 67d47b59a66d..a162573d4944 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -109,6 +109,16 @@ config REGULATOR_DA9052
 	  This driver supports the voltage regulators of DA9052-BC and
 	  DA9053-AA/Bx PMIC.
 
+config REGULATOR_DA9055
+	tristate "Dialog Semiconductor DA9055 regulators"
+	depends on MFD_DA9055
+	help
+	  Say y here to support the BUCKs and LDOs regulators found on
+	  Dialog Semiconductor DA9055 PMIC.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called da9055-regulator.
+
 config REGULATOR_FAN53555
 	tristate "Fairchild FAN53555 Regulator"
 	depends on I2C
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index e431eed8a878..4b754e958aed 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_REGULATOR_ANATOP) += anatop-regulator.o
 obj-$(CONFIG_REGULATOR_ARIZONA) += arizona-micsupp.o arizona-ldo1.o
 obj-$(CONFIG_REGULATOR_DA903X)	+= da903x.o
 obj-$(CONFIG_REGULATOR_DA9052)	+= da9052-regulator.o
+obj-$(CONFIG_REGULATOR_DA9055)	+= da9055-regulator.o
 obj-$(CONFIG_REGULATOR_DBX500_PRCMU) += dbx500-prcmu.o
 obj-$(CONFIG_REGULATOR_DB8500_PRCMU) += db8500-prcmu.o
 obj-$(CONFIG_REGULATOR_FAN53555) += fan53555.o
diff --git a/drivers/regulator/da9055-regulator.c b/drivers/regulator/da9055-regulator.c
new file mode 100644
index 000000000000..8994178b371c
--- /dev/null
+++ b/drivers/regulator/da9055-regulator.c
@@ -0,0 +1,663 @@
+/*
+* Regulator driver for DA9055 PMIC
+*
+* Copyright(c) 2012 Dialog Semiconductor Ltd.
+*
+* Author: David Dajun Chen <dchen@diasemi.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+
+#include <linux/mfd/da9055/core.h>
+#include <linux/mfd/da9055/reg.h>
+#include <linux/mfd/da9055/pdata.h>
+
+#define DA9055_MIN_UA		0
+#define DA9055_MAX_UA		3
+
+#define DA9055_LDO_MODE_SYNC	0
+#define DA9055_LDO_MODE_SLEEP	1
+
+#define DA9055_BUCK_MODE_SLEEP	1
+#define DA9055_BUCK_MODE_SYNC	2
+#define DA9055_BUCK_MODE_AUTO	3
+
+/* DA9055 REGULATOR IDs */
+#define DA9055_ID_BUCK1	0
+#define DA9055_ID_BUCK2	1
+#define DA9055_ID_LDO1		2
+#define DA9055_ID_LDO2		3
+#define DA9055_ID_LDO3		4
+#define DA9055_ID_LDO4		5
+#define DA9055_ID_LDO5		6
+#define DA9055_ID_LDO6		7
+
+/* DA9055 BUCK current limit */
+static const int da9055_current_limits[] = { 500000, 600000, 700000, 800000 };
+
+struct da9055_conf_reg {
+	int reg;
+	int sel_mask;
+	int en_mask;
+};
+
+struct da9055_volt_reg {
+	int reg_a;
+	int reg_b;
+	int sl_shift;
+	int v_offset;
+	int v_mask;
+	int v_shift;
+};
+
+struct da9055_mode_reg {
+	int reg;
+	int mask;
+	int shift;
+};
+
+struct da9055_regulator_info {
+	struct regulator_desc reg_desc;
+	struct da9055_conf_reg conf;
+	struct da9055_volt_reg volt;
+	struct da9055_mode_reg mode;
+};
+
+struct da9055_regulator {
+	struct da9055 *da9055;
+	struct da9055_regulator_info *info;
+	struct regulator_dev *rdev;
+	enum gpio_select reg_rselect;
+};
+
+static unsigned int da9055_buck_get_mode(struct regulator_dev *rdev)
+{
+	struct da9055_regulator *regulator = rdev_get_drvdata(rdev);
+	struct da9055_regulator_info *info = regulator->info;
+	int ret, mode = 0;
+
+	ret = da9055_reg_read(regulator->da9055, info->mode.reg);
+	if (ret < 0)
+		return ret;
+
+	switch ((ret & info->mode.mask) >> info->mode.shift) {
+	case DA9055_BUCK_MODE_SYNC:
+		mode = REGULATOR_MODE_FAST;
+		break;
+	case DA9055_BUCK_MODE_AUTO:
+		mode = REGULATOR_MODE_NORMAL;
+		break;
+	case DA9055_BUCK_MODE_SLEEP:
+		mode = REGULATOR_MODE_STANDBY;
+		break;
+	}
+
+	return mode;
+}
+
+static int da9055_buck_set_mode(struct regulator_dev *rdev,
+					unsigned int mode)
+{
+	struct da9055_regulator *regulator = rdev_get_drvdata(rdev);
+	struct da9055_regulator_info *info = regulator->info;
+	int val = 0;
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		val = DA9055_BUCK_MODE_SYNC << info->mode.shift;
+		break;
+	case REGULATOR_MODE_NORMAL:
+		val = DA9055_BUCK_MODE_AUTO << info->mode.shift;
+		break;
+	case REGULATOR_MODE_STANDBY:
+		val = DA9055_BUCK_MODE_SLEEP << info->mode.shift;
+		break;
+	}
+
+	return da9055_reg_update(regulator->da9055, info->mode.reg,
+				 info->mode.mask, val);
+}
+
+static unsigned int da9055_ldo_get_mode(struct regulator_dev *rdev)
+{
+	struct da9055_regulator *regulator = rdev_get_drvdata(rdev);
+	struct da9055_regulator_info *info = regulator->info;
+	int ret;
+
+	ret = da9055_reg_read(regulator->da9055, info->volt.reg_b);
+	if (ret < 0)
+		return ret;
+
+	if (ret >> info->volt.sl_shift)
+		return REGULATOR_MODE_STANDBY;
+	else
+		return REGULATOR_MODE_NORMAL;
+}
+
+static int da9055_ldo_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	struct da9055_regulator *regulator = rdev_get_drvdata(rdev);
+	struct da9055_regulator_info *info = regulator->info;
+	struct da9055_volt_reg volt = info->volt;
+	int val = 0;
+
+	switch (mode) {
+	case REGULATOR_MODE_NORMAL:
+	case REGULATOR_MODE_FAST:
+		val = DA9055_LDO_MODE_SYNC;
+		break;
+	case REGULATOR_MODE_STANDBY:
+		val = DA9055_LDO_MODE_SLEEP;
+		break;
+	}
+
+	return da9055_reg_update(regulator->da9055, volt.reg_b,
+				 1 << volt.sl_shift,
+				 val << volt.sl_shift);
+}
+
+static int da9055_buck_get_current_limit(struct regulator_dev *rdev)
+{
+	struct da9055_regulator *regulator = rdev_get_drvdata(rdev);
+	struct da9055_regulator_info *info = regulator->info;
+	int ret;
+
+	ret = da9055_reg_read(regulator->da9055, DA9055_REG_BUCK_LIM);
+	if (ret < 0)
+		return ret;
+
+	ret &= info->mode.mask;
+	return da9055_current_limits[ret >> info->mode.shift];
+}
+
+static int da9055_buck_set_current_limit(struct regulator_dev *rdev, int min_uA,
+					 int max_uA)
+{
+	struct da9055_regulator *regulator = rdev_get_drvdata(rdev);
+	struct da9055_regulator_info *info = regulator->info;
+	int i, val = 0;
+
+	if (min_uA > da9055_current_limits[DA9055_MAX_UA] ||
+	    max_uA < da9055_current_limits[DA9055_MIN_UA])
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(da9055_current_limits); i++) {
+		if (min_uA <= da9055_current_limits[i]) {
+			val = i;
+			break;
+		}
+	}
+
+	return da9055_reg_update(regulator->da9055, DA9055_REG_BUCK_LIM,
+				info->mode.mask, val << info->mode.shift);
+}
+
+static int da9055_regulator_get_voltage_sel(struct regulator_dev *rdev)
+{
+	struct da9055_regulator *regulator = rdev_get_drvdata(rdev);
+	struct da9055_regulator_info *info = regulator->info;
+	struct da9055_volt_reg volt = info->volt;
+	int ret, sel;
+
+	/*
+	 * There are two voltage register set A & B for voltage ramping but
+	 * either one of then can be active therefore we first determine
+	 * the active register set.
+	 */
+	ret = da9055_reg_read(regulator->da9055, info->conf.reg);
+	if (ret < 0)
+		return ret;
+
+	ret &= info->conf.sel_mask;
+
+	/* Get the voltage for the active register set A/B */
+	if (ret == DA9055_REGUALTOR_SET_A)
+		ret = da9055_reg_read(regulator->da9055, volt.reg_a);
+	else
+		ret = da9055_reg_read(regulator->da9055, volt.reg_b);
+
+	if (ret < 0)
+		return ret;
+
+	sel = (ret & volt.v_mask);
+	if (sel <= volt.v_offset)
+		return 0;
+	else
+		return sel;
+}
+
+static int da9055_regulator_set_voltage_bits(struct regulator_dev *rdev,
+					     unsigned int reg,
+					     unsigned int selector)
+{
+	struct da9055_regulator *regulator = rdev_get_drvdata(rdev);
+	struct da9055_regulator_info *info = regulator->info;
+
+	/* Takes care of voltage range that does not start with 0 offset. */
+	selector += info->volt.v_offset;
+
+	/* Set the voltage */
+	return da9055_reg_update(regulator->da9055, reg, info->volt.v_mask,
+				 selector);
+
+}
+
+static int da9055_regulator_set_voltage_sel(struct regulator_dev *rdev,
+					    unsigned int selector)
+{
+	struct da9055_regulator *regulator = rdev_get_drvdata(rdev);
+	struct da9055_regulator_info *info = regulator->info;
+	int ret;
+
+	/*
+	 * Regulator register set A/B is not selected through GPIO therefore
+	 * we use default register set A for voltage ramping.
+	 */
+	if (regulator->reg_rselect == NO_GPIO) {
+		/* Select register set A */
+		ret = da9055_reg_update(regulator->da9055, info->conf.reg,
+					info->conf.sel_mask, DA9055_SEL_REG_A);
+		if (ret < 0)
+			return ret;
+
+		/* Set the voltage */
+		return da9055_regulator_set_voltage_bits(rdev, info->volt.reg_a,
+							 selector);
+	}
+
+	/*
+	 * Here regulator register set A/B is selected through GPIO.
+	 * Therefore we first determine the selected register set A/B and
+	 * then set the desired voltage for that register set A/B.
+	 */
+	ret = da9055_reg_read(regulator->da9055, info->conf.reg);
+	if (ret < 0)
+		return ret;
+
+	ret &= info->conf.sel_mask;
+
+	/* Set the voltage */
+	if (ret == DA9055_REGUALTOR_SET_A)
+		return da9055_regulator_set_voltage_bits(rdev, info->volt.reg_a,
+							 selector);
+	else
+		return da9055_regulator_set_voltage_bits(rdev, info->volt.reg_b,
+						 selector);
+}
+
+static int da9055_regulator_set_suspend_voltage(struct regulator_dev *rdev,
+						int uV)
+{
+	struct da9055_regulator *regulator = rdev_get_drvdata(rdev);
+	struct da9055_regulator_info *info = regulator->info;
+	int ret;
+
+	/* Select register set B for suspend voltage ramping. */
+	if (regulator->reg_rselect == NO_GPIO) {
+		ret = da9055_reg_update(regulator->da9055, info->conf.reg,
+					info->conf.sel_mask, DA9055_SEL_REG_B);
+		if (ret < 0)
+			return ret;
+	}
+
+	ret = regulator_map_voltage_linear(rdev, uV, uV);
+	if (ret < 0)
+		return ret;
+
+	return da9055_regulator_set_voltage_bits(rdev, info->volt.reg_b, ret);
+}
+
+static int da9055_suspend_enable(struct regulator_dev *rdev)
+{
+	struct da9055_regulator *regulator = rdev_get_drvdata(rdev);
+	struct da9055_regulator_info *info = regulator->info;
+
+	/* Select register set B for voltage ramping. */
+	if (regulator->reg_rselect == NO_GPIO)
+		return da9055_reg_update(regulator->da9055, info->conf.reg,
+					info->conf.sel_mask, DA9055_SEL_REG_B);
+	else
+		return 0;
+}
+
+static int da9055_suspend_disable(struct regulator_dev *rdev)
+{
+	struct da9055_regulator *regulator = rdev_get_drvdata(rdev);
+	struct da9055_regulator_info *info = regulator->info;
+
+	/* Diselect register set B. */
+	if (regulator->reg_rselect == NO_GPIO)
+		return da9055_reg_update(regulator->da9055, info->conf.reg,
+					info->conf.sel_mask, DA9055_SEL_REG_A);
+	else
+		return 0;
+}
+
+static struct regulator_ops da9055_buck_ops = {
+	.get_mode = da9055_buck_get_mode,
+	.set_mode = da9055_buck_set_mode,
+
+	.get_current_limit = da9055_buck_get_current_limit,
+	.set_current_limit = da9055_buck_set_current_limit,
+
+	.get_voltage_sel = da9055_regulator_get_voltage_sel,
+	.set_voltage_sel = da9055_regulator_set_voltage_sel,
+	.list_voltage = regulator_list_voltage_linear,
+	.map_voltage = regulator_map_voltage_linear,
+	.is_enabled = regulator_is_enabled_regmap,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+
+	.set_suspend_voltage = da9055_regulator_set_suspend_voltage,
+	.set_suspend_enable = da9055_suspend_enable,
+	.set_suspend_disable = da9055_suspend_disable,
+	.set_suspend_mode = da9055_buck_set_mode,
+};
+
+static struct regulator_ops da9055_ldo_ops = {
+	.get_mode = da9055_ldo_get_mode,
+	.set_mode = da9055_ldo_set_mode,
+
+	.get_voltage_sel = da9055_regulator_get_voltage_sel,
+	.set_voltage_sel = da9055_regulator_set_voltage_sel,
+	.list_voltage = regulator_list_voltage_linear,
+	.map_voltage = regulator_map_voltage_linear,
+	.is_enabled = regulator_is_enabled_regmap,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+
+	.set_suspend_voltage = da9055_regulator_set_suspend_voltage,
+	.set_suspend_enable = da9055_suspend_enable,
+	.set_suspend_disable = da9055_suspend_disable,
+	.set_suspend_mode = da9055_ldo_set_mode,
+
+};
+
+#define DA9055_LDO(_id, step, min, max, vbits, voffset) \
+{\
+	.reg_desc = {\
+		.name = #_id,\
+		.ops = &da9055_ldo_ops,\
+		.type = REGULATOR_VOLTAGE,\
+		.id = DA9055_ID_##_id,\
+		.n_voltages = (max - min) / step + 1, \
+		.enable_reg = DA9055_REG_BCORE_CONT + DA9055_ID_##_id, \
+		.enable_mask = 1, \
+		.min_uV = (min) * 1000,\
+		.uV_step = (step) * 1000,\
+		.owner = THIS_MODULE,\
+	},\
+	.conf = {\
+		.reg = DA9055_REG_BCORE_CONT + DA9055_ID_##_id, \
+		.sel_mask = (1 << 4),\
+		.en_mask = 1,\
+	},\
+	.volt = {\
+		.reg_a = DA9055_REG_VBCORE_A + DA9055_ID_##_id, \
+		.reg_b = DA9055_REG_VBCORE_B + DA9055_ID_##_id, \
+		.sl_shift = 7,\
+		.v_offset = (voffset),\
+		.v_mask = (1 << (vbits)) - 1,\
+		.v_shift = (vbits),\
+	},\
+}
+
+#define DA9055_BUCK(_id, step, min, max, vbits, voffset, mbits, sbits) \
+{\
+	.reg_desc = {\
+		.name = #_id,\
+		.ops = &da9055_buck_ops,\
+		.type = REGULATOR_VOLTAGE,\
+		.id = DA9055_ID_##_id,\
+		.n_voltages = (max - min) / step + 1, \
+		.enable_reg = DA9055_REG_BCORE_CONT + DA9055_ID_##_id, \
+		.enable_mask = 1,\
+		.min_uV = (min) * 1000,\
+		.uV_step = (step) * 1000,\
+		.owner = THIS_MODULE,\
+	},\
+	.conf = {\
+		.reg = DA9055_REG_BCORE_CONT + DA9055_ID_##_id, \
+		.sel_mask = (1 << 4),\
+		.en_mask = 1,\
+	},\
+	.volt = {\
+		.reg_a = DA9055_REG_VBCORE_A + DA9055_ID_##_id, \
+		.reg_b = DA9055_REG_VBCORE_B + DA9055_ID_##_id, \
+		.sl_shift = 7,\
+		.v_offset = (voffset),\
+		.v_mask = (1 << (vbits)) - 1,\
+		.v_shift = (vbits),\
+	},\
+	.mode = {\
+		.reg = DA9055_REG_BCORE_MODE,\
+		.mask = (mbits),\
+		.shift = (sbits),\
+	},\
+}
+
+static struct da9055_regulator_info da9055_regulator_info[] = {
+	DA9055_BUCK(BUCK1, 25, 725, 2075, 6, 9, 0xc, 2),
+	DA9055_BUCK(BUCK2, 25, 925, 2500, 6, 0, 3, 0),
+	DA9055_LDO(LDO1, 50, 900, 3300, 6, 2),
+	DA9055_LDO(LDO2, 50, 900, 3300, 6, 3),
+	DA9055_LDO(LDO3, 50, 900, 3300, 6, 2),
+	DA9055_LDO(LDO4, 50, 900, 3300, 6, 2),
+	DA9055_LDO(LDO5, 50, 900, 2750, 6, 2),
+	DA9055_LDO(LDO6, 20, 900, 3300, 7, 0),
+};
+
+/*
+ * Configures regulator to be controlled either through GPIO 1 or 2.
+ * GPIO can control regulator state and/or select the regulator register
+ * set A/B for voltage ramping.
+ */
+static __devinit int da9055_gpio_init(struct da9055_regulator *regulator,
+				      struct regulator_config *config,
+				      struct da9055_pdata *pdata, int id)
+{
+	struct da9055_regulator_info *info = regulator->info;
+	int ret = 0;
+
+	if (pdata->gpio_ren && pdata->gpio_ren[id]) {
+		char name[18];
+		int gpio_mux = pdata->gpio_ren[id];
+
+		config->ena_gpio = pdata->ena_gpio[id];
+		config->ena_gpio_flags = GPIOF_OUT_INIT_HIGH;
+		config->ena_gpio_invert = 1;
+
+		/*
+		 * GPI pin is muxed with regulator to control the
+		 * regulator state.
+		 */
+		sprintf(name, "DA9055 GPI %d", gpio_mux);
+		ret = devm_gpio_request_one(config->dev, gpio_mux, GPIOF_DIR_IN,
+					    name);
+		if (ret < 0)
+			goto err;
+
+		/*
+		 * Let the regulator know that its state is controlled
+		 * through GPI.
+		 */
+		ret = da9055_reg_update(regulator->da9055, info->conf.reg,
+					DA9055_E_GPI_MASK,
+					pdata->reg_ren[id]
+					<< DA9055_E_GPI_SHIFT);
+		if (ret < 0)
+			goto err;
+	}
+
+	if (pdata->gpio_rsel && pdata->gpio_ren[id]) {
+		char name[18];
+		int gpio_mux = pdata->gpio_rsel[id];
+
+		regulator->reg_rselect = pdata->reg_rsel[id];
+
+		/*
+		 * GPI pin is muxed with regulator to select the
+		 * regulator register set A/B for voltage ramping.
+		 */
+		sprintf(name, "DA9055 GPI %d", gpio_mux);
+		ret = devm_gpio_request_one(config->dev, gpio_mux, GPIOF_DIR_IN,
+					    name);
+		if (ret < 0)
+			goto err;
+
+		/*
+		 * Let the regulator know that its register set A/B
+		 * will be selected through GPI for voltage ramping.
+		 */
+		ret = da9055_reg_update(regulator->da9055, info->conf.reg,
+					DA9055_V_GPI_MASK,
+					pdata->reg_rsel[id]
+					<< DA9055_V_GPI_SHIFT);
+	}
+
+err:
+	return ret;
+}
+
+static irqreturn_t da9055_ldo5_6_oc_irq(int irq, void *data)
+{
+	struct da9055_regulator *regulator = data;
+
+	regulator_notifier_call_chain(regulator->rdev,
+				      REGULATOR_EVENT_OVER_CURRENT, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static inline struct da9055_regulator_info *find_regulator_info(int id)
+{
+	struct da9055_regulator_info *info;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(da9055_regulator_info); i++) {
+		info = &da9055_regulator_info[i];
+		if (info->reg_desc.id == id)
+			return info;
+	}
+
+	return NULL;
+}
+
+static int __devinit da9055_regulator_probe(struct platform_device *pdev)
+{
+	struct regulator_config config = { };
+	struct da9055_regulator *regulator;
+	struct da9055 *da9055 = dev_get_drvdata(pdev->dev.parent);
+	struct da9055_pdata *pdata = da9055->dev->platform_data;
+	int ret, irq;
+
+	if (pdata == NULL || pdata->regulators[pdev->id] == NULL)
+		return -ENODEV;
+
+	regulator = devm_kzalloc(&pdev->dev, sizeof(struct da9055_regulator),
+				 GFP_KERNEL);
+	if (!regulator)
+		return -ENOMEM;
+
+	regulator->info = find_regulator_info(pdev->id);
+	if (regulator->info == NULL) {
+		dev_err(&pdev->dev, "invalid regulator ID specified\n");
+		return -EINVAL;
+	}
+
+	regulator->da9055 = da9055;
+	config.dev = &pdev->dev;
+	config.driver_data = regulator;
+	config.regmap = da9055->regmap;
+
+	if (pdata && pdata->regulators)
+		config.init_data = pdata->regulators[pdev->id];
+
+	ret = da9055_gpio_init(regulator, &config, pdata, pdev->id);
+	if (ret < 0)
+		return ret;
+
+	regulator->rdev = regulator_register(&regulator->info->reg_desc,
+					     &config);
+	if (IS_ERR(regulator->rdev)) {
+		dev_err(&pdev->dev, "Failed to register regulator %s\n",
+			regulator->info->reg_desc.name);
+		ret = PTR_ERR(regulator->rdev);
+		return ret;
+	}
+
+	/* Only LDO 5 and 6 has got the over current interrupt */
+	if (pdev->id == DA9055_ID_LDO5 || pdev->id ==  DA9055_ID_LDO6) {
+		irq = platform_get_irq_byname(pdev, "REGULATOR");
+		irq = regmap_irq_get_virq(da9055->irq_data, irq);
+		ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
+						da9055_ldo5_6_oc_irq,
+						IRQF_TRIGGER_HIGH |
+						IRQF_ONESHOT |
+						IRQF_PROBE_SHARED,
+						pdev->name, regulator);
+		if (ret != 0) {
+			if (ret != -EBUSY) {
+				dev_err(&pdev->dev,
+				"Failed to request Regulator IRQ %d: %d\n",
+				irq, ret);
+				goto err_regulator;
+			}
+		}
+	}
+
+	platform_set_drvdata(pdev, regulator);
+
+	return 0;
+
+err_regulator:
+	regulator_unregister(regulator->rdev);
+	return ret;
+}
+
+static int __devexit da9055_regulator_remove(struct platform_device *pdev)
+{
+	struct da9055_regulator *regulator = platform_get_drvdata(pdev);
+
+	regulator_unregister(regulator->rdev);
+
+	return 0;
+}
+
+static struct platform_driver da9055_regulator_driver = {
+	.probe = da9055_regulator_probe,
+	.remove = __devexit_p(da9055_regulator_remove),
+	.driver = {
+		.name = "da9055-regulator",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init da9055_regulator_init(void)
+{
+	return platform_driver_register(&da9055_regulator_driver);
+}
+subsys_initcall(da9055_regulator_init);
+
+static void __exit da9055_regulator_exit(void)
+{
+	platform_driver_unregister(&da9055_regulator_driver);
+}
+module_exit(da9055_regulator_exit);
+
+MODULE_AUTHOR("David Dajun Chen <dchen@diasemi.com>");
+MODULE_DESCRIPTION("Power Regulator driver for Dialog DA9055 PMIC");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:da9055-regulator");
diff --git a/include/linux/mfd/da9055/pdata.h b/include/linux/mfd/da9055/pdata.h
index 147293b4471d..f87a6c172a91 100644
--- a/include/linux/mfd/da9055/pdata.h
+++ b/include/linux/mfd/da9055/pdata.h
@@ -25,8 +25,29 @@ struct da9055_pdata {
 	int gpio_base;
 
 	struct regulator_init_data *regulators[DA9055_MAX_REGULATORS];
-	bool reset_enable;		/* Enable RTC in RESET Mode */
-	enum gpio_select *gpio_rsel;	/* Select regulator set thru GPIO 1/2 */
-	enum gpio_select *gpio_ren;	/* Enable regulator thru GPIO 1/2 */
+	/* Enable RTC in RESET Mode */
+	bool reset_enable;
+	/*
+	 * GPI muxed pin to control
+	 * regulator state A/B, 0 if not available.
+	 */
+	int *gpio_ren;
+	/*
+	 * GPI muxed pin to control
+	 * regulator set, 0 if not available.
+	 */
+	int *gpio_rsel;
+	/*
+	 * Regulator mode control bits value (GPI offset) that
+	 * that controls the regulator state, 0 if not available.
+	 */
+	enum gpio_select *reg_ren;
+	/*
+	 * Regulator mode control bits value (GPI offset) that
+	 * controls the regulator set A/B, 0 if  not available.
+	 */
+	enum gpio_select *reg_rsel;
+	/* GPIOs to enable regulator, 0 if not available */
+	int *ena_gpio;
 };
 #endif /* __DA9055_PDATA_H */
-- 
cgit v1.2.3-55-g7522


From 37cf6e6fc34e2fca4e7c565697e7cd5c317bc316 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires
Date: Wed, 14 Nov 2012 16:59:13 +0100
Subject: HID: export hidinput_calc_abs_res

Exporting the function allows us to calculate the resolution in third
party drivers like hid-multitouch.
This patch also complete the function with additional valid axes.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@gmail.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c      | 9 ++++++++-
 drivers/hid/hid-multitouch.c | 1 +
 include/linux/hid.h          | 1 +
 3 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index d917c0d53685..d1ec571bdb3b 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -208,7 +208,7 @@ static int hidinput_setkeycode(struct input_dev *dev,
  * Only exponent 1 length units are processed. Centimeters and inches are
  * converted to millimeters. Degrees are converted to radians.
  */
-static __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code)
+__s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code)
 {
 	__s32 unit_exponent = field->unit_exponent;
 	__s32 logical_extents = field->logical_maximum -
@@ -229,6 +229,12 @@ static __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code)
 	case ABS_X:
 	case ABS_Y:
 	case ABS_Z:
+	case ABS_MT_POSITION_X:
+	case ABS_MT_POSITION_Y:
+	case ABS_MT_TOOL_X:
+	case ABS_MT_TOOL_Y:
+	case ABS_MT_TOUCH_MAJOR:
+	case ABS_MT_TOUCH_MINOR:
 		if (field->unit == 0x11) {		/* If centimeters */
 			/* Convert to millimeters */
 			unit_exponent += 1;
@@ -283,6 +289,7 @@ static __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code)
 	/* Calculate resolution */
 	return logical_extents / physical_extents;
 }
+EXPORT_SYMBOL_GPL(hidinput_calc_abs_res);
 
 #ifdef CONFIG_HID_BATTERY_STRENGTH
 static enum power_supply_property hidinput_battery_props[] = {
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 7867d69f0efe..3687f797b731 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -297,6 +297,7 @@ static void set_abs(struct input_dev *input, unsigned int code,
 	int fmax = field->logical_maximum;
 	int fuzz = snratio ? (fmax - fmin) / snratio : 0;
 	input_set_abs_params(input, code, fmin, fmax, fuzz, 0);
+	input_abs_set_res(input, code, hidinput_calc_abs_res(field, code));
 }
 
 static void mt_store_field(struct hid_usage *usage, struct mt_device *td,
diff --git a/include/linux/hid.h b/include/linux/hid.h
index c076041a069e..c6bef8f54a82 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -706,6 +706,7 @@ int hid_input_report(struct hid_device *, int type, u8 *, int, int);
 int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int code, struct hid_field **field);
 struct hid_field *hidinput_get_led_field(struct hid_device *hid);
 unsigned int hidinput_count_leds(struct hid_device *hid);
+__s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code);
 void hid_output_report(struct hid_report *report, __u8 *data);
 struct hid_device *hid_allocate_device(void);
 struct hid_report *hid_register_report(struct hid_device *device, unsigned type, unsigned id);
-- 
cgit v1.2.3-55-g7522


From 774638386826621c984ab6994439f474709cac5e Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires
Date: Wed, 14 Nov 2012 16:59:15 +0100
Subject: HID: fix unit exponent parsing

HID spec details special values for the HID field unit exponent.
Basically, the range [0x8..0xf] correspond to [-8..-1], so this is
a standard two's complement on a half-byte.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@gmail.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c  | 16 +++++++++++++++-
 drivers/hid/hid-input.c | 13 +++++++++----
 include/linux/hid.h     |  1 +
 3 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index f4109fd657ff..f5004e2ca0c5 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -315,6 +315,7 @@ static s32 item_sdata(struct hid_item *item)
 
 static int hid_parser_global(struct hid_parser *parser, struct hid_item *item)
 {
+	__u32 raw_value;
 	switch (item->tag) {
 	case HID_GLOBAL_ITEM_TAG_PUSH:
 
@@ -365,7 +366,14 @@ static int hid_parser_global(struct hid_parser *parser, struct hid_item *item)
 		return 0;
 
 	case HID_GLOBAL_ITEM_TAG_UNIT_EXPONENT:
-		parser->global.unit_exponent = item_sdata(item);
+		/* Units exponent negative numbers are given through a
+		 * two's complement.
+		 * See "6.2.2.7 Global Items" for more information. */
+		raw_value = item_udata(item);
+		if (!(raw_value & 0xfffffff0))
+			parser->global.unit_exponent = hid_snto32(raw_value, 4);
+		else
+			parser->global.unit_exponent = raw_value;
 		return 0;
 
 	case HID_GLOBAL_ITEM_TAG_UNIT:
@@ -865,6 +873,12 @@ static s32 snto32(__u32 value, unsigned n)
 	return value & (1 << (n - 1)) ? value | (-1 << n) : value;
 }
 
+s32 hid_snto32(__u32 value, unsigned n)
+{
+	return snto32(value, n);
+}
+EXPORT_SYMBOL_GPL(hid_snto32);
+
 /*
  * Convert a signed 32-bit integer to a signed n-bit integer.
  */
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 1b0adc3f7803..007a9433bfa7 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -192,7 +192,6 @@ static int hidinput_setkeycode(struct input_dev *dev,
 	return -EINVAL;
 }
 
-
 /**
  * hidinput_calc_abs_res - calculate an absolute axis resolution
  * @field: the HID report field to calculate resolution for
@@ -235,17 +234,23 @@ __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code)
 	case ABS_MT_TOOL_Y:
 	case ABS_MT_TOUCH_MAJOR:
 	case ABS_MT_TOUCH_MINOR:
-		if (field->unit == 0x11) {		/* If centimeters */
+		if (field->unit & 0xffffff00)		/* Not a length */
+			return 0;
+		unit_exponent += hid_snto32(field->unit >> 4, 4) - 1;
+		switch (field->unit & 0xf) {
+		case 0x1:				/* If centimeters */
 			/* Convert to millimeters */
 			unit_exponent += 1;
-		} else if (field->unit == 0x13) {	/* If inches */
+			break;
+		case 0x3:				/* If inches */
 			/* Convert to millimeters */
 			prev = physical_extents;
 			physical_extents *= 254;
 			if (physical_extents < prev)
 				return 0;
 			unit_exponent -= 1;
-		} else {
+			break;
+		default:
 			return 0;
 		}
 		break;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index c6bef8f54a82..4161bf2c0b5b 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -717,6 +717,7 @@ int hid_connect(struct hid_device *hid, unsigned int connect_mask);
 void hid_disconnect(struct hid_device *hid);
 const struct hid_device_id *hid_match_id(struct hid_device *hdev,
 					 const struct hid_device_id *id);
+s32 hid_snto32(__u32 value, unsigned n);
 
 /**
  * hid_map_usage - map usage input bits
-- 
cgit v1.2.3-55-g7522


From f262d1fa2c651a5e2f92b6aee8779597631cd5d4 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires
Date: Wed, 14 Nov 2012 16:59:16 +0100
Subject: HID: add usage_index in struct hid_usage.

Currently, there is no way to know the index of the current field
in the .input_mapping and .event callbacks  when this field is inside
an array of HID fields.
This patch adds this index to the struct hid_usage so that this
information is available to input_mapping and event callbacks.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@gmail.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 4 ++++
 include/linux/hid.h    | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index f5004e2ca0c5..b5974cd45968 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -92,6 +92,7 @@ EXPORT_SYMBOL_GPL(hid_register_report);
 static struct hid_field *hid_register_field(struct hid_report *report, unsigned usages, unsigned values)
 {
 	struct hid_field *field;
+	int i;
 
 	if (report->maxfield == HID_MAX_FIELDS) {
 		hid_err(report->device, "too many fields in report\n");
@@ -110,6 +111,9 @@ static struct hid_field *hid_register_field(struct hid_report *report, unsigned
 	field->value = (s32 *)(field->usage + usages);
 	field->report = report;
 
+	for (i = 0; i < usages; i++)
+		field->usage[i].usage_index = i;
+
 	return field;
 }
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 4161bf2c0b5b..d2c42dd222c1 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -342,6 +342,7 @@ struct hid_collection {
 struct hid_usage {
 	unsigned  hid;			/* hid usage code */
 	unsigned  collection_index;	/* index into collection array */
+	unsigned  usage_index;		/* index into usage array */
 	/* hidinput data */
 	__u16     code;			/* input driver code */
 	__u8      type;			/* input driver type */
-- 
cgit v1.2.3-55-g7522


From 29807d1e24b7cd696442d2f600057230f084b3c7 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires
Date: Wed, 14 Nov 2012 16:59:22 +0100
Subject: Input: mt: add input_mt_is_used

This patch extracts the test (slot->frame == mt->frame) so that it can
be used in third party drivers.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@gmail.com>
Reviewed-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/input/input-mt.c | 2 +-
 include/linux/input/mt.h | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c
index c0ec7d42c3be..475b9d424eed 100644
--- a/drivers/input/input-mt.c
+++ b/drivers/input/input-mt.c
@@ -247,7 +247,7 @@ void input_mt_sync_frame(struct input_dev *dev)
 
 	if (mt->flags & INPUT_MT_DROP_UNUSED) {
 		for (s = mt->slots; s != mt->slots + mt->num_slots; s++) {
-			if (s->frame == mt->frame)
+			if (input_mt_is_used(mt, s))
 				continue;
 			input_mt_slot(dev, s - mt->slots);
 			input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, -1);
diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h
index cc5cca774bab..2e86bd0bfba1 100644
--- a/include/linux/input/mt.h
+++ b/include/linux/input/mt.h
@@ -69,6 +69,12 @@ static inline bool input_mt_is_active(const struct input_mt_slot *slot)
 	return input_mt_get_value(slot, ABS_MT_TRACKING_ID) >= 0;
 }
 
+static inline bool input_mt_is_used(const struct input_mt *mt,
+				    const struct input_mt_slot *slot)
+{
+	return slot->frame == mt->frame;
+}
+
 int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots,
 			unsigned int flags);
 void input_mt_destroy_slots(struct input_dev *dev);
-- 
cgit v1.2.3-55-g7522


From 5133375bb46a0d6c3fba07097caed7aa5e629ccd Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Thu, 15 Nov 2012 13:15:37 +0100
Subject: ACPI / PM: Fix build problem when CONFIG_ACPI or CONFIG_PM is not set

Commit e5cc8ef (ACPI / PM: Provide ACPI PM callback routines for
subsystems) introduced a build problem occuring if CONFIG_ACPI is
unset or CONFIG_PM is unset and errno.h is not included before
acpi.h, because in that case ENODEV used in acpi.h is undefined.

Fix the issue by making acpi.h include errno.h.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 0676b6ac57fa..5fdd87271518 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -25,6 +25,7 @@
 #ifndef _LINUX_ACPI_H
 #define _LINUX_ACPI_H
 
+#include <linux/errno.h>
 #include <linux/ioport.h>	/* for struct resource */
 
 #ifdef	CONFIG_ACPI
-- 
cgit v1.2.3-55-g7522


From 621eb19ce1ec216e03ad354cb0c4061736b2a436 Mon Sep 17 00:00:00 2001
From: J. Bruce Fields
Date: Wed, 14 Nov 2012 10:48:05 -0500
Subject: svcrpc: Revert "sunrpc/cache.h: replace simple_strtoul"

Commit bbf43dc888833ac0539e437dbaeb28bfd4fbab9f "sunrpc/cache.h: replace
simple_strtoul" introduced new range-checking which could cause get_int
to fail on unsigned integers too large to be represented as an int.

We could parse them as unsigned instead--but it turns out svcgssd is
actually passing down "-1" in some cases.  Which is perhaps stupid, but
there's nothing we can do about it now.

So just revert back to the previous "sloppy" behavior that accepts
either representation.

Cc: stable@vger.kernel.org
Reported-by: Sven Geggus <lists@fuchsschwanzdomain.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index f792794f6634..5dc9ee4d616e 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -217,6 +217,8 @@ extern int qword_get(char **bpp, char *dest, int bufsize);
 static inline int get_int(char **bpp, int *anint)
 {
 	char buf[50];
+	char *ep;
+	int rv;
 	int len = qword_get(bpp, buf, sizeof(buf));
 
 	if (len < 0)
@@ -224,9 +226,11 @@ static inline int get_int(char **bpp, int *anint)
 	if (len == 0)
 		return -ENOENT;
 
-	if (kstrtoint(buf, 0, anint))
+	rv = simple_strtol(buf, &ep, 0);
+	if (*ep)
 		return -EINVAL;
 
+	*anint = rv;
 	return 0;
 }
 
-- 
cgit v1.2.3-55-g7522


From aa1acb0451bb27add173d9641d0b74c58889e693 Mon Sep 17 00:00:00 2001
From: hongbo.zhang
Date: Thu, 15 Nov 2012 18:56:42 +0800
Subject: Thermal: Add ST-Ericsson DB8500 thermal driver.

This driver is based on the thermal management framework in thermal_sys.c. A
thermal zone device is created with the trip points to which cooling devices
can be bound, the current cooling device is cpufreq, e.g. CPU frequency is
clipped down to cool the CPU, and other cooling devices can be added and bound
to the trip points dynamically.  The platform specific PRCMU interrupts are
used to active thermal update when trip points are reached.

Signed-off-by: hongbo.zhang <hongbo.zhang@linaro.com>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Francesco Lavra <francescolavra.fl@gmail.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 .../devicetree/bindings/thermal/db8500-thermal.txt |  44 ++
 drivers/thermal/Kconfig                            |  20 +
 drivers/thermal/Makefile                           |   2 +
 drivers/thermal/db8500_cpufreq_cooling.c           | 108 +++++
 drivers/thermal/db8500_thermal.c                   | 531 +++++++++++++++++++++
 include/linux/platform_data/db8500_thermal.h       |  38 ++
 6 files changed, 743 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/thermal/db8500-thermal.txt
 create mode 100644 drivers/thermal/db8500_cpufreq_cooling.c
 create mode 100644 drivers/thermal/db8500_thermal.c
 create mode 100644 include/linux/platform_data/db8500_thermal.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/thermal/db8500-thermal.txt b/Documentation/devicetree/bindings/thermal/db8500-thermal.txt
new file mode 100644
index 000000000000..2e1c06fad81f
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/db8500-thermal.txt
@@ -0,0 +1,44 @@
+* ST-Ericsson DB8500 Thermal
+
+** Thermal node properties:
+
+- compatible : "stericsson,db8500-thermal";
+- reg : address range of the thermal sensor registers;
+- interrupts : interrupts generated from PRCMU;
+- interrupt-names : "IRQ_HOTMON_LOW" and "IRQ_HOTMON_HIGH";
+- num-trips : number of total trip points, this is required, set it 0 if none,
+  if greater than 0, the following properties must be defined;
+- tripN-temp : temperature of trip point N, should be in ascending order;
+- tripN-type : type of trip point N, should be one of "active" "passive" "hot"
+  "critical";
+- tripN-cdev-num : number of the cooling devices which can be bound to trip
+  point N, this is required if trip point N is defined, set it 0 if none,
+  otherwise the following cooling device names must be defined;
+- tripN-cdev-nameM : name of the No. M cooling device of trip point N;
+
+Usually the num-trips and tripN-*** are separated in board related dts files.
+
+Example:
+thermal@801573c0 {
+	compatible = "stericsson,db8500-thermal";
+	reg = <0x801573c0 0x40>;
+	interrupts = <21 0x4>, <22 0x4>;
+	interrupt-names = "IRQ_HOTMON_LOW", "IRQ_HOTMON_HIGH";
+
+	num-trips = <3>;
+
+	trip0-temp = <75000>;
+	trip0-type = "active";
+	trip0-cdev-num = <1>;
+	trip0-cdev-name0 = "thermal-cpufreq-0";
+
+	trip1-temp = <80000>;
+	trip1-type = "active";
+	trip1-cdev-num = <2>;
+	trip1-cdev-name0 = "thermal-cpufreq-0";
+	trip1-cdev-name1 = "thermal-fan";
+
+	trip2-temp = <85000>;
+	trip2-type = "critical";
+	trip2-cdev-num = <0>;
+}
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 99b6587ab8b3..d96da075c9f6 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -101,5 +101,25 @@ config EXYNOS_THERMAL
 	  If you say yes here you get support for TMU (Thermal Managment
 	  Unit) on SAMSUNG EXYNOS series of SoC.
 
+config DB8500_THERMAL
+	bool "DB8500 thermal management"
+	depends on ARCH_U8500
+	default y
+	help
+	  Adds DB8500 thermal management implementation according to the thermal
+	  management framework. A thermal zone with several trip points will be
+	  created. Cooling devices can be bound to the trip points to cool this
+	  thermal zone if trip points reached.
+
+config DB8500_CPUFREQ_COOLING
+	tristate "DB8500 cpufreq cooling"
+	depends on ARCH_U8500
+	depends on CPU_THERMAL
+	default y
+	help
+	  Adds DB8500 cpufreq cooling devices, and these cooling devices can be
+	  bound to thermal zone trip points. When a trip point reached, the
+	  bound cpufreq cooling device turns active to set CPU frequency low to
+	  cool down the CPU.
 
 endif
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 0b6b048f236d..d8da683245fc 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -16,3 +16,5 @@ obj-$(CONFIG_CPU_THERMAL)	+= cpu_cooling.o
 obj-$(CONFIG_SPEAR_THERMAL)	+= spear_thermal.o
 obj-$(CONFIG_RCAR_THERMAL)	+= rcar_thermal.o
 obj-$(CONFIG_EXYNOS_THERMAL)	+= exynos_thermal.o
+obj-$(CONFIG_DB8500_THERMAL)	+= db8500_thermal.o
+obj-$(CONFIG_DB8500_CPUFREQ_COOLING)	+= db8500_cpufreq_cooling.o
diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c
new file mode 100644
index 000000000000..4cf8e72af90a
--- /dev/null
+++ b/drivers/thermal/db8500_cpufreq_cooling.c
@@ -0,0 +1,108 @@
+/*
+ * db8500_cpufreq_cooling.c - DB8500 cpufreq works as cooling device.
+ *
+ * Copyright (C) 2012 ST-Ericsson
+ * Copyright (C) 2012 Linaro Ltd.
+ *
+ * Author: Hongbo Zhang <hongbo.zhang@linaro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/cpu_cooling.h>
+#include <linux/cpufreq.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+static int db8500_cpufreq_cooling_probe(struct platform_device *pdev)
+{
+	struct thermal_cooling_device *cdev;
+	struct cpumask mask_val;
+
+	/* make sure cpufreq driver has been initialized */
+	if (!cpufreq_frequency_get_table(0))
+		return -EPROBE_DEFER;
+
+	cpumask_set_cpu(0, &mask_val);
+	cdev = cpufreq_cooling_register(&mask_val);
+
+	if (IS_ERR_OR_NULL(cdev)) {
+		dev_err(&pdev->dev, "Failed to register cooling device\n");
+		return PTR_ERR(cdev);
+	}
+
+	platform_set_drvdata(pdev, cdev);
+
+	dev_info(&pdev->dev, "Cooling device registered: %s\n",	cdev->type);
+
+	return 0;
+}
+
+static int db8500_cpufreq_cooling_remove(struct platform_device *pdev)
+{
+	struct thermal_cooling_device *cdev = platform_get_drvdata(pdev);
+
+	cpufreq_cooling_unregister(cdev);
+
+	return 0;
+}
+
+static int db8500_cpufreq_cooling_suspend(struct platform_device *pdev,
+		pm_message_t state)
+{
+	return -ENOSYS;
+}
+
+static int db8500_cpufreq_cooling_resume(struct platform_device *pdev)
+{
+	return -ENOSYS;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id db8500_cpufreq_cooling_match[] = {
+	{ .compatible = "stericsson,db8500-cpufreq-cooling" },
+	{},
+};
+#else
+#define db8500_cpufreq_cooling_match NULL
+#endif
+
+static struct platform_driver db8500_cpufreq_cooling_driver = {
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "db8500-cpufreq-cooling",
+		.of_match_table = db8500_cpufreq_cooling_match,
+	},
+	.probe = db8500_cpufreq_cooling_probe,
+	.suspend = db8500_cpufreq_cooling_suspend,
+	.resume = db8500_cpufreq_cooling_resume,
+	.remove = db8500_cpufreq_cooling_remove,
+};
+
+static int __init db8500_cpufreq_cooling_init(void)
+{
+	return platform_driver_register(&db8500_cpufreq_cooling_driver);
+}
+
+static void __exit db8500_cpufreq_cooling_exit(void)
+{
+	platform_driver_unregister(&db8500_cpufreq_cooling_driver);
+}
+
+/* Should be later than db8500_cpufreq_register */
+late_initcall(db8500_cpufreq_cooling_init);
+module_exit(db8500_cpufreq_cooling_exit);
+
+MODULE_AUTHOR("Hongbo Zhang <hongbo.zhang@stericsson.com>");
+MODULE_DESCRIPTION("DB8500 cpufreq cooling driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c
new file mode 100644
index 000000000000..ec71ade3e317
--- /dev/null
+++ b/drivers/thermal/db8500_thermal.c
@@ -0,0 +1,531 @@
+/*
+ * db8500_thermal.c - DB8500 Thermal Management Implementation
+ *
+ * Copyright (C) 2012 ST-Ericsson
+ * Copyright (C) 2012 Linaro Ltd.
+ *
+ * Author: Hongbo Zhang <hongbo.zhang@linaro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/cpu_cooling.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/dbx500-prcmu.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_data/db8500_thermal.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+
+#define PRCMU_DEFAULT_MEASURE_TIME	0xFFF
+#define PRCMU_DEFAULT_LOW_TEMP		0
+
+struct db8500_thermal_zone {
+	struct thermal_zone_device *therm_dev;
+	struct mutex th_lock;
+	struct work_struct therm_work;
+	struct db8500_thsens_platform_data *trip_tab;
+	enum thermal_device_mode mode;
+	enum thermal_trend trend;
+	unsigned long cur_temp_pseudo;
+	unsigned int cur_index;
+};
+
+/* Local function to check if thermal zone matches cooling devices */
+static int db8500_thermal_match_cdev(struct thermal_cooling_device *cdev,
+		struct db8500_trip_point *trip_point)
+{
+	int i;
+
+	if (!strlen(cdev->type))
+		return -EINVAL;
+
+	for (i = 0; i < COOLING_DEV_MAX; i++) {
+		if (!strcmp(trip_point->cdev_name[i], cdev->type))
+			return 0;
+	}
+
+	return -ENODEV;
+}
+
+/* Callback to bind cooling device to thermal zone */
+static int db8500_cdev_bind(struct thermal_zone_device *thermal,
+		struct thermal_cooling_device *cdev)
+{
+	struct db8500_thermal_zone *pzone = thermal->devdata;
+	struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
+	unsigned long max_state, upper, lower;
+	int i, ret = -EINVAL;
+
+	cdev->ops->get_max_state(cdev, &max_state);
+
+	for (i = 0; i < ptrips->num_trips; i++) {
+		if (db8500_thermal_match_cdev(cdev, &ptrips->trip_points[i]))
+			continue;
+
+		upper = lower = i > max_state ? max_state : i;
+
+		ret = thermal_zone_bind_cooling_device(thermal, i, cdev,
+			upper, lower);
+
+		dev_info(&cdev->device, "%s bind to %d: %d-%s\n", cdev->type,
+			i, ret, ret ? "fail" : "succeed");
+	}
+
+	return ret;
+}
+
+/* Callback to unbind cooling device from thermal zone */
+static int db8500_cdev_unbind(struct thermal_zone_device *thermal,
+		struct thermal_cooling_device *cdev)
+{
+	struct db8500_thermal_zone *pzone = thermal->devdata;
+	struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
+	int i, ret = -EINVAL;
+
+	for (i = 0; i < ptrips->num_trips; i++) {
+		if (db8500_thermal_match_cdev(cdev, &ptrips->trip_points[i]))
+			continue;
+
+		ret = thermal_zone_unbind_cooling_device(thermal, i, cdev);
+
+		dev_info(&cdev->device, "%s unbind from %d: %s\n", cdev->type,
+			i, ret ? "fail" : "succeed");
+	}
+
+	return ret;
+}
+
+/* Callback to get current temperature */
+static int db8500_sys_get_temp(struct thermal_zone_device *thermal,
+		unsigned long *temp)
+{
+	struct db8500_thermal_zone *pzone = thermal->devdata;
+
+	/*
+	 * TODO: There is no PRCMU interface to get temperature data currently,
+	 * so a pseudo temperature is returned , it works for thermal framework
+	 * and this will be fixed when the PRCMU interface is available.
+	 */
+	*temp = pzone->cur_temp_pseudo;
+
+	return 0;
+}
+
+/* Callback to get temperature changing trend */
+static int db8500_sys_get_trend(struct thermal_zone_device *thermal,
+		int trip, enum thermal_trend *trend)
+{
+	struct db8500_thermal_zone *pzone = thermal->devdata;
+
+	*trend = pzone->trend;
+
+	return 0;
+}
+
+/* Callback to get thermal zone mode */
+static int db8500_sys_get_mode(struct thermal_zone_device *thermal,
+		enum thermal_device_mode *mode)
+{
+	struct db8500_thermal_zone *pzone = thermal->devdata;
+
+	mutex_lock(&pzone->th_lock);
+	*mode = pzone->mode;
+	mutex_unlock(&pzone->th_lock);
+
+	return 0;
+}
+
+/* Callback to set thermal zone mode */
+static int db8500_sys_set_mode(struct thermal_zone_device *thermal,
+		enum thermal_device_mode mode)
+{
+	struct db8500_thermal_zone *pzone = thermal->devdata;
+
+	mutex_lock(&pzone->th_lock);
+
+	pzone->mode = mode;
+	if (mode == THERMAL_DEVICE_ENABLED)
+		schedule_work(&pzone->therm_work);
+
+	mutex_unlock(&pzone->th_lock);
+
+	return 0;
+}
+
+/* Callback to get trip point type */
+static int db8500_sys_get_trip_type(struct thermal_zone_device *thermal,
+		int trip, enum thermal_trip_type *type)
+{
+	struct db8500_thermal_zone *pzone = thermal->devdata;
+	struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
+
+	if (trip >= ptrips->num_trips)
+		return -EINVAL;
+
+	*type = ptrips->trip_points[trip].type;
+
+	return 0;
+}
+
+/* Callback to get trip point temperature */
+static int db8500_sys_get_trip_temp(struct thermal_zone_device *thermal,
+		int trip, unsigned long *temp)
+{
+	struct db8500_thermal_zone *pzone = thermal->devdata;
+	struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
+
+	if (trip >= ptrips->num_trips)
+		return -EINVAL;
+
+	*temp = ptrips->trip_points[trip].temp;
+
+	return 0;
+}
+
+/* Callback to get critical trip point temperature */
+static int db8500_sys_get_crit_temp(struct thermal_zone_device *thermal,
+		unsigned long *temp)
+{
+	struct db8500_thermal_zone *pzone = thermal->devdata;
+	struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
+	int i;
+
+	for (i = ptrips->num_trips - 1; i > 0; i--) {
+		if (ptrips->trip_points[i].type == THERMAL_TRIP_CRITICAL) {
+			*temp = ptrips->trip_points[i].temp;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static struct thermal_zone_device_ops thdev_ops = {
+	.bind = db8500_cdev_bind,
+	.unbind = db8500_cdev_unbind,
+	.get_temp = db8500_sys_get_temp,
+	.get_trend = db8500_sys_get_trend,
+	.get_mode = db8500_sys_get_mode,
+	.set_mode = db8500_sys_set_mode,
+	.get_trip_type = db8500_sys_get_trip_type,
+	.get_trip_temp = db8500_sys_get_trip_temp,
+	.get_crit_temp = db8500_sys_get_crit_temp,
+};
+
+static void db8500_thermal_update_config(struct db8500_thermal_zone *pzone,
+		unsigned int idx, enum thermal_trend trend,
+		unsigned long next_low, unsigned long next_high)
+{
+	prcmu_stop_temp_sense();
+
+	pzone->cur_index = idx;
+	pzone->cur_temp_pseudo = (next_low + next_high)/2;
+	pzone->trend = trend;
+
+	prcmu_config_hotmon((u8)(next_low/1000), (u8)(next_high/1000));
+	prcmu_start_temp_sense(PRCMU_DEFAULT_MEASURE_TIME);
+}
+
+static irqreturn_t prcmu_low_irq_handler(int irq, void *irq_data)
+{
+	struct db8500_thermal_zone *pzone = irq_data;
+	struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
+	unsigned int idx = pzone->cur_index;
+	unsigned long next_low, next_high;
+
+	if (unlikely(idx == 0))
+		/* Meaningless for thermal management, ignoring it */
+		return IRQ_HANDLED;
+
+	if (idx == 1) {
+		next_high = ptrips->trip_points[0].temp;
+		next_low = PRCMU_DEFAULT_LOW_TEMP;
+	} else {
+		next_high = ptrips->trip_points[idx-1].temp;
+		next_low = ptrips->trip_points[idx-2].temp;
+	}
+	idx -= 1;
+
+	db8500_thermal_update_config(pzone, idx, THERMAL_TREND_DROPPING,
+		next_low, next_high);
+
+	dev_dbg(&pzone->therm_dev->device,
+		"PRCMU set max %ld, min %ld\n", next_high, next_low);
+
+	schedule_work(&pzone->therm_work);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t prcmu_high_irq_handler(int irq, void *irq_data)
+{
+	struct db8500_thermal_zone *pzone = irq_data;
+	struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
+	unsigned int idx = pzone->cur_index;
+	unsigned long next_low, next_high;
+
+	if (idx < ptrips->num_trips - 1) {
+		next_high = ptrips->trip_points[idx+1].temp;
+		next_low = ptrips->trip_points[idx].temp;
+		idx += 1;
+
+		db8500_thermal_update_config(pzone, idx, THERMAL_TREND_RAISING,
+			next_low, next_high);
+
+		dev_dbg(&pzone->therm_dev->device,
+		"PRCMU set max %ld, min %ld\n", next_high, next_low);
+	} else if (idx == ptrips->num_trips - 1)
+		pzone->cur_temp_pseudo = ptrips->trip_points[idx].temp + 1;
+
+	schedule_work(&pzone->therm_work);
+
+	return IRQ_HANDLED;
+}
+
+static void db8500_thermal_work(struct work_struct *work)
+{
+	enum thermal_device_mode cur_mode;
+	struct db8500_thermal_zone *pzone;
+
+	pzone = container_of(work, struct db8500_thermal_zone, therm_work);
+
+	mutex_lock(&pzone->th_lock);
+	cur_mode = pzone->mode;
+	mutex_unlock(&pzone->th_lock);
+
+	if (cur_mode == THERMAL_DEVICE_DISABLED)
+		return;
+
+	thermal_zone_device_update(pzone->therm_dev);
+	dev_dbg(&pzone->therm_dev->device, "thermal work finished.\n");
+}
+
+#ifdef CONFIG_OF
+static struct db8500_thsens_platform_data*
+		db8500_thermal_parse_dt(struct platform_device *pdev)
+{
+	struct db8500_thsens_platform_data *ptrips;
+	struct device_node *np = pdev->dev.of_node;
+	char prop_name[32];
+	const char *tmp_str;
+	u32 tmp_data;
+	int i, j;
+
+	ptrips = devm_kzalloc(&pdev->dev, sizeof(*ptrips), GFP_KERNEL);
+	if (!ptrips)
+		return NULL;
+
+	if (of_property_read_u32(np, "num-trips", &tmp_data))
+		goto err_parse_dt;
+
+	if (tmp_data > THERMAL_MAX_TRIPS)
+		goto err_parse_dt;
+
+	ptrips->num_trips = tmp_data;
+
+	for (i = 0; i < ptrips->num_trips; i++) {
+		sprintf(prop_name, "trip%d-temp", i);
+		if (of_property_read_u32(np, prop_name, &tmp_data))
+			goto err_parse_dt;
+
+		ptrips->trip_points[i].temp = tmp_data;
+
+		sprintf(prop_name, "trip%d-type", i);
+		if (of_property_read_string(np, prop_name, &tmp_str))
+			goto err_parse_dt;
+
+		if (!strcmp(tmp_str, "active"))
+			ptrips->trip_points[i].type = THERMAL_TRIP_ACTIVE;
+		else if (!strcmp(tmp_str, "passive"))
+			ptrips->trip_points[i].type = THERMAL_TRIP_PASSIVE;
+		else if (!strcmp(tmp_str, "hot"))
+			ptrips->trip_points[i].type = THERMAL_TRIP_HOT;
+		else if (!strcmp(tmp_str, "critical"))
+			ptrips->trip_points[i].type = THERMAL_TRIP_CRITICAL;
+		else
+			goto err_parse_dt;
+
+		sprintf(prop_name, "trip%d-cdev-num", i);
+		if (of_property_read_u32(np, prop_name, &tmp_data))
+			goto err_parse_dt;
+
+		if (tmp_data > COOLING_DEV_MAX)
+			goto err_parse_dt;
+
+		for (j = 0; j < tmp_data; j++) {
+			sprintf(prop_name, "trip%d-cdev-name%d", i, j);
+			if (of_property_read_string(np, prop_name, &tmp_str))
+				goto err_parse_dt;
+
+			if (strlen(tmp_str) >= THERMAL_NAME_LENGTH)
+				goto err_parse_dt;
+
+			strcpy(ptrips->trip_points[i].cdev_name[j], tmp_str);
+		}
+	}
+	return ptrips;
+
+err_parse_dt:
+	dev_err(&pdev->dev, "Parsing device tree data error.\n");
+	return NULL;
+}
+#else
+static inline struct db8500_thsens_platform_data*
+		db8500_thermal_parse_dt(struct platform_device *pdev)
+{
+	return NULL;
+}
+#endif
+
+static int db8500_thermal_probe(struct platform_device *pdev)
+{
+	struct db8500_thermal_zone *pzone = NULL;
+	struct db8500_thsens_platform_data *ptrips = NULL;
+	struct device_node *np = pdev->dev.of_node;
+	int low_irq, high_irq, ret = 0;
+	unsigned long dft_low, dft_high;
+
+	if (np)
+		ptrips = db8500_thermal_parse_dt(pdev);
+	else
+		ptrips = dev_get_platdata(&pdev->dev);
+
+	if (!ptrips)
+		return -EINVAL;
+
+	pzone = devm_kzalloc(&pdev->dev, sizeof(*pzone), GFP_KERNEL);
+	if (!pzone)
+		return -ENOMEM;
+
+	mutex_init(&pzone->th_lock);
+	mutex_lock(&pzone->th_lock);
+
+	pzone->mode = THERMAL_DEVICE_DISABLED;
+	pzone->trip_tab = ptrips;
+
+	INIT_WORK(&pzone->therm_work, db8500_thermal_work);
+
+	low_irq = platform_get_irq_byname(pdev, "IRQ_HOTMON_LOW");
+	if (low_irq < 0) {
+		dev_err(&pdev->dev, "Get IRQ_HOTMON_LOW failed.\n");
+		return low_irq;
+	}
+
+	ret = devm_request_threaded_irq(&pdev->dev, low_irq, NULL,
+		prcmu_low_irq_handler, IRQF_NO_SUSPEND | IRQF_ONESHOT,
+		"dbx500_temp_low", pzone);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to allocate temp low irq.\n");
+		return ret;
+	}
+
+	high_irq = platform_get_irq_byname(pdev, "IRQ_HOTMON_HIGH");
+	if (high_irq < 0) {
+		dev_err(&pdev->dev, "Get IRQ_HOTMON_HIGH failed.\n");
+		return high_irq;
+	}
+
+	ret = devm_request_threaded_irq(&pdev->dev, high_irq, NULL,
+		prcmu_high_irq_handler, IRQF_NO_SUSPEND | IRQF_ONESHOT,
+		"dbx500_temp_high", pzone);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to allocate temp high irq.\n");
+		return ret;
+	}
+
+	pzone->therm_dev = thermal_zone_device_register("db8500_thermal_zone",
+		ptrips->num_trips, 0, pzone, &thdev_ops, NULL, 0, 0);
+
+	if (IS_ERR_OR_NULL(pzone->therm_dev)) {
+		dev_err(&pdev->dev, "Register thermal zone device failed.\n");
+		return PTR_ERR(pzone->therm_dev);
+	}
+	dev_info(&pdev->dev, "Thermal zone device registered.\n");
+
+	dft_low = PRCMU_DEFAULT_LOW_TEMP;
+	dft_high = ptrips->trip_points[0].temp;
+
+	db8500_thermal_update_config(pzone, 0, THERMAL_TREND_STABLE,
+		dft_low, dft_high);
+
+	platform_set_drvdata(pdev, pzone);
+	pzone->mode = THERMAL_DEVICE_ENABLED;
+	mutex_unlock(&pzone->th_lock);
+
+	return 0;
+}
+
+static int db8500_thermal_remove(struct platform_device *pdev)
+{
+	struct db8500_thermal_zone *pzone = platform_get_drvdata(pdev);
+
+	thermal_zone_device_unregister(pzone->therm_dev);
+	cancel_work_sync(&pzone->therm_work);
+	mutex_destroy(&pzone->th_lock);
+
+	return 0;
+}
+
+static int db8500_thermal_suspend(struct platform_device *pdev,
+		pm_message_t state)
+{
+	struct db8500_thermal_zone *pzone = platform_get_drvdata(pdev);
+
+	flush_work(&pzone->therm_work);
+	prcmu_stop_temp_sense();
+
+	return 0;
+}
+
+static int db8500_thermal_resume(struct platform_device *pdev)
+{
+	struct db8500_thermal_zone *pzone = platform_get_drvdata(pdev);
+	struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
+	unsigned long dft_low, dft_high;
+
+	dft_low = PRCMU_DEFAULT_LOW_TEMP;
+	dft_high = ptrips->trip_points[0].temp;
+
+	db8500_thermal_update_config(pzone, 0, THERMAL_TREND_STABLE,
+		dft_low, dft_high);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id db8500_thermal_match[] = {
+	{ .compatible = "stericsson,db8500-thermal" },
+	{},
+};
+#else
+#define db8500_thermal_match NULL
+#endif
+
+static struct platform_driver db8500_thermal_driver = {
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "db8500-thermal",
+		.of_match_table = db8500_thermal_match,
+	},
+	.probe = db8500_thermal_probe,
+	.suspend = db8500_thermal_suspend,
+	.resume = db8500_thermal_resume,
+	.remove = db8500_thermal_remove,
+};
+
+module_platform_driver(db8500_thermal_driver);
+
+MODULE_AUTHOR("Hongbo Zhang <hongbo.zhang@stericsson.com>");
+MODULE_DESCRIPTION("DB8500 thermal driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/db8500_thermal.h b/include/linux/platform_data/db8500_thermal.h
new file mode 100644
index 000000000000..3bf60902e902
--- /dev/null
+++ b/include/linux/platform_data/db8500_thermal.h
@@ -0,0 +1,38 @@
+/*
+ * db8500_thermal.h - DB8500 Thermal Management Implementation
+ *
+ * Copyright (C) 2012 ST-Ericsson
+ * Copyright (C) 2012 Linaro Ltd.
+ *
+ * Author: Hongbo Zhang <hongbo.zhang@linaro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _DB8500_THERMAL_H_
+#define _DB8500_THERMAL_H_
+
+#include <linux/thermal.h>
+
+#define COOLING_DEV_MAX 8
+
+struct db8500_trip_point {
+	unsigned long temp;
+	enum thermal_trip_type type;
+	char cdev_name[COOLING_DEV_MAX][THERMAL_NAME_LENGTH];
+};
+
+struct db8500_thsens_platform_data {
+	struct db8500_trip_point trip_points[THERMAL_MAX_TRIPS];
+	int num_trips;
+};
+
+#endif /* _DB8500_THERMAL_H_ */
-- 
cgit v1.2.3-55-g7522


From fc05d5a30dc19dd4c6d161e551719a8c597c7890 Mon Sep 17 00:00:00 2001
From: Linus Walleij
Date: Thu, 4 Oct 2012 09:28:49 +0200
Subject: mtd: delete nomadik_nand driver

The nomadik_nand driver is really just a subset of the FSMC
NAND driver, and there are no users anymore so let's delete
it.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/mtd/nand/Kconfig                       |   6 -
 drivers/mtd/nand/Makefile                      |   1 -
 drivers/mtd/nand/nomadik_nand.c                | 235 -------------------------
 include/linux/platform_data/mtd-nomadik-nand.h |  16 --
 4 files changed, 258 deletions(-)
 delete mode 100644 drivers/mtd/nand/nomadik_nand.c
 delete mode 100644 include/linux/platform_data/mtd-nomadik-nand.h

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index ee803d611e4e..a803d9ba55bd 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -526,12 +526,6 @@ config MTD_NAND_MXC
 	  This enables the driver for the NAND flash controller on the
 	  MXC processors.
 
-config MTD_NAND_NOMADIK
-	tristate "ST Nomadik 8815 NAND support"
-	depends on ARCH_NOMADIK
-	help
-	  Driver for the NAND flash controller on the Nomadik, with ECC.
-
 config MTD_NAND_SH_FLCTL
 	tristate "Support for NAND on Renesas SuperH FLCTL"
 	depends on SUPERH || ARCH_SHMOBILE
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 38358c90771e..44fca0553365 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -49,7 +49,6 @@ obj-$(CONFIG_MTD_NAND_MXC)		+= mxc_nand.o
 obj-$(CONFIG_MTD_NAND_SOCRATES)		+= socrates_nand.o
 obj-$(CONFIG_MTD_NAND_TXX9NDFMC)	+= txx9ndfmc.o
 obj-$(CONFIG_MTD_NAND_NUC900)		+= nuc900_nand.o
-obj-$(CONFIG_MTD_NAND_NOMADIK)		+= nomadik_nand.o
 obj-$(CONFIG_MTD_NAND_MPC5121_NFC)	+= mpc5121_nfc.o
 obj-$(CONFIG_MTD_NAND_RICOH)		+= r852.o
 obj-$(CONFIG_MTD_NAND_JZ4740)		+= jz4740_nand.o
diff --git a/drivers/mtd/nand/nomadik_nand.c b/drivers/mtd/nand/nomadik_nand.c
deleted file mode 100644
index 9ee0c4edfacf..000000000000
--- a/drivers/mtd/nand/nomadik_nand.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- *  drivers/mtd/nand/nomadik_nand.c
- *
- *  Overview:
- *  	Driver for on-board NAND flash on Nomadik Platforms
- *
- * Copyright © 2007 STMicroelectronics Pvt. Ltd.
- * Author: Sachin Verma <sachin.verma@st.com>
- *
- * Copyright © 2009 Alessandro Rubini
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/nand.h>
-#include <linux/mtd/nand_ecc.h>
-#include <linux/platform_device.h>
-#include <linux/mtd/partitions.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/platform_data/mtd-nomadik-nand.h>
-#include <mach/fsmc.h>
-
-#include <mtd/mtd-abi.h>
-
-struct nomadik_nand_host {
-	struct mtd_info		mtd;
-	struct nand_chip	nand;
-	void __iomem *data_va;
-	void __iomem *cmd_va;
-	void __iomem *addr_va;
-	struct nand_bbt_descr *bbt_desc;
-};
-
-static struct nand_ecclayout nomadik_ecc_layout = {
-	.eccbytes = 3 * 4,
-	.eccpos = { /* each subpage has 16 bytes: pos 2,3,4 hosts ECC */
-		0x02, 0x03, 0x04,
-		0x12, 0x13, 0x14,
-		0x22, 0x23, 0x24,
-		0x32, 0x33, 0x34},
-	/* let's keep bytes 5,6,7 for us, just in case we change ECC algo */
-	.oobfree = { {0x08, 0x08}, {0x18, 0x08}, {0x28, 0x08}, {0x38, 0x08} },
-};
-
-static void nomadik_ecc_control(struct mtd_info *mtd, int mode)
-{
-	/* No need to enable hw ecc, it's on by default */
-}
-
-static void nomadik_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
-{
-	struct nand_chip *nand = mtd->priv;
-	struct nomadik_nand_host *host = nand->priv;
-
-	if (cmd == NAND_CMD_NONE)
-		return;
-
-	if (ctrl & NAND_CLE)
-		writeb(cmd, host->cmd_va);
-	else
-		writeb(cmd, host->addr_va);
-}
-
-static int nomadik_nand_probe(struct platform_device *pdev)
-{
-	struct nomadik_nand_platform_data *pdata = pdev->dev.platform_data;
-	struct nomadik_nand_host *host;
-	struct mtd_info *mtd;
-	struct nand_chip *nand;
-	struct resource *res;
-	int ret = 0;
-
-	/* Allocate memory for the device structure (and zero it) */
-	host = kzalloc(sizeof(struct nomadik_nand_host), GFP_KERNEL);
-	if (!host) {
-		dev_err(&pdev->dev, "Failed to allocate device structure.\n");
-		return -ENOMEM;
-	}
-
-	/* Call the client's init function, if any */
-	if (pdata->init)
-		ret = pdata->init();
-	if (ret < 0) {
-		dev_err(&pdev->dev, "Init function failed\n");
-		goto err;
-	}
-
-	/* ioremap three regions */
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_addr");
-	if (!res) {
-		ret = -EIO;
-		goto err_unmap;
-	}
-	host->addr_va = ioremap(res->start, resource_size(res));
-
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_data");
-	if (!res) {
-		ret = -EIO;
-		goto err_unmap;
-	}
-	host->data_va = ioremap(res->start, resource_size(res));
-
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_cmd");
-	if (!res) {
-		ret = -EIO;
-		goto err_unmap;
-	}
-	host->cmd_va = ioremap(res->start, resource_size(res));
-
-	if (!host->addr_va || !host->data_va || !host->cmd_va) {
-		ret = -ENOMEM;
-		goto err_unmap;
-	}
-
-	/* Link all private pointers */
-	mtd = &host->mtd;
-	nand = &host->nand;
-	mtd->priv = nand;
-	nand->priv = host;
-
-	host->mtd.owner = THIS_MODULE;
-	nand->IO_ADDR_R = host->data_va;
-	nand->IO_ADDR_W = host->data_va;
-	nand->cmd_ctrl = nomadik_cmd_ctrl;
-
-	/*
-	 * This stanza declares ECC_HW but uses soft routines. It's because
-	 * HW claims to make the calculation but not the correction. However,
-	 * I haven't managed to get the desired data out of it until now.
-	 */
-	nand->ecc.mode = NAND_ECC_SOFT;
-	nand->ecc.layout = &nomadik_ecc_layout;
-	nand->ecc.hwctl = nomadik_ecc_control;
-	nand->ecc.size = 512;
-	nand->ecc.bytes = 3;
-
-	nand->options = pdata->options;
-
-	/*
-	 * Scan to find existence of the device
-	 */
-	if (nand_scan(&host->mtd, 1)) {
-		ret = -ENXIO;
-		goto err_unmap;
-	}
-
-	mtd_device_register(&host->mtd, pdata->parts, pdata->nparts);
-
-	platform_set_drvdata(pdev, host);
-	return 0;
-
- err_unmap:
-	if (host->cmd_va)
-		iounmap(host->cmd_va);
-	if (host->data_va)
-		iounmap(host->data_va);
-	if (host->addr_va)
-		iounmap(host->addr_va);
- err:
-	kfree(host);
-	return ret;
-}
-
-/*
- * Clean up routine
- */
-static int nomadik_nand_remove(struct platform_device *pdev)
-{
-	struct nomadik_nand_host *host = platform_get_drvdata(pdev);
-	struct nomadik_nand_platform_data *pdata = pdev->dev.platform_data;
-
-	if (pdata->exit)
-		pdata->exit();
-
-	if (host) {
-		nand_release(&host->mtd);
-		iounmap(host->cmd_va);
-		iounmap(host->data_va);
-		iounmap(host->addr_va);
-		kfree(host);
-	}
-	return 0;
-}
-
-static int nomadik_nand_suspend(struct device *dev)
-{
-	struct nomadik_nand_host *host = dev_get_drvdata(dev);
-	int ret = 0;
-	if (host)
-		ret = mtd_suspend(&host->mtd);
-	return ret;
-}
-
-static int nomadik_nand_resume(struct device *dev)
-{
-	struct nomadik_nand_host *host = dev_get_drvdata(dev);
-	if (host)
-		mtd_resume(&host->mtd);
-	return 0;
-}
-
-static const struct dev_pm_ops nomadik_nand_pm_ops = {
-	.suspend = nomadik_nand_suspend,
-	.resume = nomadik_nand_resume,
-};
-
-static struct platform_driver nomadik_nand_driver = {
-	.probe = nomadik_nand_probe,
-	.remove = nomadik_nand_remove,
-	.driver = {
-		.owner = THIS_MODULE,
-		.name = "nomadik_nand",
-		.pm = &nomadik_nand_pm_ops,
-	},
-};
-
-module_platform_driver(nomadik_nand_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("ST Microelectronics (sachin.verma@st.com)");
-MODULE_DESCRIPTION("NAND driver for Nomadik Platform");
diff --git a/include/linux/platform_data/mtd-nomadik-nand.h b/include/linux/platform_data/mtd-nomadik-nand.h
deleted file mode 100644
index c3c8254c22a5..000000000000
--- a/include/linux/platform_data/mtd-nomadik-nand.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __ASM_ARCH_NAND_H
-#define __ASM_ARCH_NAND_H
-
-struct nomadik_nand_platform_data {
-	struct mtd_partition *parts;
-	int nparts;
-	int options;
-	int (*init) (void);
-	int (*exit) (void);
-};
-
-#define NAND_IO_DATA	0x40000000
-#define NAND_IO_CMD	0x40800000
-#define NAND_IO_ADDR	0x41000000
-
-#endif				/* __ASM_ARCH_NAND_H */
-- 
cgit v1.2.3-55-g7522


From 6d7b42a447f92eb3e7e410bbf62042693eb040f7 Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD
Date: Thu, 4 Oct 2012 15:14:16 +0200
Subject: mtd: fsmc_nand: pass the ale and cmd resource via resource

Do not use the platform_data to pass resource and be smart in the drivers.
Just pass it via resource

Switch to devm_request_and_ioremap at the sametime

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-By: Vipin Kumar <vipin.kumar@st.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 .../devicetree/bindings/mtd/fsmc-nand.txt          | 12 +++---
 arch/arm/boot/dts/spear13xx.dtsi                   | 10 ++---
 arch/arm/boot/dts/spear300.dtsi                    |  8 ++--
 arch/arm/boot/dts/spear310.dtsi                    |  8 ++--
 arch/arm/boot/dts/spear320.dtsi                    |  8 ++--
 arch/arm/boot/dts/spear600.dtsi                    |  8 ++--
 arch/arm/mach-u300/core.c                          | 14 ++++++-
 drivers/mtd/nand/fsmc_nand.c                       | 44 ++++++----------------
 include/linux/mtd/fsmc.h                           |  3 --
 9 files changed, 49 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mtd/fsmc-nand.txt b/Documentation/devicetree/bindings/mtd/fsmc-nand.txt
index e2c663b354d2..e3ea32e7de3e 100644
--- a/Documentation/devicetree/bindings/mtd/fsmc-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/fsmc-nand.txt
@@ -3,9 +3,7 @@
 Required properties:
 - compatible : "st,spear600-fsmc-nand"
 - reg : Address range of the mtd chip
-- reg-names: Should contain the reg names "fsmc_regs" and "nand_data"
-- st,ale-off : Chip specific offset to ALE
-- st,cle-off : Chip specific offset to CLE
+- reg-names: Should contain the reg names "fsmc_regs", "nand_data", "nand_addr" and "nand_cmd"
 
 Optional properties:
 - bank-width : Width (in bytes) of the device.  If not present, the width
@@ -19,10 +17,10 @@ Example:
 		#address-cells = <1>;
 		#size-cells = <1>;
 		reg = <0xd1800000 0x1000	/* FSMC Register */
-		       0xd2000000 0x4000>;	/* NAND Base */
-		reg-names = "fsmc_regs", "nand_data";
-		st,ale-off = <0x20000>;
-		st,cle-off = <0x10000>;
+		       0xd2000000 0x0010	/* NAND Base DATA */
+		       0xd2020000 0x0010	/* NAND Base ADDR */
+		       0xd2010000 0x0010>;	/* NAND Base CMD */
+		reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 
 		bank-width = <1>;
 		nand-skip-bbtscan;
diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi
index f7b84aced654..14a6d15c2a81 100644
--- a/arch/arm/boot/dts/spear13xx.dtsi
+++ b/arch/arm/boot/dts/spear13xx.dtsi
@@ -104,15 +104,15 @@
 			compatible = "st,spear600-fsmc-nand";
 			#address-cells = <1>;
 			#size-cells = <1>;
-			reg = <0xb0000000 0x1000	/* FSMC Register */
-			       0xb0800000 0x0010>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
+			reg = <0xb0000000 0x1000	/* FSMC Register*/
+			       0xb0800000 0x0010	/* NAND Base DATA */
+			       0xb0820000 0x0010	/* NAND Base ADDR */
+			       0xb0810000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			interrupts = <0 20 0x4
 				      0 21 0x4
 				      0 22 0x4
 				      0 23 0x4>;
-			st,ale-off = <0x20000>;
-			st,cle-off = <0x10000>;
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/spear300.dtsi b/arch/arm/boot/dts/spear300.dtsi
index ed3627c116cc..bc436387d7f9 100644
--- a/arch/arm/boot/dts/spear300.dtsi
+++ b/arch/arm/boot/dts/spear300.dtsi
@@ -38,10 +38,10 @@
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x94000000 0x1000	/* FSMC Register */
-			       0x80000000 0x0010>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
-			st,ale-off = <0x20000>;
-			st,cle-off = <0x10000>;
+			       0x80000000 0x0010	/* NAND Base DATA */
+			       0x80020000 0x0010	/* NAND Base ADDR */
+			       0x80010000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/spear310.dtsi b/arch/arm/boot/dts/spear310.dtsi
index 62fc4fb3e5f9..7840e529aba2 100644
--- a/arch/arm/boot/dts/spear310.dtsi
+++ b/arch/arm/boot/dts/spear310.dtsi
@@ -32,10 +32,10 @@
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x44000000 0x1000	/* FSMC Register */
-			       0x40000000 0x0010>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
-			st,ale-off = <0x10000>;
-			st,cle-off = <0x20000>;
+			       0x40000000 0x0010	/* NAND Base DATA */
+			       0x40020000 0x0010	/* NAND Base ADDR */
+			       0x40010000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/spear320.dtsi b/arch/arm/boot/dts/spear320.dtsi
index 1f49d69595a0..5ad820641ac0 100644
--- a/arch/arm/boot/dts/spear320.dtsi
+++ b/arch/arm/boot/dts/spear320.dtsi
@@ -38,10 +38,10 @@
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x4c000000 0x1000	/* FSMC Register */
-			       0x50000000 0x0010>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
-			st,ale-off = <0x20000>;
-			st,cle-off = <0x10000>;
+			       0x50000000 0x0010	/* NAND Base DATA */
+			       0x50020000 0x0010	/* NAND Base ADDR */
+			       0x50010000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/spear600.dtsi b/arch/arm/boot/dts/spear600.dtsi
index a3c36e47d7ef..4ecc66f5ac88 100644
--- a/arch/arm/boot/dts/spear600.dtsi
+++ b/arch/arm/boot/dts/spear600.dtsi
@@ -67,10 +67,10 @@
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0xd1800000 0x1000	/* FSMC Register */
-			       0xd2000000 0x4000>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
-			st,ale-off = <0x20000>;
-			st,cle-off = <0x10000>;
+			       0xd2000000 0x0010	/* NAND Base DATA */
+			       0xd2020000 0x0010	/* NAND Base ADDR */
+			       0xd2010000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			status = "disabled";
 		};
 
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index b8efac4daed8..f642a1552346 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -251,6 +251,18 @@ static struct resource rtc_resources[] = {
  * but these are not yet used by the driver.
  */
 static struct resource fsmc_resources[] = {
+	{
+		.name  = "nand_addr",
+		.start = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_ALE,
+		.end   = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_ALE + SZ_16K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.name  = "nand_cmd",
+		.start = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_CLE,
+		.end   = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_CLE + SZ_16K - 1,
+		.flags = IORESOURCE_MEM,
+	},
 	{
 		.name  = "nand_data",
 		.start = U300_NAND_CS0_PHYS_BASE,
@@ -1496,8 +1508,6 @@ static struct fsmc_nand_platform_data nand_platform_data = {
 	.nr_partitions = ARRAY_SIZE(u300_partitions),
 	.options = NAND_SKIP_BBTSCAN,
 	.width = FSMC_NAND_BW8,
-	.ale_off = PLAT_NAND_ALE,
-	.cle_off = PLAT_NAND_CLE,
 };
 
 static struct platform_device nand_device = {
diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 38d26240d8b1..cb8645087151 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -876,8 +876,6 @@ static int __devinit fsmc_nand_probe_config_dt(struct platform_device *pdev,
 			return -EINVAL;
 		}
 	}
-	of_property_read_u32(np, "st,ale-off", &pdata->ale_off);
-	of_property_read_u32(np, "st,cle-off", &pdata->cle_off);
 	if (of_get_property(np, "nand-skip-bbtscan", NULL))
 		pdata->options = NAND_SKIP_BBTSCAN;
 
@@ -935,41 +933,28 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	if (!res)
 		return -EINVAL;
 
-	if (!devm_request_mem_region(&pdev->dev, res->start, resource_size(res),
-				pdev->name)) {
-		dev_err(&pdev->dev, "Failed to get memory data resourse\n");
-		return -ENOENT;
-	}
-
-	host->data_pa = (dma_addr_t)res->start;
-	host->data_va = devm_ioremap(&pdev->dev, res->start,
-			resource_size(res));
+	host->data_va = devm_request_and_ioremap(&pdev->dev, res);
 	if (!host->data_va) {
 		dev_err(&pdev->dev, "data ioremap failed\n");
 		return -ENOMEM;
 	}
+	host->data_pa = (dma_addr_t)res->start;
 
-	if (!devm_request_mem_region(&pdev->dev, res->start + pdata->ale_off,
-			resource_size(res), pdev->name)) {
-		dev_err(&pdev->dev, "Failed to get memory ale resourse\n");
-		return -ENOENT;
-	}
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_addr");
+	if (!res)
+		return -EINVAL;
 
-	host->addr_va = devm_ioremap(&pdev->dev, res->start + pdata->ale_off,
-			resource_size(res));
+	host->addr_va = devm_request_and_ioremap(&pdev->dev, res);
 	if (!host->addr_va) {
 		dev_err(&pdev->dev, "ale ioremap failed\n");
 		return -ENOMEM;
 	}
 
-	if (!devm_request_mem_region(&pdev->dev, res->start + pdata->cle_off,
-			resource_size(res), pdev->name)) {
-		dev_err(&pdev->dev, "Failed to get memory cle resourse\n");
-		return -ENOENT;
-	}
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_cmd");
+	if (!res)
+		return -EINVAL;
 
-	host->cmd_va = devm_ioremap(&pdev->dev, res->start + pdata->cle_off,
-			resource_size(res));
+	host->cmd_va = devm_request_and_ioremap(&pdev->dev, res);
 	if (!host->cmd_va) {
 		dev_err(&pdev->dev, "ale ioremap failed\n");
 		return -ENOMEM;
@@ -979,14 +964,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	if (!res)
 		return -EINVAL;
 
-	if (!devm_request_mem_region(&pdev->dev, res->start, resource_size(res),
-			pdev->name)) {
-		dev_err(&pdev->dev, "Failed to get memory regs resourse\n");
-		return -ENOENT;
-	}
-
-	host->regs_va = devm_ioremap(&pdev->dev, res->start,
-			resource_size(res));
+	host->regs_va = devm_request_and_ioremap(&pdev->dev, res);
 	if (!host->regs_va) {
 		dev_err(&pdev->dev, "regs ioremap failed\n");
 		return -ENOMEM;
diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h
index b20029221fb1..d6ed61ef451d 100644
--- a/include/linux/mtd/fsmc.h
+++ b/include/linux/mtd/fsmc.h
@@ -155,9 +155,6 @@ struct fsmc_nand_platform_data {
 	unsigned int		width;
 	unsigned int		bank;
 
-	/* CLE, ALE offsets */
-	unsigned int		cle_off;
-	unsigned int		ale_off;
 	enum access_mode	mode;
 
 	void			(*select_bank)(uint32_t bank, uint32_t busw);
-- 
cgit v1.2.3-55-g7522


From 2f25ae97fe4b424d88d765797c46456c7c0f1bae Mon Sep 17 00:00:00 2001
From: Vipin Kumar
Date: Tue, 9 Oct 2012 16:14:53 +0530
Subject: mtd: nand: Increase the ecc placement locations to 640

Few devices like H27UBG8T2CTR have a writesize/oobsize of 8KB/640B.
This means that the maximum oobsize has gone up to 640 bytes and consequently
the maximum ecc placement locations have also gone up to 640.

Signed-off-by: Vipin Kumar <vipin.kumar@st.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/mtd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 81d61e704599..f9ac2897b86b 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -98,7 +98,7 @@ struct mtd_oob_ops {
 };
 
 #define MTD_MAX_OOBFREE_ENTRIES_LARGE	32
-#define MTD_MAX_ECCPOS_ENTRIES_LARGE	448
+#define MTD_MAX_ECCPOS_ENTRIES_LARGE	640
 /*
  * Internal ECC layout control structure. For historical reasons, there is a
  * similar, smaller struct nand_ecclayout_user (in mtd-abi.h) that is retained
-- 
cgit v1.2.3-55-g7522


From 5de0b52ea8f8f5149502867acff2efb5efaf1fc2 Mon Sep 17 00:00:00 2001
From: Huang Shijie
Date: Sat, 13 Oct 2012 13:03:29 -0400
Subject: mtd: gpmi: remove unneccessary header

The whole gpmi-nand driver has turned to pure devicetree supported.
So the linux/mtd/gpmi-nand.h is not neccessary now. Just remove it,
and move some macros to the gpmi-nand driver itself.

Signed-off-by: Huang Shijie <shijie8@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/mtd/nand/gpmi-nand/gpmi-lib.c  |  1 -
 drivers/mtd/nand/gpmi-nand/gpmi-nand.c |  7 +++-
 drivers/mtd/nand/gpmi-nand/gpmi-nand.h |  1 -
 include/linux/mtd/gpmi-nand.h          | 68 ----------------------------------
 4 files changed, 6 insertions(+), 71 deletions(-)
 delete mode 100644 include/linux/mtd/gpmi-nand.h

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index 3502accd4bc3..1585c5b1c8bf 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
@@ -18,7 +18,6 @@
  * with this program; if not, write to the Free Software Foundation, Inc.,
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
-#include <linux/mtd/gpmi-nand.h>
 #include <linux/delay.h>
 #include <linux/clk.h>
 
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index e2c56fc4574b..d37619882fa6 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -25,7 +25,6 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/mtd/gpmi-nand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/of.h>
@@ -33,6 +32,12 @@
 #include <linux/of_mtd.h>
 #include "gpmi-nand.h"
 
+/* Resource names for the GPMI NAND driver. */
+#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME  "gpmi-nand"
+#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME   "bch"
+#define GPMI_NAND_BCH_INTERRUPT_RES_NAME   "bch"
+#define GPMI_NAND_DMA_INTERRUPT_RES_NAME   "gpmi-dma"
+
 /* add our owner bbt descriptor */
 static uint8_t scan_ff_pattern[] = { 0xff };
 static struct nand_bbt_descr gpmi_bbt_descr = {
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
index 7ac25c1e58f9..3d93a5e39090 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
@@ -130,7 +130,6 @@ struct gpmi_nand_data {
 	/* System Interface */
 	struct device		*dev;
 	struct platform_device	*pdev;
-	struct gpmi_nand_platform_data	*pdata;
 
 	/* Resources */
 	struct resources	resources;
diff --git a/include/linux/mtd/gpmi-nand.h b/include/linux/mtd/gpmi-nand.h
deleted file mode 100644
index ed3c4e09f3d1..000000000000
--- a/include/linux/mtd/gpmi-nand.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef __MACH_MXS_GPMI_NAND_H__
-#define __MACH_MXS_GPMI_NAND_H__
-
-/* The size of the resources is fixed. */
-#define GPMI_NAND_RES_SIZE	6
-
-/* Resource names for the GPMI NAND driver. */
-#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME  "gpmi-nand"
-#define GPMI_NAND_GPMI_INTERRUPT_RES_NAME  "GPMI NAND GPMI Interrupt"
-#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME   "bch"
-#define GPMI_NAND_BCH_INTERRUPT_RES_NAME   "bch"
-#define GPMI_NAND_DMA_CHANNELS_RES_NAME    "GPMI NAND DMA Channels"
-#define GPMI_NAND_DMA_INTERRUPT_RES_NAME   "gpmi-dma"
-
-/**
- * struct gpmi_nand_platform_data - GPMI NAND driver platform data.
- *
- * This structure communicates platform-specific information to the GPMI NAND
- * driver that can't be expressed as resources.
- *
- * @platform_init:           A pointer to a function the driver will call to
- *                           initialize the platform (e.g., set up the pin mux).
- * @min_prop_delay_in_ns:    Minimum propagation delay of GPMI signals to and
- *                           from the NAND Flash device, in nanoseconds.
- * @max_prop_delay_in_ns:    Maximum propagation delay of GPMI signals to and
- *                           from the NAND Flash device, in nanoseconds.
- * @max_chip_count:          The maximum number of chips for which the driver
- *                           should configure the hardware. This value most
- *                           likely reflects the number of pins that are
- *                           connected to a NAND Flash device. If this is
- *                           greater than the SoC hardware can support, the
- *                           driver will print a message and fail to initialize.
- * @partitions:              An optional pointer to an array of partition
- *                           descriptions.
- * @partition_count:         The number of elements in the partitions array.
- */
-struct gpmi_nand_platform_data {
-	/* SoC hardware information. */
-	int		(*platform_init)(void);
-
-	/* NAND Flash information. */
-	unsigned int	min_prop_delay_in_ns;
-	unsigned int	max_prop_delay_in_ns;
-	unsigned int	max_chip_count;
-
-	/* Medium information. */
-	struct		mtd_partition *partitions;
-	unsigned	partition_count;
-};
-#endif
-- 
cgit v1.2.3-55-g7522


From e8a9d8f31c592eea89f1b0d3fd425e7a96944e88 Mon Sep 17 00:00:00 2001
From: Bastian Hecht
Date: Fri, 19 Oct 2012 12:15:34 +0200
Subject: mtd: sh_flctl: Minor cleanups

Some small fixes to avoid sparse and smatch complain. Other cosmetic fixes
as well.

- Change of the type of the member index in struct sh_flctl from signed
to unsigned. We use index by addressing array members, so unsigned is more
concise here. Adapt functions relying on sh_flctl::index.
- Remove a blurring cast in write_fiforeg().
- Apply consistent naming scheme when refering to the data buffer.
- Shorten some unnecessarily verbose functions.
- Remove spaces at start of lines.

Signed-off-by: Bastian Hecht <hechtb@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/mtd/nand/sh_flctl.c  | 37 ++++++++++++++++---------------------
 include/linux/mtd/sh_flctl.h |  2 +-
 2 files changed, 17 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 4fbfe96e37a1..78d18c0f132f 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -225,7 +225,7 @@ static enum flctl_ecc_res_t wait_recfifo_ready
 
 		for (i = 0; i < 3; i++) {
 			uint8_t org;
-			int index;
+			unsigned int index;
 
 			data = readl(ecc_reg[i]);
 
@@ -305,28 +305,29 @@ static enum flctl_ecc_res_t read_ecfiforeg
 	return res;
 }
 
-static void write_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+static void write_fiforeg(struct sh_flctl *flctl, int rlen,
+						unsigned int offset)
 {
 	int i, len_4align;
-	unsigned long *data = (unsigned long *)&flctl->done_buff[offset];
-	void *fifo_addr = (void *)FLDTFIFO(flctl);
+	unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
 
 	len_4align = (rlen + 3) / 4;
 	for (i = 0; i < len_4align; i++) {
 		wait_wfifo_ready(flctl);
-		writel(cpu_to_be32(data[i]), fifo_addr);
+		writel(cpu_to_be32(buf[i]), FLDTFIFO(flctl));
 	}
 }
 
-static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen,
+						unsigned int offset)
 {
 	int i, len_4align;
-	unsigned long *data = (unsigned long *)&flctl->done_buff[offset];
+	unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
 
 	len_4align = (rlen + 3) / 4;
 	for (i = 0; i < len_4align; i++) {
 		wait_wecfifo_ready(flctl);
-		writel(cpu_to_be32(data[i]), FLECFIFO(flctl));
+		writel(cpu_to_be32(buf[i]), FLECFIFO(flctl));
 	}
 }
 
@@ -748,41 +749,35 @@ static void flctl_select_chip(struct mtd_info *mtd, int chipnr)
 static void flctl_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 {
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
-	int index = flctl->index;
 
-	memcpy(&flctl->done_buff[index], buf, len);
+	memcpy(&flctl->done_buff[flctl->index], buf, len);
 	flctl->index += len;
 }
 
 static uint8_t flctl_read_byte(struct mtd_info *mtd)
 {
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
-	int index = flctl->index;
 	uint8_t data;
 
-	data = flctl->done_buff[index];
+	data = flctl->done_buff[flctl->index];
 	flctl->index++;
 	return data;
 }
 
 static uint16_t flctl_read_word(struct mtd_info *mtd)
 {
-       struct sh_flctl *flctl = mtd_to_flctl(mtd);
-       int index = flctl->index;
-       uint16_t data;
-       uint16_t *buf = (uint16_t *)&flctl->done_buff[index];
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	uint16_t *buf = (uint16_t *)&flctl->done_buff[flctl->index];
 
-       data = *buf;
-       flctl->index += 2;
-       return data;
+	flctl->index += 2;
+	return *buf;
 }
 
 static void flctl_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
 {
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
-	int index = flctl->index;
 
-	memcpy(buf, &flctl->done_buff[index], len);
+	memcpy(buf, &flctl->done_buff[flctl->index], len);
 	flctl->index += len;
 }
 
diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
index 01e4b15b280e..481557688d05 100644
--- a/include/linux/mtd/sh_flctl.h
+++ b/include/linux/mtd/sh_flctl.h
@@ -147,7 +147,7 @@ struct sh_flctl {
 
 	uint8_t	done_buff[2048 + 64];	/* max size 2048 + 64 */
 	int	read_bytes;
-	int	index;
+	unsigned int index;
 	int	seqin_column;		/* column in SEQIN cmd */
 	int	seqin_page_addr;	/* page_addr in SEQIN cmd */
 	uint32_t seqin_read_cmd;		/* read cmd in SEQIN cmd */
-- 
cgit v1.2.3-55-g7522


From 83738d87e3a0a4096e1419a65b8228130d183df6 Mon Sep 17 00:00:00 2001
From: Bastian Hecht
Date: Fri, 19 Oct 2012 12:15:35 +0200
Subject: mtd: sh_flctl: Add DMA capabilty

The code probes if DMA channels can get allocated and tears them down at
removal/failure if needed.
If available it uses them to transfer the data part (not ECC). On
failure we fall back to PIO mode.

Based on Guennadi Liakhovetski's code from the sh_mmcif driver.

Signed-off-by: Bastian Hecht <hechtb@gmail.com>
Reviewed-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/mtd/nand/sh_flctl.c  | 173 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/mtd/sh_flctl.h |  12 +++
 2 files changed, 183 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 78d18c0f132f..6dc0369aa44b 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -23,11 +23,15 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/completion.h>
 #include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/sh_dma.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
@@ -106,6 +110,84 @@ static void wait_completion(struct sh_flctl *flctl)
 	writeb(0x0, FLTRCR(flctl));
 }
 
+static void flctl_dma_complete(void *param)
+{
+	struct sh_flctl *flctl = param;
+
+	complete(&flctl->dma_complete);
+}
+
+static void flctl_release_dma(struct sh_flctl *flctl)
+{
+	if (flctl->chan_fifo0_rx) {
+		dma_release_channel(flctl->chan_fifo0_rx);
+		flctl->chan_fifo0_rx = NULL;
+	}
+	if (flctl->chan_fifo0_tx) {
+		dma_release_channel(flctl->chan_fifo0_tx);
+		flctl->chan_fifo0_tx = NULL;
+	}
+}
+
+static void flctl_setup_dma(struct sh_flctl *flctl)
+{
+	dma_cap_mask_t mask;
+	struct dma_slave_config cfg;
+	struct platform_device *pdev = flctl->pdev;
+	struct sh_flctl_platform_data *pdata = pdev->dev.platform_data;
+	int ret;
+
+	if (!pdata)
+		return;
+
+	if (pdata->slave_id_fifo0_tx <= 0 || pdata->slave_id_fifo0_rx <= 0)
+		return;
+
+	/* We can only either use DMA for both Tx and Rx or not use it at all */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	flctl->chan_fifo0_tx = dma_request_channel(mask, shdma_chan_filter,
+					    (void *)pdata->slave_id_fifo0_tx);
+	dev_dbg(&pdev->dev, "%s: TX: got channel %p\n", __func__,
+		flctl->chan_fifo0_tx);
+
+	if (!flctl->chan_fifo0_tx)
+		return;
+
+	memset(&cfg, 0, sizeof(cfg));
+	cfg.slave_id = pdata->slave_id_fifo0_tx;
+	cfg.direction = DMA_MEM_TO_DEV;
+	cfg.dst_addr = (dma_addr_t)FLDTFIFO(flctl);
+	cfg.src_addr = 0;
+	ret = dmaengine_slave_config(flctl->chan_fifo0_tx, &cfg);
+	if (ret < 0)
+		goto err;
+
+	flctl->chan_fifo0_rx = dma_request_channel(mask, shdma_chan_filter,
+					    (void *)pdata->slave_id_fifo0_rx);
+	dev_dbg(&pdev->dev, "%s: RX: got channel %p\n", __func__,
+		flctl->chan_fifo0_rx);
+
+	if (!flctl->chan_fifo0_rx)
+		goto err;
+
+	cfg.slave_id = pdata->slave_id_fifo0_rx;
+	cfg.direction = DMA_DEV_TO_MEM;
+	cfg.dst_addr = 0;
+	cfg.src_addr = (dma_addr_t)FLDTFIFO(flctl);
+	ret = dmaengine_slave_config(flctl->chan_fifo0_rx, &cfg);
+	if (ret < 0)
+		goto err;
+
+	init_completion(&flctl->dma_complete);
+
+	return;
+
+err:
+	flctl_release_dma(flctl);
+}
+
 static void set_addr(struct mtd_info *mtd, int column, int page_addr)
 {
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
@@ -261,6 +343,70 @@ static void wait_wecfifo_ready(struct sh_flctl *flctl)
 	timeout_error(flctl, __func__);
 }
 
+static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf,
+					int len, enum dma_data_direction dir)
+{
+	struct dma_async_tx_descriptor *desc = NULL;
+	struct dma_chan *chan;
+	enum dma_transfer_direction tr_dir;
+	dma_addr_t dma_addr;
+	dma_cookie_t cookie = -EINVAL;
+	uint32_t reg;
+	int ret;
+
+	if (dir == DMA_FROM_DEVICE) {
+		chan = flctl->chan_fifo0_rx;
+		tr_dir = DMA_DEV_TO_MEM;
+	} else {
+		chan = flctl->chan_fifo0_tx;
+		tr_dir = DMA_MEM_TO_DEV;
+	}
+
+	dma_addr = dma_map_single(chan->device->dev, buf, len, dir);
+
+	if (dma_addr)
+		desc = dmaengine_prep_slave_single(chan, dma_addr, len,
+			tr_dir, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	if (desc) {
+		reg = readl(FLINTDMACR(flctl));
+		reg |= DREQ0EN;
+		writel(reg, FLINTDMACR(flctl));
+
+		desc->callback = flctl_dma_complete;
+		desc->callback_param = flctl;
+		cookie = dmaengine_submit(desc);
+
+		dma_async_issue_pending(chan);
+	} else {
+		/* DMA failed, fall back to PIO */
+		flctl_release_dma(flctl);
+		dev_warn(&flctl->pdev->dev,
+			 "DMA failed, falling back to PIO\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	ret =
+	wait_for_completion_timeout(&flctl->dma_complete,
+				msecs_to_jiffies(3000));
+
+	if (ret <= 0) {
+		chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
+		dev_err(&flctl->pdev->dev, "wait_for_completion_timeout\n");
+	}
+
+out:
+	reg = readl(FLINTDMACR(flctl));
+	reg &= ~DREQ0EN;
+	writel(reg, FLINTDMACR(flctl));
+
+	dma_unmap_single(chan->device->dev, dma_addr, len, dir);
+
+	/* ret > 0 is success */
+	return ret;
+}
+
 static void read_datareg(struct sh_flctl *flctl, int offset)
 {
 	unsigned long data;
@@ -279,11 +425,20 @@ static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
 
 	len_4align = (rlen + 3) / 4;
 
+	/* initiate DMA transfer */
+	if (flctl->chan_fifo0_rx && rlen >= 32 &&
+		flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_DEV_TO_MEM) > 0)
+			goto convert;	/* DMA success */
+
+	/* do polling transfer */
 	for (i = 0; i < len_4align; i++) {
 		wait_rfifo_ready(flctl);
 		buf[i] = readl(FLDTFIFO(flctl));
-		buf[i] = be32_to_cpu(buf[i]);
 	}
+
+convert:
+	for (i = 0; i < len_4align; i++)
+		buf[i] = be32_to_cpu(buf[i]);
 }
 
 static enum flctl_ecc_res_t read_ecfiforeg
@@ -325,9 +480,19 @@ static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen,
 	unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
 
 	len_4align = (rlen + 3) / 4;
+
+	for (i = 0; i < len_4align; i++)
+		buf[i] = cpu_to_be32(buf[i]);
+
+	/* initiate DMA transfer */
+	if (flctl->chan_fifo0_tx && rlen >= 32 &&
+		flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_MEM_TO_DEV) > 0)
+			return;	/* DMA success */
+
+	/* do polling transfer */
 	for (i = 0; i < len_4align; i++) {
 		wait_wecfifo_ready(flctl);
-		writel(cpu_to_be32(buf[i]), FLECFIFO(flctl));
+		writel(buf[i], FLECFIFO(flctl));
 	}
 }
 
@@ -925,6 +1090,8 @@ static int __devinit flctl_probe(struct platform_device *pdev)
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_resume(&pdev->dev);
 
+	flctl_setup_dma(flctl);
+
 	ret = nand_scan_ident(flctl_mtd, 1, NULL);
 	if (ret)
 		goto err_chip;
@@ -942,6 +1109,7 @@ static int __devinit flctl_probe(struct platform_device *pdev)
 	return 0;
 
 err_chip:
+	flctl_release_dma(flctl);
 	pm_runtime_disable(&pdev->dev);
 	free_irq(irq, flctl);
 err_flste:
@@ -955,6 +1123,7 @@ static int __devexit flctl_remove(struct platform_device *pdev)
 {
 	struct sh_flctl *flctl = platform_get_drvdata(pdev);
 
+	flctl_release_dma(flctl);
 	nand_release(&flctl->mtd);
 	pm_runtime_disable(&pdev->dev);
 	free_irq(platform_get_irq(pdev, 0), flctl);
diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
index 481557688d05..1c28f8879b1c 100644
--- a/include/linux/mtd/sh_flctl.h
+++ b/include/linux/mtd/sh_flctl.h
@@ -20,6 +20,7 @@
 #ifndef __SH_FLCTL_H__
 #define __SH_FLCTL_H__
 
+#include <linux/completion.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
@@ -107,6 +108,7 @@
 #define ESTERINTE	(0x1 << 24)	/* ECC error interrupt enable */
 #define AC1CLR		(0x1 << 19)	/* ECC FIFO clear */
 #define AC0CLR		(0x1 << 18)	/* Data FIFO clear */
+#define DREQ0EN		(0x1 << 16)	/* FLDTFIFODMA Request Enable */
 #define ECERB		(0x1 << 9)	/* ECC error */
 #define STERB		(0x1 << 8)	/* Status error */
 #define STERINTE	(0x1 << 4)	/* Status error enable */
@@ -138,6 +140,8 @@ enum flctl_ecc_res_t {
 	FL_TIMEOUT
 };
 
+struct dma_chan;
+
 struct sh_flctl {
 	struct mtd_info		mtd;
 	struct nand_chip	chip;
@@ -161,6 +165,11 @@ struct sh_flctl {
 	unsigned hwecc:1;	/* Hardware ECC (0 = disabled, 1 = enabled) */
 	unsigned holden:1;	/* Hardware has FLHOLDCR and HOLDEN is set */
 	unsigned qos_request:1;	/* QoS request to prevent deep power shutdown */
+
+	/* DMA related objects */
+	struct dma_chan		*chan_fifo0_rx;
+	struct dma_chan		*chan_fifo0_tx;
+	struct completion	dma_complete;
 };
 
 struct sh_flctl_platform_data {
@@ -170,6 +179,9 @@ struct sh_flctl_platform_data {
 
 	unsigned has_hwecc:1;
 	unsigned use_holden:1;
+
+	unsigned int            slave_id_fifo0_tx;
+	unsigned int            slave_id_fifo0_rx;
 };
 
 static inline struct sh_flctl *mtd_to_flctl(struct mtd_info *mtdinfo)
-- 
cgit v1.2.3-55-g7522


From 9ef525a9141b14d23613faad303cf48a20814f1b Mon Sep 17 00:00:00 2001
From: Robert P. J. Day
Date: Thu, 25 Oct 2012 09:43:10 -0400
Subject: mtd: Fix kernel-doc content to avoid warning.

Add missing colons to fix kernel-doc generation warnings.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/nand.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 24e915957e4f..9d8a6048aacd 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -471,8 +471,8 @@ struct nand_buffers {
  *			non 0 if ONFI supported.
  * @onfi_params:	[INTERN] holds the ONFI page parameter when ONFI is
  *			supported, 0 otherwise.
- * @onfi_set_features	[REPLACEABLE] set the features for ONFI nand
- * @onfi_get_features	[REPLACEABLE] get the features for ONFI nand
+ * @onfi_set_features:	[REPLACEABLE] set the features for ONFI nand
+ * @onfi_get_features:	[REPLACEABLE] get the features for ONFI nand
  * @ecclayout:		[REPLACEABLE] the default ECC placement scheme
  * @bbt:		[INTERN] bad block table pointer
  * @bbt_td:		[REPLACEABLE] bad block table descriptor for flash
-- 
cgit v1.2.3-55-g7522


From 3e9ce49e0ef95e22790a74720f0068696b2477c9 Mon Sep 17 00:00:00 2001
From: Viresh Kumar
Date: Mon, 29 Oct 2012 22:47:26 +0530
Subject: mtd: map: Fix compilation warning

This patch is an attempt to fix following compilation warning.

In file included from drivers/mtd/chips/cfi_cmdset_0001.c:35:0:
drivers/mtd/chips/cfi_cmdset_0001.c: In function 'cfi_intelext_write_words':
include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wmaybe-uninitialized]

I could have used uninitialized_var() too, but didn't used it as the final else
part of map_word_load() is missing. So there is a chance that it might be passed
uninitialized. Better initialize to zero.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/map.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 3595a0236b0f..56c7936e0c65 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -328,7 +328,7 @@ static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word
 
 static inline map_word map_word_load(struct map_info *map, const void *ptr)
 {
-	map_word r;
+	map_word r = {{0} };
 
 	if (map_bankwidth_is_1(map))
 		r.x[0] = *(unsigned char *)ptr;
-- 
cgit v1.2.3-55-g7522


From 62532da9d5f47a7ced3b965aa73ffd5b1afbeb79 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich
Date: Thu, 15 Nov 2012 08:49:10 +0000
Subject: net: Add generic packet offload infrastructure.

Create a new data structure to contain the GRO/GSO callbacks and add
a new registration mechanism.

Singed-off-by: Vlad Yasevich <vyasevic@redhat.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 14 +++++++++
 net/core/dev.c            | 80 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7bf867c97043..d45a58db4ba3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1521,6 +1521,17 @@ struct packet_type {
 	struct list_head	list;
 };
 
+struct packet_offload {
+	__be16			type;	/* This is really htons(ether_type). */
+	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
+						netdev_features_t features);
+	int			(*gso_send_check)(struct sk_buff *skb);
+	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
+					       struct sk_buff *skb);
+	int			(*gro_complete)(struct sk_buff *skb);
+	struct list_head	list;
+};
+
 #include <linux/notifier.h>
 
 /* netdevice notifier chain. Please remember to update the rtnetlink
@@ -1615,6 +1626,9 @@ extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short
 extern void		dev_add_pack(struct packet_type *pt);
 extern void		dev_remove_pack(struct packet_type *pt);
 extern void		__dev_remove_pack(struct packet_type *pt);
+extern void		dev_add_offload(struct packet_offload *po);
+extern void		dev_remove_offload(struct packet_offload *po);
+extern void		__dev_remove_offload(struct packet_offload *po);
 
 extern struct net_device	*dev_get_by_flags_rcu(struct net *net, unsigned short flags,
 						      unsigned short mask);
diff --git a/net/core/dev.c b/net/core/dev.c
index 83232a1be1e7..6884f8783bdd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -176,8 +176,10 @@
 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
 
 static DEFINE_SPINLOCK(ptype_lock);
+static DEFINE_SPINLOCK(offload_lock);
 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 static struct list_head ptype_all __read_mostly;	/* Taps */
+static struct list_head offload_base __read_mostly;
 
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -470,6 +472,82 @@ void dev_remove_pack(struct packet_type *pt)
 }
 EXPORT_SYMBOL(dev_remove_pack);
 
+
+/**
+ *	dev_add_offload - register offload handlers
+ *	@po: protocol offload declaration
+ *
+ *	Add protocol offload handlers to the networking stack. The passed
+ *	&proto_offload is linked into kernel lists and may not be freed until
+ *	it has been removed from the kernel lists.
+ *
+ *	This call does not sleep therefore it can not
+ *	guarantee all CPU's that are in middle of receiving packets
+ *	will see the new offload handlers (until the next received packet).
+ */
+void dev_add_offload(struct packet_offload *po)
+{
+	struct list_head *head = &offload_base;
+
+	spin_lock(&offload_lock);
+	list_add_rcu(&po->list, head);
+	spin_unlock(&offload_lock);
+}
+EXPORT_SYMBOL(dev_add_offload);
+
+/**
+ *	__dev_remove_offload	 - remove offload handler
+ *	@po: packet offload declaration
+ *
+ *	Remove a protocol offload handler that was previously added to the
+ *	kernel offload handlers by dev_add_offload(). The passed &offload_type
+ *	is removed from the kernel lists and can be freed or reused once this
+ *	function returns.
+ *
+ *      The packet type might still be in use by receivers
+ *	and must not be freed until after all the CPU's have gone
+ *	through a quiescent state.
+ */
+void __dev_remove_offload(struct packet_offload *po)
+{
+	struct list_head *head = &offload_base;
+	struct packet_offload *po1;
+
+	spin_lock(&ptype_lock);
+
+	list_for_each_entry(po1, head, list) {
+		if (po == po1) {
+			list_del_rcu(&po->list);
+			goto out;
+		}
+	}
+
+	pr_warn("dev_remove_offload: %p not found\n", po);
+out:
+	spin_unlock(&ptype_lock);
+}
+EXPORT_SYMBOL(__dev_remove_offload);
+
+/**
+ *	dev_remove_offload	 - remove packet offload handler
+ *	@po: packet offload declaration
+ *
+ *	Remove a packet offload handler that was previously added to the kernel
+ *	offload handlers by dev_add_offload(). The passed &offload_type is
+ *	removed from the kernel lists and can be freed or reused once this
+ *	function returns.
+ *
+ *	This call sleeps to guarantee that no CPU is looking at the packet
+ *	type after return.
+ */
+void dev_remove_offload(struct packet_offload *po)
+{
+	__dev_remove_offload(po);
+
+	synchronize_net();
+}
+EXPORT_SYMBOL(dev_remove_offload);
+
 /******************************************************************************
 
 		      Device Boot-time Settings Routines
@@ -6661,6 +6739,8 @@ static int __init net_dev_init(void)
 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&ptype_base[i]);
 
+	INIT_LIST_HEAD(&offload_base);
+
 	if (register_pernet_subsys(&netdev_net_ops))
 		goto out;
 
-- 
cgit v1.2.3-55-g7522


From 22061d8014455b01eb018bd6c35a1b3040ccc230 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich
Date: Thu, 15 Nov 2012 08:49:11 +0000
Subject: net: Switch to using the new packet offload infrustructure

Convert to using the new GSO/GRO registration mechanism and new
packet offload structure.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  6 ------
 net/core/dev.c            | 19 +++++++++----------
 net/ipv4/af_inet.c        |  5 +++++
 net/ipv6/af_inet6.c       |  6 ++++++
 4 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d45a58db4ba3..61bc8483031f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1509,12 +1509,6 @@ struct packet_type {
 					 struct net_device *,
 					 struct packet_type *,
 					 struct net_device *);
-	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
-						netdev_features_t features);
-	int			(*gso_send_check)(struct sk_buff *skb);
-	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
-					       struct sk_buff *skb);
-	int			(*gro_complete)(struct sk_buff *skb);
 	bool			(*id_match)(struct packet_type *ptype,
 					    struct sock *sk);
 	void			*af_packet_priv;
diff --git a/net/core/dev.c b/net/core/dev.c
index 6884f8783bdd..cf843a256cc6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2072,7 +2072,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
-	struct packet_type *ptype;
+	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
 	int vlan_depth = ETH_HLEN;
 	int err;
@@ -2101,9 +2101,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 	}
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ptype,
-			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
-		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+	list_for_each_entry_rcu(ptype, &offload_base, list) {
+		if (ptype->type == type && ptype->gso_segment) {
 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
 				err = ptype->gso_send_check(skb);
 				segs = ERR_PTR(err);
@@ -3522,9 +3521,9 @@ static void flush_backlog(void *arg)
 
 static int napi_gro_complete(struct sk_buff *skb)
 {
-	struct packet_type *ptype;
+	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
-	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+	struct list_head *head = &offload_base;
 	int err = -ENOENT;
 
 	if (NAPI_GRO_CB(skb)->count == 1) {
@@ -3534,7 +3533,7 @@ static int napi_gro_complete(struct sk_buff *skb)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
-		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+		if (ptype->type != type || !ptype->gro_complete)
 			continue;
 
 		err = ptype->gro_complete(skb);
@@ -3584,9 +3583,9 @@ EXPORT_SYMBOL(napi_gro_flush);
 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff **pp = NULL;
-	struct packet_type *ptype;
+	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
-	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+	struct list_head *head = &offload_base;
 	int same_flow;
 	int mac_len;
 	enum gro_result ret;
@@ -3599,7 +3598,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
-		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+		if (ptype->type != type || !ptype->gro_receive)
 			continue;
 
 		skb_set_network_header(skb, skb_gro_offset(skb));
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 766c59658563..4c99c5fdba3f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1662,6 +1662,10 @@ static int ipv4_proc_init(void);
 static struct packet_type ip_packet_type __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IP),
 	.func = ip_rcv,
+};
+
+static struct packet_offload ip_packet_offload __read_mostly = {
+	.type = cpu_to_be16(ETH_P_IP),
 	.gso_send_check = inet_gso_send_check,
 	.gso_segment = inet_gso_segment,
 	.gro_receive = inet_gro_receive,
@@ -1781,6 +1785,7 @@ static int __init inet_init(void)
 
 	ipfrag_init();
 
+	dev_add_offload(&ip_packet_offload);
 	dev_add_pack(&ip_packet_type);
 
 	rc = 0;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a974247a9ae4..6e245177608c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -938,6 +938,10 @@ out_unlock:
 static struct packet_type ipv6_packet_type __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IPV6),
 	.func = ipv6_rcv,
+};
+
+static struct packet_offload ipv6_packet_offload __read_mostly = {
+	.type = cpu_to_be16(ETH_P_IPV6),
 	.gso_send_check = ipv6_gso_send_check,
 	.gso_segment = ipv6_gso_segment,
 	.gro_receive = ipv6_gro_receive,
@@ -946,6 +950,7 @@ static struct packet_type ipv6_packet_type __read_mostly = {
 
 static int __init ipv6_packet_init(void)
 {
+	dev_add_offload(&ipv6_packet_offload);
 	dev_add_pack(&ipv6_packet_type);
 	return 0;
 }
@@ -953,6 +958,7 @@ static int __init ipv6_packet_init(void)
 static void ipv6_packet_cleanup(void)
 {
 	dev_remove_pack(&ipv6_packet_type);
+	dev_remove_offload(&ipv6_packet_offload);
 }
 
 static int __net_init ipv6_init_mibs(struct net *net)
-- 
cgit v1.2.3-55-g7522


From f191a1d17f227032c159e5499809f545402b6dc6 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich
Date: Thu, 15 Nov 2012 08:49:23 +0000
Subject: net: Remove code duplication between offload structures

Move the offload callbacks into its own structure.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 +++++++---
 include/net/protocol.h    | 10 +++-------
 net/core/dev.c            | 14 +++++++-------
 net/ipv4/af_inet.c        | 42 ++++++++++++++++++++++++------------------
 net/ipv6/ip6_offload.c    | 28 +++++++++++++++-------------
 net/ipv6/tcpv6_offload.c  | 10 ++++++----
 net/ipv6/udp_offload.c    |  6 ++++--
 7 files changed, 66 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 61bc8483031f..e46c830c88d8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1515,15 +1515,19 @@ struct packet_type {
 	struct list_head	list;
 };
 
-struct packet_offload {
-	__be16			type;	/* This is really htons(ether_type). */
+struct offload_callbacks {
 	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
 						netdev_features_t features);
 	int			(*gso_send_check)(struct sk_buff *skb);
 	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
 					       struct sk_buff *skb);
 	int			(*gro_complete)(struct sk_buff *skb);
-	struct list_head	list;
+};
+
+struct packet_offload {
+	__be16			 type;	/* This is really htons(ether_type). */
+	struct offload_callbacks callbacks;
+	struct list_head	 list;
 };
 
 #include <linux/notifier.h>
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 2c90794c139d..047c0476c0a0 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -29,6 +29,7 @@
 #if IS_ENABLED(CONFIG_IPV6)
 #include <linux/ipv6.h>
 #endif
+#include <linux/netdevice.h>
 
 /* This is one larger than the largest protocol value that can be
  * found in an ipv4 or ipv6 header.  Since in both cases the protocol
@@ -63,13 +64,8 @@ struct inet6_protocol {
 #endif
 
 struct net_offload {
-	int			(*gso_send_check)(struct sk_buff *skb);
-	struct sk_buff	       *(*gso_segment)(struct sk_buff *skb,
-					       netdev_features_t features);
-	struct sk_buff	      **(*gro_receive)(struct sk_buff **head,
-					       struct sk_buff *skb);
-	int			(*gro_complete)(struct sk_buff *skb);
-	unsigned int		flags;	/* Flags used by IPv6 for now */
+	struct offload_callbacks callbacks;
+	unsigned int		 flags;	/* Flags used by IPv6 for now */
 };
 /* This should be set for any extension header which is compatible with GSO. */
 #define INET6_PROTO_GSO_EXTHDR	0x1
diff --git a/net/core/dev.c b/net/core/dev.c
index cf843a256cc6..cf105e886cca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2102,16 +2102,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &offload_base, list) {
-		if (ptype->type == type && ptype->gso_segment) {
+		if (ptype->type == type && ptype->callbacks.gso_segment) {
 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
-				err = ptype->gso_send_check(skb);
+				err = ptype->callbacks.gso_send_check(skb);
 				segs = ERR_PTR(err);
 				if (err || skb_gso_ok(skb, features))
 					break;
 				__skb_push(skb, (skb->data -
 						 skb_network_header(skb)));
 			}
-			segs = ptype->gso_segment(skb, features);
+			segs = ptype->callbacks.gso_segment(skb, features);
 			break;
 		}
 	}
@@ -3533,10 +3533,10 @@ static int napi_gro_complete(struct sk_buff *skb)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
-		if (ptype->type != type || !ptype->gro_complete)
+		if (ptype->type != type || !ptype->callbacks.gro_complete)
 			continue;
 
-		err = ptype->gro_complete(skb);
+		err = ptype->callbacks.gro_complete(skb);
 		break;
 	}
 	rcu_read_unlock();
@@ -3598,7 +3598,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
-		if (ptype->type != type || !ptype->gro_receive)
+		if (ptype->type != type || !ptype->callbacks.gro_receive)
 			continue;
 
 		skb_set_network_header(skb, skb_gro_offset(skb));
@@ -3608,7 +3608,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
 
-		pp = ptype->gro_receive(&napi->gro_list, skb);
+		pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
 		break;
 	}
 	rcu_read_unlock();
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9f2e7fd8bea8..d5e5a054123c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1276,8 +1276,8 @@ static int inet_gso_send_check(struct sk_buff *skb)
 
 	rcu_read_lock();
 	ops = rcu_dereference(inet_offloads[proto]);
-	if (likely(ops && ops->gso_send_check))
-		err = ops->gso_send_check(skb);
+	if (likely(ops && ops->callbacks.gso_send_check))
+		err = ops->callbacks.gso_send_check(skb);
 	rcu_read_unlock();
 
 out:
@@ -1326,8 +1326,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 
 	rcu_read_lock();
 	ops = rcu_dereference(inet_offloads[proto]);
-	if (likely(ops && ops->gso_segment))
-		segs = ops->gso_segment(skb, features);
+	if (likely(ops && ops->callbacks.gso_segment))
+		segs = ops->callbacks.gso_segment(skb, features);
 	rcu_read_unlock();
 
 	if (!segs || IS_ERR(segs))
@@ -1379,7 +1379,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 
 	rcu_read_lock();
 	ops = rcu_dereference(inet_offloads[proto]);
-	if (!ops || !ops->gro_receive)
+	if (!ops || !ops->callbacks.gro_receive)
 		goto out_unlock;
 
 	if (*(u8 *)iph != 0x45)
@@ -1420,7 +1420,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 	skb_gro_pull(skb, sizeof(*iph));
 	skb_set_transport_header(skb, skb_gro_offset(skb));
 
-	pp = ops->gro_receive(head, skb);
+	pp = ops->callbacks.gro_receive(head, skb);
 
 out_unlock:
 	rcu_read_unlock();
@@ -1444,10 +1444,10 @@ static int inet_gro_complete(struct sk_buff *skb)
 
 	rcu_read_lock();
 	ops = rcu_dereference(inet_offloads[proto]);
-	if (WARN_ON(!ops || !ops->gro_complete))
+	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
 		goto out_unlock;
 
-	err = ops->gro_complete(skb);
+	err = ops->callbacks.gro_complete(skb);
 
 out_unlock:
 	rcu_read_unlock();
@@ -1563,10 +1563,12 @@ static const struct net_protocol tcp_protocol = {
 };
 
 static const struct net_offload tcp_offload = {
-	.gso_send_check	=	tcp_v4_gso_send_check,
-	.gso_segment	=	tcp_tso_segment,
-	.gro_receive	=	tcp4_gro_receive,
-	.gro_complete	=	tcp4_gro_complete,
+	.callbacks = {
+		.gso_send_check	=	tcp_v4_gso_send_check,
+		.gso_segment	=	tcp_tso_segment,
+		.gro_receive	=	tcp4_gro_receive,
+		.gro_complete	=	tcp4_gro_complete,
+	},
 };
 
 static const struct net_protocol udp_protocol = {
@@ -1577,8 +1579,10 @@ static const struct net_protocol udp_protocol = {
 };
 
 static const struct net_offload udp_offload = {
-	.gso_send_check = udp4_ufo_send_check,
-	.gso_segment = udp4_ufo_fragment,
+	.callbacks = {
+		.gso_send_check = udp4_ufo_send_check,
+		.gso_segment = udp4_ufo_fragment,
+	},
 };
 
 static const struct net_protocol icmp_protocol = {
@@ -1667,10 +1671,12 @@ static int ipv4_proc_init(void);
 
 static struct packet_offload ip_packet_offload __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IP),
-	.gso_send_check = inet_gso_send_check,
-	.gso_segment = inet_gso_segment,
-	.gro_receive = inet_gro_receive,
-	.gro_complete = inet_gro_complete,
+	.callbacks = {
+		.gso_send_check = inet_gso_send_check,
+		.gso_segment = inet_gso_segment,
+		.gro_receive = inet_gro_receive,
+		.gro_complete = inet_gro_complete,
+	},
 };
 
 static int __init ipv4_offload_init(void)
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 63d79d9005bd..f26f0da7f095 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -70,9 +70,9 @@ static int ipv6_gso_send_check(struct sk_buff *skb)
 	ops = rcu_dereference(inet6_offloads[
 		ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
 
-	if (likely(ops && ops->gso_send_check)) {
+	if (likely(ops && ops->callbacks.gso_send_check)) {
 		skb_reset_transport_header(skb);
-		err = ops->gso_send_check(skb);
+		err = ops->callbacks.gso_send_check(skb);
 	}
 	rcu_read_unlock();
 
@@ -113,9 +113,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 	proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
 	rcu_read_lock();
 	ops = rcu_dereference(inet6_offloads[proto]);
-	if (likely(ops && ops->gso_segment)) {
+	if (likely(ops && ops->callbacks.gso_segment)) {
 		skb_reset_transport_header(skb);
-		segs = ops->gso_segment(skb, features);
+		segs = ops->callbacks.gso_segment(skb, features);
 	}
 	rcu_read_unlock();
 
@@ -173,7 +173,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 	rcu_read_lock();
 	proto = iph->nexthdr;
 	ops = rcu_dereference(inet6_offloads[proto]);
-	if (!ops || !ops->gro_receive) {
+	if (!ops || !ops->callbacks.gro_receive) {
 		__pskb_pull(skb, skb_gro_offset(skb));
 		proto = ipv6_gso_pull_exthdrs(skb, proto);
 		skb_gro_pull(skb, -skb_transport_offset(skb));
@@ -181,7 +181,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 		__skb_push(skb, skb_gro_offset(skb));
 
 		ops = rcu_dereference(inet6_offloads[proto]);
-		if (!ops || !ops->gro_receive)
+		if (!ops || !ops->callbacks.gro_receive)
 			goto out_unlock;
 
 		iph = ipv6_hdr(skb);
@@ -220,7 +220,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 	csum = skb->csum;
 	skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
 
-	pp = ops->gro_receive(head, skb);
+	pp = ops->callbacks.gro_receive(head, skb);
 
 	skb->csum = csum;
 
@@ -244,10 +244,10 @@ static int ipv6_gro_complete(struct sk_buff *skb)
 
 	rcu_read_lock();
 	ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]);
-	if (WARN_ON(!ops || !ops->gro_complete))
+	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
 		goto out_unlock;
 
-	err = ops->gro_complete(skb);
+	err = ops->callbacks.gro_complete(skb);
 
 out_unlock:
 	rcu_read_unlock();
@@ -257,10 +257,12 @@ out_unlock:
 
 static struct packet_offload ipv6_packet_offload __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IPV6),
-	.gso_send_check = ipv6_gso_send_check,
-	.gso_segment = ipv6_gso_segment,
-	.gro_receive = ipv6_gro_receive,
-	.gro_complete = ipv6_gro_complete,
+	.callbacks = {
+		.gso_send_check = ipv6_gso_send_check,
+		.gso_segment = ipv6_gso_segment,
+		.gro_receive = ipv6_gro_receive,
+		.gro_complete = ipv6_gro_complete,
+	},
 };
 
 static int __init ipv6_offload_init(void)
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 3a27fe685c8e..2ec6bf6a0aa0 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -81,10 +81,12 @@ static int tcp6_gro_complete(struct sk_buff *skb)
 }
 
 static const struct net_offload tcpv6_offload = {
-	.gso_send_check	=	tcp_v6_gso_send_check,
-	.gso_segment	=	tcp_tso_segment,
-	.gro_receive	=	tcp6_gro_receive,
-	.gro_complete	=	tcp6_gro_complete,
+	.callbacks = {
+		.gso_send_check	=	tcp_v6_gso_send_check,
+		.gso_segment	=	tcp_tso_segment,
+		.gro_receive	=	tcp6_gro_receive,
+		.gro_complete	=	tcp6_gro_complete,
+	},
 };
 
 int __init tcpv6_offload_init(void)
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 979e4ab63a8b..8e01c44a987c 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -107,8 +107,10 @@ out:
 	return segs;
 }
 static const struct net_offload udpv6_offload = {
-	.gso_send_check =	udp6_ufo_send_check,
-	.gso_segment	=	udp6_ufo_fragment,
+	.callbacks = {
+		.gso_send_check =	udp6_ufo_send_check,
+		.gso_segment	=	udp6_ufo_fragment,
+	},
 };
 
 int __init udp_offload_init(void)
-- 
cgit v1.2.3-55-g7522


From 997071bcb34005f42e0fe5bc7930e895b070f251 Mon Sep 17 00:00:00 2001
From: K. Y. Srinivasan
Date: Thu, 15 Nov 2012 14:34:42 -0800
Subject: mm: export a function to get vm committed memory

It will be useful to be able to access global memory commitment from
device drivers.  On the Hyper-V platform, the host has a policy engine to
balance the available physical memory amongst all competing virtual
machines hosted on a given node.  This policy engine is driven by a number
of metrics including the memory commitment reported by the guests.  The
balloon driver for Linux on Hyper-V will use this function to retrieve
guest memory commitment.  This function is also used in Xen self
ballooning code.

[akpm@linux-foundation.org: coding-style tweak]
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/xen/xen-selfballoon.c |  2 +-
 include/linux/mman.h          |  2 ++
 mm/mmap.c                     | 14 ++++++++++++++
 mm/nommu.c                    | 15 +++++++++++++++
 4 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 7d041cb6da26..2552d3e0a70f 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -222,7 +222,7 @@ static void selfballoon_process(struct work_struct *work)
 	if (xen_selfballooning_enabled) {
 		cur_pages = totalram_pages;
 		tgt_pages = cur_pages; /* default is no change */
-		goal_pages = percpu_counter_read_positive(&vm_committed_as) +
+		goal_pages = vm_memory_committed() +
 				totalreserve_pages +
 				MB2PAGES(selfballoon_reserved_mb);
 #ifdef CONFIG_FRONTSWAP
diff --git a/include/linux/mman.h b/include/linux/mman.h
index d09dde1e57fb..9aa863da287f 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -11,6 +11,8 @@ extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
 extern struct percpu_counter vm_committed_as;
 
+unsigned long vm_memory_committed(void);
+
 static inline void vm_acct_memory(long pages)
 {
 	percpu_counter_add(&vm_committed_as, pages);
diff --git a/mm/mmap.c b/mm/mmap.c
index 2d942353d681..b064822be82e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -88,6 +88,20 @@ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
  */
 struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
 
+/*
+ * The global memory commitment made in the system can be a metric
+ * that can be used to drive ballooning decisions when Linux is hosted
+ * as a guest. On Hyper-V, the host implements a policy engine for dynamically
+ * balancing memory across competing virtual machines that are hosted.
+ * Several metrics drive this policy engine including the guest reported
+ * memory commitment.
+ */
+unsigned long vm_memory_committed(void)
+{
+	return percpu_counter_read_positive(&vm_committed_as);
+}
+EXPORT_SYMBOL_GPL(vm_memory_committed);
+
 /*
  * Check that a process has enough memory to allocate a new virtual
  * mapping. 0 means there is enough memory for the allocation to
diff --git a/mm/nommu.c b/mm/nommu.c
index 45131b41bcdb..79c3cac87afa 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -66,6 +66,21 @@ int heap_stack_gap = 0;
 
 atomic_long_t mmap_pages_allocated;
 
+/*
+ * The global memory commitment made in the system can be a metric
+ * that can be used to drive ballooning decisions when Linux is hosted
+ * as a guest. On Hyper-V, the host implements a policy engine for dynamically
+ * balancing memory across competing virtual machines that are hosted.
+ * Several metrics drive this policy engine including the guest reported
+ * memory commitment.
+ */
+unsigned long vm_memory_committed(void)
+{
+	return percpu_counter_read_positive(&vm_committed_as);
+}
+
+EXPORT_SYMBOL_GPL(vm_memory_committed);
+
 EXPORT_SYMBOL(mem_map);
 EXPORT_SYMBOL(num_physpages);
 
-- 
cgit v1.2.3-55-g7522


From de274bfe0fc81def6ddb8a17020a9a4b56477cc4 Mon Sep 17 00:00:00 2001
From: Jiri Slaby
Date: Thu, 15 Nov 2012 09:49:54 +0100
Subject: TTY: introduce tty_port_destroy

After commit "TTY: move tty buffers to tty_port", the tty buffers are
not freed in some drivers. This is because tty_port_destructor is not
called whenever a tty_port is freed. This was an assumption I counted
with but was unfortunately untrue.

Those using refcounting are safe now, but for those which do not we
introduce a function to be called right before the tty_port is freed
by the drivers.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_port.c | 16 +++++++++++++++-
 include/linux/tty.h    |  1 +
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index fdc42c2d565f..b7ff59d3db88 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -122,12 +122,26 @@ void tty_port_free_xmit_buf(struct tty_port *port)
 }
 EXPORT_SYMBOL(tty_port_free_xmit_buf);
 
+/**
+ * tty_port_destroy -- destroy inited port
+ * @port: tty port to be doestroyed
+ *
+ * When a port was initialized using tty_port_init, one has to destroy the
+ * port by this function. Either indirectly by using tty_port refcounting
+ * (tty_port_put) or directly if refcounting is not used.
+ */
+void tty_port_destroy(struct tty_port *port)
+{
+	tty_buffer_free_all(port);
+}
+EXPORT_SYMBOL(tty_port_destroy);
+
 static void tty_port_destructor(struct kref *kref)
 {
 	struct tty_port *port = container_of(kref, struct tty_port, kref);
 	if (port->xmit_buf)
 		free_page((unsigned long)port->xmit_buf);
-	tty_buffer_free_all(port);
+	tty_port_destroy(port);
 	if (port->ops && port->ops->destruct)
 		port->ops->destruct(port);
 	else
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d7ff88fb8967..8db1b569c37a 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -455,6 +455,7 @@ extern struct device *tty_port_register_device_attr(struct tty_port *port,
 		const struct attribute_group **attr_grp);
 extern int tty_port_alloc_xmit_buf(struct tty_port *port);
 extern void tty_port_free_xmit_buf(struct tty_port *port);
+extern void tty_port_destroy(struct tty_port *port);
 extern void tty_port_put(struct tty_port *port);
 
 static inline struct tty_port *tty_port_get(struct tty_port *port)
-- 
cgit v1.2.3-55-g7522


From 67d16a4686c9b94c8f52a66afe7521909aeb75b4 Mon Sep 17 00:00:00 2001
From: Wei WANG
Date: Fri, 9 Nov 2012 20:53:33 +0800
Subject: drivers/mfd: Add realtek pcie card reader driver

Realtek PCI-E card reader driver adapts requests from upper-level
sdmmc/memstick layer to the real physical card reader.

Signed-off-by: Wei WANG <wei_wang@realsil.com.cn>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mfd/Kconfig             |    9 +
 drivers/mfd/Makefile            |    3 +
 drivers/mfd/rtl8411.c           |  251 ++++++++
 drivers/mfd/rts5209.c           |  223 +++++++
 drivers/mfd/rts5229.c           |  205 +++++++
 drivers/mfd/rtsx_pcr.c          | 1251 +++++++++++++++++++++++++++++++++++++++
 drivers/mfd/rtsx_pcr.h          |   32 +
 include/linux/mfd/rtsx_common.h |   48 ++
 include/linux/mfd/rtsx_pci.h    |  794 +++++++++++++++++++++++++
 9 files changed, 2816 insertions(+)
 create mode 100644 drivers/mfd/rtl8411.c
 create mode 100644 drivers/mfd/rts5209.c
 create mode 100644 drivers/mfd/rts5229.c
 create mode 100644 drivers/mfd/rtsx_pcr.c
 create mode 100644 drivers/mfd/rtsx_pcr.h
 create mode 100644 include/linux/mfd/rtsx_common.h
 create mode 100644 include/linux/mfd/rtsx_pci.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index acab3ef8a310..867af072d482 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -63,6 +63,15 @@ config MFD_SM501_GPIO
 	 lines on the SM501. The platform data is used to supply the
 	 base number for the first GPIO line to register.
 
+config MFD_RTSX_PCI
+	tristate "Support for Realtek PCI-E card reader"
+	depends on PCI
+	help
+	  This supports for Realtek PCI-Express card reader including rts5209,
+	  rts5229, rtl8411, etc. Realtek card reader supports access to many
+	  types of memory cards, such as Memory Stick, Memory Stick Pro,
+	  Secure Digital and MultiMediaCard.
+
 config MFD_ASIC3
 	bool "Support for Compaq ASIC3"
 	depends on GENERIC_HARDIRQS && GPIOLIB && ARM
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index d8ccb630ddb0..b53db06d1b46 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -9,6 +9,9 @@ obj-$(CONFIG_MFD_88PM805)	+= 88pm805.o 88pm80x.o
 obj-$(CONFIG_MFD_SM501)		+= sm501.o
 obj-$(CONFIG_MFD_ASIC3)		+= asic3.o tmio_core.o
 
+rtsx_pci-objs			:= rtsx_pcr.o rts5209.o rts5229.o rtl8411.o
+obj-$(CONFIG_MFD_RTSX_PCI)	+= rtsx_pci.o
+
 obj-$(CONFIG_HTC_EGPIO)		+= htc-egpio.o
 obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
 obj-$(CONFIG_HTC_I2CPLD)	+= htc-i2cpld.o
diff --git a/drivers/mfd/rtl8411.c b/drivers/mfd/rtl8411.c
new file mode 100644
index 000000000000..89f046ca9e41
--- /dev/null
+++ b/drivers/mfd/rtl8411.c
@@ -0,0 +1,251 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/mfd/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rtl8411_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, SYS_VER, &val);
+	return val & 0x0F;
+}
+
+static int rtl8411_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CD_PAD_CTL,
+			CD_DISABLE_MASK | CD_AUTO_DISABLE, CD_ENABLE);
+}
+
+static int rtl8411_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00);
+}
+
+static int rtl8411_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01);
+}
+
+static int rtl8411_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D);
+}
+
+static int rtl8411_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00);
+}
+
+static int rtl8411_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CTL,
+			BPP_LDO_POWB, BPP_LDO_SUSPEND);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_10_PERCENT_ON);
+	if (err < 0)
+		return err;
+
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_15_PERCENT_ON);
+	if (err < 0)
+		return err;
+
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_ON);
+	if (err < 0)
+		return err;
+
+	return rtsx_pci_write_register(pcr, LDO_CTL, BPP_LDO_POWB, BPP_LDO_ON);
+}
+
+static int rtl8411_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_OFF);
+	if (err < 0)
+		return err;
+
+	return rtsx_pci_write_register(pcr, LDO_CTL,
+			BPP_LDO_POWB, BPP_LDO_SUSPEND);
+}
+
+static unsigned int rtl8411_cd_deglitch(struct rtsx_pcr *pcr)
+{
+	unsigned int card_exist;
+
+	card_exist = rtsx_pci_readl(pcr, RTSX_BIPR);
+	card_exist &= CARD_EXIST;
+	if (!card_exist) {
+		/* Enable card CD */
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, CD_ENABLE);
+		/* Enable card interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x00);
+		return 0;
+	}
+
+	if (hweight32(card_exist) > 1) {
+		rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+				BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON);
+		msleep(100);
+
+		card_exist = rtsx_pci_readl(pcr, RTSX_BIPR);
+		if (card_exist & MS_EXIST)
+			card_exist = MS_EXIST;
+		else if (card_exist & SD_EXIST)
+			card_exist = SD_EXIST;
+		else
+			card_exist = 0;
+
+		rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+				BPP_POWER_MASK, BPP_POWER_OFF);
+
+		dev_dbg(&(pcr->pci->dev),
+				"After CD deglitch, card_exist = 0x%x\n",
+				card_exist);
+	}
+
+	if (card_exist & MS_EXIST) {
+		/* Disable SD interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x40);
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, MS_CD_EN_ONLY);
+	} else if (card_exist & SD_EXIST) {
+		/* Disable MS interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x80);
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, SD_CD_EN_ONLY);
+	}
+
+	return card_exist;
+}
+
+static const struct pcr_ops rtl8411_pcr_ops = {
+	.extra_init_hw = rtl8411_extra_init_hw,
+	.optimize_phy = NULL,
+	.turn_on_led = rtl8411_turn_on_led,
+	.turn_off_led = rtl8411_turn_off_led,
+	.enable_auto_blink = rtl8411_enable_auto_blink,
+	.disable_auto_blink = rtl8411_disable_auto_blink,
+	.card_power_on = rtl8411_card_power_on,
+	.card_power_off = rtl8411_card_power_off,
+	.cd_deglitch = rtl8411_cd_deglitch,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rtl8411_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xA9),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rtl8411_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rtl8411_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rtl8411_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+void rtl8411_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->ops = &rtl8411_pcr_ops;
+
+	pcr->ic_version = rtl8411_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rtl8411_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rtl8411_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rtl8411_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rtl8411_ms_pull_ctl_disable_tbl;
+}
diff --git a/drivers/mfd/rts5209.c b/drivers/mfd/rts5209.c
new file mode 100644
index 000000000000..283a4f148084
--- /dev/null
+++ b/drivers/mfd/rts5209.c
@@ -0,0 +1,223 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/mfd/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5209_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	val = rtsx_pci_readb(pcr, 0x1C);
+	return val & 0x0F;
+}
+
+static void rts5209_init_vendor_cfg(struct rtsx_pcr *pcr)
+{
+	u32 val;
+
+	rtsx_pci_read_config_dword(pcr, 0x724, &val);
+	dev_dbg(&(pcr->pci->dev), "Cfg 0x724: 0x%x\n", val);
+
+	if (!(val & 0x80)) {
+		if (val & 0x08)
+			pcr->ms_pmos = false;
+		else
+			pcr->ms_pmos = true;
+	}
+}
+
+static int rts5209_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	/* Turn off LED */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO, 0xFF, 0x03);
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO_DIR, 0xFF, 0x03);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5209_optimize_phy(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_phy_register(pcr, 0x00, 0xB966);
+}
+
+static int rts5209_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00);
+}
+
+static int rts5209_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01);
+}
+
+static int rts5209_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D);
+}
+
+static int rts5209_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00);
+}
+
+static int rts5209_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+	u8 pwr_mask, partial_pwr_on, pwr_on;
+
+	pwr_mask = SD_POWER_MASK;
+	partial_pwr_on = SD_PARTIAL_POWER_ON;
+	pwr_on = SD_POWER_ON;
+
+	if (pcr->ms_pmos && (card == RTSX_MS_CARD)) {
+		pwr_mask = MS_POWER_MASK;
+		partial_pwr_on = MS_PARTIAL_POWER_ON;
+		pwr_on = MS_POWER_ON;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			pwr_mask, partial_pwr_on);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x04);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, pwr_mask, pwr_on);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x00);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int rts5209_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	u8 pwr_mask, pwr_off;
+
+	pwr_mask = SD_POWER_MASK;
+	pwr_off = SD_POWER_OFF;
+
+	if (pcr->ms_pmos && (card == RTSX_MS_CARD)) {
+		pwr_mask = MS_POWER_MASK;
+		pwr_off = MS_POWER_OFF;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			pwr_mask | PMOS_STRG_MASK, pwr_off | PMOS_STRG_400mA);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0X06);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static const struct pcr_ops rts5209_pcr_ops = {
+	.extra_init_hw = rts5209_extra_init_hw,
+	.optimize_phy = rts5209_optimize_phy,
+	.turn_on_led = rts5209_turn_on_led,
+	.turn_off_led = rts5209_turn_off_led,
+	.enable_auto_blink = rts5209_enable_auto_blink,
+	.disable_auto_blink = rts5209_disable_auto_blink,
+	.card_power_on = rts5209_card_power_on,
+	.card_power_off = rts5209_card_power_off,
+	.cd_deglitch = NULL,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5209_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5209_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5209_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5209_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5209_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 |
+		EXTRA_CAPS_SD_SDR104 | EXTRA_CAPS_MMC_8BIT;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5209_pcr_ops;
+
+	rts5209_init_vendor_cfg(pcr);
+
+	pcr->ic_version = rts5209_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5209_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5209_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rts5209_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5209_ms_pull_ctl_disable_tbl;
+}
diff --git a/drivers/mfd/rts5229.c b/drivers/mfd/rts5229.c
new file mode 100644
index 000000000000..b9dbab266fda
--- /dev/null
+++ b/drivers/mfd/rts5229.c
@@ -0,0 +1,205 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/mfd/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5229_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & 0x0F;
+}
+
+static int rts5229_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02);
+	/* Switch LDO3318 source from DV33 to card_3v3 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01);
+	/* LED shine disabled, set initial shine cycle period */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5229_optimize_phy(struct rtsx_pcr *pcr)
+{
+	/* Optimize RX sensitivity */
+	return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42);
+}
+
+static int rts5229_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02);
+}
+
+static int rts5229_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00);
+}
+
+static int rts5229_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08);
+}
+
+static int rts5229_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00);
+}
+
+static int rts5229_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_PARTIAL_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x02);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x06);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int rts5229_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK | PMOS_STRG_MASK,
+			SD_POWER_OFF | PMOS_STRG_400mA);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0X00);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static const struct pcr_ops rts5229_pcr_ops = {
+	.extra_init_hw = rts5229_extra_init_hw,
+	.optimize_phy = rts5229_optimize_phy,
+	.turn_on_led = rts5229_turn_on_led,
+	.turn_off_led = rts5229_turn_off_led,
+	.enable_auto_blink = rts5229_enable_auto_blink,
+	.disable_auto_blink = rts5229_disable_auto_blink,
+	.card_power_on = rts5229_card_power_on,
+	.card_power_off = rts5229_card_power_off,
+	.cd_deglitch = NULL,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5229_sd_pull_ctl_enable_tbl1[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* For RTS5229 version C */
+static const u32 rts5229_sd_pull_ctl_enable_tbl2[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5229_sd_pull_ctl_disable_tbl1[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+/* For RTS5229 version C */
+static const u32 rts5229_sd_pull_ctl_disable_tbl2[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE5),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5229_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5229_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5229_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5229_pcr_ops;
+
+	pcr->ic_version = rts5229_get_ic_version(pcr);
+	if (pcr->ic_version == IC_VER_C) {
+		pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl2;
+		pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl2;
+	} else {
+		pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl1;
+		pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl1;
+	}
+	pcr->ms_pull_ctl_enable_tbl = rts5229_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5229_ms_pull_ctl_disable_tbl;
+}
diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
new file mode 100644
index 000000000000..56d4377c62c2
--- /dev/null
+++ b/drivers/mfd/rtsx_pcr.c
@@ -0,0 +1,1251 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/rtsx_pci.h>
+#include <asm/unaligned.h>
+
+#include "rtsx_pcr.h"
+
+static bool msi_en = true;
+module_param(msi_en, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(msi_en, "Enable MSI");
+
+static DEFINE_IDR(rtsx_pci_idr);
+static DEFINE_SPINLOCK(rtsx_pci_lock);
+
+static struct mfd_cell rtsx_pcr_cells[] = {
+	[RTSX_SD_CARD] = {
+		.name = DRV_NAME_RTSX_PCI_SDMMC,
+	},
+	[RTSX_MS_CARD] = {
+		.name = DRV_NAME_RTSX_PCI_MS,
+	},
+};
+
+static DEFINE_PCI_DEVICE_TABLE(rtsx_pci_ids) = {
+	{ PCI_DEVICE(0x10EC, 0x5209), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5229), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5289), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, rtsx_pci_ids);
+
+void rtsx_pci_start_run(struct rtsx_pcr *pcr)
+{
+	/* If pci device removed, don't queue idle work any more */
+	if (pcr->remove_pci)
+		return;
+
+	if (pcr->state != PDEV_STAT_RUN) {
+		pcr->state = PDEV_STAT_RUN;
+		if (pcr->ops->enable_auto_blink)
+			pcr->ops->enable_auto_blink(pcr);
+	}
+
+	mod_delayed_work(system_wq, &pcr->idle_work, msecs_to_jiffies(200));
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_start_run);
+
+int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data)
+{
+	int i;
+	u32 val = HAIMR_WRITE_START;
+
+	val |= (u32)(addr & 0x3FFF) << 16;
+	val |= (u32)mask << 8;
+	val |= (u32)data;
+
+	rtsx_pci_writel(pcr, RTSX_HAIMR, val);
+
+	for (i = 0; i < MAX_RW_REG_CNT; i++) {
+		val = rtsx_pci_readl(pcr, RTSX_HAIMR);
+		if ((val & HAIMR_TRANS_END) == 0) {
+			if (data != (u8)val)
+				return -EIO;
+			return 0;
+		}
+	}
+
+	return -ETIMEDOUT;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_register);
+
+int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data)
+{
+	u32 val = HAIMR_READ_START;
+	int i;
+
+	val |= (u32)(addr & 0x3FFF) << 16;
+	rtsx_pci_writel(pcr, RTSX_HAIMR, val);
+
+	for (i = 0; i < MAX_RW_REG_CNT; i++) {
+		val = rtsx_pci_readl(pcr, RTSX_HAIMR);
+		if ((val & HAIMR_TRANS_END) == 0)
+			break;
+	}
+
+	if (i >= MAX_RW_REG_CNT)
+		return -ETIMEDOUT;
+
+	if (data)
+		*data = (u8)(val & 0xFF);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_register);
+
+int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val)
+{
+	int err, i, finished = 0;
+	u8 tmp;
+
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA0, 0xFF, (u8)val);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA1, 0xFF, (u8)(val >> 8));
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x81);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < 100000; i++) {
+		err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp);
+		if (err < 0)
+			return err;
+
+		if (!(tmp & 0x80)) {
+			finished = 1;
+			break;
+		}
+	}
+
+	if (!finished)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_phy_register);
+
+int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val)
+{
+	int err, i, finished = 0;
+	u16 data;
+	u8 *ptr, tmp;
+
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x80);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < 100000; i++) {
+		err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp);
+		if (err < 0)
+			return err;
+
+		if (!(tmp & 0x80)) {
+			finished = 1;
+			break;
+		}
+	}
+
+	if (!finished)
+		return -ETIMEDOUT;
+
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA0, 0, 0);
+	rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA1, 0, 0);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	ptr = rtsx_pci_get_cmd_data(pcr);
+	data = ((u16)ptr[1] << 8) | ptr[0];
+
+	if (val)
+		*val = data;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_phy_register);
+
+void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA);
+
+	rtsx_pci_write_register(pcr, DMACTL, 0x80, 0x80);
+	rtsx_pci_write_register(pcr, RBCTL, 0x80, 0x80);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_stop_cmd);
+
+void rtsx_pci_add_cmd(struct rtsx_pcr *pcr,
+		u8 cmd_type, u16 reg_addr, u8 mask, u8 data)
+{
+	unsigned long flags;
+	u32 val = 0;
+	u32 *ptr = (u32 *)(pcr->host_cmds_ptr);
+
+	val |= (u32)(cmd_type & 0x03) << 30;
+	val |= (u32)(reg_addr & 0x3FFF) << 16;
+	val |= (u32)mask << 8;
+	val |= (u32)data;
+
+	spin_lock_irqsave(&pcr->lock, flags);
+	ptr += pcr->ci;
+	if (pcr->ci < (HOST_CMDS_BUF_LEN / 4)) {
+		put_unaligned_le32(val, ptr);
+		ptr++;
+		pcr->ci++;
+	}
+	spin_unlock_irqrestore(&pcr->lock, flags);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_add_cmd);
+
+void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr)
+{
+	u32 val = 1 << 31;
+
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	val |= (u32)(pcr->ci * 4) & 0x00FFFFFF;
+	/* Hardware Auto Response */
+	val |= 0x40000000;
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, val);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd_no_wait);
+
+int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout)
+{
+	struct completion trans_done;
+	u32 val = 1 << 31;
+	long timeleft;
+	unsigned long flags;
+	int err = 0;
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	/* set up data structures for the wakeup system */
+	pcr->done = &trans_done;
+	pcr->trans_result = TRANS_NOT_READY;
+	init_completion(&trans_done);
+
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	val |= (u32)(pcr->ci * 4) & 0x00FFFFFF;
+	/* Hardware Auto Response */
+	val |= 0x40000000;
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, val);
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	/* Wait for TRANS_OK_INT */
+	timeleft = wait_for_completion_interruptible_timeout(
+			&trans_done, msecs_to_jiffies(timeout));
+	if (timeleft <= 0) {
+		dev_dbg(&(pcr->pci->dev), "Timeout (%s %d)\n",
+				__func__, __LINE__);
+		err = -ETIMEDOUT;
+		goto finish_send_cmd;
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+	if (pcr->trans_result == TRANS_RESULT_FAIL)
+		err = -EINVAL;
+	else if (pcr->trans_result == TRANS_RESULT_OK)
+		err = 0;
+	else if (pcr->trans_result == TRANS_NO_DEVICE)
+		err = -ENODEV;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+finish_send_cmd:
+	spin_lock_irqsave(&pcr->lock, flags);
+	pcr->done = NULL;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	if ((err < 0) && (err != -ENODEV))
+		rtsx_pci_stop_cmd(pcr);
+
+	if (pcr->finish_me)
+		complete(pcr->finish_me);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd);
+
+static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr,
+		dma_addr_t addr, unsigned int len, int end)
+{
+	u64 *ptr = (u64 *)(pcr->host_sg_tbl_ptr) + pcr->sgi;
+	u64 val;
+	u8 option = SG_VALID | SG_TRANS_DATA;
+
+	dev_dbg(&(pcr->pci->dev), "DMA addr: 0x%x, Len: 0x%x\n",
+			(unsigned int)addr, len);
+
+	if (end)
+		option |= SG_END;
+	val = ((u64)addr << 32) | ((u64)len << 12) | option;
+
+	put_unaligned_le64(val, ptr);
+	ptr++;
+	pcr->sgi++;
+}
+
+int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read, int timeout)
+{
+	struct completion trans_done;
+	u8 dir;
+	int err = 0, i, count;
+	long timeleft;
+	unsigned long flags;
+	struct scatterlist *sg;
+	enum dma_data_direction dma_dir;
+	u32 val;
+	dma_addr_t addr;
+	unsigned int len;
+
+	dev_dbg(&(pcr->pci->dev), "--> %s: num_sg = %d\n", __func__, num_sg);
+
+	/* don't transfer data during abort processing */
+	if (pcr->remove_pci)
+		return -EINVAL;
+
+	if ((sglist == NULL) || (num_sg <= 0))
+		return -EINVAL;
+
+	if (read) {
+		dir = DEVICE_TO_HOST;
+		dma_dir = DMA_FROM_DEVICE;
+	} else {
+		dir = HOST_TO_DEVICE;
+		dma_dir = DMA_TO_DEVICE;
+	}
+
+	count = dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
+	if (count < 1) {
+		dev_err(&(pcr->pci->dev), "scatterlist map failed\n");
+		return -EINVAL;
+	}
+	dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count);
+
+	val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE;
+	pcr->sgi = 0;
+	for_each_sg(sglist, sg, count, i) {
+		addr = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+		rtsx_pci_add_sg_tbl(pcr, addr, len, i == count - 1);
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	pcr->done = &trans_done;
+	pcr->trans_result = TRANS_NOT_READY;
+	init_completion(&trans_done);
+	rtsx_pci_writel(pcr, RTSX_HDBAR, pcr->host_sg_tbl_addr);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, val);
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	timeleft = wait_for_completion_interruptible_timeout(
+			&trans_done, msecs_to_jiffies(timeout));
+	if (timeleft <= 0) {
+		dev_dbg(&(pcr->pci->dev), "Timeout (%s %d)\n",
+				__func__, __LINE__);
+		err = -ETIMEDOUT;
+		goto out;
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	if (pcr->trans_result == TRANS_RESULT_FAIL)
+		err = -EINVAL;
+	else if (pcr->trans_result == TRANS_NO_DEVICE)
+		err = -ENODEV;
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+out:
+	spin_lock_irqsave(&pcr->lock, flags);
+	pcr->done = NULL;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
+
+	if ((err < 0) && (err != -ENODEV))
+		rtsx_pci_stop_cmd(pcr);
+
+	if (pcr->finish_me)
+		complete(pcr->finish_me);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
+
+int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
+{
+	int err;
+	int i, j;
+	u16 reg;
+	u8 *ptr;
+
+	if (buf_len > 512)
+		buf_len = 512;
+
+	ptr = buf;
+	reg = PPBUF_BASE2;
+	for (i = 0; i < buf_len / 256; i++) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < 256; j++)
+			rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0);
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+
+		memcpy(ptr, rtsx_pci_get_cmd_data(pcr), 256);
+		ptr += 256;
+	}
+
+	if (buf_len % 256) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < buf_len % 256; j++)
+			rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0);
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	memcpy(ptr, rtsx_pci_get_cmd_data(pcr), buf_len % 256);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_ppbuf);
+
+int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
+{
+	int err;
+	int i, j;
+	u16 reg;
+	u8 *ptr;
+
+	if (buf_len > 512)
+		buf_len = 512;
+
+	ptr = buf;
+	reg = PPBUF_BASE2;
+	for (i = 0; i < buf_len / 256; i++) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < 256; j++) {
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+					reg++, 0xFF, *ptr);
+			ptr++;
+		}
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	if (buf_len % 256) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < buf_len % 256; j++) {
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+					reg++, 0xFF, *ptr);
+			ptr++;
+		}
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_ppbuf);
+
+static int rtsx_pci_set_pull_ctl(struct rtsx_pcr *pcr, const u32 *tbl)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+
+	while (*tbl & 0xFFFF0000) {
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+				(u16)(*tbl >> 16), 0xFF, (u8)(*tbl));
+		tbl++;
+	}
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card)
+{
+	const u32 *tbl;
+
+	if (card == RTSX_SD_CARD)
+		tbl = pcr->sd_pull_ctl_enable_tbl;
+	else if (card == RTSX_MS_CARD)
+		tbl = pcr->ms_pull_ctl_enable_tbl;
+	else
+		return -EINVAL;
+
+	return rtsx_pci_set_pull_ctl(pcr, tbl);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_enable);
+
+int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card)
+{
+	const u32 *tbl;
+
+	if (card == RTSX_SD_CARD)
+		tbl = pcr->sd_pull_ctl_disable_tbl;
+	else if (card == RTSX_MS_CARD)
+		tbl = pcr->ms_pull_ctl_disable_tbl;
+	else
+		return -EINVAL;
+
+
+	return rtsx_pci_set_pull_ctl(pcr, tbl);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_disable);
+
+static void rtsx_pci_enable_bus_int(struct rtsx_pcr *pcr)
+{
+	pcr->bier = TRANS_OK_INT_EN | TRANS_FAIL_INT_EN | SD_INT_EN;
+
+	if (pcr->num_slots > 1)
+		pcr->bier |= MS_INT_EN;
+
+	/* Enable Bus Interrupt */
+	rtsx_pci_writel(pcr, RTSX_BIER, pcr->bier);
+
+	dev_dbg(&(pcr->pci->dev), "RTSX_BIER: 0x%08x\n", pcr->bier);
+}
+
+static inline u8 double_ssc_depth(u8 depth)
+{
+	return ((depth > 1) ? (depth - 1) : depth);
+}
+
+static u8 revise_ssc_depth(u8 ssc_depth, u8 div)
+{
+	if (div > CLK_DIV_1) {
+		if (ssc_depth > (div - 1))
+			ssc_depth -= (div - 1);
+		else
+			ssc_depth = SSC_DEPTH_4M;
+	}
+
+	return ssc_depth;
+}
+
+int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk)
+{
+	int err, clk;
+	u8 N, min_N, max_N, clk_divider;
+	u8 mcu_cnt, div, max_div;
+	u8 depth[] = {
+		[RTSX_SSC_DEPTH_4M] = SSC_DEPTH_4M,
+		[RTSX_SSC_DEPTH_2M] = SSC_DEPTH_2M,
+		[RTSX_SSC_DEPTH_1M] = SSC_DEPTH_1M,
+		[RTSX_SSC_DEPTH_500K] = SSC_DEPTH_500K,
+		[RTSX_SSC_DEPTH_250K] = SSC_DEPTH_250K,
+	};
+
+	if (initial_mode) {
+		/* We use 250k(around) here, in initial stage */
+		clk_divider = SD_CLK_DIVIDE_128;
+		card_clock = 30000000;
+	} else {
+		clk_divider = SD_CLK_DIVIDE_0;
+	}
+	err = rtsx_pci_write_register(pcr, SD_CFG1,
+			SD_CLK_DIVIDE_MASK, clk_divider);
+	if (err < 0)
+		return err;
+
+	card_clock /= 1000000;
+	dev_dbg(&(pcr->pci->dev), "Switch card clock to %dMHz\n", card_clock);
+
+	min_N = 80;
+	max_N = 208;
+	max_div = CLK_DIV_8;
+
+	clk = card_clock;
+	if (!initial_mode && double_clk)
+		clk = card_clock * 2;
+	dev_dbg(&(pcr->pci->dev),
+			"Internal SSC clock: %dMHz (cur_clock = %d)\n",
+			clk, pcr->cur_clock);
+
+	if (clk == pcr->cur_clock)
+		return 0;
+
+	N = (u8)(clk - 2);
+	if ((clk <= 2) || (N > max_N))
+		return -EINVAL;
+
+	mcu_cnt = (u8)(125/clk + 3);
+	if (mcu_cnt > 15)
+		mcu_cnt = 15;
+
+	/* Make sure that the SSC clock div_n is equal or greater than min_N */
+	div = CLK_DIV_1;
+	while ((N < min_N) && (div < max_div)) {
+		N = (N + 2) * 2 - 2;
+		div++;
+	}
+	dev_dbg(&(pcr->pci->dev), "N = %d, div = %d\n", N, div);
+
+	ssc_depth = depth[ssc_depth];
+	if (double_clk)
+		ssc_depth = double_ssc_depth(ssc_depth);
+
+	ssc_depth = revise_ssc_depth(ssc_depth, div);
+	dev_dbg(&(pcr->pci->dev), "ssc_depth = %d\n", ssc_depth);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL,
+			CLK_LOW_FREQ, CLK_LOW_FREQ);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV,
+			0xFF, (div << 4) | mcu_cnt);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2,
+			SSC_DEPTH_MASK, ssc_depth);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, N);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB);
+	if (vpclk) {
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, 0);
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, PHASE_NOT_RESET);
+	}
+
+	err = rtsx_pci_send_cmd(pcr, 2000);
+	if (err < 0)
+		return err;
+
+	/* Wait SSC clock stable */
+	udelay(10);
+	err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0);
+	if (err < 0)
+		return err;
+
+	pcr->cur_clock = clk;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_switch_clock);
+
+int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	if (pcr->ops->card_power_on)
+		return pcr->ops->card_power_on(pcr, card);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_power_on);
+
+int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	if (pcr->ops->card_power_off)
+		return pcr->ops->card_power_off(pcr, card);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_power_off);
+
+unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr)
+{
+	unsigned int val;
+
+	val = rtsx_pci_readl(pcr, RTSX_BIPR);
+	if (pcr->ops->cd_deglitch)
+		val = pcr->ops->cd_deglitch(pcr);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_exist);
+
+void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr)
+{
+	struct completion finish;
+
+	pcr->finish_me = &finish;
+	init_completion(&finish);
+
+	if (pcr->done)
+		complete(pcr->done);
+
+	if (!pcr->remove_pci)
+		rtsx_pci_stop_cmd(pcr);
+
+	wait_for_completion_interruptible_timeout(&finish,
+			msecs_to_jiffies(2));
+	pcr->finish_me = NULL;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_complete_unfinished_transfer);
+
+static void rtsx_pci_card_detect(struct work_struct *work)
+{
+	struct delayed_work *dwork;
+	struct rtsx_pcr *pcr;
+	unsigned long flags;
+	unsigned int card_detect = 0;
+	u32 irq_status;
+
+	dwork = to_delayed_work(work);
+	pcr = container_of(dwork, struct rtsx_pcr, carddet_work);
+
+	dev_dbg(&(pcr->pci->dev), "--> %s\n", __func__);
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	irq_status = rtsx_pci_readl(pcr, RTSX_BIPR);
+	dev_dbg(&(pcr->pci->dev), "irq_status: 0x%08x\n", irq_status);
+
+	if (pcr->card_inserted || pcr->card_removed) {
+		dev_dbg(&(pcr->pci->dev),
+				"card_inserted: 0x%x, card_removed: 0x%x\n",
+				pcr->card_inserted, pcr->card_removed);
+
+		if (pcr->ops->cd_deglitch)
+			pcr->card_inserted = pcr->ops->cd_deglitch(pcr);
+
+		card_detect = pcr->card_inserted | pcr->card_removed;
+		pcr->card_inserted = 0;
+		pcr->card_removed = 0;
+	}
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	if (card_detect & SD_EXIST)
+		pcr->slots[RTSX_SD_CARD].card_event(
+				pcr->slots[RTSX_SD_CARD].p_dev);
+	if (card_detect & MS_EXIST)
+		pcr->slots[RTSX_MS_CARD].card_event(
+				pcr->slots[RTSX_MS_CARD].p_dev);
+}
+
+static irqreturn_t rtsx_pci_isr(int irq, void *dev_id)
+{
+	struct rtsx_pcr *pcr = dev_id;
+	u32 int_reg;
+
+	if (!pcr)
+		return IRQ_NONE;
+
+	spin_lock(&pcr->lock);
+
+	int_reg = rtsx_pci_readl(pcr, RTSX_BIPR);
+	/* Clear interrupt flag */
+	rtsx_pci_writel(pcr, RTSX_BIPR, int_reg);
+	if ((int_reg & pcr->bier) == 0) {
+		spin_unlock(&pcr->lock);
+		return IRQ_NONE;
+	}
+	if (int_reg == 0xFFFFFFFF) {
+		spin_unlock(&pcr->lock);
+		return IRQ_HANDLED;
+	}
+
+	int_reg &= (pcr->bier | 0x7FFFFF);
+
+	if (int_reg & SD_INT) {
+		if (int_reg & SD_EXIST) {
+			pcr->card_inserted |= SD_EXIST;
+		} else {
+			pcr->card_removed |= SD_EXIST;
+			pcr->card_inserted &= ~SD_EXIST;
+		}
+	}
+
+	if (int_reg & MS_INT) {
+		if (int_reg & MS_EXIST) {
+			pcr->card_inserted |= MS_EXIST;
+		} else {
+			pcr->card_removed |= MS_EXIST;
+			pcr->card_inserted &= ~MS_EXIST;
+		}
+	}
+
+	if (pcr->card_inserted || pcr->card_removed)
+		schedule_delayed_work(&pcr->carddet_work,
+				msecs_to_jiffies(200));
+
+	if (int_reg & (NEED_COMPLETE_INT | DELINK_INT)) {
+		if (int_reg & (TRANS_FAIL_INT | DELINK_INT)) {
+			pcr->trans_result = TRANS_RESULT_FAIL;
+			if (pcr->done)
+				complete(pcr->done);
+		} else if (int_reg & TRANS_OK_INT) {
+			pcr->trans_result = TRANS_RESULT_OK;
+			if (pcr->done)
+				complete(pcr->done);
+		}
+	}
+
+	spin_unlock(&pcr->lock);
+	return IRQ_HANDLED;
+}
+
+static int rtsx_pci_acquire_irq(struct rtsx_pcr *pcr)
+{
+	dev_info(&(pcr->pci->dev), "%s: pcr->msi_en = %d, pci->irq = %d\n",
+			__func__, pcr->msi_en, pcr->pci->irq);
+
+	if (request_irq(pcr->pci->irq, rtsx_pci_isr,
+			pcr->msi_en ? 0 : IRQF_SHARED,
+			DRV_NAME_RTSX_PCI, pcr)) {
+		dev_err(&(pcr->pci->dev),
+			"rtsx_sdmmc: unable to grab IRQ %d, disabling device\n",
+			pcr->pci->irq);
+		return -1;
+	}
+
+	pcr->irq = pcr->pci->irq;
+	pci_intx(pcr->pci, !pcr->msi_en);
+
+	return 0;
+}
+
+static void rtsx_pci_idle_work(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct rtsx_pcr *pcr = container_of(dwork, struct rtsx_pcr, idle_work);
+
+	dev_dbg(&(pcr->pci->dev), "--> %s\n", __func__);
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	pcr->state = PDEV_STAT_IDLE;
+
+	if (pcr->ops->disable_auto_blink)
+		pcr->ops->disable_auto_blink(pcr);
+	if (pcr->ops->turn_off_led)
+		pcr->ops->turn_off_led(pcr);
+
+	mutex_unlock(&pcr->pcr_mutex);
+}
+
+static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	rtsx_pci_enable_bus_int(pcr);
+
+	/* Power on SSC */
+	err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0);
+	if (err < 0)
+		return err;
+
+	/* Wait SSC power stable */
+	udelay(200);
+
+	if (pcr->ops->optimize_phy) {
+		err = pcr->ops->optimize_phy(pcr);
+		if (err < 0)
+			return err;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+
+	/* Set mcu_cnt to 7 to ensure data can be sampled properly */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, 0x07, 0x07);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, HOST_SLEEP_STATE, 0x03, 0x00);
+	/* Disable card clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_CLK_EN, 0x1E, 0);
+	/* Reset ASPM state to default value */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0);
+	/* Reset delink mode */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x0A, 0);
+	/* Card driving select */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL,
+			0x07, DRIVER_TYPE_D);
+	/* Enable SSC Clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1,
+			0xFF, SSC_8X_EN | SSC_SEL_4M);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12);
+	/* Disable cd_pwr_save */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x16, 0x10);
+	/* Clear Link Ready Interrupt */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, IRQSTAT0,
+			LINK_RDY_INT, LINK_RDY_INT);
+	/* Enlarge the estimation window of PERST# glitch
+	 * to reduce the chance of invalid card interrupt
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PERST_GLITCH_WIDTH, 0xFF, 0x80);
+	/* Update RC oscillator to 400k
+	 * bit[0] F_HIGH: for RC oscillator, Rst_value is 1'b1
+	 *                1: 2M  0: 400k
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RCCTL, 0x01, 0x00);
+	/* Set interrupt write clear
+	 * bit 1: U_elbi_if_rd_clr_en
+	 *	1: Enable ELBI interrupt[31:22] & [7:0] flag read clear
+	 *	0: ELBI interrupt flag[31:22] & [7:0] only can be write clear
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, NFTS_TX_CTRL, 0x02, 0);
+	/* Force CLKREQ# PIN to drive 0 to request clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x08, 0x08);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* Enable clk_request_n to enable clock power management */
+	rtsx_pci_write_config_byte(pcr, 0x81, 1);
+	/* Enter L1 when host tx idle */
+	rtsx_pci_write_config_byte(pcr, 0x70F, 0x5B);
+
+	if (pcr->ops->extra_init_hw) {
+		err = pcr->ops->extra_init_hw(pcr);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static int rtsx_pci_init_chip(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	spin_lock_init(&pcr->lock);
+	mutex_init(&pcr->pcr_mutex);
+
+	switch (PCI_PID(pcr)) {
+	default:
+	case 0x5209:
+		rts5209_init_params(pcr);
+		break;
+
+	case 0x5229:
+		rts5229_init_params(pcr);
+		break;
+
+	case 0x5289:
+		rtl8411_init_params(pcr);
+		break;
+	}
+
+	dev_dbg(&(pcr->pci->dev), "PID: 0x%04x, IC version: 0x%02x\n",
+			PCI_PID(pcr), pcr->ic_version);
+
+	pcr->slots = kcalloc(pcr->num_slots, sizeof(struct rtsx_slot),
+			GFP_KERNEL);
+	if (!pcr->slots)
+		return -ENOMEM;
+
+	pcr->state = PDEV_STAT_IDLE;
+	err = rtsx_pci_init_hw(pcr);
+	if (err < 0) {
+		kfree(pcr->slots);
+		return err;
+	}
+
+	return 0;
+}
+
+static int __devinit rtsx_pci_probe(struct pci_dev *pcidev,
+				    const struct pci_device_id *id)
+{
+	struct rtsx_pcr *pcr;
+	struct pcr_handle *handle;
+	u32 base, len;
+	int ret, i;
+
+	dev_dbg(&(pcidev->dev),
+		": Realtek PCI-E Card Reader found at %s [%04x:%04x] (rev %x)\n",
+		pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device,
+		(int)pcidev->revision);
+
+	ret = pci_enable_device(pcidev);
+	if (ret)
+		return ret;
+
+	ret = pci_request_regions(pcidev, DRV_NAME_RTSX_PCI);
+	if (ret)
+		goto disable;
+
+	pcr = kzalloc(sizeof(*pcr), GFP_KERNEL);
+	if (!pcr) {
+		ret = -ENOMEM;
+		goto release_pci;
+	}
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (!handle) {
+		ret = -ENOMEM;
+		goto free_pcr;
+	}
+	handle->pcr = pcr;
+
+	if (!idr_pre_get(&rtsx_pci_idr, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto free_handle;
+	}
+
+	spin_lock(&rtsx_pci_lock);
+	ret = idr_get_new(&rtsx_pci_idr, pcr, &pcr->id);
+	spin_unlock(&rtsx_pci_lock);
+	if (ret)
+		goto free_handle;
+
+	pcr->pci = pcidev;
+	dev_set_drvdata(&pcidev->dev, handle);
+
+	len = pci_resource_len(pcidev, 0);
+	base = pci_resource_start(pcidev, 0);
+	pcr->remap_addr = ioremap_nocache(base, len);
+	if (!pcr->remap_addr) {
+		ret = -ENOMEM;
+		goto free_host;
+	}
+
+	pcr->rtsx_resv_buf = dma_alloc_coherent(&(pcidev->dev),
+			RTSX_RESV_BUF_LEN, &(pcr->rtsx_resv_buf_addr),
+			GFP_KERNEL);
+	if (pcr->rtsx_resv_buf == NULL) {
+		ret = -ENXIO;
+		goto unmap;
+	}
+	pcr->host_cmds_ptr = pcr->rtsx_resv_buf;
+	pcr->host_cmds_addr = pcr->rtsx_resv_buf_addr;
+	pcr->host_sg_tbl_ptr = pcr->rtsx_resv_buf + HOST_CMDS_BUF_LEN;
+	pcr->host_sg_tbl_addr = pcr->rtsx_resv_buf_addr + HOST_CMDS_BUF_LEN;
+
+	pcr->card_inserted = 0;
+	pcr->card_removed = 0;
+	INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect);
+	INIT_DELAYED_WORK(&pcr->idle_work, rtsx_pci_idle_work);
+
+	pcr->msi_en = msi_en;
+	if (pcr->msi_en) {
+		ret = pci_enable_msi(pcidev);
+		if (ret < 0)
+			pcr->msi_en = false;
+	}
+
+	ret = rtsx_pci_acquire_irq(pcr);
+	if (ret < 0)
+		goto free_dma;
+
+	pci_set_master(pcidev);
+	synchronize_irq(pcr->irq);
+
+	ret = rtsx_pci_init_chip(pcr);
+	if (ret < 0)
+		goto disable_irq;
+
+	for (i = 0; i < ARRAY_SIZE(rtsx_pcr_cells); i++) {
+		rtsx_pcr_cells[i].platform_data = handle;
+		rtsx_pcr_cells[i].pdata_size = sizeof(*handle);
+	}
+	ret = mfd_add_devices(&pcidev->dev, pcr->id, rtsx_pcr_cells,
+			ARRAY_SIZE(rtsx_pcr_cells), NULL, 0, NULL);
+	if (ret < 0)
+		goto disable_irq;
+
+	schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
+
+	return 0;
+
+disable_irq:
+	free_irq(pcr->irq, (void *)pcr);
+free_dma:
+	dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN,
+			pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr);
+unmap:
+	iounmap(pcr->remap_addr);
+free_host:
+	dev_set_drvdata(&pcidev->dev, NULL);
+free_handle:
+	kfree(handle);
+free_pcr:
+	kfree(pcr);
+release_pci:
+	pci_release_regions(pcidev);
+disable:
+	pci_disable_device(pcidev);
+
+	return ret;
+}
+
+static void __devexit rtsx_pci_remove(struct pci_dev *pcidev)
+{
+	struct pcr_handle *handle = pci_get_drvdata(pcidev);
+	struct rtsx_pcr *pcr = handle->pcr;
+
+	pcr->remove_pci = true;
+
+	cancel_delayed_work(&pcr->carddet_work);
+	cancel_delayed_work(&pcr->idle_work);
+
+	mfd_remove_devices(&pcidev->dev);
+
+	dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN,
+			pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr);
+	free_irq(pcr->irq, (void *)pcr);
+	if (pcr->msi_en)
+		pci_disable_msi(pcr->pci);
+	iounmap(pcr->remap_addr);
+
+	dev_set_drvdata(&pcidev->dev, NULL);
+	pci_release_regions(pcidev);
+	pci_disable_device(pcidev);
+
+	spin_lock(&rtsx_pci_lock);
+	idr_remove(&rtsx_pci_idr, pcr->id);
+	spin_unlock(&rtsx_pci_lock);
+
+	kfree(pcr->slots);
+	kfree(pcr);
+	kfree(handle);
+
+	dev_dbg(&(pcidev->dev),
+		": Realtek PCI-E Card Reader at %s [%04x:%04x] has been removed\n",
+		pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device);
+}
+
+#ifdef CONFIG_PM
+
+static int rtsx_pci_suspend(struct pci_dev *pcidev, pm_message_t state)
+{
+	struct pcr_handle *handle;
+	struct rtsx_pcr *pcr;
+	int ret = 0;
+
+	dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
+
+	handle = pci_get_drvdata(pcidev);
+	pcr = handle->pcr;
+
+	cancel_delayed_work(&pcr->carddet_work);
+	cancel_delayed_work(&pcr->idle_work);
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	if (pcr->ops->turn_off_led)
+		pcr->ops->turn_off_led(pcr);
+
+	rtsx_pci_writel(pcr, RTSX_BIER, 0);
+	pcr->bier = 0;
+
+	rtsx_pci_write_register(pcr, PETXCFG, 0x08, 0x08);
+	rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x02);
+
+	pci_save_state(pcidev);
+	pci_enable_wake(pcidev, pci_choose_state(pcidev, state), 0);
+	pci_disable_device(pcidev);
+	pci_set_power_state(pcidev, pci_choose_state(pcidev, state));
+
+	mutex_unlock(&pcr->pcr_mutex);
+	return ret;
+}
+
+static int rtsx_pci_resume(struct pci_dev *pcidev)
+{
+	struct pcr_handle *handle;
+	struct rtsx_pcr *pcr;
+	int ret = 0;
+
+	dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
+
+	handle = pci_get_drvdata(pcidev);
+	pcr = handle->pcr;
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	pci_set_power_state(pcidev, PCI_D0);
+	pci_restore_state(pcidev);
+	ret = pci_enable_device(pcidev);
+	if (ret)
+		goto out;
+	pci_set_master(pcidev);
+
+	ret = rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00);
+	if (ret)
+		goto out;
+
+	ret = rtsx_pci_init_hw(pcr);
+	if (ret)
+		goto out;
+
+	schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
+
+out:
+	mutex_unlock(&pcr->pcr_mutex);
+	return ret;
+}
+
+#else /* CONFIG_PM */
+
+#define rtsx_pci_suspend NULL
+#define rtsx_pci_resume NULL
+
+#endif /* CONFIG_PM */
+
+static struct pci_driver rtsx_pci_driver = {
+	.name = DRV_NAME_RTSX_PCI,
+	.id_table = rtsx_pci_ids,
+	.probe = rtsx_pci_probe,
+	.remove = __devexit_p(rtsx_pci_remove),
+	.suspend = rtsx_pci_suspend,
+	.resume = rtsx_pci_resume,
+};
+module_pci_driver(rtsx_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Wei WANG <wei_wang@realsil.com.cn>");
+MODULE_DESCRIPTION("Realtek PCI-E Card Reader Driver");
diff --git a/drivers/mfd/rtsx_pcr.h b/drivers/mfd/rtsx_pcr.h
new file mode 100644
index 000000000000..12462c1df1a9
--- /dev/null
+++ b/drivers/mfd/rtsx_pcr.h
@@ -0,0 +1,32 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#ifndef __RTSX_PCR_H
+#define __RTSX_PCR_H
+
+#include <linux/mfd/rtsx_pci.h>
+
+void rts5209_init_params(struct rtsx_pcr *pcr);
+void rts5229_init_params(struct rtsx_pcr *pcr);
+void rtl8411_init_params(struct rtsx_pcr *pcr);
+
+#endif
diff --git a/include/linux/mfd/rtsx_common.h b/include/linux/mfd/rtsx_common.h
new file mode 100644
index 000000000000..a8d393e3066b
--- /dev/null
+++ b/include/linux/mfd/rtsx_common.h
@@ -0,0 +1,48 @@
+/* Driver for Realtek driver-based card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#ifndef __RTSX_COMMON_H
+#define __RTSX_COMMON_H
+
+#define DRV_NAME_RTSX_PCI		"rtsx_pci"
+#define DRV_NAME_RTSX_PCI_SDMMC		"rtsx_pci_sdmmc"
+#define DRV_NAME_RTSX_PCI_MS		"rtsx_pci_ms"
+
+#define RTSX_REG_PAIR(addr, val)	(((u32)(addr) << 16) | (u8)(val))
+
+#define RTSX_SSC_DEPTH_4M		0x01
+#define RTSX_SSC_DEPTH_2M		0x02
+#define RTSX_SSC_DEPTH_1M		0x03
+#define RTSX_SSC_DEPTH_500K		0x04
+#define RTSX_SSC_DEPTH_250K		0x05
+
+#define RTSX_SD_CARD			0
+#define RTSX_MS_CARD			1
+
+struct platform_device;
+
+struct rtsx_slot {
+	struct platform_device	*p_dev;
+	void			(*card_event)(struct platform_device *p_dev);
+};
+
+#endif
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
new file mode 100644
index 000000000000..060b721fcbfb
--- /dev/null
+++ b/include/linux/mfd/rtsx_pci.h
@@ -0,0 +1,794 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#ifndef __RTSX_PCI_H
+#define __RTSX_PCI_H
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+
+#include "rtsx_common.h"
+
+#define MAX_RW_REG_CNT			1024
+
+/* PCI Operation Register Address */
+#define RTSX_HCBAR			0x00
+#define RTSX_HCBCTLR			0x04
+#define RTSX_HDBAR			0x08
+#define RTSX_HDBCTLR			0x0C
+#define RTSX_HAIMR			0x10
+#define RTSX_BIPR			0x14
+#define RTSX_BIER			0x18
+
+/* Host command buffer control register */
+#define STOP_CMD			(0x01 << 28)
+
+/* Host data buffer control register */
+#define SDMA_MODE			0x00
+#define ADMA_MODE			(0x02 << 26)
+#define STOP_DMA			(0x01 << 28)
+#define TRIG_DMA			(0x01 << 31)
+
+/* Host access internal memory register */
+#define HAIMR_TRANS_START		(0x01 << 31)
+#define HAIMR_READ			0x00
+#define HAIMR_WRITE			(0x01 << 30)
+#define HAIMR_READ_START		(HAIMR_TRANS_START | HAIMR_READ)
+#define HAIMR_WRITE_START		(HAIMR_TRANS_START | HAIMR_WRITE)
+#define HAIMR_TRANS_END			(HAIMR_TRANS_START)
+
+/* Bus interrupt pending register */
+#define CMD_DONE_INT			(1 << 31)
+#define DATA_DONE_INT			(1 << 30)
+#define TRANS_OK_INT			(1 << 29)
+#define TRANS_FAIL_INT			(1 << 28)
+#define XD_INT				(1 << 27)
+#define MS_INT				(1 << 26)
+#define SD_INT				(1 << 25)
+#define GPIO0_INT			(1 << 24)
+#define OC_INT				(1 << 23)
+#define SD_WRITE_PROTECT		(1 << 19)
+#define XD_EXIST			(1 << 18)
+#define MS_EXIST			(1 << 17)
+#define SD_EXIST			(1 << 16)
+#define DELINK_INT			GPIO0_INT
+#define MS_OC_INT			(1 << 23)
+#define SD_OC_INT			(1 << 22)
+
+#define CARD_INT		(XD_INT | MS_INT | SD_INT)
+#define NEED_COMPLETE_INT	(DATA_DONE_INT | TRANS_OK_INT | TRANS_FAIL_INT)
+#define RTSX_INT		(CMD_DONE_INT | NEED_COMPLETE_INT | \
+					CARD_INT | GPIO0_INT | OC_INT)
+
+#define CARD_EXIST		(XD_EXIST | MS_EXIST | SD_EXIST)
+
+/* Bus interrupt enable register */
+#define CMD_DONE_INT_EN		(1 << 31)
+#define DATA_DONE_INT_EN	(1 << 30)
+#define TRANS_OK_INT_EN		(1 << 29)
+#define TRANS_FAIL_INT_EN	(1 << 28)
+#define XD_INT_EN		(1 << 27)
+#define MS_INT_EN		(1 << 26)
+#define SD_INT_EN		(1 << 25)
+#define GPIO0_INT_EN		(1 << 24)
+#define OC_INT_EN		(1 << 23)
+#define DELINK_INT_EN		GPIO0_INT_EN
+#define MS_OC_INT_EN		(1 << 23)
+#define SD_OC_INT_EN		(1 << 22)
+
+#define READ_REG_CMD		0
+#define WRITE_REG_CMD		1
+#define CHECK_REG_CMD		2
+
+/*
+ * macros for easy use
+ */
+#define rtsx_pci_writel(pcr, reg, value) \
+	iowrite32(value, (pcr)->remap_addr + reg)
+#define rtsx_pci_readl(pcr, reg) \
+	ioread32((pcr)->remap_addr + reg)
+#define rtsx_pci_writew(pcr, reg, value) \
+	iowrite16(value, (pcr)->remap_addr + reg)
+#define rtsx_pci_readw(pcr, reg) \
+	ioread16((pcr)->remap_addr + reg)
+#define rtsx_pci_writeb(pcr, reg, value) \
+	iowrite8(value, (pcr)->remap_addr + reg)
+#define rtsx_pci_readb(pcr, reg) \
+	ioread8((pcr)->remap_addr + reg)
+
+#define rtsx_pci_read_config_byte(pcr, where, val) \
+	pci_read_config_byte((pcr)->pci, where, val)
+
+#define rtsx_pci_write_config_byte(pcr, where, val) \
+	pci_write_config_byte((pcr)->pci, where, val)
+
+#define rtsx_pci_read_config_dword(pcr, where, val) \
+	pci_read_config_dword((pcr)->pci, where, val)
+
+#define rtsx_pci_write_config_dword(pcr, where, val) \
+	pci_write_config_dword((pcr)->pci, where, val)
+
+#define STATE_TRANS_NONE	0
+#define STATE_TRANS_CMD		1
+#define STATE_TRANS_BUF		2
+#define STATE_TRANS_SG		3
+
+#define TRANS_NOT_READY		0
+#define TRANS_RESULT_OK		1
+#define TRANS_RESULT_FAIL	2
+#define TRANS_NO_DEVICE		3
+
+#define RTSX_RESV_BUF_LEN	4096
+#define HOST_CMDS_BUF_LEN	1024
+#define HOST_SG_TBL_BUF_LEN	(RTSX_RESV_BUF_LEN - HOST_CMDS_BUF_LEN)
+#define HOST_SG_TBL_ITEMS	(HOST_SG_TBL_BUF_LEN / 8)
+#define MAX_SG_ITEM_LEN		0x80000
+
+#define HOST_TO_DEVICE		0
+#define DEVICE_TO_HOST		1
+
+#define MAX_PHASE		31
+#define RX_TUNING_CNT		3
+
+/* SG descriptor */
+#define SG_INT			0x04
+#define SG_END			0x02
+#define SG_VALID		0x01
+
+#define SG_NO_OP		0x00
+#define SG_TRANS_DATA		(0x02 << 4)
+#define SG_LINK_DESC		(0x03 << 4)
+
+/* SD bank voltage */
+#define SD_IO_3V3		0
+#define SD_IO_1V8		1
+
+
+/* Card Clock Enable Register */
+#define SD_CLK_EN			0x04
+#define MS_CLK_EN			0x08
+
+/* Card Select Register */
+#define SD_MOD_SEL			2
+#define MS_MOD_SEL			3
+
+/* Card Output Enable Register */
+#define SD_OUTPUT_EN			0x04
+#define MS_OUTPUT_EN			0x08
+
+/* CARD_SHARE_MODE */
+#define CARD_SHARE_MASK			0x0F
+#define CARD_SHARE_MULTI_LUN		0x00
+#define	CARD_SHARE_NORMAL		0x00
+#define	CARD_SHARE_48_SD		0x04
+#define	CARD_SHARE_48_MS		0x08
+/* CARD_SHARE_MODE for barossa */
+#define CARD_SHARE_BAROSSA_SD		0x01
+#define CARD_SHARE_BAROSSA_MS		0x02
+
+/* SD30_DRIVE_SEL */
+#define DRIVER_TYPE_A			0x05
+#define DRIVER_TYPE_B			0x03
+#define DRIVER_TYPE_C			0x02
+#define DRIVER_TYPE_D			0x01
+
+/* FPDCTL */
+#define SSC_POWER_DOWN			0x01
+#define SD_OC_POWER_DOWN		0x02
+#define ALL_POWER_DOWN			0x07
+#define OC_POWER_DOWN			0x06
+
+/* CLK_CTL */
+#define CHANGE_CLK			0x01
+
+/* LDO_CTL */
+#define BPP_LDO_POWB			0x03
+#define BPP_LDO_ON			0x00
+#define BPP_LDO_SUSPEND			0x02
+#define BPP_LDO_OFF			0x03
+
+/* CD_PAD_CTL */
+#define CD_DISABLE_MASK			0x07
+#define MS_CD_DISABLE			0x04
+#define SD_CD_DISABLE			0x02
+#define XD_CD_DISABLE			0x01
+#define CD_DISABLE			0x07
+#define CD_ENABLE			0x00
+#define MS_CD_EN_ONLY			0x03
+#define SD_CD_EN_ONLY			0x05
+#define XD_CD_EN_ONLY			0x06
+#define FORCE_CD_LOW_MASK		0x38
+#define FORCE_CD_XD_LOW			0x08
+#define FORCE_CD_SD_LOW			0x10
+#define FORCE_CD_MS_LOW			0x20
+#define CD_AUTO_DISABLE			0x40
+
+/* SD_STAT1 */
+#define	SD_CRC7_ERR			0x80
+#define	SD_CRC16_ERR			0x40
+#define	SD_CRC_WRITE_ERR		0x20
+#define	SD_CRC_WRITE_ERR_MASK		0x1C
+#define	GET_CRC_TIME_OUT		0x02
+#define	SD_TUNING_COMPARE_ERR		0x01
+
+/* SD_STAT2 */
+#define	SD_RSP_80CLK_TIMEOUT		0x01
+
+/* SD_BUS_STAT */
+#define	SD_CLK_TOGGLE_EN		0x80
+#define	SD_CLK_FORCE_STOP	        0x40
+#define	SD_DAT3_STATUS		        0x10
+#define	SD_DAT2_STATUS		        0x08
+#define	SD_DAT1_STATUS		        0x04
+#define	SD_DAT0_STATUS		        0x02
+#define	SD_CMD_STATUS			0x01
+
+/* SD_PAD_CTL */
+#define	SD_IO_USING_1V8		        0x80
+#define	SD_IO_USING_3V3		        0x7F
+#define	TYPE_A_DRIVING		        0x00
+#define	TYPE_B_DRIVING			0x01
+#define	TYPE_C_DRIVING			0x02
+#define	TYPE_D_DRIVING		        0x03
+
+/* SD_SAMPLE_POINT_CTL */
+#define	DDR_FIX_RX_DAT			0x00
+#define	DDR_VAR_RX_DAT			0x80
+#define	DDR_FIX_RX_DAT_EDGE		0x00
+#define	DDR_FIX_RX_DAT_14_DELAY		0x40
+#define	DDR_FIX_RX_CMD			0x00
+#define	DDR_VAR_RX_CMD			0x20
+#define	DDR_FIX_RX_CMD_POS_EDGE		0x00
+#define	DDR_FIX_RX_CMD_14_DELAY		0x10
+#define	SD20_RX_POS_EDGE		0x00
+#define	SD20_RX_14_DELAY		0x08
+#define SD20_RX_SEL_MASK		0x08
+
+/* SD_PUSH_POINT_CTL */
+#define	DDR_FIX_TX_CMD_DAT		0x00
+#define	DDR_VAR_TX_CMD_DAT		0x80
+#define	DDR_FIX_TX_DAT_14_TSU		0x00
+#define	DDR_FIX_TX_DAT_12_TSU		0x40
+#define	DDR_FIX_TX_CMD_NEG_EDGE		0x00
+#define	DDR_FIX_TX_CMD_14_AHEAD		0x20
+#define	SD20_TX_NEG_EDGE		0x00
+#define	SD20_TX_14_AHEAD		0x10
+#define SD20_TX_SEL_MASK		0x10
+#define	DDR_VAR_SDCLK_POL_SWAP		0x01
+
+/* SD_TRANSFER */
+#define	SD_TRANSFER_START		0x80
+#define	SD_TRANSFER_END			0x40
+#define SD_STAT_IDLE			0x20
+#define	SD_TRANSFER_ERR			0x10
+/* SD Transfer Mode definition */
+#define	SD_TM_NORMAL_WRITE		0x00
+#define	SD_TM_AUTO_WRITE_3		0x01
+#define	SD_TM_AUTO_WRITE_4		0x02
+#define	SD_TM_AUTO_READ_3		0x05
+#define	SD_TM_AUTO_READ_4		0x06
+#define	SD_TM_CMD_RSP			0x08
+#define	SD_TM_AUTO_WRITE_1		0x09
+#define	SD_TM_AUTO_WRITE_2		0x0A
+#define	SD_TM_NORMAL_READ		0x0C
+#define	SD_TM_AUTO_READ_1		0x0D
+#define	SD_TM_AUTO_READ_2		0x0E
+#define	SD_TM_AUTO_TUNING		0x0F
+
+/* SD_VPTX_CTL / SD_VPRX_CTL */
+#define PHASE_CHANGE			0x80
+#define PHASE_NOT_RESET			0x40
+
+/* SD_DCMPS_TX_CTL / SD_DCMPS_RX_CTL */
+#define DCMPS_CHANGE			0x80
+#define DCMPS_CHANGE_DONE		0x40
+#define DCMPS_ERROR			0x20
+#define DCMPS_CURRENT_PHASE		0x1F
+
+/* SD Configure 1 Register */
+#define SD_CLK_DIVIDE_0			0x00
+#define	SD_CLK_DIVIDE_256		0xC0
+#define	SD_CLK_DIVIDE_128		0x80
+#define	SD_BUS_WIDTH_1BIT		0x00
+#define	SD_BUS_WIDTH_4BIT		0x01
+#define	SD_BUS_WIDTH_8BIT		0x02
+#define	SD_ASYNC_FIFO_NOT_RST		0x10
+#define	SD_20_MODE			0x00
+#define	SD_DDR_MODE			0x04
+#define	SD_30_MODE			0x08
+
+#define SD_CLK_DIVIDE_MASK		0xC0
+
+/* SD_CMD_STATE */
+#define SD_CMD_IDLE			0x80
+
+/* SD_DATA_STATE */
+#define SD_DATA_IDLE			0x80
+
+/* DCM_DRP_CTL */
+#define DCM_RESET			0x08
+#define DCM_LOCKED			0x04
+#define DCM_208M			0x00
+#define DCM_TX			        0x01
+#define DCM_RX			        0x02
+
+/* DCM_DRP_TRIG */
+#define DRP_START			0x80
+#define DRP_DONE			0x40
+
+/* DCM_DRP_CFG */
+#define DRP_WRITE			0x80
+#define DRP_READ			0x00
+#define DCM_WRITE_ADDRESS_50		0x50
+#define DCM_WRITE_ADDRESS_51		0x51
+#define DCM_READ_ADDRESS_00		0x00
+#define DCM_READ_ADDRESS_51		0x51
+
+/* IRQSTAT0 */
+#define DMA_DONE_INT			0x80
+#define SUSPEND_INT			0x40
+#define LINK_RDY_INT			0x20
+#define LINK_DOWN_INT			0x10
+
+/* DMACTL */
+#define DMA_RST				0x80
+#define DMA_BUSY			0x04
+#define DMA_DIR_TO_CARD			0x00
+#define DMA_DIR_FROM_CARD		0x02
+#define DMA_EN				0x01
+#define DMA_128				(0 << 4)
+#define DMA_256				(1 << 4)
+#define DMA_512				(2 << 4)
+#define DMA_1024			(3 << 4)
+#define DMA_PACK_SIZE_MASK		0x30
+
+/* SSC_CTL1 */
+#define SSC_RSTB			0x80
+#define SSC_8X_EN			0x40
+#define SSC_FIX_FRAC			0x20
+#define SSC_SEL_1M			0x00
+#define SSC_SEL_2M			0x08
+#define SSC_SEL_4M			0x10
+#define SSC_SEL_8M			0x18
+
+/* SSC_CTL2 */
+#define SSC_DEPTH_MASK			0x07
+#define SSC_DEPTH_DISALBE		0x00
+#define SSC_DEPTH_4M			0x01
+#define SSC_DEPTH_2M			0x02
+#define SSC_DEPTH_1M			0x03
+#define SSC_DEPTH_500K			0x04
+#define SSC_DEPTH_250K			0x05
+
+/* System Clock Control Register */
+#define CLK_LOW_FREQ			0x01
+
+/* System Clock Divider Register */
+#define CLK_DIV_1			0x01
+#define CLK_DIV_2			0x02
+#define CLK_DIV_4			0x03
+#define CLK_DIV_8			0x04
+
+/* MS_CFG */
+#define	SAMPLE_TIME_RISING		0x00
+#define	SAMPLE_TIME_FALLING		0x80
+#define	PUSH_TIME_DEFAULT		0x00
+#define	PUSH_TIME_ODD			0x40
+#define	NO_EXTEND_TOGGLE		0x00
+#define	EXTEND_TOGGLE_CHK		0x20
+#define	MS_BUS_WIDTH_1			0x00
+#define	MS_BUS_WIDTH_4			0x10
+#define	MS_BUS_WIDTH_8			0x18
+#define	MS_2K_SECTOR_MODE		0x04
+#define	MS_512_SECTOR_MODE		0x00
+#define	MS_TOGGLE_TIMEOUT_EN		0x00
+#define	MS_TOGGLE_TIMEOUT_DISEN		0x01
+#define MS_NO_CHECK_INT			0x02
+
+/* MS_TRANS_CFG */
+#define	WAIT_INT			0x80
+#define	NO_WAIT_INT			0x00
+#define	NO_AUTO_READ_INT_REG		0x00
+#define	AUTO_READ_INT_REG		0x40
+#define	MS_CRC16_ERR			0x20
+#define	MS_RDY_TIMEOUT			0x10
+#define	MS_INT_CMDNK			0x08
+#define	MS_INT_BREQ			0x04
+#define	MS_INT_ERR			0x02
+#define	MS_INT_CED			0x01
+
+/* MS_TRANSFER */
+#define	MS_TRANSFER_START		0x80
+#define	MS_TRANSFER_END			0x40
+#define	MS_TRANSFER_ERR			0x20
+#define	MS_BS_STATE			0x10
+#define	MS_TM_READ_BYTES		0x00
+#define	MS_TM_NORMAL_READ		0x01
+#define	MS_TM_WRITE_BYTES		0x04
+#define	MS_TM_NORMAL_WRITE		0x05
+#define	MS_TM_AUTO_READ			0x08
+#define	MS_TM_AUTO_WRITE		0x0C
+
+/* SD Configure 2 Register */
+#define	SD_CALCULATE_CRC7		0x00
+#define	SD_NO_CALCULATE_CRC7		0x80
+#define	SD_CHECK_CRC16			0x00
+#define	SD_NO_CHECK_CRC16		0x40
+#define SD_NO_CHECK_WAIT_CRC_TO		0x20
+#define	SD_WAIT_BUSY_END		0x08
+#define	SD_NO_WAIT_BUSY_END		0x00
+#define	SD_CHECK_CRC7			0x00
+#define	SD_NO_CHECK_CRC7		0x04
+#define	SD_RSP_LEN_0			0x00
+#define	SD_RSP_LEN_6			0x01
+#define	SD_RSP_LEN_17			0x02
+/* SD/MMC Response Type Definition */
+#define	SD_RSP_TYPE_R0			0x04
+#define	SD_RSP_TYPE_R1			0x01
+#define	SD_RSP_TYPE_R1b			0x09
+#define	SD_RSP_TYPE_R2			0x02
+#define	SD_RSP_TYPE_R3			0x05
+#define	SD_RSP_TYPE_R4			0x05
+#define	SD_RSP_TYPE_R5			0x01
+#define	SD_RSP_TYPE_R6			0x01
+#define	SD_RSP_TYPE_R7			0x01
+
+/* SD_CONFIURE3 */
+#define	SD_RSP_80CLK_TIMEOUT_EN		0x01
+
+/* Card Transfer Reset Register */
+#define SPI_STOP			0x01
+#define XD_STOP				0x02
+#define SD_STOP				0x04
+#define MS_STOP				0x08
+#define SPI_CLR_ERR			0x10
+#define XD_CLR_ERR			0x20
+#define SD_CLR_ERR			0x40
+#define MS_CLR_ERR			0x80
+
+/* Card Data Source Register */
+#define PINGPONG_BUFFER			0x01
+#define RING_BUFFER			0x00
+
+/* Card Power Control Register */
+#define PMOS_STRG_MASK			0x10
+#define PMOS_STRG_800mA			0x10
+#define PMOS_STRG_400mA			0x00
+#define SD_POWER_OFF			0x03
+#define SD_PARTIAL_POWER_ON		0x01
+#define SD_POWER_ON			0x00
+#define SD_POWER_MASK			0x03
+#define MS_POWER_OFF			0x0C
+#define MS_PARTIAL_POWER_ON		0x04
+#define MS_POWER_ON			0x00
+#define MS_POWER_MASK			0x0C
+#define BPP_POWER_OFF			0x0F
+#define BPP_POWER_5_PERCENT_ON		0x0E
+#define BPP_POWER_10_PERCENT_ON		0x0C
+#define BPP_POWER_15_PERCENT_ON		0x08
+#define BPP_POWER_ON			0x00
+#define BPP_POWER_MASK			0x0F
+
+/* PWR_GATE_CTRL */
+#define PWR_GATE_EN			0x01
+#define LDO3318_PWR_MASK		0x06
+#define LDO_ON				0x00
+#define LDO_SUSPEND			0x04
+#define LDO_OFF				0x06
+
+/* CARD_CLK_SOURCE */
+#define CRC_FIX_CLK			(0x00 << 0)
+#define CRC_VAR_CLK0			(0x01 << 0)
+#define CRC_VAR_CLK1			(0x02 << 0)
+#define SD30_FIX_CLK			(0x00 << 2)
+#define SD30_VAR_CLK0			(0x01 << 2)
+#define SD30_VAR_CLK1			(0x02 << 2)
+#define SAMPLE_FIX_CLK			(0x00 << 4)
+#define SAMPLE_VAR_CLK0			(0x01 << 4)
+#define SAMPLE_VAR_CLK1			(0x02 << 4)
+
+#define MS_CFG				0xFD40
+#define MS_TPC				0xFD41
+#define MS_TRANS_CFG			0xFD42
+#define MS_TRANSFER			0xFD43
+#define MS_INT_REG			0xFD44
+#define MS_BYTE_CNT			0xFD45
+#define MS_SECTOR_CNT_L			0xFD46
+#define MS_SECTOR_CNT_H			0xFD47
+#define MS_DBUS_H			0xFD48
+
+#define SD_CFG1				0xFDA0
+#define SD_CFG2				0xFDA1
+#define SD_CFG3				0xFDA2
+#define SD_STAT1			0xFDA3
+#define SD_STAT2			0xFDA4
+#define SD_BUS_STAT			0xFDA5
+#define SD_PAD_CTL			0xFDA6
+#define SD_SAMPLE_POINT_CTL		0xFDA7
+#define SD_PUSH_POINT_CTL		0xFDA8
+#define SD_CMD0				0xFDA9
+#define SD_CMD1				0xFDAA
+#define SD_CMD2				0xFDAB
+#define SD_CMD3				0xFDAC
+#define SD_CMD4				0xFDAD
+#define SD_CMD5				0xFDAE
+#define SD_BYTE_CNT_L			0xFDAF
+#define SD_BYTE_CNT_H			0xFDB0
+#define SD_BLOCK_CNT_L			0xFDB1
+#define SD_BLOCK_CNT_H			0xFDB2
+#define SD_TRANSFER			0xFDB3
+#define SD_CMD_STATE			0xFDB5
+#define SD_DATA_STATE			0xFDB6
+
+#define SRCTL				0xFC13
+
+#define	DCM_DRP_CTL			0xFC23
+#define	DCM_DRP_TRIG			0xFC24
+#define	DCM_DRP_CFG			0xFC25
+#define	DCM_DRP_WR_DATA_L		0xFC26
+#define	DCM_DRP_WR_DATA_H		0xFC27
+#define	DCM_DRP_RD_DATA_L		0xFC28
+#define	DCM_DRP_RD_DATA_H		0xFC29
+#define SD_VPCLK0_CTL			0xFC2A
+#define SD_VPCLK1_CTL			0xFC2B
+#define SD_DCMPS0_CTL			0xFC2C
+#define SD_DCMPS1_CTL			0xFC2D
+#define SD_VPTX_CTL			SD_VPCLK0_CTL
+#define SD_VPRX_CTL			SD_VPCLK1_CTL
+#define SD_DCMPS_TX_CTL			SD_DCMPS0_CTL
+#define SD_DCMPS_RX_CTL			SD_DCMPS1_CTL
+#define CARD_CLK_SOURCE			0xFC2E
+
+#define CARD_PWR_CTL			0xFD50
+#define CARD_CLK_SWITCH			0xFD51
+#define CARD_SHARE_MODE			0xFD52
+#define CARD_DRIVE_SEL			0xFD53
+#define CARD_STOP			0xFD54
+#define CARD_OE				0xFD55
+#define CARD_AUTO_BLINK			0xFD56
+#define CARD_GPIO_DIR			0xFD57
+#define CARD_GPIO			0xFD58
+#define CARD_DATA_SOURCE		0xFD5B
+#define CARD_SELECT			0xFD5C
+#define SD30_DRIVE_SEL			0xFD5E
+#define CARD_CLK_EN			0xFD69
+#define SDIO_CTRL			0xFD6B
+#define CD_PAD_CTL			0xFD73
+
+#define FPDCTL				0xFC00
+#define PDINFO				0xFC01
+
+#define CLK_CTL				0xFC02
+#define CLK_DIV				0xFC03
+#define CLK_SEL				0xFC04
+
+#define SSC_DIV_N_0			0xFC0F
+#define SSC_DIV_N_1			0xFC10
+#define SSC_CTL1			0xFC11
+#define SSC_CTL2			0xFC12
+
+#define RCCTL				0xFC14
+
+#define FPGA_PULL_CTL			0xFC1D
+#define OLT_LED_CTL			0xFC1E
+#define GPIO_CTL			0xFC1F
+
+#define LDO_CTL				0xFC1E
+#define SYS_VER				0xFC32
+
+#define CARD_PULL_CTL1			0xFD60
+#define CARD_PULL_CTL2			0xFD61
+#define CARD_PULL_CTL3			0xFD62
+#define CARD_PULL_CTL4			0xFD63
+#define CARD_PULL_CTL5			0xFD64
+#define CARD_PULL_CTL6			0xFD65
+
+/* PCI Express Related Registers */
+#define IRQEN0				0xFE20
+#define IRQSTAT0			0xFE21
+#define IRQEN1				0xFE22
+#define IRQSTAT1			0xFE23
+#define TLPRIEN				0xFE24
+#define TLPRISTAT			0xFE25
+#define TLPTIEN				0xFE26
+#define TLPTISTAT			0xFE27
+#define DMATC0				0xFE28
+#define DMATC1				0xFE29
+#define DMATC2				0xFE2A
+#define DMATC3				0xFE2B
+#define DMACTL				0xFE2C
+#define BCTL				0xFE2D
+#define RBBC0				0xFE2E
+#define RBBC1				0xFE2F
+#define RBDAT				0xFE30
+#define RBCTL				0xFE34
+#define CFGADDR0			0xFE35
+#define CFGADDR1			0xFE36
+#define CFGDATA0			0xFE37
+#define CFGDATA1			0xFE38
+#define CFGDATA2			0xFE39
+#define CFGDATA3			0xFE3A
+#define CFGRWCTL			0xFE3B
+#define PHYRWCTL			0xFE3C
+#define PHYDATA0			0xFE3D
+#define PHYDATA1			0xFE3E
+#define PHYADDR				0xFE3F
+#define MSGRXDATA0			0xFE40
+#define MSGRXDATA1			0xFE41
+#define MSGRXDATA2			0xFE42
+#define MSGRXDATA3			0xFE43
+#define MSGTXDATA0			0xFE44
+#define MSGTXDATA1			0xFE45
+#define MSGTXDATA2			0xFE46
+#define MSGTXDATA3			0xFE47
+#define MSGTXCTL			0xFE48
+#define PETXCFG				0xFE49
+
+#define CDRESUMECTL			0xFE52
+#define WAKE_SEL_CTL			0xFE54
+#define PME_FORCE_CTL			0xFE56
+#define ASPM_FORCE_CTL			0xFE57
+#define PM_CLK_FORCE_CTL		0xFE58
+#define PERST_GLITCH_WIDTH		0xFE5C
+#define CHANGE_LINK_STATE		0xFE5B
+#define RESET_LOAD_REG			0xFE5E
+#define EFUSE_CONTENT			0xFE5F
+#define HOST_SLEEP_STATE		0xFE60
+#define SDIO_CFG			0xFE70
+
+#define NFTS_TX_CTRL			0xFE72
+
+#define PWR_GATE_CTRL			0xFE75
+#define PWD_SUSPEND_EN			0xFE76
+#define LDO_PWR_SEL			0xFE78
+
+#define DUMMY_REG_RESET_0		0xFE90
+
+/* Memory mapping */
+#define SRAM_BASE			0xE600
+#define RBUF_BASE			0xF400
+#define PPBUF_BASE1			0xF800
+#define PPBUF_BASE2			0xFA00
+#define IMAGE_FLAG_ADDR0		0xCE80
+#define IMAGE_FLAG_ADDR1		0xCE81
+
+#define rtsx_pci_init_cmd(pcr)		((pcr)->ci = 0)
+
+struct rtsx_pcr;
+
+struct pcr_handle {
+	struct rtsx_pcr			*pcr;
+};
+
+struct pcr_ops {
+	int		(*extra_init_hw)(struct rtsx_pcr *pcr);
+	int		(*optimize_phy)(struct rtsx_pcr *pcr);
+	int		(*turn_on_led)(struct rtsx_pcr *pcr);
+	int		(*turn_off_led)(struct rtsx_pcr *pcr);
+	int		(*enable_auto_blink)(struct rtsx_pcr *pcr);
+	int		(*disable_auto_blink)(struct rtsx_pcr *pcr);
+	int		(*card_power_on)(struct rtsx_pcr *pcr, int card);
+	int		(*card_power_off)(struct rtsx_pcr *pcr, int card);
+	unsigned int	(*cd_deglitch)(struct rtsx_pcr *pcr);
+};
+
+enum PDEV_STAT  {PDEV_STAT_IDLE, PDEV_STAT_RUN};
+
+struct rtsx_pcr {
+	struct pci_dev			*pci;
+	unsigned int			id;
+
+	/* pci resources */
+	unsigned long			addr;
+	void __iomem			*remap_addr;
+	int				irq;
+
+	/* host reserved buffer */
+	void				*rtsx_resv_buf;
+	dma_addr_t			rtsx_resv_buf_addr;
+
+	void				*host_cmds_ptr;
+	dma_addr_t			host_cmds_addr;
+	int				ci;
+
+	void				*host_sg_tbl_ptr;
+	dma_addr_t			host_sg_tbl_addr;
+	int				sgi;
+
+	u32				bier;
+	char				trans_result;
+
+	unsigned int			card_inserted;
+	unsigned int			card_removed;
+
+	struct delayed_work		carddet_work;
+	struct delayed_work		idle_work;
+
+	spinlock_t			lock;
+	struct mutex			pcr_mutex;
+	struct completion		*done;
+	struct completion		*finish_me;
+
+	unsigned int			cur_clock;
+	bool				ms_pmos;
+	bool				remove_pci;
+	bool				msi_en;
+
+#define EXTRA_CAPS_SD_SDR50		(1 << 0)
+#define EXTRA_CAPS_SD_SDR104		(1 << 1)
+#define EXTRA_CAPS_SD_DDR50		(1 << 2)
+#define EXTRA_CAPS_MMC_HSDDR		(1 << 3)
+#define EXTRA_CAPS_MMC_HS200		(1 << 4)
+#define EXTRA_CAPS_MMC_8BIT		(1 << 5)
+	u32				extra_caps;
+
+#define IC_VER_A			0
+#define IC_VER_B			1
+#define IC_VER_C			2
+#define IC_VER_D			3
+	u8				ic_version;
+
+	const u32			*sd_pull_ctl_enable_tbl;
+	const u32			*sd_pull_ctl_disable_tbl;
+	const u32			*ms_pull_ctl_enable_tbl;
+	const u32			*ms_pull_ctl_disable_tbl;
+
+	const struct pcr_ops		*ops;
+	enum PDEV_STAT			state;
+
+	int				num_slots;
+	struct rtsx_slot		*slots;
+};
+
+#define CHK_PCI_PID(pcr, pid)		((pcr)->pci->device == (pid))
+#define PCI_VID(pcr)			((pcr)->pci->vendor)
+#define PCI_PID(pcr)			((pcr)->pci->device)
+
+void rtsx_pci_start_run(struct rtsx_pcr *pcr);
+int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data);
+int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data);
+int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val);
+int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val);
+void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr);
+void rtsx_pci_add_cmd(struct rtsx_pcr *pcr,
+		u8 cmd_type, u16 reg_addr, u8 mask, u8 data);
+void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr);
+int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout);
+int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read, int timeout);
+int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
+int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
+int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card);
+int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card);
+int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk);
+int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card);
+int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card);
+unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr);
+void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr);
+
+static inline u8 *rtsx_pci_get_cmd_data(struct rtsx_pcr *pcr)
+{
+	return (u8 *)(pcr->host_cmds_ptr);
+}
+
+#endif
-- 
cgit v1.2.3-55-g7522


From ac96511bb5cf92bad97ffc2249f75e45eb70301d Mon Sep 17 00:00:00 2001
From: Peter Chen
Date: Fri, 9 Nov 2012 09:44:44 +0800
Subject: usb: phy: change phy notify connect/disconnect API

The old parameter "port" is useless for phy notify, as one usb
phy is only for one usb port. New parameter "speed" stands for
the device's speed which is on the port, this "speed" parameter
is needed at some platforms which will do some phy operations
according to device's speed.

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Tested-by: Mike Thompson <mpthompson@gmail.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c    |  4 ++--
 drivers/usb/otg/mxs-phy.c | 22 ++++++++++++++--------
 include/linux/usb/phy.h   | 15 +++++++++------
 3 files changed, 25 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 8391538d688b..a815fd2cc5e7 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4039,7 +4039,7 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 		goto fail;
 
 	if (hcd->phy && !hdev->parent)
-		usb_phy_notify_connect(hcd->phy, port1);
+		usb_phy_notify_connect(hcd->phy, udev->speed);
 
 	/*
 	 * Some superspeed devices have finished the link training process
@@ -4238,7 +4238,7 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 	if (udev) {
 		if (hcd->phy && !hdev->parent &&
 				!(portstatus & USB_PORT_STAT_CONNECTION))
-			usb_phy_notify_disconnect(hcd->phy, port1);
+			usb_phy_notify_disconnect(hcd->phy, udev->speed);
 		usb_disconnect(&hub->ports[port1 - 1]->child);
 	}
 	clear_bit(port1, hub->change_bits);
diff --git a/drivers/usb/otg/mxs-phy.c b/drivers/usb/otg/mxs-phy.c
index 5b09f3339ded..9a3caeecc508 100644
--- a/drivers/usb/otg/mxs-phy.c
+++ b/drivers/usb/otg/mxs-phy.c
@@ -76,22 +76,28 @@ static void mxs_phy_shutdown(struct usb_phy *phy)
 	clk_disable_unprepare(mxs_phy->clk);
 }
 
-static int mxs_phy_on_connect(struct usb_phy *phy, int port)
+static int mxs_phy_on_connect(struct usb_phy *phy,
+		enum usb_device_speed speed)
 {
-	dev_dbg(phy->dev, "Connect on port %d\n", port);
+	dev_dbg(phy->dev, "%s speed device has connected\n",
+		(speed == USB_SPEED_HIGH) ? "high" : "non-high");
 
-	writel_relaxed(BM_USBPHY_CTRL_ENHOSTDISCONDETECT,
-			phy->io_priv + HW_USBPHY_CTRL_SET);
+	if (speed == USB_SPEED_HIGH)
+		writel_relaxed(BM_USBPHY_CTRL_ENHOSTDISCONDETECT,
+				phy->io_priv + HW_USBPHY_CTRL_SET);
 
 	return 0;
 }
 
-static int mxs_phy_on_disconnect(struct usb_phy *phy, int port)
+static int mxs_phy_on_disconnect(struct usb_phy *phy,
+		enum usb_device_speed speed)
 {
-	dev_dbg(phy->dev, "Disconnect on port %d\n", port);
+	dev_dbg(phy->dev, "%s speed device has disconnected\n",
+		(speed == USB_SPEED_HIGH) ? "high" : "non-high");
 
-	writel_relaxed(BM_USBPHY_CTRL_ENHOSTDISCONDETECT,
-			phy->io_priv + HW_USBPHY_CTRL_CLR);
+	if (speed == USB_SPEED_HIGH)
+		writel_relaxed(BM_USBPHY_CTRL_ENHOSTDISCONDETECT,
+				phy->io_priv + HW_USBPHY_CTRL_CLR);
 
 	return 0;
 }
diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h
index 06b5bae35b29..a29ae1eb9346 100644
--- a/include/linux/usb/phy.h
+++ b/include/linux/usb/phy.h
@@ -10,6 +10,7 @@
 #define __LINUX_USB_PHY_H
 
 #include <linux/notifier.h>
+#include <linux/usb.h>
 
 enum usb_phy_events {
 	USB_EVENT_NONE,         /* no events or cable disconnected */
@@ -99,8 +100,10 @@ struct usb_phy {
 				int suspend);
 
 	/* notify phy connect status change */
-	int	(*notify_connect)(struct usb_phy *x, int port);
-	int	(*notify_disconnect)(struct usb_phy *x, int port);
+	int	(*notify_connect)(struct usb_phy *x,
+			enum usb_device_speed speed);
+	int	(*notify_disconnect)(struct usb_phy *x,
+			enum usb_device_speed speed);
 };
 
 
@@ -189,19 +192,19 @@ usb_phy_set_suspend(struct usb_phy *x, int suspend)
 }
 
 static inline int
-usb_phy_notify_connect(struct usb_phy *x, int port)
+usb_phy_notify_connect(struct usb_phy *x, enum usb_device_speed speed)
 {
 	if (x->notify_connect)
-		return x->notify_connect(x, port);
+		return x->notify_connect(x, speed);
 	else
 		return 0;
 }
 
 static inline int
-usb_phy_notify_disconnect(struct usb_phy *x, int port)
+usb_phy_notify_disconnect(struct usb_phy *x, enum usb_device_speed speed)
 {
 	if (x->notify_disconnect)
-		return x->notify_disconnect(x, port);
+		return x->notify_disconnect(x, speed);
 	else
 		return 0;
 }
-- 
cgit v1.2.3-55-g7522


From ebd5ac165f2aaefb767c53112c2010b0ff3df688 Mon Sep 17 00:00:00 2001
From: Shinya Kuribayashi
Date: Wed, 24 Oct 2012 19:58:10 +0900
Subject: i2c: i2c-sh_mobile: support I2C hardware block with a faster
 operating clock

On newer SH-/R-Mobile SoCs, a clock supply to the I2C hardware block,
which is used to generate the SCL clock output, is getting faster than
before, while on the other hand, the SCL clock control registers, ICCH
and ICCL, stay unchanged in 9-bit-wide (8+1).

On such silicons, the internal SCL clock counter gets incremented every
2 clocks of the operating clock.

This patch makes it configurable through platform data.

Signed-off-by: Shinya Kuribayashi <shinya.kuribayashi.px@renesas.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 drivers/i2c/busses/i2c-sh_mobile.c | 5 +++++
 include/linux/i2c/i2c-sh_mobile.h  | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index 4dc0cc3611c2..4c283583bea0 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -120,6 +120,7 @@ struct sh_mobile_i2c_data {
 	void __iomem *reg;
 	struct i2c_adapter adap;
 	unsigned long bus_speed;
+	unsigned int clks_per_count;
 	struct clk *clk;
 	u_int8_t icic;
 	u_int8_t flags;
@@ -231,6 +232,7 @@ static void sh_mobile_i2c_init(struct sh_mobile_i2c_data *pd)
 	/* Get clock rate after clock is enabled */
 	clk_enable(pd->clk);
 	i2c_clk_khz = clk_get_rate(pd->clk) / 1000;
+	i2c_clk_khz /= pd->clks_per_count;
 
 	if (pd->bus_speed == STANDARD_MODE) {
 		tLOW	= 47;	/* tLOW = 4.7 us */
@@ -658,6 +660,9 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
 	pd->bus_speed = STANDARD_MODE;
 	if (pdata && pdata->bus_speed)
 		pd->bus_speed = pdata->bus_speed;
+	pd->clks_per_count = 1;
+	if (pdata && pdata->clks_per_count)
+		pd->clks_per_count = pdata->clks_per_count;
 
 	/* The IIC blocks on SH-Mobile ARM processors
 	 * come with two new bits in ICIC.
diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h
index beda7081aead..06e3089795fb 100644
--- a/include/linux/i2c/i2c-sh_mobile.h
+++ b/include/linux/i2c/i2c-sh_mobile.h
@@ -5,6 +5,7 @@
 
 struct i2c_sh_mobile_platform_data {
 	unsigned long bus_speed;
+	unsigned int clks_per_count;
 };
 
 #endif /* __I2C_SH_MOBILE_H__ */
-- 
cgit v1.2.3-55-g7522


From d611d41b46c96195b9a168a21992782458826e07 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann
Date: Tue, 6 Nov 2012 22:55:27 +0100
Subject: mtd: diskonchip: use inline functions for DocRead/DocWrite

The diskonchip drivers traditionally use home-grown macros for
doing MMIO accesses, which cause a lot of warnings, at least
on ARM machines:

drivers/mtd/devices/doc2000.c: In function 'doc_write':
drivers/mtd/devices/doc2000.c:854:5: warning: value computed is not used [-Wunused-value]
drivers/mtd/devices/doc2000.c: In function 'doc_erase':
drivers/mtd/devices/doc2000.c:1123:5: warning: value computed is not used [-Wunused-value
drivers/mtd/nand/diskonchip.c: In function 'doc2000_read_byte':
drivers/mtd/nand/diskonchip.c:318:3: warning: value computed is not used [-Wunused-value]

A nicer solution is to use the architecture-defined I/O accessors.
Here, we use the __raw_readl/__raw_writel style, instead of the
proper readl/writel ones, in order to preserve the odd semantics
of the existing macros that have their own barrier implementation
and no byte swap. It would be nice to fix this properly and use
the correct accessors as well as make the word size independent
from the architecture, but I guess the hardware is obsolete
enough that we should better not mess the driver an more than
necessary.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/doc2000.h | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/doc2000.h b/include/linux/mtd/doc2000.h
index 0f6fea73a1f6..407d1e556c39 100644
--- a/include/linux/mtd/doc2000.h
+++ b/include/linux/mtd/doc2000.h
@@ -92,12 +92,26 @@
  * Others use readb/writeb
  */
 #if defined(__arm__)
-#define ReadDOC_(adr, reg)      ((unsigned char)(*(volatile __u32 *)(((unsigned long)adr)+((reg)<<2))))
-#define WriteDOC_(d, adr, reg)  do{ *(volatile __u32 *)(((unsigned long)adr)+((reg)<<2)) = (__u32)d; wmb();} while(0)
+static inline u8 ReadDOC_(u32 __iomem *addr, unsigned long reg)
+{
+	return __raw_readl(addr + reg);
+}
+static inline void WriteDOC_(u8 data, u32 __iomem *addr, unsigned long reg)
+{
+	__raw_writel(data, addr + reg);
+	wmb();
+}
 #define DOC_IOREMAP_LEN 0x8000
 #elif defined(__ppc__)
-#define ReadDOC_(adr, reg)      ((unsigned char)(*(volatile __u16 *)(((unsigned long)adr)+((reg)<<1))))
-#define WriteDOC_(d, adr, reg)  do{ *(volatile __u16 *)(((unsigned long)adr)+((reg)<<1)) = (__u16)d; wmb();} while(0)
+static inline u8 ReadDOC_(u16 __iomem *addr, unsigned long reg)
+{
+	return __raw_readw(addr + reg);
+}
+static inline void WriteDOC_(u8 data, u16 __iomem *addr, unsigned long reg)
+{
+	__raw_writew(data, addr + reg);
+	wmb();
+}
 #define DOC_IOREMAP_LEN 0x4000
 #else
 #define ReadDOC_(adr, reg)      readb((void __iomem *)(adr) + (reg))
-- 
cgit v1.2.3-55-g7522


From 5d27aa5af04f58f3020de1c224dcf8a62151fd58 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann
Date: Tue, 6 Nov 2012 22:55:28 +0100
Subject: mtd: uninitialized variable warning in map.h

The map_word_load() function initializes exactly
as many words in the buffer as required, but gcc
cannot figure this out and gives a misleading
warning. Marking the local variable as
uninitialized_var shuts up that warning.

Without this patch, building acs5k_defconfig results in:

drivers/mtd/chips/cfi_cmdset_0002.c: In function 'cfi_amdstd_panic_write':
include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized]
drivers/mtd/chips/cfi_cmdset_0002.c: In function 'cfi_amdstd_write_words':
include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized]
drivers/mtd/chips/cfi_cmdset_0001.c: In function 'cfi_intelext_write_words':
include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/map.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 56c7936e0c65..f6eb4332ac92 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -391,7 +391,7 @@ static inline map_word map_word_ff(struct map_info *map)
 
 static inline map_word inline_map_read(struct map_info *map, unsigned long ofs)
 {
-	map_word r;
+	map_word uninitialized_var(r);
 
 	if (map_bankwidth_is_1(map))
 		r.x[0] = __raw_readb(map->virt + ofs);
-- 
cgit v1.2.3-55-g7522


From 6920b5a791bbb54810159fbf6acf2c6ae14cdd22 Mon Sep 17 00:00:00 2001
From: Russell King
Date: Fri, 21 Sep 2012 10:18:58 +0100
Subject: ARM: move serial_sa1100.h header file to linux/platform_data

This is really driver platform data, so move it to the appropriate
directory.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mach/serial_sa1100.h   | 31 ---------------------------
 arch/arm/mach-sa1100/assabet.c              |  2 +-
 arch/arm/mach-sa1100/badge4.c               |  2 +-
 arch/arm/mach-sa1100/cerf.c                 |  2 +-
 arch/arm/mach-sa1100/collie.c               |  2 +-
 arch/arm/mach-sa1100/h3xxx.c                |  2 +-
 arch/arm/mach-sa1100/hackkit.c              |  2 +-
 arch/arm/mach-sa1100/jornada720.c           |  2 +-
 arch/arm/mach-sa1100/lart.c                 |  2 +-
 arch/arm/mach-sa1100/nanoengine.c           |  2 +-
 arch/arm/mach-sa1100/neponset.c             |  2 +-
 arch/arm/mach-sa1100/pleb.c                 |  2 +-
 arch/arm/mach-sa1100/shannon.c              |  2 +-
 arch/arm/mach-sa1100/simpad.c               |  2 +-
 drivers/tty/serial/sa1100.c                 |  2 +-
 include/linux/platform_data/sa11x0-serial.h | 33 +++++++++++++++++++++++++++++
 16 files changed, 47 insertions(+), 45 deletions(-)
 delete mode 100644 arch/arm/include/asm/mach/serial_sa1100.h
 create mode 100644 include/linux/platform_data/sa11x0-serial.h

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/mach/serial_sa1100.h b/arch/arm/include/asm/mach/serial_sa1100.h
deleted file mode 100644
index d09064bf95a0..000000000000
--- a/arch/arm/include/asm/mach/serial_sa1100.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- *  arch/arm/include/asm/mach/serial_sa1100.h
- *
- *  Author: Nicolas Pitre
- *
- * Moved and changed lots, Russell King
- *
- * Low level machine dependent UART functions.
- */
-
-struct uart_port;
-struct uart_info;
-
-/*
- * This is a temporary structure for registering these
- * functions; it is intended to be discarded after boot.
- */
-struct sa1100_port_fns {
-	void	(*set_mctrl)(struct uart_port *, u_int);
-	u_int	(*get_mctrl)(struct uart_port *);
-	void	(*pm)(struct uart_port *, u_int, u_int);
-	int	(*set_wake)(struct uart_port *, u_int);
-};
-
-#ifdef CONFIG_SERIAL_SA1100
-void sa1100_register_uart_fns(struct sa1100_port_fns *fns);
-void sa1100_register_uart(int idx, int port);
-#else
-#define sa1100_register_uart_fns(fns) do { } while (0)
-#define sa1100_register_uart(idx,port) do { } while (0)
-#endif
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 6a7ad3c2a3fc..9a23739f7026 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/serial_core.h>
 #include <linux/mfd/ucb1x00.h>
 #include <linux/mtd/mtd.h>
@@ -37,7 +38,6 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/irda.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <mach/assabet.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-sa1100/badge4.c b/arch/arm/mach-sa1100/badge4.c
index 038df4894b0f..b2dadf3ea3df 100644
--- a/arch/arm/mach-sa1100/badge4.c
+++ b/arch/arm/mach-sa1100/badge4.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/tty.h>
@@ -34,7 +35,6 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
 #include <asm/hardware/sa1111.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/badge4.h>
 
diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c
index ad0eb08ea077..304bca4a07c0 100644
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/tty.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/mtd/mtd.h>
@@ -27,7 +28,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/cerf.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 170cb6107f68..45f424f5fca6 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -21,6 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/tty.h>
 #include <linux/delay.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/ucb1x00.h>
 #include <linux/mtd/mtd.h>
@@ -40,7 +41,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <asm/hardware/scoop.h>
 #include <asm/mach/sharpsl_param.h>
diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c
index 63150e1ffe9e..f17e7382242a 100644
--- a/arch/arm/mach-sa1100/h3xxx.c
+++ b/arch/arm/mach-sa1100/h3xxx.c
@@ -17,12 +17,12 @@
 #include <linux/mfd/htc-egpio.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/serial_core.h>
 
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/h3xxx.h>
 
diff --git a/arch/arm/mach-sa1100/hackkit.c b/arch/arm/mach-sa1100/hackkit.c
index fc106aab7c7e..d005939c41fc 100644
--- a/arch/arm/mach-sa1100/hackkit.c
+++ b/arch/arm/mach-sa1100/hackkit.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/cpufreq.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/serial_core.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
@@ -35,7 +36,6 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
 #include <asm/mach/irq.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/hardware.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-sa1100/jornada720.c b/arch/arm/mach-sa1100/jornada720.c
index e3084f47027d..35cfc428b4d4 100644
--- a/arch/arm/mach-sa1100/jornada720.c
+++ b/arch/arm/mach-sa1100/jornada720.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/tty.h>
 #include <linux/delay.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/ioport.h>
 #include <linux/mtd/mtd.h>
@@ -30,7 +31,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/hardware.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c
index 3048b17e84c5..f69f78fc3ddd 100644
--- a/arch/arm/mach-sa1100/lart.c
+++ b/arch/arm/mach-sa1100/lart.c
@@ -4,6 +4,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/tty.h>
 #include <linux/gpio.h>
 #include <linux/leds.h>
@@ -18,7 +19,6 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
 #include <mach/irqs.h>
 
diff --git a/arch/arm/mach-sa1100/nanoengine.c b/arch/arm/mach-sa1100/nanoengine.c
index 41f69d97066f..102e08f7b109 100644
--- a/arch/arm/mach-sa1100/nanoengine.c
+++ b/arch/arm/mach-sa1100/nanoengine.c
@@ -13,6 +13,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/root_dev.h>
@@ -24,7 +25,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/hardware.h>
 #include <mach/nanoengine.h>
diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c
index 266db873a4e4..88be0474f3d7 100644
--- a/arch/arm/mach-sa1100/neponset.c
+++ b/arch/arm/mach-sa1100/neponset.c
@@ -7,6 +7,7 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/serial_core.h>
@@ -14,7 +15,6 @@
 
 #include <asm/mach-types.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <asm/hardware/sa1111.h>
 #include <asm/sizes.h>
 
diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c
index 37fe0a0a5369..c51bb63f90fb 100644
--- a/arch/arm/mach-sa1100/pleb.c
+++ b/arch/arm/mach-sa1100/pleb.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/tty.h>
 #include <linux/ioport.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/io.h>
@@ -18,7 +19,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <asm/mach/flash.h>
-#include <asm/mach/serial_sa1100.h>
 #include <mach/irqs.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index ff6b7b35bca9..6460d25fbb88 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -5,6 +5,7 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/tty.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
@@ -18,7 +19,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
 #include <mach/shannon.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index 71790e581d93..6d65f65fcb23 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -9,6 +9,7 @@
 #include <linux/proc_fs.h>
 #include <linux/string.h>
 #include <linux/pm.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/ucb1x00.h>
 #include <linux/mtd/mtd.h>
@@ -23,7 +24,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
 #include <mach/simpad.h>
 #include <mach/irqs.h>
diff --git a/drivers/tty/serial/sa1100.c b/drivers/tty/serial/sa1100.c
index 2ca5959ec3fa..ecc1e16be623 100644
--- a/drivers/tty/serial/sa1100.c
+++ b/drivers/tty/serial/sa1100.c
@@ -29,6 +29,7 @@
 #include <linux/init.h>
 #include <linux/console.h>
 #include <linux/sysrq.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
@@ -39,7 +40,6 @@
 #include <asm/irq.h>
 #include <mach/hardware.h>
 #include <mach/irqs.h>
-#include <asm/mach/serial_sa1100.h>
 
 /* We've been assigned a range on the "Low-density serial ports" major */
 #define SERIAL_SA1100_MAJOR	204
diff --git a/include/linux/platform_data/sa11x0-serial.h b/include/linux/platform_data/sa11x0-serial.h
new file mode 100644
index 000000000000..4504d5d592f0
--- /dev/null
+++ b/include/linux/platform_data/sa11x0-serial.h
@@ -0,0 +1,33 @@
+/*
+ *  Author: Nicolas Pitre
+ *
+ * Moved and changed lots, Russell King
+ *
+ * Low level machine dependent UART functions.
+ */
+#ifndef SA11X0_SERIAL_H
+#define SA11X0_SERIAL_H
+
+struct uart_port;
+struct uart_info;
+
+/*
+ * This is a temporary structure for registering these
+ * functions; it is intended to be discarded after boot.
+ */
+struct sa1100_port_fns {
+	void	(*set_mctrl)(struct uart_port *, u_int);
+	u_int	(*get_mctrl)(struct uart_port *);
+	void	(*pm)(struct uart_port *, u_int, u_int);
+	int	(*set_wake)(struct uart_port *, u_int);
+};
+
+#ifdef CONFIG_SERIAL_SA1100
+void sa1100_register_uart_fns(struct sa1100_port_fns *fns);
+void sa1100_register_uart(int idx, int port);
+#else
+#define sa1100_register_uart_fns(fns) do { } while (0)
+#define sa1100_register_uart(idx,port) do { } while (0)
+#endif
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 46000065a631c6d21452d533baf086175c384ba4 Mon Sep 17 00:00:00 2001
From: Russell King
Date: Fri, 21 Sep 2012 10:23:44 +0100
Subject: ARM: move udc_pxa2xx.h to linux/platform_data

Move the PXA2xx/IXP4xx UDC header file into linux/platform_data as it
only contains a driver platform data structure.

Acked-by: Felipe Balbi <balbi@ti.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Krzysztof Halasa <khc@pm.waw.pl>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mach/udc_pxa2xx.h   | 26 --------------------------
 arch/arm/mach-ixp4xx/include/mach/udc.h  |  2 +-
 arch/arm/mach-pxa/include/mach/udc.h     |  2 +-
 drivers/usb/gadget/pxa25x_udc.c          |  4 +---
 include/linux/platform_data/pxa2xx_udc.h | 27 +++++++++++++++++++++++++++
 5 files changed, 30 insertions(+), 31 deletions(-)
 delete mode 100644 arch/arm/include/asm/mach/udc_pxa2xx.h
 create mode 100644 include/linux/platform_data/pxa2xx_udc.h

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/mach/udc_pxa2xx.h b/arch/arm/include/asm/mach/udc_pxa2xx.h
deleted file mode 100644
index ea297ac70bc6..000000000000
--- a/arch/arm/include/asm/mach/udc_pxa2xx.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * arch/arm/include/asm/mach/udc_pxa2xx.h
- *
- * This supports machine-specific differences in how the PXA2xx
- * USB Device Controller (UDC) is wired.
- *
- * It is set in linux/arch/arm/mach-pxa/<machine>.c or in
- * linux/arch/mach-ixp4xx/<machine>.c and used in
- * the probe routine of linux/drivers/usb/gadget/pxa2xx_udc.c
- */
-
-struct pxa2xx_udc_mach_info {
-        int  (*udc_is_connected)(void);		/* do we see host? */
-        void (*udc_command)(int cmd);
-#define	PXA2XX_UDC_CMD_CONNECT		0	/* let host see us */
-#define	PXA2XX_UDC_CMD_DISCONNECT	1	/* so host won't see us */
-
-	/* Boards following the design guidelines in the developer's manual,
-	 * with on-chip GPIOs not Lubbock's weird hardware, can have a sane
-	 * VBUS IRQ and omit the methods above.  Store the GPIO number
-	 * here.  Note that sometimes the signals go through inverters...
-	 */
-	bool	gpio_pullup_inverted;
-	int	gpio_pullup;			/* high == pullup activated */
-};
-
diff --git a/arch/arm/mach-ixp4xx/include/mach/udc.h b/arch/arm/mach-ixp4xx/include/mach/udc.h
index 80d6da2eafac..7bd8b96c8843 100644
--- a/arch/arm/mach-ixp4xx/include/mach/udc.h
+++ b/arch/arm/mach-ixp4xx/include/mach/udc.h
@@ -2,7 +2,7 @@
  * arch/arm/mach-ixp4xx/include/mach/udc.h
  *
  */
-#include <asm/mach/udc_pxa2xx.h>
+#include <linux/platform_data/pxa2xx_udc.h>
 
 extern void ixp4xx_set_udc_info(struct pxa2xx_udc_mach_info *info);
 
diff --git a/arch/arm/mach-pxa/include/mach/udc.h b/arch/arm/mach-pxa/include/mach/udc.h
index 2f82332e81a0..9a827e32db98 100644
--- a/arch/arm/mach-pxa/include/mach/udc.h
+++ b/arch/arm/mach-pxa/include/mach/udc.h
@@ -2,7 +2,7 @@
  * arch/arm/mach-pxa/include/mach/udc.h
  *
  */
-#include <asm/mach/udc_pxa2xx.h>
+#include <linux/platform_data/pxa2xx_udc.h>
 
 extern void pxa_set_udc_info(struct pxa2xx_udc_mach_info *info);
 
diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c
index 8efbf08c3561..d4ca9f1f7f24 100644
--- a/drivers/usb/gadget/pxa25x_udc.c
+++ b/drivers/usb/gadget/pxa25x_udc.c
@@ -29,6 +29,7 @@
 #include <linux/list.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
+#include <linux/platform_data/pxa2xx_udc.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/irq.h>
@@ -59,9 +60,6 @@
 #include <mach/lubbock.h>
 #endif
 
-#include <asm/mach/udc_pxa2xx.h>
-
-
 /*
  * This driver handles the USB Device Controller (UDC) in Intel's PXA 25x
  * series processors.  The UDC for the IXP 4xx series is very similar.
diff --git a/include/linux/platform_data/pxa2xx_udc.h b/include/linux/platform_data/pxa2xx_udc.h
new file mode 100644
index 000000000000..c6c5e98b5b82
--- /dev/null
+++ b/include/linux/platform_data/pxa2xx_udc.h
@@ -0,0 +1,27 @@
+/*
+ * This supports machine-specific differences in how the PXA2xx
+ * USB Device Controller (UDC) is wired.
+ *
+ * It is set in linux/arch/arm/mach-pxa/<machine>.c or in
+ * linux/arch/mach-ixp4xx/<machine>.c and used in
+ * the probe routine of linux/drivers/usb/gadget/pxa2xx_udc.c
+ */
+#ifndef PXA2XX_UDC_H
+#define PXA2XX_UDC_H
+
+struct pxa2xx_udc_mach_info {
+        int  (*udc_is_connected)(void);		/* do we see host? */
+        void (*udc_command)(int cmd);
+#define	PXA2XX_UDC_CMD_CONNECT		0	/* let host see us */
+#define	PXA2XX_UDC_CMD_DISCONNECT	1	/* so host won't see us */
+
+	/* Boards following the design guidelines in the developer's manual,
+	 * with on-chip GPIOs not Lubbock's weird hardware, can have a sane
+	 * VBUS IRQ and omit the methods above.  Store the GPIO number
+	 * here.  Note that sometimes the signals go through inverters...
+	 */
+	bool	gpio_pullup_inverted;
+	int	gpio_pullup;			/* high == pullup activated */
+};
+
+#endif
-- 
cgit v1.2.3-55-g7522


From ed71871bed7198ca4aa6a79b7a93b73ad6408e98 Mon Sep 17 00:00:00 2001
From: Noam Camus
Date: Fri, 16 Nov 2012 07:03:05 +0200
Subject: tty/8250_early: Turn serial_in/serial_out into weak symbols.

Allows overriding default methods serial_in/serial_out.

In such platform specific replacement it is possible to use
other regshift, biased register offset, any other manipulation
that is not covered with common default methods.

Overriding default methods may be useful for platforms which got
serial peripheral with registers represented in big endian.
In this situation and assuming that 32 bit operations / alignment
is required then it may be useful to swab words before/after
accessing the serial registers.

Signed-off-by: Noam Camus <noamc@ezchip.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_early.c | 42 ++++++++++++++++++------------------
 include/linux/serial_8250.h          |  2 ++
 2 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c
index 843a150ba105..f53a7db4350d 100644
--- a/drivers/tty/serial/8250/8250_early.c
+++ b/drivers/tty/serial/8250/8250_early.c
@@ -48,7 +48,7 @@ struct early_serial8250_device {
 
 static struct early_serial8250_device early_device;
 
-static unsigned int __init serial_in(struct uart_port *port, int offset)
+unsigned int __weak __init serial8250_early_in(struct uart_port *port, int offset)
 {
 	switch (port->iotype) {
 	case UPIO_MEM:
@@ -62,7 +62,7 @@ static unsigned int __init serial_in(struct uart_port *port, int offset)
 	}
 }
 
-static void __init serial_out(struct uart_port *port, int offset, int value)
+void __weak __init serial8250_early_out(struct uart_port *port, int offset, int value)
 {
 	switch (port->iotype) {
 	case UPIO_MEM:
@@ -84,7 +84,7 @@ static void __init wait_for_xmitr(struct uart_port *port)
 	unsigned int status;
 
 	for (;;) {
-		status = serial_in(port, UART_LSR);
+		status = serial8250_early_in(port, UART_LSR);
 		if ((status & BOTH_EMPTY) == BOTH_EMPTY)
 			return;
 		cpu_relax();
@@ -94,7 +94,7 @@ static void __init wait_for_xmitr(struct uart_port *port)
 static void __init serial_putc(struct uart_port *port, int c)
 {
 	wait_for_xmitr(port);
-	serial_out(port, UART_TX, c);
+	serial8250_early_out(port, UART_TX, c);
 }
 
 static void __init early_serial8250_write(struct console *console,
@@ -104,14 +104,14 @@ static void __init early_serial8250_write(struct console *console,
 	unsigned int ier;
 
 	/* Save the IER and disable interrupts */
-	ier = serial_in(port, UART_IER);
-	serial_out(port, UART_IER, 0);
+	ier = serial8250_early_in(port, UART_IER);
+	serial8250_early_out(port, UART_IER, 0);
 
 	uart_console_write(port, s, count, serial_putc);
 
 	/* Wait for transmitter to become empty and restore the IER */
 	wait_for_xmitr(port);
-	serial_out(port, UART_IER, ier);
+	serial8250_early_out(port, UART_IER, ier);
 }
 
 static unsigned int __init probe_baud(struct uart_port *port)
@@ -119,11 +119,11 @@ static unsigned int __init probe_baud(struct uart_port *port)
 	unsigned char lcr, dll, dlm;
 	unsigned int quot;
 
-	lcr = serial_in(port, UART_LCR);
-	serial_out(port, UART_LCR, lcr | UART_LCR_DLAB);
-	dll = serial_in(port, UART_DLL);
-	dlm = serial_in(port, UART_DLM);
-	serial_out(port, UART_LCR, lcr);
+	lcr = serial8250_early_in(port, UART_LCR);
+	serial8250_early_out(port, UART_LCR, lcr | UART_LCR_DLAB);
+	dll = serial8250_early_in(port, UART_DLL);
+	dlm = serial8250_early_in(port, UART_DLM);
+	serial8250_early_out(port, UART_LCR, lcr);
 
 	quot = (dlm << 8) | dll;
 	return (port->uartclk / 16) / quot;
@@ -135,17 +135,17 @@ static void __init init_port(struct early_serial8250_device *device)
 	unsigned int divisor;
 	unsigned char c;
 
-	serial_out(port, UART_LCR, 0x3);	/* 8n1 */
-	serial_out(port, UART_IER, 0);		/* no interrupt */
-	serial_out(port, UART_FCR, 0);		/* no fifo */
-	serial_out(port, UART_MCR, 0x3);	/* DTR + RTS */
+	serial8250_early_out(port, UART_LCR, 0x3);	/* 8n1 */
+	serial8250_early_out(port, UART_IER, 0);	/* no interrupt */
+	serial8250_early_out(port, UART_FCR, 0);	/* no fifo */
+	serial8250_early_out(port, UART_MCR, 0x3);	/* DTR + RTS */
 
 	divisor = DIV_ROUND_CLOSEST(port->uartclk, 16 * device->baud);
-	c = serial_in(port, UART_LCR);
-	serial_out(port, UART_LCR, c | UART_LCR_DLAB);
-	serial_out(port, UART_DLL, divisor & 0xff);
-	serial_out(port, UART_DLM, (divisor >> 8) & 0xff);
-	serial_out(port, UART_LCR, c & ~UART_LCR_DLAB);
+	c = serial8250_early_in(port, UART_LCR);
+	serial8250_early_out(port, UART_LCR, c | UART_LCR_DLAB);
+	serial8250_early_out(port, UART_DLL, divisor & 0xff);
+	serial8250_early_out(port, UART_DLM, (divisor >> 8) & 0xff);
+	serial8250_early_out(port, UART_LCR, c & ~UART_LCR_DLAB);
 }
 
 static int __init parse_options(struct early_serial8250_device *device,
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index c174c90fb3fb..c490d20b3fb8 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -105,6 +105,8 @@ extern int early_serial_setup(struct uart_port *port);
 
 extern int serial8250_find_port(struct uart_port *p);
 extern int serial8250_find_port_for_earlycon(void);
+extern unsigned int serial8250_early_in(struct uart_port *port, int offset);
+extern void serial8250_early_out(struct uart_port *port, int offset, int value);
 extern int setup_early_serial8250_console(char *cmdline);
 extern void serial8250_do_set_termios(struct uart_port *port,
 		struct ktermios *termios, struct ktermios *old);
-- 
cgit v1.2.3-55-g7522


From 32cdba1e05418909708a17e52505e8b2ba4381d1 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Wed, 14 Nov 2012 19:03:42 +0100
Subject: uprobes: Use percpu_rw_semaphore to fix register/unregister vs
 dup_mmap() race

This was always racy, but 268720903f87e0b84b161626c4447b81671b5d18
"uprobes: Rework register_for_each_vma() to make it O(n)" should be
blamed anyway, it made everything worse and I didn't notice.

register/unregister call build_map_info() and then do install/remove
breakpoint for every mm which mmaps inode/offset. This can obviously
race with fork()->dup_mmap() in between and we can miss the child.

uprobe_register() could be easily fixed but unregister is much worse,
the new mm inherits "int3" from parent and there is no way to detect
this if uprobe goes away.

So this patch simply adds percpu_down_read/up_read around dup_mmap(),
and percpu_down_write/up_write into register_for_each_vma().

This adds 2 new hooks into dup_mmap() but we can kill uprobe_dup_mmap()
and fold it into uprobe_end_dup_mmap().

Reported-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/uprobes.h |  8 ++++++++
 kernel/events/uprobes.c | 26 +++++++++++++++++++++++---
 kernel/fork.c           |  2 ++
 3 files changed, 33 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 2615c4d7788d..4f628a6fc5b4 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -97,6 +97,8 @@ extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_con
 extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_mmap(struct vm_area_struct *vma);
 extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+extern void uprobe_start_dup_mmap(void);
+extern void uprobe_end_dup_mmap(void);
 extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
 extern void uprobe_free_utask(struct task_struct *t);
 extern void uprobe_copy_process(struct task_struct *t);
@@ -127,6 +129,12 @@ static inline void
 uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
 }
+static inline void uprobe_start_dup_mmap(void)
+{
+}
+static inline void uprobe_end_dup_mmap(void)
+{
+}
 static inline void
 uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
 {
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 5ce99cfd2e6e..dea7acfbb071 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -33,6 +33,7 @@
 #include <linux/ptrace.h>	/* user_enable_single_step */
 #include <linux/kdebug.h>	/* notifier mechanism */
 #include "../../mm/internal.h"	/* munlock_vma_page */
+#include <linux/percpu-rwsem.h>
 
 #include <linux/uprobes.h>
 
@@ -71,6 +72,8 @@ static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
 static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
 #define uprobes_mmap_hash(v)	(&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
 
+static struct percpu_rw_semaphore dup_mmap_sem;
+
 /*
  * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe
  * events active at this time.  Probably a fine grained per inode count is
@@ -766,10 +769,13 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 	struct map_info *info;
 	int err = 0;
 
+	percpu_down_write(&dup_mmap_sem);
 	info = build_map_info(uprobe->inode->i_mapping,
 					uprobe->offset, is_register);
-	if (IS_ERR(info))
-		return PTR_ERR(info);
+	if (IS_ERR(info)) {
+		err = PTR_ERR(info);
+		goto out;
+	}
 
 	while (info) {
 		struct mm_struct *mm = info->mm;
@@ -799,7 +805,8 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 		mmput(mm);
 		info = free_map_info(info);
 	}
-
+ out:
+	percpu_up_write(&dup_mmap_sem);
 	return err;
 }
 
@@ -1131,6 +1138,16 @@ void uprobe_clear_state(struct mm_struct *mm)
 	kfree(area);
 }
 
+void uprobe_start_dup_mmap(void)
+{
+	percpu_down_read(&dup_mmap_sem);
+}
+
+void uprobe_end_dup_mmap(void)
+{
+	percpu_up_read(&dup_mmap_sem);
+}
+
 void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
 {
 	newmm->uprobes_state.xol_area = NULL;
@@ -1597,6 +1614,9 @@ static int __init init_uprobes(void)
 		mutex_init(&uprobes_mmap_mutex[i]);
 	}
 
+	if (percpu_init_rwsem(&dup_mmap_sem))
+		return -ENOMEM;
+
 	return register_die_notifier(&uprobe_exception_nb);
 }
 module_init(init_uprobes);
diff --git a/kernel/fork.c b/kernel/fork.c
index 8b20ab7d3aa2..c497e57aa654 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -352,6 +352,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 	unsigned long charge;
 	struct mempolicy *pol;
 
+	uprobe_start_dup_mmap();
 	down_write(&oldmm->mmap_sem);
 	flush_cache_dup_mm(oldmm);
 	uprobe_dup_mmap(oldmm, mm);
@@ -469,6 +470,7 @@ out:
 	up_write(&mm->mmap_sem);
 	flush_tlb_mm(oldmm);
 	up_write(&oldmm->mmap_sem);
+	uprobe_end_dup_mmap();
 	return retval;
 fail_nomem_anon_vma_fork:
 	mpol_put(pol);
-- 
cgit v1.2.3-55-g7522


From 40fc3bb56ed125aa22c0a85c816ae0f923519146 Mon Sep 17 00:00:00 2001
From: Jon Hunter
Date: Fri, 28 Sep 2012 11:34:49 -0500
Subject: ARM: OMAP: Add platform data header for DMTIMERs

Move definition of dmtimer platform data structure in to its own header
under <linux/platform_data>.

Signed-off-by: Jon Hunter <jon-hunter@ti.com>
---
 arch/arm/mach-omap1/timer.c                |  1 +
 arch/arm/mach-omap2/timer.c                |  2 ++
 arch/arm/plat-omap/dmtimer.c               |  2 ++
 arch/arm/plat-omap/include/plat/dmtimer.h  |  8 --------
 include/linux/platform_data/dmtimer-omap.h | 31 ++++++++++++++++++++++++++++++
 5 files changed, 36 insertions(+), 8 deletions(-)
 create mode 100644 include/linux/platform_data/dmtimer-omap.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/timer.c b/arch/arm/mach-omap1/timer.c
index cdeb9d3ef640..bde7a35e5000 100644
--- a/arch/arm/mach-omap1/timer.c
+++ b/arch/arm/mach-omap1/timer.c
@@ -25,6 +25,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
+#include <linux/platform_data/dmtimer-omap.h>
 
 #include <mach/irqs.h>
 
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index 1a662dfdda11..4daa8b41c522 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -39,6 +39,8 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/dmtimer-omap.h>
 
 #include <asm/mach/time.h>
 #include <asm/smp_twd.h>
diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
index efe47744b491..89585c293554 100644
--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -43,6 +43,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/dmtimer-omap.h>
 
 #include <plat/dmtimer.h>
 
diff --git a/arch/arm/plat-omap/include/plat/dmtimer.h b/arch/arm/plat-omap/include/plat/dmtimer.h
index b3cd91b60a2e..a3fbc48c332e 100644
--- a/arch/arm/plat-omap/include/plat/dmtimer.h
+++ b/arch/arm/plat-omap/include/plat/dmtimer.h
@@ -124,14 +124,6 @@ struct omap_dm_timer {
 	struct list_head node;
 };
 
-struct dmtimer_platform_data {
-	/* set_timer_src - Only used for OMAP1 devices */
-	int (*set_timer_src)(struct platform_device *pdev, int source);
-	u32 timer_errata;
-	u32 timer_capability;
-	int (*get_context_loss_count)(struct device *);
-};
-
 int omap_dm_timer_reserve_systimer(int id);
 struct omap_dm_timer *omap_dm_timer_request(void);
 struct omap_dm_timer *omap_dm_timer_request_specific(int timer_id);
diff --git a/include/linux/platform_data/dmtimer-omap.h b/include/linux/platform_data/dmtimer-omap.h
new file mode 100644
index 000000000000..a19b78d826e9
--- /dev/null
+++ b/include/linux/platform_data/dmtimer-omap.h
@@ -0,0 +1,31 @@
+/*
+ * DMTIMER platform data for TI OMAP platforms
+ *
+ * Copyright (C) 2012 Texas Instruments
+ * Author: Jon Hunter <jon-hunter@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __PLATFORM_DATA_DMTIMER_OMAP_H__
+#define __PLATFORM_DATA_DMTIMER_OMAP_H__
+
+struct dmtimer_platform_data {
+	/* set_timer_src - Only used for OMAP1 devices */
+	int (*set_timer_src)(struct platform_device *pdev, int source);
+	u32 timer_capability;
+	u32 timer_errata;
+	int (*get_context_loss_count)(struct device *);
+};
+
+#endif /* __PLATFORM_DATA_DMTIMER_OMAP_H__ */
-- 
cgit v1.2.3-55-g7522


From 7583fe778f77cbeb37f54ff5b9afd96119fcab43 Mon Sep 17 00:00:00 2001
From: Piotr Haber
Date: Thu, 8 Nov 2012 09:47:27 +0100
Subject: ssb: fix SPROM offset

Offset for temperature compensation
values is wrong in ssb SPROMv8 map.

Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Reviewed-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Piotr Haber <phaber@broadcom.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ssb/ssb_regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h
index a0525019e1d1..6ecfa02ddbac 100644
--- a/include/linux/ssb/ssb_regs.h
+++ b/include/linux/ssb/ssb_regs.h
@@ -485,7 +485,7 @@
 #define  SSB_SPROM8_HWIQ_IQSWP_IQCAL_SWP_SHIFT	4
 #define  SSB_SPROM8_HWIQ_IQSWP_HW_IQCAL	0x0020
 #define  SSB_SPROM8_HWIQ_IQSWP_HW_IQCAL_SHIFT	5
-#define SSB_SPROM8_TEMPDELTA		0x00BA
+#define SSB_SPROM8_TEMPDELTA		0x00BC
 #define  SSB_SPROM8_TEMPDELTA_PHYCAL	0x00ff
 #define  SSB_SPROM8_TEMPDELTA_PHYCAL_SHIFT	0
 #define  SSB_SPROM8_TEMPDELTA_PERIOD	0x0f00
-- 
cgit v1.2.3-55-g7522


From 7dbce021a6df9d4812385d11729140829abc3f95 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez
Date: Fri, 16 Nov 2012 19:33:45 +0100
Subject: ipack: move header files to include/linux

Move ipack header files to include/linux/ directory where they belong.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ipack/carriers/tpci200.h |   3 +-
 drivers/ipack/devices/ipoctal.c  |   3 +-
 drivers/ipack/ipack.c            |   3 +-
 drivers/ipack/ipack.h            | 215 ---------------------------------------
 include/linux/ipack.h            | 213 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 218 insertions(+), 219 deletions(-)
 delete mode 100644 drivers/ipack/ipack.h
 create mode 100644 include/linux/ipack.h

(limited to 'include/linux')

diff --git a/drivers/ipack/carriers/tpci200.h b/drivers/ipack/carriers/tpci200.h
index 8d9be277b34d..a7a151dab83c 100644
--- a/drivers/ipack/carriers/tpci200.h
+++ b/drivers/ipack/carriers/tpci200.h
@@ -20,8 +20,7 @@
 #include <linux/spinlock.h>
 #include <linux/swab.h>
 #include <linux/io.h>
-
-#include "../ipack.h"
+#include <linux/ipack.h>
 
 #define TPCI200_NB_SLOT               0x4
 #define TPCI200_NB_BAR                0x6
diff --git a/drivers/ipack/devices/ipoctal.c b/drivers/ipack/devices/ipoctal.c
index 783f120338d1..7f568e268a1e 100644
--- a/drivers/ipack/devices/ipoctal.c
+++ b/drivers/ipack/devices/ipoctal.c
@@ -22,7 +22,8 @@
 #include <linux/slab.h>
 #include <linux/atomic.h>
 #include <linux/io.h>
-#include "../ipack.h"
+#include <linux/ipack.h>
+#include "../ipack_ids.h"
 #include "ipoctal.h"
 #include "scc2698.h"
 
diff --git a/drivers/ipack/ipack.c b/drivers/ipack/ipack.c
index 6d5079de52b9..cc5498347acb 100644
--- a/drivers/ipack/ipack.c
+++ b/drivers/ipack/ipack.c
@@ -13,7 +13,8 @@
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/io.h>
-#include "ipack.h"
+#include <linux/ipack.h>
+#include "ipack_ids.h"
 
 #define to_ipack_dev(device) container_of(device, struct ipack_device, dev)
 #define to_ipack_driver(drv) container_of(drv, struct ipack_driver, driver)
diff --git a/drivers/ipack/ipack.h b/drivers/ipack/ipack.h
deleted file mode 100644
index 6760bfaf0ac4..000000000000
--- a/drivers/ipack/ipack.h
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * Industry-pack bus.
- *
- * Copyright (C) 2011-2012 CERN (www.cern.ch)
- * Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; version 2 of the License.
- */
-
-#include <linux/mod_devicetable.h>
-#include <linux/device.h>
-#include <linux/interrupt.h>
-
-#include "ipack_ids.h"
-
-#define IPACK_IDPROM_OFFSET_I			0x01
-#define IPACK_IDPROM_OFFSET_P			0x03
-#define IPACK_IDPROM_OFFSET_A			0x05
-#define IPACK_IDPROM_OFFSET_C			0x07
-#define IPACK_IDPROM_OFFSET_MANUFACTURER_ID	0x09
-#define IPACK_IDPROM_OFFSET_MODEL		0x0B
-#define IPACK_IDPROM_OFFSET_REVISION		0x0D
-#define IPACK_IDPROM_OFFSET_RESERVED		0x0F
-#define IPACK_IDPROM_OFFSET_DRIVER_ID_L		0x11
-#define IPACK_IDPROM_OFFSET_DRIVER_ID_H		0x13
-#define IPACK_IDPROM_OFFSET_NUM_BYTES		0x15
-#define IPACK_IDPROM_OFFSET_CRC			0x17
-
-struct ipack_bus_ops;
-struct ipack_driver;
-
-enum ipack_space {
-	IPACK_IO_SPACE    = 0,
-	IPACK_ID_SPACE,
-	IPACK_INT_SPACE,
-	IPACK_MEM8_SPACE,
-	IPACK_MEM16_SPACE,
-	/* Dummy for counting the number of entries.  Must remain the last
-	 * entry */
-	IPACK_SPACE_COUNT,
-};
-
-/**
- */
-struct ipack_region {
-	phys_addr_t start;
-	size_t      size;
-};
-
-/**
- *	struct ipack_device
- *
- *	@slot: Slot where the device is plugged in the carrier board
- *	@bus: ipack_bus_device where the device is plugged to.
- *	@id_space: Virtual address to ID space.
- *	@io_space: Virtual address to IO space.
- *	@mem_space: Virtual address to MEM space.
- *	@dev: device in kernel representation.
- *
- * Warning: Direct access to mapped memory is possible but the endianness
- * is not the same with PCI carrier or VME carrier. The endianness is managed
- * by the carrier board throught bus->ops.
- */
-struct ipack_device {
-	unsigned int slot;
-	struct ipack_bus_device *bus;
-	struct device dev;
-	void (*release) (struct ipack_device *dev);
-	struct ipack_region      region[IPACK_SPACE_COUNT];
-	u8                      *id;
-	size_t			 id_avail;
-	u32			 id_vendor;
-	u32			 id_device;
-	u8			 id_format;
-	unsigned int		 id_crc_correct:1;
-	unsigned int		 speed_8mhz:1;
-	unsigned int		 speed_32mhz:1;
-};
-
-/**
- * struct ipack_driver_ops -- Callbacks to IPack device driver
- *
- * @probe:  Probe function
- * @remove: Prepare imminent removal of the device.  Services provided by the
- *          device should be revoked.
- */
-
-struct ipack_driver_ops {
-	int (*probe) (struct ipack_device *dev);
-	void (*remove) (struct ipack_device *dev);
-};
-
-/**
- * struct ipack_driver -- Specific data to each ipack device driver
- *
- * @driver: Device driver kernel representation
- * @ops:    Callbacks provided by the IPack device driver
- */
-struct ipack_driver {
-	struct device_driver driver;
-	const struct ipack_device_id *id_table;
-	const struct ipack_driver_ops *ops;
-};
-
-/**
- *	struct ipack_bus_ops - available operations on a bridge module
- *
- *	@map_space: map IP address space
- *	@unmap_space: unmap IP address space
- *	@request_irq: request IRQ
- *	@free_irq: free IRQ
- *	@get_clockrate: Returns the clockrate the carrier is currently
- *		communicating with the device at.
- *	@set_clockrate: Sets the clock-rate for carrier / module communication.
- *		Should return -EINVAL if the requested speed is not supported.
- *	@get_error: Returns the error state for the slot the device is attached
- *		to.
- *	@get_timeout: Returns 1 if the communication with the device has
- *		previously timed out.
- *	@reset_timeout: Resets the state returned by get_timeout.
- */
-struct ipack_bus_ops {
-	int (*request_irq) (struct ipack_device *dev,
-			    irqreturn_t (*handler)(void *), void *arg);
-	int (*free_irq) (struct ipack_device *dev);
-	int (*get_clockrate) (struct ipack_device *dev);
-	int (*set_clockrate) (struct ipack_device *dev, int mherz);
-	int (*get_error) (struct ipack_device *dev);
-	int (*get_timeout) (struct ipack_device *dev);
-	int (*reset_timeout) (struct ipack_device *dev);
-};
-
-/**
- *	struct ipack_bus_device
- *
- *	@dev: pointer to carrier device
- *	@slots: number of slots available
- *	@bus_nr: ipack bus number
- *	@ops: bus operations for the mezzanine drivers
- */
-struct ipack_bus_device {
-	struct device *parent;
-	int slots;
-	int bus_nr;
-	const struct ipack_bus_ops *ops;
-};
-
-/**
- *	ipack_bus_register -- register a new ipack bus
- *
- * @parent: pointer to the parent device, if any.
- * @slots: number of slots available in the bus device.
- * @ops: bus operations for the mezzanine drivers.
- *
- * The carrier board device should call this function to register itself as
- * available bus device in ipack.
- */
-struct ipack_bus_device *ipack_bus_register(struct device *parent, int slots,
-					    const struct ipack_bus_ops *ops);
-
-/**
- *	ipack_bus_unregister -- unregister an ipack bus
- */
-int ipack_bus_unregister(struct ipack_bus_device *bus);
-
-/**
- * ipack_driver_register -- Register a new ipack device driver
- *
- * Called by a ipack driver to register itself as a driver
- * that can manage ipack devices.
- */
-int ipack_driver_register(struct ipack_driver *edrv, struct module *owner,
-			  const char *name);
-void ipack_driver_unregister(struct ipack_driver *edrv);
-
-/**
- *	ipack_device_register -- register an IPack device with the kernel
- *	@dev: the new device to register.
- *
- *	Register a new IPack device ("module" in IndustryPack jargon). The call
- *	is done by the carrier driver.  The carrier should populate the fields
- *	bus and slot as well as the region array of @dev prior to calling this
- *	function.  The rest of the fields will be allocated and populated
- *	during registration.
- *
- *	Return zero on success or error code on failure.
- */
-int ipack_device_register(struct ipack_device *dev);
-void ipack_device_unregister(struct ipack_device *dev);
-
-/**
- * DEFINE_IPACK_DEVICE_TABLE - macro used to describe a IndustryPack table
- * @_table: device table name
- *
- * This macro is used to create a struct ipack_device_id array (a device table)
- * in a generic manner.
- */
-#define DEFINE_IPACK_DEVICE_TABLE(_table) \
-	const struct ipack_device_id _table[] __devinitconst
-
-/**
- * IPACK_DEVICE - macro used to describe a specific IndustryPack device
- * @_format: the format version (currently either 1 or 2, 8 bit value)
- * @vend:    the 8 or 24 bit IndustryPack Vendor ID
- * @dev:     the 8 or 16  bit IndustryPack Device ID
- *
- * This macro is used to create a struct ipack_device_id that matches a specific
- * device.
- */
-#define IPACK_DEVICE(_format, vend, dev) \
-	 .format = (_format), \
-	 .vendor = (vend), \
-	 .device = (dev)
diff --git a/include/linux/ipack.h b/include/linux/ipack.h
new file mode 100644
index 000000000000..7a0a3085ab5b
--- /dev/null
+++ b/include/linux/ipack.h
@@ -0,0 +1,213 @@
+/*
+ * Industry-pack bus.
+ *
+ * Copyright (C) 2011-2012 CERN (www.cern.ch)
+ * Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2 of the License.
+ */
+
+#include <linux/mod_devicetable.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+
+#define IPACK_IDPROM_OFFSET_I			0x01
+#define IPACK_IDPROM_OFFSET_P			0x03
+#define IPACK_IDPROM_OFFSET_A			0x05
+#define IPACK_IDPROM_OFFSET_C			0x07
+#define IPACK_IDPROM_OFFSET_MANUFACTURER_ID	0x09
+#define IPACK_IDPROM_OFFSET_MODEL		0x0B
+#define IPACK_IDPROM_OFFSET_REVISION		0x0D
+#define IPACK_IDPROM_OFFSET_RESERVED		0x0F
+#define IPACK_IDPROM_OFFSET_DRIVER_ID_L		0x11
+#define IPACK_IDPROM_OFFSET_DRIVER_ID_H		0x13
+#define IPACK_IDPROM_OFFSET_NUM_BYTES		0x15
+#define IPACK_IDPROM_OFFSET_CRC			0x17
+
+struct ipack_bus_ops;
+struct ipack_driver;
+
+enum ipack_space {
+	IPACK_IO_SPACE    = 0,
+	IPACK_ID_SPACE,
+	IPACK_INT_SPACE,
+	IPACK_MEM8_SPACE,
+	IPACK_MEM16_SPACE,
+	/* Dummy for counting the number of entries.  Must remain the last
+	 * entry */
+	IPACK_SPACE_COUNT,
+};
+
+/**
+ */
+struct ipack_region {
+	phys_addr_t start;
+	size_t      size;
+};
+
+/**
+ *	struct ipack_device
+ *
+ *	@slot: Slot where the device is plugged in the carrier board
+ *	@bus: ipack_bus_device where the device is plugged to.
+ *	@id_space: Virtual address to ID space.
+ *	@io_space: Virtual address to IO space.
+ *	@mem_space: Virtual address to MEM space.
+ *	@dev: device in kernel representation.
+ *
+ * Warning: Direct access to mapped memory is possible but the endianness
+ * is not the same with PCI carrier or VME carrier. The endianness is managed
+ * by the carrier board throught bus->ops.
+ */
+struct ipack_device {
+	unsigned int slot;
+	struct ipack_bus_device *bus;
+	struct device dev;
+	void (*release) (struct ipack_device *dev);
+	struct ipack_region      region[IPACK_SPACE_COUNT];
+	u8                      *id;
+	size_t			 id_avail;
+	u32			 id_vendor;
+	u32			 id_device;
+	u8			 id_format;
+	unsigned int		 id_crc_correct:1;
+	unsigned int		 speed_8mhz:1;
+	unsigned int		 speed_32mhz:1;
+};
+
+/**
+ * struct ipack_driver_ops -- Callbacks to IPack device driver
+ *
+ * @probe:  Probe function
+ * @remove: Prepare imminent removal of the device.  Services provided by the
+ *          device should be revoked.
+ */
+
+struct ipack_driver_ops {
+	int (*probe) (struct ipack_device *dev);
+	void (*remove) (struct ipack_device *dev);
+};
+
+/**
+ * struct ipack_driver -- Specific data to each ipack device driver
+ *
+ * @driver: Device driver kernel representation
+ * @ops:    Callbacks provided by the IPack device driver
+ */
+struct ipack_driver {
+	struct device_driver driver;
+	const struct ipack_device_id *id_table;
+	const struct ipack_driver_ops *ops;
+};
+
+/**
+ *	struct ipack_bus_ops - available operations on a bridge module
+ *
+ *	@map_space: map IP address space
+ *	@unmap_space: unmap IP address space
+ *	@request_irq: request IRQ
+ *	@free_irq: free IRQ
+ *	@get_clockrate: Returns the clockrate the carrier is currently
+ *		communicating with the device at.
+ *	@set_clockrate: Sets the clock-rate for carrier / module communication.
+ *		Should return -EINVAL if the requested speed is not supported.
+ *	@get_error: Returns the error state for the slot the device is attached
+ *		to.
+ *	@get_timeout: Returns 1 if the communication with the device has
+ *		previously timed out.
+ *	@reset_timeout: Resets the state returned by get_timeout.
+ */
+struct ipack_bus_ops {
+	int (*request_irq) (struct ipack_device *dev,
+			    irqreturn_t (*handler)(void *), void *arg);
+	int (*free_irq) (struct ipack_device *dev);
+	int (*get_clockrate) (struct ipack_device *dev);
+	int (*set_clockrate) (struct ipack_device *dev, int mherz);
+	int (*get_error) (struct ipack_device *dev);
+	int (*get_timeout) (struct ipack_device *dev);
+	int (*reset_timeout) (struct ipack_device *dev);
+};
+
+/**
+ *	struct ipack_bus_device
+ *
+ *	@dev: pointer to carrier device
+ *	@slots: number of slots available
+ *	@bus_nr: ipack bus number
+ *	@ops: bus operations for the mezzanine drivers
+ */
+struct ipack_bus_device {
+	struct device *parent;
+	int slots;
+	int bus_nr;
+	const struct ipack_bus_ops *ops;
+};
+
+/**
+ *	ipack_bus_register -- register a new ipack bus
+ *
+ * @parent: pointer to the parent device, if any.
+ * @slots: number of slots available in the bus device.
+ * @ops: bus operations for the mezzanine drivers.
+ *
+ * The carrier board device should call this function to register itself as
+ * available bus device in ipack.
+ */
+struct ipack_bus_device *ipack_bus_register(struct device *parent, int slots,
+					    const struct ipack_bus_ops *ops);
+
+/**
+ *	ipack_bus_unregister -- unregister an ipack bus
+ */
+int ipack_bus_unregister(struct ipack_bus_device *bus);
+
+/**
+ * ipack_driver_register -- Register a new ipack device driver
+ *
+ * Called by a ipack driver to register itself as a driver
+ * that can manage ipack devices.
+ */
+int ipack_driver_register(struct ipack_driver *edrv, struct module *owner,
+			  const char *name);
+void ipack_driver_unregister(struct ipack_driver *edrv);
+
+/**
+ *	ipack_device_register -- register an IPack device with the kernel
+ *	@dev: the new device to register.
+ *
+ *	Register a new IPack device ("module" in IndustryPack jargon). The call
+ *	is done by the carrier driver.  The carrier should populate the fields
+ *	bus and slot as well as the region array of @dev prior to calling this
+ *	function.  The rest of the fields will be allocated and populated
+ *	during registration.
+ *
+ *	Return zero on success or error code on failure.
+ */
+int ipack_device_register(struct ipack_device *dev);
+void ipack_device_unregister(struct ipack_device *dev);
+
+/**
+ * DEFINE_IPACK_DEVICE_TABLE - macro used to describe a IndustryPack table
+ * @_table: device table name
+ *
+ * This macro is used to create a struct ipack_device_id array (a device table)
+ * in a generic manner.
+ */
+#define DEFINE_IPACK_DEVICE_TABLE(_table) \
+	const struct ipack_device_id _table[] __devinitconst
+
+/**
+ * IPACK_DEVICE - macro used to describe a specific IndustryPack device
+ * @_format: the format version (currently either 1 or 2, 8 bit value)
+ * @vend:    the 8 or 24 bit IndustryPack Vendor ID
+ * @dev:     the 8 or 16  bit IndustryPack Device ID
+ *
+ * This macro is used to create a struct ipack_device_id that matches a specific
+ * device.
+ */
+#define IPACK_DEVICE(_format, vend, dev) \
+	 .format = (_format), \
+	 .vendor = (vend), \
+	 .device = (dev)
-- 
cgit v1.2.3-55-g7522


From 27cf2d1b873fc50a2c0388253ec666fa4c61bfd4 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez
Date: Fri, 16 Nov 2012 19:33:46 +0100
Subject: ipack: remove ipack_ids.h file

Its contents are merged into ipack.h. So this file is not needed.

Doing that, it simplifies the ipack-related driver development.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ipack/devices/ipoctal.c |  1 -
 drivers/ipack/ipack.c           |  1 -
 drivers/ipack/ipack_ids.h       | 32 --------------------------------
 include/linux/ipack.h           | 33 +++++++++++++++++++++++++++++++++
 4 files changed, 33 insertions(+), 34 deletions(-)
 delete mode 100644 drivers/ipack/ipack_ids.h

(limited to 'include/linux')

diff --git a/drivers/ipack/devices/ipoctal.c b/drivers/ipack/devices/ipoctal.c
index 7f568e268a1e..c06ab396e84f 100644
--- a/drivers/ipack/devices/ipoctal.c
+++ b/drivers/ipack/devices/ipoctal.c
@@ -23,7 +23,6 @@
 #include <linux/atomic.h>
 #include <linux/io.h>
 #include <linux/ipack.h>
-#include "../ipack_ids.h"
 #include "ipoctal.h"
 #include "scc2698.h"
 
diff --git a/drivers/ipack/ipack.c b/drivers/ipack/ipack.c
index cc5498347acb..7ec6b208b1cb 100644
--- a/drivers/ipack/ipack.c
+++ b/drivers/ipack/ipack.c
@@ -14,7 +14,6 @@
 #include <linux/idr.h>
 #include <linux/io.h>
 #include <linux/ipack.h>
-#include "ipack_ids.h"
 
 #define to_ipack_dev(device) container_of(device, struct ipack_device, dev)
 #define to_ipack_driver(drv) container_of(drv, struct ipack_driver, driver)
diff --git a/drivers/ipack/ipack_ids.h b/drivers/ipack/ipack_ids.h
deleted file mode 100644
index 8153fee3f2f7..000000000000
--- a/drivers/ipack/ipack_ids.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * IndustryPack Fromat, Vendor and Device IDs.
- */
-
-/* ID section format versions */
-#define IPACK_ID_VERSION_INVALID	0x00
-#define IPACK_ID_VERSION_1		0x01
-#define IPACK_ID_VERSION_2		0x02
-
-/* Vendors and devices. Sort key: vendor first, device next. */
-#define IPACK1_VENDOR_ID_RESERVED1	0x00
-#define IPACK1_VENDOR_ID_RESERVED2	0xFF
-#define IPACK1_VENDOR_ID_UNREGISTRED01	0x01
-#define IPACK1_VENDOR_ID_UNREGISTRED02	0x02
-#define IPACK1_VENDOR_ID_UNREGISTRED03	0x03
-#define IPACK1_VENDOR_ID_UNREGISTRED04	0x04
-#define IPACK1_VENDOR_ID_UNREGISTRED05	0x05
-#define IPACK1_VENDOR_ID_UNREGISTRED06	0x06
-#define IPACK1_VENDOR_ID_UNREGISTRED07	0x07
-#define IPACK1_VENDOR_ID_UNREGISTRED08	0x08
-#define IPACK1_VENDOR_ID_UNREGISTRED09	0x09
-#define IPACK1_VENDOR_ID_UNREGISTRED10	0x0A
-#define IPACK1_VENDOR_ID_UNREGISTRED11	0x0B
-#define IPACK1_VENDOR_ID_UNREGISTRED12	0x0C
-#define IPACK1_VENDOR_ID_UNREGISTRED13	0x0D
-#define IPACK1_VENDOR_ID_UNREGISTRED14	0x0E
-#define IPACK1_VENDOR_ID_UNREGISTRED15	0x0F
-
-#define IPACK1_VENDOR_ID_SBS            0xF0
-#define IPACK1_DEVICE_ID_SBS_OCTAL_232  0x22
-#define IPACK1_DEVICE_ID_SBS_OCTAL_422  0x2A
-#define IPACK1_DEVICE_ID_SBS_OCTAL_485  0x48
diff --git a/include/linux/ipack.h b/include/linux/ipack.h
index 7a0a3085ab5b..a360c73129a6 100644
--- a/include/linux/ipack.h
+++ b/include/linux/ipack.h
@@ -26,6 +26,39 @@
 #define IPACK_IDPROM_OFFSET_NUM_BYTES		0x15
 #define IPACK_IDPROM_OFFSET_CRC			0x17
 
+/*
+ * IndustryPack Fromat, Vendor and Device IDs.
+ */
+
+/* ID section format versions */
+#define IPACK_ID_VERSION_INVALID	0x00
+#define IPACK_ID_VERSION_1		0x01
+#define IPACK_ID_VERSION_2		0x02
+
+/* Vendors and devices. Sort key: vendor first, device next. */
+#define IPACK1_VENDOR_ID_RESERVED1	0x00
+#define IPACK1_VENDOR_ID_RESERVED2	0xFF
+#define IPACK1_VENDOR_ID_UNREGISTRED01	0x01
+#define IPACK1_VENDOR_ID_UNREGISTRED02	0x02
+#define IPACK1_VENDOR_ID_UNREGISTRED03	0x03
+#define IPACK1_VENDOR_ID_UNREGISTRED04	0x04
+#define IPACK1_VENDOR_ID_UNREGISTRED05	0x05
+#define IPACK1_VENDOR_ID_UNREGISTRED06	0x06
+#define IPACK1_VENDOR_ID_UNREGISTRED07	0x07
+#define IPACK1_VENDOR_ID_UNREGISTRED08	0x08
+#define IPACK1_VENDOR_ID_UNREGISTRED09	0x09
+#define IPACK1_VENDOR_ID_UNREGISTRED10	0x0A
+#define IPACK1_VENDOR_ID_UNREGISTRED11	0x0B
+#define IPACK1_VENDOR_ID_UNREGISTRED12	0x0C
+#define IPACK1_VENDOR_ID_UNREGISTRED13	0x0D
+#define IPACK1_VENDOR_ID_UNREGISTRED14	0x0E
+#define IPACK1_VENDOR_ID_UNREGISTRED15	0x0F
+
+#define IPACK1_VENDOR_ID_SBS            0xF0
+#define IPACK1_DEVICE_ID_SBS_OCTAL_232  0x22
+#define IPACK1_DEVICE_ID_SBS_OCTAL_422  0x2A
+#define IPACK1_DEVICE_ID_SBS_OCTAL_485  0x48
+
 struct ipack_bus_ops;
 struct ipack_driver;
 
-- 
cgit v1.2.3-55-g7522


From 404525d5a7ecc847b5ac178dad96402f1e102ccc Mon Sep 17 00:00:00 2001
From: Maxime Ripard
Date: Fri, 16 Nov 2012 21:21:43 +0100
Subject: clk: sunxi: Add dummy fixed rate clock for Allwinner A1X SoCs

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Cc: Mike Turquette <mturquette@ti.com>
---
 drivers/clk/Makefile      |  1 +
 drivers/clk/clk-sunxi.c   | 30 ++++++++++++++++++++++++++++++
 include/linux/clk/sunxi.h | 22 ++++++++++++++++++++++
 3 files changed, 53 insertions(+)
 create mode 100644 drivers/clk/clk-sunxi.c
 create mode 100644 include/linux/clk/sunxi.h

(limited to 'include/linux')

diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 71a25b91de00..9c300a828ede 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -19,6 +19,7 @@ endif
 obj-$(CONFIG_MACH_LOONGSON1)	+= clk-ls1x.o
 obj-$(CONFIG_ARCH_U8500)	+= ux500/
 obj-$(CONFIG_ARCH_VT8500)	+= clk-vt8500.o
+obj-$(CONFIG_ARCH_SUNXI)	+= clk-sunxi.o
 
 # Chip specific
 obj-$(CONFIG_COMMON_CLK_WM831X) += clk-wm831x.o
diff --git a/drivers/clk/clk-sunxi.c b/drivers/clk/clk-sunxi.c
new file mode 100644
index 000000000000..0e831b584ba7
--- /dev/null
+++ b/drivers/clk/clk-sunxi.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2012 Maxime Ripard
+ *
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/clk/sunxi.h>
+#include <linux/of.h>
+
+static const __initconst struct of_device_id clk_match[] = {
+	{ .compatible = "fixed-clock", .data = of_fixed_clk_setup, },
+	{}
+};
+
+void __init sunxi_init_clocks(void)
+{
+	of_clk_init(clk_match);
+}
diff --git a/include/linux/clk/sunxi.h b/include/linux/clk/sunxi.h
new file mode 100644
index 000000000000..e074fdd5a236
--- /dev/null
+++ b/include/linux/clk/sunxi.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2012 Maxime Ripard
+ *
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_CLK_SUNXI_H_
+#define __LINUX_CLK_SUNXI_H_
+
+void __init sunxi_init_clocks(void);
+
+#endif
-- 
cgit v1.2.3-55-g7522


From b2ac5d7549710173ea0217bf8c7b3f71da5220d4 Mon Sep 17 00:00:00 2001
From: Maxime Ripard
Date: Mon, 12 Nov 2012 15:07:50 +0100
Subject: clocksource: sunxi: Add Allwinner A1X Timer Driver

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: John Stultz <johnstul@us.ibm.com>
---
 .../bindings/timer/allwinner,sunxi-timer.txt       |  17 +++
 drivers/clocksource/Kconfig                        |   3 +
 drivers/clocksource/Makefile                       |   1 +
 drivers/clocksource/sunxi_timer.c                  | 170 +++++++++++++++++++++
 include/linux/sunxi_timer.h                        |  24 +++
 5 files changed, 215 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/timer/allwinner,sunxi-timer.txt
 create mode 100644 drivers/clocksource/sunxi_timer.c
 create mode 100644 include/linux/sunxi_timer.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/timer/allwinner,sunxi-timer.txt b/Documentation/devicetree/bindings/timer/allwinner,sunxi-timer.txt
new file mode 100644
index 000000000000..0c7b64e95a61
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/allwinner,sunxi-timer.txt
@@ -0,0 +1,17 @@
+Allwinner A1X SoCs Timer Controller
+
+Required properties:
+
+- compatible : should be "allwinner,sunxi-timer"
+- reg : Specifies base physical address and size of the registers.
+- interrupts : The interrupt of the first timer
+- clocks: phandle to the source clock (usually a 24 MHz fixed clock)
+
+Example:
+
+timer {
+	compatible = "allwinner,sunxi-timer";
+	reg = <0x01c20c00 0x400>;
+	interrupts = <22>;
+	clocks = <&osc>;
+};
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 6a78073c3808..a0985732f1e2 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -22,6 +22,9 @@ config DW_APB_TIMER_OF
 config ARMADA_370_XP_TIMER
 	bool
 
+config SUNXI_TIMER
+	bool
+
 config CLKSRC_DBX500_PRCMU
 	bool "Clocksource PRCMU Timer"
 	depends on UX500_SOC_DB8500
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 603be366f762..36f06de4c5ab 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -14,5 +14,6 @@ obj-$(CONFIG_DW_APB_TIMER_OF)	+= dw_apb_timer_of.o
 obj-$(CONFIG_CLKSRC_DBX500_PRCMU)	+= clksrc-dbx500-prcmu.o
 obj-$(CONFIG_ARMADA_370_XP_TIMER)	+= time-armada-370-xp.o
 obj-$(CONFIG_ARCH_BCM2835)	+= bcm2835_timer.o
+obj-$(CONFIG_SUNXI_TIMER)	+= sunxi_timer.o
 
 obj-$(CONFIG_CLKSRC_ARM_GENERIC)	+= arm_generic.o
diff --git a/drivers/clocksource/sunxi_timer.c b/drivers/clocksource/sunxi_timer.c
new file mode 100644
index 000000000000..3c46434b64cb
--- /dev/null
+++ b/drivers/clocksource/sunxi_timer.c
@@ -0,0 +1,170 @@
+/*
+ * Allwinner A1X SoCs timer handling.
+ *
+ * Copyright (C) 2012 Maxime Ripard
+ *
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * Based on code from
+ * Allwinner Technology Co., Ltd. <www.allwinnertech.com>
+ * Benn Huang <benn@allwinnertech.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqreturn.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/sunxi_timer.h>
+#include <linux/clk/sunxi.h>
+
+#define TIMER_CTL_REG		0x00
+#define TIMER_CTL_ENABLE		(1 << 0)
+#define TIMER_IRQ_ST_REG	0x04
+#define TIMER0_CTL_REG		0x10
+#define TIMER0_CTL_ENABLE		(1 << 0)
+#define TIMER0_CTL_AUTORELOAD		(1 << 1)
+#define TIMER0_CTL_ONESHOT		(1 << 7)
+#define TIMER0_INTVAL_REG	0x14
+#define TIMER0_CNTVAL_REG	0x18
+
+#define TIMER_SCAL		16
+
+static void __iomem *timer_base;
+
+static void sunxi_clkevt_mode(enum clock_event_mode mode,
+			      struct clock_event_device *clk)
+{
+	u32 u = readl(timer_base + TIMER0_CTL_REG);
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		u &= ~(TIMER0_CTL_ONESHOT);
+		writel(u | TIMER0_CTL_ENABLE, timer_base + TIMER0_CTL_REG);
+		break;
+
+	case CLOCK_EVT_MODE_ONESHOT:
+		writel(u | TIMER0_CTL_ONESHOT, timer_base + TIMER0_CTL_REG);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		writel(u & ~(TIMER0_CTL_ENABLE), timer_base + TIMER0_CTL_REG);
+		break;
+	}
+}
+
+static int sunxi_clkevt_next_event(unsigned long evt,
+				   struct clock_event_device *unused)
+{
+	u32 u = readl(timer_base + TIMER0_CTL_REG);
+	writel(evt, timer_base + TIMER0_CNTVAL_REG);
+	writel(u | TIMER0_CTL_ENABLE | TIMER0_CTL_AUTORELOAD,
+	       timer_base + TIMER0_CTL_REG);
+
+	return 0;
+}
+
+static struct clock_event_device sunxi_clockevent = {
+	.name = "sunxi_tick",
+	.shift = 32,
+	.rating = 300,
+	.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+	.set_mode = sunxi_clkevt_mode,
+	.set_next_event = sunxi_clkevt_next_event,
+};
+
+
+static irqreturn_t sunxi_timer_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = (struct clock_event_device *)dev_id;
+
+	writel(0x1, timer_base + TIMER_IRQ_ST_REG);
+	evt->event_handler(evt);
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction sunxi_timer_irq = {
+	.name = "sunxi_timer0",
+	.flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.handler = sunxi_timer_interrupt,
+	.dev_id = &sunxi_clockevent,
+};
+
+static struct of_device_id sunxi_timer_dt_ids[] = {
+	{ .compatible = "allwinner,sunxi-timer" },
+};
+
+static void __init sunxi_timer_init(void)
+{
+	struct device_node *node;
+	unsigned long rate = 0;
+	struct clk *clk;
+	int ret, irq;
+	u32 val;
+
+	node = of_find_matching_node(NULL, sunxi_timer_dt_ids);
+	if (!node)
+		panic("No sunxi timer node");
+
+	timer_base = of_iomap(node, 0);
+	if (!timer_base)
+		panic("Can't map registers");
+
+	irq = irq_of_parse_and_map(node, 0);
+	if (irq <= 0)
+		panic("Can't parse IRQ");
+
+	sunxi_init_clocks();
+
+	clk = of_clk_get(node, 0);
+	if (IS_ERR(clk))
+		panic("Can't get timer clock");
+
+	rate = clk_get_rate(clk);
+
+	writel(rate / (TIMER_SCAL * HZ),
+	       timer_base + TIMER0_INTVAL_REG);
+
+	/* set clock source to HOSC, 16 pre-division */
+	val = readl(timer_base + TIMER0_CTL_REG);
+	val &= ~(0x07 << 4);
+	val &= ~(0x03 << 2);
+	val |= (4 << 4) | (1 << 2);
+	writel(val, timer_base + TIMER0_CTL_REG);
+
+	/* set mode to auto reload */
+	val = readl(timer_base + TIMER0_CTL_REG);
+	writel(val | TIMER0_CTL_AUTORELOAD, timer_base + TIMER0_CTL_REG);
+
+	ret = setup_irq(irq, &sunxi_timer_irq);
+	if (ret)
+		pr_warn("failed to setup irq %d\n", irq);
+
+	/* Enable timer0 interrupt */
+	val = readl(timer_base + TIMER_CTL_REG);
+	writel(val | TIMER_CTL_ENABLE, timer_base + TIMER_CTL_REG);
+
+	sunxi_clockevent.mult = div_sc(rate / TIMER_SCAL,
+				NSEC_PER_SEC,
+				sunxi_clockevent.shift);
+	sunxi_clockevent.max_delta_ns = clockevent_delta2ns(0xff,
+							    &sunxi_clockevent);
+	sunxi_clockevent.min_delta_ns = clockevent_delta2ns(0x1,
+							    &sunxi_clockevent);
+	sunxi_clockevent.cpumask = cpumask_of(0);
+
+	clockevents_register_device(&sunxi_clockevent);
+}
+
+struct sys_timer sunxi_timer = {
+	.init = sunxi_timer_init,
+};
diff --git a/include/linux/sunxi_timer.h b/include/linux/sunxi_timer.h
new file mode 100644
index 000000000000..b9165bba6e61
--- /dev/null
+++ b/include/linux/sunxi_timer.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2012 Maxime Ripard
+ *
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __SUNXI_TIMER_H
+#define __SUNXI_TIMER_H
+
+#include <asm/mach/time.h>
+
+extern struct sys_timer sunxi_timer;
+
+#endif
-- 
cgit v1.2.3-55-g7522


From afd24e146826cec0f46929263a0c874406a19cd8 Mon Sep 17 00:00:00 2001
From: Maxime Ripard
Date: Wed, 14 Nov 2012 09:59:14 +0100
Subject: irqchip: sunxi: Add irq controller driver

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
CC: Thomas Gleixner <tglx@linutronix.de>
---
 .../interrupt-controller/allwinner,sunxi-ic.txt    | 104 ++++++++++++++
 drivers/irqchip/Makefile                           |   1 +
 drivers/irqchip/irq-sunxi.c                        | 150 +++++++++++++++++++++
 include/linux/irqchip/sunxi.h                      |  27 ++++
 4 files changed, 282 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-ic.txt
 create mode 100644 drivers/irqchip/irq-sunxi.c
 create mode 100644 include/linux/irqchip/sunxi.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-ic.txt b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-ic.txt
new file mode 100644
index 000000000000..7f9fb85f5456
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-ic.txt
@@ -0,0 +1,104 @@
+Allwinner Sunxi Interrupt Controller
+
+Required properties:
+
+- compatible : should be "allwinner,sunxi-ic"
+- reg : Specifies base physical address and size of the registers.
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+  interrupt source. The value shall be 1.
+
+The interrupt sources are as follows:
+
+0: ENMI
+1: UART0
+2: UART1
+3: UART2
+4: UART3
+5: IR0
+6: IR1
+7: I2C0
+8: I2C1
+9: I2C2
+10: SPI0
+11: SPI1
+12: SPI2
+13: SPDIF
+14: AC97
+15: TS
+16: I2S
+17: UART4
+18: UART5
+19: UART6
+20: UART7
+21: KEYPAD
+22: TIMER0
+23: TIMER1
+24: TIMER2
+25: TIMER3
+26: CAN
+27: DMA
+28: PIO
+29: TOUCH_PANEL
+30: AUDIO_CODEC
+31: LRADC
+32: SDMC0
+33: SDMC1
+34: SDMC2
+35: SDMC3
+36: MEMSTICK
+37: NAND
+38: USB0
+39: USB1
+40: USB2
+41: SCR
+42: CSI0
+43: CSI1
+44: LCDCTRL0
+45: LCDCTRL1
+46: MP
+47: DEFEBE0
+48: DEFEBE1
+49: PMU
+50: SPI3
+51: TZASC
+52: PATA
+53: VE
+54: SS
+55: EMAC
+56: SATA
+57: GPS
+58: HDMI
+59: TVE
+60: ACE
+61: TVD
+62: PS2_0
+63: PS2_1
+64: USB3
+65: USB4
+66: PLE_PFM
+67: TIMER4
+68: TIMER5
+69: GPU_GP
+70: GPU_GPMMU
+71: GPU_PP0
+72: GPU_PPMMU0
+73: GPU_PMU
+74: GPU_RSV0
+75: GPU_RSV1
+76: GPU_RSV2
+77: GPU_RSV3
+78: GPU_RSV4
+79: GPU_RSV5
+80: GPU_RSV6
+82: SYNC_TIMER0
+83: SYNC_TIMER1
+
+Example:
+
+intc: interrupt-controller {
+	compatible = "allwinner,sunxi-ic";
+	reg = <0x01c20400 0x400>;
+	interrupt-controller;
+	#interrupt-cells = <2>;
+};
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 054321db4350..2444d07544cd 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o
+obj-$(CONFIG_ARCH_SUNXI)   += irq-sunxi.o
diff --git a/drivers/irqchip/irq-sunxi.c b/drivers/irqchip/irq-sunxi.c
new file mode 100644
index 000000000000..eef41a49acae
--- /dev/null
+++ b/drivers/irqchip/irq-sunxi.c
@@ -0,0 +1,150 @@
+/*
+ * Allwinner A1X SoCs IRQ chip driver.
+ *
+ * Copyright (C) 2012 Maxime Ripard
+ *
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * Based on code from
+ * Allwinner Technology Co., Ltd. <www.allwinnertech.com>
+ * Benn Huang <benn@allwinnertech.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <linux/irqchip/sunxi.h>
+
+#define SUNXI_IRQ_VECTOR_REG		0x00
+#define SUNXI_IRQ_PROTECTION_REG	0x08
+#define SUNXI_IRQ_NMI_CTRL_REG		0x0c
+#define SUNXI_IRQ_PENDING_REG(x)	(0x10 + 0x4 * x)
+#define SUNXI_IRQ_FIQ_PENDING_REG(x)	(0x20 + 0x4 * x)
+#define SUNXI_IRQ_ENABLE_REG(x)		(0x40 + 0x4 * x)
+#define SUNXI_IRQ_MASK_REG(x)		(0x50 + 0x4 * x)
+
+static void __iomem *sunxi_irq_base;
+static struct irq_domain *sunxi_irq_domain;
+
+void sunxi_irq_ack(struct irq_data *irqd)
+{
+	unsigned int irq = irqd_to_hwirq(irqd);
+	unsigned int irq_off = irq % 32;
+	int reg = irq / 32;
+	u32 val;
+
+	val = readl(sunxi_irq_base + SUNXI_IRQ_PENDING_REG(reg));
+	writel(val | (1 << irq_off),
+	       sunxi_irq_base + SUNXI_IRQ_PENDING_REG(reg));
+}
+
+static void sunxi_irq_mask(struct irq_data *irqd)
+{
+	unsigned int irq = irqd_to_hwirq(irqd);
+	unsigned int irq_off = irq % 32;
+	int reg = irq / 32;
+	u32 val;
+
+	val = readl(sunxi_irq_base + SUNXI_IRQ_ENABLE_REG(reg));
+	writel(val & ~(1 << irq_off),
+	       sunxi_irq_base + SUNXI_IRQ_ENABLE_REG(reg));
+}
+
+static void sunxi_irq_unmask(struct irq_data *irqd)
+{
+	unsigned int irq = irqd_to_hwirq(irqd);
+	unsigned int irq_off = irq % 32;
+	int reg = irq / 32;
+	u32 val;
+
+	val = readl(sunxi_irq_base + SUNXI_IRQ_ENABLE_REG(reg));
+	writel(val | (1 << irq_off),
+	       sunxi_irq_base + SUNXI_IRQ_ENABLE_REG(reg));
+}
+
+static struct irq_chip sunxi_irq_chip = {
+	.name		= "sunxi_irq",
+	.irq_ack	= sunxi_irq_ack,
+	.irq_mask	= sunxi_irq_mask,
+	.irq_unmask	= sunxi_irq_unmask,
+};
+
+static int sunxi_irq_map(struct irq_domain *d, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	irq_set_chip_and_handler(virq, &sunxi_irq_chip,
+				 handle_level_irq);
+	set_irq_flags(virq, IRQF_VALID | IRQF_PROBE);
+
+	return 0;
+}
+
+static struct irq_domain_ops sunxi_irq_ops = {
+	.map = sunxi_irq_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+static int __init sunxi_of_init(struct device_node *node,
+				struct device_node *parent)
+{
+	sunxi_irq_base = of_iomap(node, 0);
+	if (!sunxi_irq_base)
+		panic("%s: unable to map IC registers\n",
+			node->full_name);
+
+	/* Disable all interrupts */
+	writel(0, sunxi_irq_base + SUNXI_IRQ_ENABLE_REG(0));
+	writel(0, sunxi_irq_base + SUNXI_IRQ_ENABLE_REG(1));
+	writel(0, sunxi_irq_base + SUNXI_IRQ_ENABLE_REG(2));
+
+	/* Mask all the interrupts */
+	writel(0, sunxi_irq_base + SUNXI_IRQ_MASK_REG(0));
+	writel(0, sunxi_irq_base + SUNXI_IRQ_MASK_REG(1));
+	writel(0, sunxi_irq_base + SUNXI_IRQ_MASK_REG(2));
+
+	/* Clear all the pending interrupts */
+	writel(0xffffffff, sunxi_irq_base + SUNXI_IRQ_PENDING_REG(0));
+	writel(0xffffffff, sunxi_irq_base + SUNXI_IRQ_PENDING_REG(1));
+	writel(0xffffffff, sunxi_irq_base + SUNXI_IRQ_PENDING_REG(2));
+
+	/* Enable protection mode */
+	writel(0x01, sunxi_irq_base + SUNXI_IRQ_PROTECTION_REG);
+
+	/* Configure the external interrupt source type */
+	writel(0x00, sunxi_irq_base + SUNXI_IRQ_NMI_CTRL_REG);
+
+	sunxi_irq_domain = irq_domain_add_linear(node, 3 * 32,
+						 &sunxi_irq_ops, NULL);
+	if (!sunxi_irq_domain)
+		panic("%s: unable to create IRQ domain\n", node->full_name);
+
+	return 0;
+}
+
+static struct of_device_id sunxi_irq_dt_ids[] __initconst = {
+	{ .compatible = "allwinner,sunxi-ic", .data = sunxi_of_init }
+};
+
+void __init sunxi_init_irq(void)
+{
+	of_irq_init(sunxi_irq_dt_ids);
+}
+
+asmlinkage void __exception_irq_entry sunxi_handle_irq(struct pt_regs *regs)
+{
+	u32 irq, hwirq;
+
+	hwirq = readl(sunxi_irq_base + SUNXI_IRQ_VECTOR_REG) >> 2;
+	while (hwirq != 0) {
+		irq = irq_find_mapping(sunxi_irq_domain, hwirq);
+		handle_IRQ(irq, regs);
+		hwirq = readl(sunxi_irq_base + SUNXI_IRQ_VECTOR_REG) >> 2;
+	}
+}
diff --git a/include/linux/irqchip/sunxi.h b/include/linux/irqchip/sunxi.h
new file mode 100644
index 000000000000..1fe2c2260e2b
--- /dev/null
+++ b/include/linux/irqchip/sunxi.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2012 Maxime Ripard
+ *
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_IRQCHIP_SUNXI_H
+#define __LINUX_IRQCHIP_SUNXI_H
+
+#include <asm/exception.h>
+
+extern void sunxi_init_irq(void);
+
+extern asmlinkage void __exception_irq_entry sunxi_handle_irq(
+	struct pt_regs *regs);
+
+#endif
-- 
cgit v1.2.3-55-g7522


From c22618a11d1ba2966bd2cfd5e4918ed4f2dad13e Mon Sep 17 00:00:00 2001
From: Grant Likely
Date: Wed, 14 Nov 2012 22:37:12 +0000
Subject: drivers/of: Constify device_node->name and ->path_component_name

Neither of these should ever be changed once set. Make them const and
fix up the users that try to modify it in-place. In one case
kmalloc+memcpy is replaced with kstrdup() to avoid modifying the string.

Build tested with defconfigs on ARM, PowerPC, Sparc, MIPS, x86 among
others.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Julian Calaby <julian.calaby@gmail.com>
---
 arch/powerpc/platforms/powermac/pfunc_core.c |  2 +-
 arch/powerpc/platforms/pseries/reconfig.c    |  3 +--
 arch/powerpc/sysdev/fsl_pci.c                |  2 +-
 arch/sparc/kernel/pci_impl.h                 |  2 +-
 drivers/of/fdt.c                             | 10 +++++-----
 include/linux/of.h                           |  4 ++--
 6 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
index b0c3777528a1..d588e48dff74 100644
--- a/arch/powerpc/platforms/powermac/pfunc_core.c
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -686,7 +686,7 @@ static int pmf_add_functions(struct pmf_device *dev, void *driverdata)
 	int count = 0;
 
 	for (pp = dev->node->properties; pp != 0; pp = pp->next) {
-		char *name;
+		const char *name;
 		if (strncmp(pp->name, PP_PREFIX, plen) != 0)
 			continue;
 		name = pp->name + plen;
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 39f71fba9b38..2f4668136b20 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -281,12 +281,11 @@ static struct property *new_property(const char *name, const int length,
 	if (!new)
 		return NULL;
 
-	if (!(new->name = kmalloc(strlen(name) + 1, GFP_KERNEL)))
+	if (!(new->name = kstrdup(name, GFP_KERNEL)))
 		goto cleanup;
 	if (!(new->value = kmalloc(length + 1, GFP_KERNEL)))
 		goto cleanup;
 
-	strcpy(new->name, name);
 	memcpy(new->value, value, length);
 	*(((char *)new->value) + length) = 0;
 	new->length = length;
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index ffb93ae9379b..01b62a62c635 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -136,7 +136,7 @@ static void __init setup_pci_atmu(struct pci_controller *hose,
 	u32 pcicsrbar = 0, pcicsrbar_sz;
 	u32 piwar = PIWAR_EN | PIWAR_PF | PIWAR_TGI_LOCAL |
 			PIWAR_READ_SNOOP | PIWAR_WRITE_SNOOP;
-	char *name = hose->dn->full_name;
+	const char *name = hose->dn->full_name;
 	const u64 *reg;
 	int len;
 
diff --git a/arch/sparc/kernel/pci_impl.h b/arch/sparc/kernel/pci_impl.h
index 918a2031c8bb..5f688531f48c 100644
--- a/arch/sparc/kernel/pci_impl.h
+++ b/arch/sparc/kernel/pci_impl.h
@@ -88,7 +88,7 @@ struct pci_pbm_info {
 	int				chip_revision;
 
 	/* Name used for top-level resources. */
-	char				*name;
+	const char			*name;
 
 	/* OBP specific information. */
 	struct platform_device		*op;
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index c2b08dcdbc53..c8be32644c85 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -199,10 +199,10 @@ static unsigned long unflatten_dt_node(struct boot_param_header *blob,
 	np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl,
 				__alignof__(struct device_node));
 	if (allnextpp) {
+		char *fn;
 		memset(np, 0, sizeof(*np));
-		np->full_name = ((char *)np) + sizeof(struct device_node);
+		np->full_name = fn = ((char *)np) + sizeof(*np);
 		if (new_format) {
-			char *fn = np->full_name;
 			/* rebuild full path for new format */
 			if (dad && dad->parent) {
 				strcpy(fn, dad->full_name);
@@ -216,9 +216,9 @@ static unsigned long unflatten_dt_node(struct boot_param_header *blob,
 				fn += strlen(fn);
 			}
 			*(fn++) = '/';
-			memcpy(fn, pathp, l);
-		} else
-			memcpy(np->full_name, pathp, l);
+		}
+		memcpy(fn, pathp, l);
+
 		prev_pp = &np->properties;
 		**allnextpp = np;
 		*allnextpp = &np->allnext;
diff --git a/include/linux/of.h b/include/linux/of.h
index b4e50d56fc74..857dde984a6e 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -46,7 +46,7 @@ struct device_node {
 	const char *name;
 	const char *type;
 	phandle phandle;
-	char	*full_name;
+	const char *full_name;
 
 	struct	property *properties;
 	struct	property *deadprops;	/* removed properties */
@@ -60,7 +60,7 @@ struct device_node {
 	unsigned long _flags;
 	void	*data;
 #if defined(CONFIG_SPARC)
-	char	*path_component_name;
+	const char *path_component_name;
 	unsigned int unique_id;
 	struct of_irq_controller *irq_trans;
 #endif
-- 
cgit v1.2.3-55-g7522


From ea2ce92e44dc83b7a69c2aedd9c52bfe7fee1a62 Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala
Date: Tue, 9 Oct 2012 22:25:29 +0530
Subject: power_supply: Add support for CHARGE_CONTROL_* attributes

Add support for power supply attributes CHARGE_CONTROL_LIMIT
and CHARGE_CONTROL_LIMIT_MAX.

These new attributes will enable the user space to implement
custom charging algorithms based on platform state.

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 Documentation/power/power_supply_class.txt | 3 +++
 drivers/power/power_supply_sysfs.c         | 2 ++
 include/linux/power_supply.h               | 2 ++
 3 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index 9c647bd7c5a9..3f10b39b0346 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -123,6 +123,9 @@ CONSTANT_CHARGE_VOLTAGE - constant charge voltage programmed by charger.
 CONSTANT_CHARGE_VOLTAGE_MAX - maximum charge voltage supported by the
 power supply object.
 
+CHARGE_CONTROL_LIMIT - current charge control limit setting
+CHARGE_CONTROL_LIMIT_MAX - maximum charge control limit setting
+
 ENERGY_FULL, ENERGY_EMPTY - same as above but for energy.
 
 CAPACITY - capacity in percents.
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 395c2cfa16c0..40fa3b7cae54 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -164,6 +164,8 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(constant_charge_current_max),
 	POWER_SUPPLY_ATTR(constant_charge_voltage),
 	POWER_SUPPLY_ATTR(constant_charge_voltage_max),
+	POWER_SUPPLY_ATTR(charge_control_limit),
+	POWER_SUPPLY_ATTR(charge_control_limit_max),
 	POWER_SUPPLY_ATTR(energy_full_design),
 	POWER_SUPPLY_ATTR(energy_empty_design),
 	POWER_SUPPLY_ATTR(energy_full),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index e5ef45834c3c..445b4b249af5 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -114,6 +114,8 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX,
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE,
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX,
+	POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT,
+	POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX,
 	POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
 	POWER_SUPPLY_PROP_ENERGY_EMPTY_DESIGN,
 	POWER_SUPPLY_PROP_ENERGY_FULL,
-- 
cgit v1.2.3-55-g7522


From 952aeeb3ee28bc12a70744e40636de40688eb60d Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala
Date: Tue, 9 Oct 2012 22:25:59 +0530
Subject: power_supply: Register power supply for thermal cooling device

This patch registers the power supply as a cooling device if the power
supply has support for charge throttling.

Now with this change low level drivers need not register with thermal
framework as it is automatically done by power supply framework.

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 drivers/power/power_supply_core.c | 96 +++++++++++++++++++++++++++++++++++++++
 include/linux/power_supply.h      |  1 +
 2 files changed, 97 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 2436f1350013..f984da1066ec 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -216,6 +216,86 @@ static void psy_unregister_thermal(struct power_supply *psy)
 		return;
 	thermal_zone_device_unregister(psy->tzd);
 }
+
+/* thermal cooling device callbacks */
+static int ps_get_max_charge_cntl_limit(struct thermal_cooling_device *tcd,
+					unsigned long *state)
+{
+	struct power_supply *psy;
+	union power_supply_propval val;
+	int ret;
+
+	psy = tcd->devdata;
+	ret = psy->get_property(psy,
+		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val);
+	if (!ret)
+		*state = val.intval;
+
+	return ret;
+}
+
+static int ps_get_cur_chrage_cntl_limit(struct thermal_cooling_device *tcd,
+					unsigned long *state)
+{
+	struct power_supply *psy;
+	union power_supply_propval val;
+	int ret;
+
+	psy = tcd->devdata;
+	ret = psy->get_property(psy,
+		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
+	if (!ret)
+		*state = val.intval;
+
+	return ret;
+}
+
+static int ps_set_cur_charge_cntl_limit(struct thermal_cooling_device *tcd,
+					unsigned long state)
+{
+	struct power_supply *psy;
+	union power_supply_propval val;
+	int ret;
+
+	psy = tcd->devdata;
+	val.intval = state;
+	ret = psy->set_property(psy,
+		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
+
+	return ret;
+}
+
+static struct thermal_cooling_device_ops psy_tcd_ops = {
+	.get_max_state = ps_get_max_charge_cntl_limit,
+	.get_cur_state = ps_get_cur_chrage_cntl_limit,
+	.set_cur_state = ps_set_cur_charge_cntl_limit,
+};
+
+static int psy_register_cooler(struct power_supply *psy)
+{
+	int i;
+
+	/* Register for cooling device if psy can control charging */
+	for (i = 0; i < psy->num_properties; i++) {
+		if (psy->properties[i] ==
+				POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT) {
+			psy->tcd = thermal_cooling_device_register(
+							(char *)psy->name,
+							psy, &psy_tcd_ops);
+			if (IS_ERR(psy->tcd))
+				return PTR_ERR(psy->tcd);
+			break;
+		}
+	}
+	return 0;
+}
+
+static void psy_unregister_cooler(struct power_supply *psy)
+{
+	if (IS_ERR_OR_NULL(psy->tcd))
+		return;
+	thermal_cooling_device_unregister(psy->tcd);
+}
 #else
 static int psy_register_thermal(struct power_supply *psy)
 {
@@ -225,6 +305,15 @@ static int psy_register_thermal(struct power_supply *psy)
 static void psy_unregister_thermal(struct power_supply *psy)
 {
 }
+
+static int psy_register_cooler(struct power_supply *psy)
+{
+	return 0;
+}
+
+static void psy_unregister_cooler(struct power_supply *psy)
+{
+}
 #endif
 
 int power_supply_register(struct device *parent, struct power_supply *psy)
@@ -259,6 +348,10 @@ int power_supply_register(struct device *parent, struct power_supply *psy)
 	if (rc)
 		goto register_thermal_failed;
 
+	rc = psy_register_cooler(psy);
+	if (rc)
+		goto register_cooler_failed;
+
 	rc = power_supply_create_triggers(psy);
 	if (rc)
 		goto create_triggers_failed;
@@ -268,6 +361,8 @@ int power_supply_register(struct device *parent, struct power_supply *psy)
 	goto success;
 
 create_triggers_failed:
+	psy_unregister_cooler(psy);
+register_cooler_failed:
 	psy_unregister_thermal(psy);
 register_thermal_failed:
 	device_del(dev);
@@ -284,6 +379,7 @@ void power_supply_unregister(struct power_supply *psy)
 	cancel_work_sync(&psy->changed_work);
 	sysfs_remove_link(&psy->dev->kobj, "powers");
 	power_supply_remove_triggers(psy);
+	psy_unregister_cooler(psy);
 	psy_unregister_thermal(psy);
 	device_unregister(psy->dev);
 }
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 445b4b249af5..1f0ab90aff00 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -188,6 +188,7 @@ struct power_supply {
 	struct work_struct changed_work;
 #ifdef CONFIG_THERMAL
 	struct thermal_zone_device *tzd;
+	struct thermal_cooling_device *tcd;
 #endif
 
 #ifdef CONFIG_LEDS_TRIGGERS
-- 
cgit v1.2.3-55-g7522


From 08d816b8cb09de1fd8268c8f1dbc97c3cc435d67 Mon Sep 17 00:00:00 2001
From: Kim, Milo
Date: Mon, 22 Oct 2012 00:18:54 +0000
Subject: lp8788-charger: Fix ADC channel names

The name of ADC channel is configurable in the platform side. This name is
referenced in the IIO consumer driver. To get the IIO channel, specific
name in the platform data is used as an parameter of the
iio_channel_get(). Thus, lp8788_adc_id platform data are replaced with
specific names.

Signed-off-by: Milo(Woogyom) Kim <milo.kim@ti.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 drivers/power/lp8788-charger.c | 44 ++++++------------------------------------
 include/linux/mfd/lp8788.h     |  8 ++++----
 2 files changed, 10 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/lp8788-charger.c b/drivers/power/lp8788-charger.c
index 1afa5f7a5359..fb592bfbec6e 100644
--- a/drivers/power/lp8788-charger.c
+++ b/drivers/power/lp8788-charger.c
@@ -584,50 +584,18 @@ static void lp8788_setup_adc_channel(const char *consumer_name,
 				struct lp8788_charger *pchg)
 {
 	struct lp8788_charger_platform_data *pdata = pchg->pdata;
-	struct device *dev = pchg->lp->dev;
 	struct iio_channel *chan;
-	enum lp8788_adc_id id;
-	const char *chan_name[LPADC_MAX] = {
-		[LPADC_VBATT_5P5] = "vbatt-5p5",
-		[LPADC_VBATT_6P0] = "vbatt-6p0",
-		[LPADC_VBATT_5P0] = "vbatt-5p0",
-		[LPADC_ADC1]      = "adc1",
-		[LPADC_ADC2]      = "adc2",
-		[LPADC_ADC3]      = "adc3",
-		[LPADC_ADC4]      = "adc4",
-	};
 
 	if (!pdata)
 		return;
 
-	id = pdata->vbatt_adc;
-	switch (id) {
-	case LPADC_VBATT_5P5:
-	case LPADC_VBATT_6P0:
-	case LPADC_VBATT_5P0:
-		chan = iio_channel_get(consumer_name, chan_name[id]);
-		pchg->chan[LP8788_VBATT] = IS_ERR(chan) ? NULL : chan;
-		break;
-	default:
-		dev_err(dev, "invalid ADC id for VBATT: %d\n", id);
-		pchg->chan[LP8788_VBATT] = NULL;
-		break;
-	}
+	/* ADC channel for battery voltage */
+	chan = iio_channel_get(consumer_name, pdata->adc_vbatt);
+	pchg->chan[LP8788_VBATT] = IS_ERR(chan) ? NULL : chan;
 
-	id = pdata->batt_temp_adc;
-	switch (id) {
-	case LPADC_ADC1:
-	case LPADC_ADC2:
-	case LPADC_ADC3:
-	case LPADC_ADC4:
-		chan = iio_channel_get(consumer_name, chan_name[id]);
-		pchg->chan[LP8788_BATT_TEMP] = IS_ERR(chan) ? NULL : chan;
-		break;
-	default:
-		dev_err(dev, "invalid ADC id for BATT_TEMP : %d\n", id);
-		pchg->chan[LP8788_BATT_TEMP] = NULL;
-		break;
-	}
+	/* ADC channel for battery temperature */
+	chan = iio_channel_get(consumer_name, pdata->adc_batt_temp);
+	pchg->chan[LP8788_BATT_TEMP] = IS_ERR(chan) ? NULL : chan;
 }
 
 static void lp8788_release_adc_channel(struct lp8788_charger *pchg)
diff --git a/include/linux/mfd/lp8788.h b/include/linux/mfd/lp8788.h
index cec364bdccfa..2a32b16f79cb 100644
--- a/include/linux/mfd/lp8788.h
+++ b/include/linux/mfd/lp8788.h
@@ -211,16 +211,16 @@ struct lp8788_chg_param {
 
 /*
  * struct lp8788_charger_platform_data
- * @vbatt_adc         : adc selection id for battery voltage
- * @batt_temp_adc     : adc selection id for battery temperature
+ * @adc_vbatt         : adc channel name for battery voltage
+ * @adc_batt_temp     : adc channel name for battery temperature
  * @max_vbatt_mv      : used for calculating battery capacity
  * @chg_params        : initial charging parameters
  * @num_chg_params    : numbers of charging parameters
  * @charger_event     : the charger event can be reported to the platform side
  */
 struct lp8788_charger_platform_data {
-	enum lp8788_adc_id vbatt_adc;
-	enum lp8788_adc_id batt_temp_adc;
+	const char *adc_vbatt;
+	const char *adc_batt_temp;
 	unsigned int max_vbatt_mv;
 	struct lp8788_chg_param *chg_params;
 	int num_chg_params;
-- 
cgit v1.2.3-55-g7522


From 0bc98cc6155205b615a9d29a2d54d1b839521c04 Mon Sep 17 00:00:00 2001
From: Pali Rohár
Date: Fri, 2 Nov 2012 20:18:11 +0100
Subject: power_supply: Add bq2415x charger driver

The bq2415x charger driver is needed for example on Nokia N900 for
charging battery. Driver is part of open source project to replace
proprietary battery management.

Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 drivers/power/bq2415x_charger.c       | 1644 +++++++++++++++++++++++++++++++++
 include/linux/power/bq2415x_charger.h |   90 ++
 2 files changed, 1734 insertions(+)
 create mode 100644 drivers/power/bq2415x_charger.c
 create mode 100644 include/linux/power/bq2415x_charger.h

(limited to 'include/linux')

diff --git a/drivers/power/bq2415x_charger.c b/drivers/power/bq2415x_charger.c
new file mode 100644
index 000000000000..017b3c27f769
--- /dev/null
+++ b/drivers/power/bq2415x_charger.c
@@ -0,0 +1,1644 @@
+/*
+    bq2415x_charger.c - bq2415x charger driver
+    Copyright (C) 2011-2012  Pali Rohár <pali.rohar@gmail.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+/*
+  Datasheets:
+  http://www.ti.com/product/bq24150
+  http://www.ti.com/product/bq24150a
+  http://www.ti.com/product/bq24152
+  http://www.ti.com/product/bq24153
+  http://www.ti.com/product/bq24153a
+  http://www.ti.com/product/bq24155
+*/
+
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/param.h>
+#include <linux/err.h>
+#include <linux/workqueue.h>
+#include <linux/sysfs.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/idr.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+
+#include <linux/power/bq2415x_charger.h>
+
+/* timeout for resetting chip timer */
+#define BQ2415X_TIMER_TIMEOUT		10
+
+#define BQ2415X_REG_STATUS		0x00
+#define BQ2415X_REG_CONTROL		0x01
+#define BQ2415X_REG_VOLTAGE		0x02
+#define BQ2415X_REG_VENDER		0x03
+#define BQ2415X_REG_CURRENT		0x04
+
+/* reset state for all registers */
+#define BQ2415X_RESET_STATUS		BIT(6)
+#define BQ2415X_RESET_CONTROL		(BIT(4)|BIT(5))
+#define BQ2415X_RESET_VOLTAGE		(BIT(1)|BIT(3))
+#define BQ2415X_RESET_CURRENT		(BIT(0)|BIT(3)|BIT(7))
+
+/* status register */
+#define BQ2415X_BIT_TMR_RST		7
+#define BQ2415X_BIT_OTG			7
+#define BQ2415X_BIT_EN_STAT		6
+#define BQ2415X_MASK_STAT		(BIT(4)|BIT(5))
+#define BQ2415X_SHIFT_STAT		4
+#define BQ2415X_BIT_BOOST		3
+#define BQ2415X_MASK_FAULT		(BIT(0)|BIT(1)|BIT(2))
+#define BQ2415X_SHIFT_FAULT		0
+
+/* control register */
+#define BQ2415X_MASK_LIMIT		(BIT(6)|BIT(7))
+#define BQ2415X_SHIFT_LIMIT		6
+#define BQ2415X_MASK_VLOWV		(BIT(4)|BIT(5))
+#define BQ2415X_SHIFT_VLOWV		4
+#define BQ2415X_BIT_TE			3
+#define BQ2415X_BIT_CE			2
+#define BQ2415X_BIT_HZ_MODE		1
+#define BQ2415X_BIT_OPA_MODE		0
+
+/* voltage register */
+#define BQ2415X_MASK_VO		(BIT(2)|BIT(3)|BIT(4)|BIT(5)|BIT(6)|BIT(7))
+#define BQ2415X_SHIFT_VO		2
+#define BQ2415X_BIT_OTG_PL		1
+#define BQ2415X_BIT_OTG_EN		0
+
+/* vender register */
+#define BQ2415X_MASK_VENDER		(BIT(5)|BIT(6)|BIT(7))
+#define BQ2415X_SHIFT_VENDER		5
+#define BQ2415X_MASK_PN			(BIT(3)|BIT(4))
+#define BQ2415X_SHIFT_PN		3
+#define BQ2415X_MASK_REVISION		(BIT(0)|BIT(1)|BIT(2))
+#define BQ2415X_SHIFT_REVISION		0
+
+/* current register */
+#define BQ2415X_MASK_RESET		BIT(7)
+#define BQ2415X_MASK_VI_CHRG		(BIT(4)|BIT(5)|BIT(6))
+#define BQ2415X_SHIFT_VI_CHRG		4
+/* N/A					BIT(3) */
+#define BQ2415X_MASK_VI_TERM		(BIT(0)|BIT(1)|BIT(2))
+#define BQ2415X_SHIFT_VI_TERM		0
+
+
+enum bq2415x_command {
+	BQ2415X_TIMER_RESET,
+	BQ2415X_OTG_STATUS,
+	BQ2415X_STAT_PIN_STATUS,
+	BQ2415X_STAT_PIN_ENABLE,
+	BQ2415X_STAT_PIN_DISABLE,
+	BQ2415X_CHARGE_STATUS,
+	BQ2415X_BOOST_STATUS,
+	BQ2415X_FAULT_STATUS,
+
+	BQ2415X_CHARGE_TERMINATION_STATUS,
+	BQ2415X_CHARGE_TERMINATION_ENABLE,
+	BQ2415X_CHARGE_TERMINATION_DISABLE,
+	BQ2415X_CHARGER_STATUS,
+	BQ2415X_CHARGER_ENABLE,
+	BQ2415X_CHARGER_DISABLE,
+	BQ2415X_HIGH_IMPEDANCE_STATUS,
+	BQ2415X_HIGH_IMPEDANCE_ENABLE,
+	BQ2415X_HIGH_IMPEDANCE_DISABLE,
+	BQ2415X_BOOST_MODE_STATUS,
+	BQ2415X_BOOST_MODE_ENABLE,
+	BQ2415X_BOOST_MODE_DISABLE,
+
+	BQ2415X_OTG_LEVEL,
+	BQ2415X_OTG_ACTIVATE_HIGH,
+	BQ2415X_OTG_ACTIVATE_LOW,
+	BQ2415X_OTG_PIN_STATUS,
+	BQ2415X_OTG_PIN_ENABLE,
+	BQ2415X_OTG_PIN_DISABLE,
+
+	BQ2415X_VENDER_CODE,
+	BQ2415X_PART_NUMBER,
+	BQ2415X_REVISION,
+};
+
+enum bq2415x_chip {
+	BQUNKNOWN,
+	BQ24150,
+	BQ24150A,
+	BQ24151,
+	BQ24151A,
+	BQ24152,
+	BQ24153,
+	BQ24153A,
+	BQ24155,
+	BQ24156,
+	BQ24156A,
+	BQ24158,
+};
+
+static char *bq2415x_chip_name[] = {
+	"unknown",
+	"bq24150",
+	"bq24150a",
+	"bq24151",
+	"bq24151a",
+	"bq24152",
+	"bq24153",
+	"bq24153a",
+	"bq24155",
+	"bq24156",
+	"bq24156a",
+	"bq24158",
+};
+
+struct bq2415x_device {
+	struct device *dev;
+	struct bq2415x_platform_data init_data;
+	struct power_supply charger;
+	struct delayed_work work;
+	enum bq2415x_mode reported_mode;/* mode reported by hook function */
+	enum bq2415x_mode mode;		/* current configured mode */
+	enum bq2415x_chip chip;
+	const char *timer_error;
+	char *model;
+	char *name;
+	int autotimer;	/* 1 - if driver automatically reset timer, 0 - not */
+	int automode;	/* 1 - enabled, 0 - disabled; -1 - not supported */
+	int id;
+};
+
+/* each registered chip must have unique id */
+static DEFINE_IDR(bq2415x_id);
+
+static DEFINE_MUTEX(bq2415x_id_mutex);
+static DEFINE_MUTEX(bq2415x_timer_mutex);
+static DEFINE_MUTEX(bq2415x_i2c_mutex);
+
+/**** i2c read functions ****/
+
+/* read value from register */
+static int bq2415x_i2c_read(struct bq2415x_device *bq, u8 reg)
+{
+	struct i2c_client *client = to_i2c_client(bq->dev);
+	struct i2c_msg msg[2];
+	u8 val;
+	int ret;
+
+	if (!client->adapter)
+		return -ENODEV;
+
+	msg[0].addr = client->addr;
+	msg[0].flags = 0;
+	msg[0].buf = &reg;
+	msg[0].len = sizeof(reg);
+	msg[1].addr = client->addr;
+	msg[1].flags = I2C_M_RD;
+	msg[1].buf = &val;
+	msg[1].len = sizeof(val);
+
+	mutex_lock(&bq2415x_i2c_mutex);
+	ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+	mutex_unlock(&bq2415x_i2c_mutex);
+
+	if (ret < 0)
+		return ret;
+
+	return val;
+}
+
+/* read value from register, apply mask and right shift it */
+static int bq2415x_i2c_read_mask(struct bq2415x_device *bq, u8 reg,
+				u8 mask, u8 shift)
+{
+	int ret;
+
+	if (shift > 8)
+		return -EINVAL;
+
+	ret = bq2415x_i2c_read(bq, reg);
+	if (ret < 0)
+		return ret;
+	else
+		return (ret & mask) >> shift;
+}
+
+/* read value from register and return one specified bit */
+static int bq2415x_i2c_read_bit(struct bq2415x_device *bq, u8 reg, u8 bit)
+{
+	if (bit > 8)
+		return -EINVAL;
+	else
+		return bq2415x_i2c_read_mask(bq, reg, BIT(bit), bit);
+}
+
+/**** i2c write functions ****/
+
+/* write value to register */
+static int bq2415x_i2c_write(struct bq2415x_device *bq, u8 reg, u8 val)
+{
+	struct i2c_client *client = to_i2c_client(bq->dev);
+	struct i2c_msg msg[1];
+	u8 data[2];
+	int ret;
+
+	data[0] = reg;
+	data[1] = val;
+
+	msg[0].addr = client->addr;
+	msg[0].flags = 0;
+	msg[0].buf = data;
+	msg[0].len = ARRAY_SIZE(data);
+
+	mutex_lock(&bq2415x_i2c_mutex);
+	ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+	mutex_unlock(&bq2415x_i2c_mutex);
+
+	/* i2c_transfer returns number of messages transferred */
+	if (ret < 0)
+		return ret;
+	else if (ret != 1)
+		return -EIO;
+
+	return 0;
+}
+
+/* read value from register, change it with mask left shifted and write back */
+static int bq2415x_i2c_write_mask(struct bq2415x_device *bq, u8 reg, u8 val,
+				u8 mask, u8 shift)
+{
+	int ret;
+
+	if (shift > 8)
+		return -EINVAL;
+
+	ret = bq2415x_i2c_read(bq, reg);
+	if (ret < 0)
+		return ret;
+
+	ret &= ~mask;
+	ret |= val << shift;
+
+	return bq2415x_i2c_write(bq, reg, ret);
+}
+
+/* change only one bit in register */
+static int bq2415x_i2c_write_bit(struct bq2415x_device *bq, u8 reg,
+				bool val, u8 bit)
+{
+	if (bit > 8)
+		return -EINVAL;
+	else
+		return bq2415x_i2c_write_mask(bq, reg, val, BIT(bit), bit);
+}
+
+/**** global functions ****/
+
+/* exec command function */
+static int bq2415x_exec_command(struct bq2415x_device *bq,
+				enum bq2415x_command command)
+{
+	int ret;
+	switch (command) {
+	case BQ2415X_TIMER_RESET:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_STATUS,
+				1, BQ2415X_BIT_TMR_RST);
+	case BQ2415X_OTG_STATUS:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_STATUS,
+				BQ2415X_BIT_OTG);
+	case BQ2415X_STAT_PIN_STATUS:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_STATUS,
+				BQ2415X_BIT_EN_STAT);
+	case BQ2415X_STAT_PIN_ENABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_STATUS, 1,
+				BQ2415X_BIT_EN_STAT);
+	case BQ2415X_STAT_PIN_DISABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_STATUS, 0,
+				BQ2415X_BIT_EN_STAT);
+	case BQ2415X_CHARGE_STATUS:
+		return bq2415x_i2c_read_mask(bq, BQ2415X_REG_STATUS,
+				BQ2415X_MASK_STAT, BQ2415X_SHIFT_STAT);
+	case BQ2415X_BOOST_STATUS:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_STATUS,
+				BQ2415X_BIT_BOOST);
+	case BQ2415X_FAULT_STATUS:
+		return bq2415x_i2c_read_mask(bq, BQ2415X_REG_STATUS,
+			BQ2415X_MASK_FAULT, BQ2415X_SHIFT_FAULT);
+
+	case BQ2415X_CHARGE_TERMINATION_STATUS:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_CONTROL,
+				BQ2415X_BIT_TE);
+	case BQ2415X_CHARGE_TERMINATION_ENABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				1, BQ2415X_BIT_TE);
+	case BQ2415X_CHARGE_TERMINATION_DISABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				0, BQ2415X_BIT_TE);
+	case BQ2415X_CHARGER_STATUS:
+		ret = bq2415x_i2c_read_bit(bq, BQ2415X_REG_CONTROL,
+			BQ2415X_BIT_CE);
+		if (ret < 0)
+			return ret;
+		else
+			return ret > 0 ? 0 : 1;
+	case BQ2415X_CHARGER_ENABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				0, BQ2415X_BIT_CE);
+	case BQ2415X_CHARGER_DISABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				1, BQ2415X_BIT_CE);
+	case BQ2415X_HIGH_IMPEDANCE_STATUS:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_CONTROL,
+				BQ2415X_BIT_HZ_MODE);
+	case BQ2415X_HIGH_IMPEDANCE_ENABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				1, BQ2415X_BIT_HZ_MODE);
+	case BQ2415X_HIGH_IMPEDANCE_DISABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				0, BQ2415X_BIT_HZ_MODE);
+	case BQ2415X_BOOST_MODE_STATUS:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_CONTROL,
+				BQ2415X_BIT_OPA_MODE);
+	case BQ2415X_BOOST_MODE_ENABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				1, BQ2415X_BIT_OPA_MODE);
+	case BQ2415X_BOOST_MODE_DISABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				0, BQ2415X_BIT_OPA_MODE);
+
+	case BQ2415X_OTG_LEVEL:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_VOLTAGE,
+				BQ2415X_BIT_OTG_PL);
+	case BQ2415X_OTG_ACTIVATE_HIGH:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_VOLTAGE,
+				1, BQ2415X_BIT_OTG_PL);
+	case BQ2415X_OTG_ACTIVATE_LOW:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_VOLTAGE,
+				0, BQ2415X_BIT_OTG_PL);
+	case BQ2415X_OTG_PIN_STATUS:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_VOLTAGE,
+				BQ2415X_BIT_OTG_EN);
+	case BQ2415X_OTG_PIN_ENABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_VOLTAGE,
+				1, BQ2415X_BIT_OTG_EN);
+	case BQ2415X_OTG_PIN_DISABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_VOLTAGE,
+				0, BQ2415X_BIT_OTG_EN);
+
+	case BQ2415X_VENDER_CODE:
+		return bq2415x_i2c_read_mask(bq, BQ2415X_REG_VENDER,
+			BQ2415X_MASK_VENDER, BQ2415X_SHIFT_VENDER);
+	case BQ2415X_PART_NUMBER:
+		return bq2415x_i2c_read_mask(bq, BQ2415X_REG_VENDER,
+				BQ2415X_MASK_PN, BQ2415X_SHIFT_PN);
+	case BQ2415X_REVISION:
+		return bq2415x_i2c_read_mask(bq, BQ2415X_REG_VENDER,
+			BQ2415X_MASK_REVISION, BQ2415X_SHIFT_REVISION);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+/* detect chip type */
+static enum bq2415x_chip bq2415x_detect_chip(struct bq2415x_device *bq)
+{
+	struct i2c_client *client = to_i2c_client(bq->dev);
+	int ret = bq2415x_exec_command(bq, BQ2415X_PART_NUMBER);
+
+	if (ret < 0)
+		return ret;
+
+	switch (client->addr) {
+	case 0x6b:
+		switch (ret) {
+		case 0:
+			if (bq->chip == BQ24151A)
+				return bq->chip;
+			else
+				return BQ24151;
+		case 1:
+			if (bq->chip == BQ24150A ||
+				bq->chip == BQ24152 ||
+				bq->chip == BQ24155)
+				return bq->chip;
+			else
+				return BQ24150;
+		case 2:
+			if (bq->chip == BQ24153A)
+				return bq->chip;
+			else
+				return BQ24153;
+		default:
+			return BQUNKNOWN;
+		}
+		break;
+
+	case 0x6a:
+		switch (ret) {
+		case 0:
+			if (bq->chip == BQ24156A)
+				return bq->chip;
+			else
+				return BQ24156;
+		case 2:
+			return BQ24158;
+		default:
+			return BQUNKNOWN;
+		}
+		break;
+	}
+
+	return BQUNKNOWN;
+}
+
+/* detect chip revision */
+static int bq2415x_detect_revision(struct bq2415x_device *bq)
+{
+	int ret = bq2415x_exec_command(bq, BQ2415X_REVISION);
+	int chip = bq2415x_detect_chip(bq);
+	if (ret < 0 || chip < 0)
+		return -1;
+
+	switch (chip) {
+	case BQ24150:
+	case BQ24150A:
+	case BQ24151:
+	case BQ24151A:
+	case BQ24152:
+		if (ret >= 0 && ret <= 3)
+			return ret;
+		else
+			return -1;
+
+	case BQ24153:
+	case BQ24153A:
+	case BQ24156:
+	case BQ24156A:
+	case BQ24158:
+		if (ret == 3)
+			return 0;
+		else if (ret == 1)
+			return 1;
+		else
+			return -1;
+
+	case BQ24155:
+		if (ret == 3)
+			return 3;
+		else
+			return -1;
+
+	case BQUNKNOWN:
+		return -1;
+	}
+
+	return -1;
+}
+
+/* return chip vender code */
+static int bq2415x_get_vender_code(struct bq2415x_device *bq)
+{
+	int ret = bq2415x_exec_command(bq, BQ2415X_VENDER_CODE);
+	if (ret < 0)
+		return 0;
+	else /* convert to binary */
+		return (ret & 0x1) +
+			((ret >> 1) & 0x1) * 10 +
+			((ret >> 2) & 0x1) * 100;
+}
+
+/* reset all chip registers to default state */
+static void bq2415x_reset_chip(struct bq2415x_device *bq)
+{
+	bq2415x_i2c_write(bq, BQ2415X_REG_CURRENT, BQ2415X_RESET_CURRENT);
+	bq2415x_i2c_write(bq, BQ2415X_REG_VOLTAGE, BQ2415X_RESET_VOLTAGE);
+	bq2415x_i2c_write(bq, BQ2415X_REG_CONTROL, BQ2415X_RESET_CONTROL);
+	bq2415x_i2c_write(bq, BQ2415X_REG_STATUS, BQ2415X_RESET_STATUS);
+	bq->timer_error = NULL;
+}
+
+/**** properties functions ****/
+
+/* set current limit in mA */
+static int bq2415x_set_current_limit(struct bq2415x_device *bq, int mA)
+{
+	int val;
+	if (mA <= 100)
+		val = 0;
+	else if (mA <= 500)
+		val = 1;
+	else if (mA <= 800)
+		val = 2;
+	else
+		val = 3;
+	return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CONTROL, val,
+			BQ2415X_MASK_LIMIT, BQ2415X_SHIFT_LIMIT);
+}
+
+/* get current limit in mA */
+static int bq2415x_get_current_limit(struct bq2415x_device *bq)
+{
+	int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL,
+			BQ2415X_MASK_LIMIT, BQ2415X_SHIFT_LIMIT);
+	if (ret < 0)
+		return ret;
+	else if (ret == 0)
+		return 100;
+	else if (ret == 1)
+		return 500;
+	else if (ret == 2)
+		return 800;
+	else if (ret == 3)
+		return 1800;
+	else
+		return -EINVAL;
+}
+
+/* set weak battery voltage in mV */
+static int bq2415x_set_weak_battery_voltage(struct bq2415x_device *bq, int mV)
+{
+	/* round to 100mV */
+	int val;
+	if (mV <= 3400 + 50)
+		val = 0;
+	else if (mV <= 3500 + 50)
+		val = 1;
+	else if (mV <= 3600 + 50)
+		val = 2;
+	else
+		val = 3;
+	return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CONTROL, val,
+			BQ2415X_MASK_VLOWV, BQ2415X_SHIFT_VLOWV);
+}
+
+/* get weak battery voltage in mV */
+static int bq2415x_get_weak_battery_voltage(struct bq2415x_device *bq)
+{
+	int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL,
+			BQ2415X_MASK_VLOWV, BQ2415X_SHIFT_VLOWV);
+	if (ret < 0)
+		return ret;
+	else
+		return 100 * (34 + ret);
+}
+
+/* set battery regulation voltage in mV */
+static int bq2415x_set_battery_regulation_voltage(struct bq2415x_device *bq,
+						int mV)
+{
+	int val = (mV/10 - 350) / 2;
+
+	if (val < 0)
+		val = 0;
+	else if (val > 94) /* FIXME: Max is 94 or 122 ? Set max value ? */
+		return -EINVAL;
+
+	return bq2415x_i2c_write_mask(bq, BQ2415X_REG_VOLTAGE, val,
+			BQ2415X_MASK_VO, BQ2415X_SHIFT_VO);
+}
+
+/* get battery regulation voltage in mV */
+static int bq2415x_get_battery_regulation_voltage(struct bq2415x_device *bq)
+{
+	int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_VOLTAGE,
+			BQ2415X_MASK_VO, BQ2415X_SHIFT_VO);
+	if (ret < 0)
+		return ret;
+	else
+		return 10 * (350 + 2*ret);
+}
+
+/* set charge current in mA (platform data must provide resistor sense) */
+static int bq2415x_set_charge_current(struct bq2415x_device *bq, int mA)
+{
+	int val;
+	if (bq->init_data.resistor_sense <= 0)
+		return -ENOSYS;
+
+	val = (mA * bq->init_data.resistor_sense - 37400) / 6800;
+
+	if (val < 0)
+		val = 0;
+	else if (val > 7)
+		val = 7;
+
+	return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CURRENT, val,
+			BQ2415X_MASK_VI_CHRG | BQ2415X_MASK_RESET,
+			BQ2415X_SHIFT_VI_CHRG);
+}
+
+/* get charge current in mA (platform data must provide resistor sense) */
+static int bq2415x_get_charge_current(struct bq2415x_device *bq)
+{
+	int ret;
+	if (bq->init_data.resistor_sense <= 0)
+		return -ENOSYS;
+
+	ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CURRENT,
+			BQ2415X_MASK_VI_CHRG, BQ2415X_SHIFT_VI_CHRG);
+	if (ret < 0)
+		return ret;
+	else
+		return (37400 + 6800*ret) / bq->init_data.resistor_sense;
+}
+
+/* set termination current in mA (platform data must provide resistor sense) */
+static int bq2415x_set_termination_current(struct bq2415x_device *bq, int mA)
+{
+	int val;
+	if (bq->init_data.resistor_sense <= 0)
+		return -ENOSYS;
+
+	val = (mA * bq->init_data.resistor_sense - 3400) / 3400;
+
+	if (val < 0)
+		val = 0;
+	else if (val > 7)
+		val = 7;
+
+	return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CURRENT, val,
+			BQ2415X_MASK_VI_TERM | BQ2415X_MASK_RESET,
+			BQ2415X_SHIFT_VI_TERM);
+}
+
+/* get termination current in mA (platform data must provide resistor sense) */
+static int bq2415x_get_termination_current(struct bq2415x_device *bq)
+{
+	int ret;
+	if (bq->init_data.resistor_sense <= 0)
+		return -ENOSYS;
+
+	ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CURRENT,
+			BQ2415X_MASK_VI_TERM, BQ2415X_SHIFT_VI_TERM);
+	if (ret < 0)
+		return ret;
+	else
+		return (3400 + 3400*ret) / bq->init_data.resistor_sense;
+}
+
+/* set default value of property */
+#define bq2415x_set_default_value(bq, prop) \
+	do { \
+		int ret = 0; \
+		if (bq->init_data.prop != -1) \
+			ret = bq2415x_set_##prop(bq, bq->init_data.prop); \
+		if (ret < 0) \
+			return ret; \
+	} while (0)
+
+/* set default values of all properties */
+static int bq2415x_set_defaults(struct bq2415x_device *bq)
+{
+	bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_DISABLE);
+	bq2415x_exec_command(bq, BQ2415X_CHARGER_DISABLE);
+	bq2415x_exec_command(bq, BQ2415X_CHARGE_TERMINATION_DISABLE);
+	bq2415x_set_default_value(bq, current_limit);
+	bq2415x_set_default_value(bq, weak_battery_voltage);
+	bq2415x_set_default_value(bq, battery_regulation_voltage);
+	if (bq->init_data.resistor_sense > 0) {
+		bq2415x_set_default_value(bq, charge_current);
+		bq2415x_set_default_value(bq, termination_current);
+		bq2415x_exec_command(bq, BQ2415X_CHARGE_TERMINATION_ENABLE);
+	}
+	bq2415x_exec_command(bq, BQ2415X_CHARGER_ENABLE);
+	return 0;
+}
+
+/**** charger mode functions ****/
+
+/* set charger mode */
+static int bq2415x_set_mode(struct bq2415x_device *bq, enum bq2415x_mode mode)
+{
+	int ret = 0;
+	int charger = 0;
+	int boost = 0;
+
+	if (mode == BQ2415X_MODE_HOST_CHARGER ||
+		mode == BQ2415X_MODE_DEDICATED_CHARGER)
+			charger = 1;
+
+	if (mode == BQ2415X_MODE_BOOST)
+		boost = 1;
+
+	if (!charger)
+		ret = bq2415x_exec_command(bq, BQ2415X_CHARGER_DISABLE);
+
+	if (!boost)
+		ret = bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_DISABLE);
+
+	if (ret < 0)
+		return ret;
+
+	switch (mode) {
+	case BQ2415X_MODE_NONE:
+		dev_dbg(bq->dev, "changing mode to: N/A\n");
+		ret = bq2415x_set_current_limit(bq, 100);
+		break;
+	case BQ2415X_MODE_HOST_CHARGER:
+		dev_dbg(bq->dev, "changing mode to: Host/HUB charger\n");
+		ret = bq2415x_set_current_limit(bq, 500);
+		break;
+	case BQ2415X_MODE_DEDICATED_CHARGER:
+		dev_dbg(bq->dev, "changing mode to: Dedicated charger\n");
+		ret = bq2415x_set_current_limit(bq, 1800);
+		break;
+	case BQ2415X_MODE_BOOST: /* Boost mode */
+		dev_dbg(bq->dev, "changing mode to: Boost\n");
+		ret = bq2415x_set_current_limit(bq, 100);
+		break;
+	}
+
+	if (ret < 0)
+		return ret;
+
+	if (charger)
+		ret = bq2415x_exec_command(bq, BQ2415X_CHARGER_ENABLE);
+	else if (boost)
+		ret = bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_ENABLE);
+
+	if (ret < 0)
+		return ret;
+
+	bq2415x_set_default_value(bq, weak_battery_voltage);
+	bq2415x_set_default_value(bq, battery_regulation_voltage);
+
+	bq->mode = mode;
+	sysfs_notify(&bq->charger.dev->kobj, NULL, "mode");
+
+	return 0;
+
+}
+
+/* hook function called by other driver which set reported mode */
+static void bq2415x_hook_function(enum bq2415x_mode mode, void *data)
+{
+	struct bq2415x_device *bq = data;
+
+	if (!bq)
+		return;
+
+	dev_dbg(bq->dev, "hook function was called\n");
+	bq->reported_mode = mode;
+
+	/* if automode is not enabled do not tell about reported_mode */
+	if (bq->automode < 1)
+		return;
+
+	sysfs_notify(&bq->charger.dev->kobj, NULL, "reported_mode");
+	bq2415x_set_mode(bq, bq->reported_mode);
+
+}
+
+/**** timer functions ****/
+
+/* enable/disable auto resetting chip timer */
+static void bq2415x_set_autotimer(struct bq2415x_device *bq, int state)
+{
+	mutex_lock(&bq2415x_timer_mutex);
+
+	if (bq->autotimer == state) {
+		mutex_unlock(&bq2415x_timer_mutex);
+		return;
+	}
+
+	bq->autotimer = state;
+
+	if (state) {
+		schedule_delayed_work(&bq->work, BQ2415X_TIMER_TIMEOUT * HZ);
+		bq2415x_exec_command(bq, BQ2415X_TIMER_RESET);
+		bq->timer_error = NULL;
+	} else {
+		cancel_delayed_work_sync(&bq->work);
+	}
+
+	mutex_unlock(&bq2415x_timer_mutex);
+}
+
+/* called by bq2415x_timer_work on timer error */
+static void bq2415x_timer_error(struct bq2415x_device *bq, const char *msg)
+{
+	bq->timer_error = msg;
+	sysfs_notify(&bq->charger.dev->kobj, NULL, "timer");
+	dev_err(bq->dev, "%s\n", msg);
+	if (bq->automode > 0)
+		bq->automode = 0;
+	bq2415x_set_mode(bq, BQ2415X_MODE_NONE);
+	bq2415x_set_autotimer(bq, 0);
+}
+
+/* delayed work function for auto resetting chip timer */
+static void bq2415x_timer_work(struct work_struct *work)
+{
+	struct bq2415x_device *bq = container_of(work, struct bq2415x_device,
+						work.work);
+	int ret, error, boost;
+
+	if (!bq->autotimer)
+		return;
+
+	ret = bq2415x_exec_command(bq, BQ2415X_TIMER_RESET);
+	if (ret < 0) {
+		bq2415x_timer_error(bq, "Resetting timer failed");
+		return;
+	}
+
+	boost = bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_STATUS);
+	if (boost < 0) {
+		bq2415x_timer_error(bq, "Unknown error");
+		return;
+	}
+
+	error = bq2415x_exec_command(bq, BQ2415X_FAULT_STATUS);
+	if (error < 0) {
+		bq2415x_timer_error(bq, "Unknown error");
+		return;
+	}
+
+	if (boost) {
+		switch (error) {
+		/* Non fatal errors, chip is OK */
+		case 0: /* No error */
+			break;
+		case 6: /* Timer expired */
+			dev_err(bq->dev, "Timer expired\n");
+			break;
+		case 3: /* Battery voltage too low */
+			dev_err(bq->dev, "Battery voltage to low\n");
+			break;
+
+		/* Fatal errors, disable and reset chip */
+		case 1: /* Overvoltage protection (chip fried) */
+			bq2415x_timer_error(bq,
+				"Overvoltage protection (chip fried)");
+			return;
+		case 2: /* Overload */
+			bq2415x_timer_error(bq, "Overload");
+			return;
+		case 4: /* Battery overvoltage protection */
+			bq2415x_timer_error(bq,
+				"Battery overvoltage protection");
+			return;
+		case 5: /* Thermal shutdown (too hot) */
+			bq2415x_timer_error(bq,
+					"Thermal shutdown (too hot)");
+			return;
+		case 7: /* N/A */
+			bq2415x_timer_error(bq, "Unknown error");
+			return;
+		}
+	} else {
+		switch (error) {
+		/* Non fatal errors, chip is OK */
+		case 0: /* No error */
+			break;
+		case 2: /* Sleep mode */
+			dev_err(bq->dev, "Sleep mode\n");
+			break;
+		case 3: /* Poor input source */
+			dev_err(bq->dev, "Poor input source\n");
+			break;
+		case 6: /* Timer expired */
+			dev_err(bq->dev, "Timer expired\n");
+			break;
+		case 7: /* No battery */
+			dev_err(bq->dev, "No battery\n");
+			break;
+
+		/* Fatal errors, disable and reset chip */
+		case 1: /* Overvoltage protection (chip fried) */
+			bq2415x_timer_error(bq,
+				"Overvoltage protection (chip fried)");
+			return;
+		case 4: /* Battery overvoltage protection */
+			bq2415x_timer_error(bq,
+				"Battery overvoltage protection");
+			return;
+		case 5: /* Thermal shutdown (too hot) */
+			bq2415x_timer_error(bq,
+				"Thermal shutdown (too hot)");
+			return;
+		}
+	}
+
+	schedule_delayed_work(&bq->work, BQ2415X_TIMER_TIMEOUT * HZ);
+}
+
+/**** power supply interface code ****/
+
+static enum power_supply_property bq2415x_power_supply_props[] = {
+	/* TODO: maybe add more power supply properties */
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_MODEL_NAME,
+};
+
+static int bq2415x_power_supply_get_property(struct power_supply *psy,
+	enum power_supply_property psp, union power_supply_propval *val)
+{
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	int ret;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		ret = bq2415x_exec_command(bq, BQ2415X_CHARGE_STATUS);
+		if (ret < 0)
+			return ret;
+		else if (ret == 0) /* Ready */
+			val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		else if (ret == 1) /* Charge in progress */
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+		else if (ret == 2) /* Charge done */
+			val->intval = POWER_SUPPLY_STATUS_FULL;
+		else
+			val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
+		break;
+	case POWER_SUPPLY_PROP_MODEL_NAME:
+		val->strval = bq->model;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int bq2415x_power_supply_init(struct bq2415x_device *bq)
+{
+	int ret;
+	int chip;
+	char revstr[8];
+
+	bq->charger.name = bq->name;
+	bq->charger.type = POWER_SUPPLY_TYPE_USB;
+	bq->charger.properties = bq2415x_power_supply_props;
+	bq->charger.num_properties = ARRAY_SIZE(bq2415x_power_supply_props);
+	bq->charger.get_property = bq2415x_power_supply_get_property;
+
+	ret = bq2415x_detect_chip(bq);
+	if (ret < 0)
+		chip = BQUNKNOWN;
+	else
+		chip = ret;
+
+	ret = bq2415x_detect_revision(bq);
+	if (ret < 0)
+		strcpy(revstr, "unknown");
+	else
+		sprintf(revstr, "1.%d", ret);
+
+	bq->model = kasprintf(GFP_KERNEL,
+				"chip %s, revision %s, vender code %.3d",
+				bq2415x_chip_name[chip], revstr,
+				bq2415x_get_vender_code(bq));
+	if (!bq->model) {
+		dev_err(bq->dev, "failed to allocate model name\n");
+		return -ENOMEM;
+	}
+
+	ret = power_supply_register(bq->dev, &bq->charger);
+	if (ret) {
+		kfree(bq->model);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void bq2415x_power_supply_exit(struct bq2415x_device *bq)
+{
+	bq->autotimer = 0;
+	if (bq->automode > 0)
+		bq->automode = 0;
+	cancel_delayed_work_sync(&bq->work);
+	power_supply_unregister(&bq->charger);
+	kfree(bq->model);
+}
+
+/**** additional sysfs entries for power supply interface ****/
+
+/* show *_status entries */
+static ssize_t bq2415x_sysfs_show_status(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	enum bq2415x_command command;
+	int ret;
+
+	if (strcmp(attr->attr.name, "otg_status") == 0)
+		command = BQ2415X_OTG_STATUS;
+	else if (strcmp(attr->attr.name, "charge_status") == 0)
+		command = BQ2415X_CHARGE_STATUS;
+	else if (strcmp(attr->attr.name, "boost_status") == 0)
+		command = BQ2415X_BOOST_STATUS;
+	else if (strcmp(attr->attr.name, "fault_status") == 0)
+		command = BQ2415X_FAULT_STATUS;
+	else
+		return -EINVAL;
+
+	ret = bq2415x_exec_command(bq, command);
+	if (ret < 0)
+		return ret;
+	else
+		return sprintf(buf, "%d\n", ret);
+}
+
+/* set timer entry:
+     auto - enable auto mode
+     off - disable auto mode
+     (other values) - reset chip timer
+*/
+static ssize_t bq2415x_sysfs_set_timer(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	int ret = 0;
+
+	if (strncmp(buf, "auto", 4) == 0)
+		bq2415x_set_autotimer(bq, 1);
+	else if (strncmp(buf, "off", 3) == 0)
+		bq2415x_set_autotimer(bq, 0);
+	else
+		ret = bq2415x_exec_command(bq, BQ2415X_TIMER_RESET);
+
+	if (ret < 0)
+		return ret;
+	else
+		return count;
+}
+
+/* show timer entry (auto or off) */
+static ssize_t bq2415x_sysfs_show_timer(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+
+	if (bq->timer_error)
+		return sprintf(buf, "%s\n", bq->timer_error);
+
+	if (bq->autotimer)
+		return sprintf(buf, "auto\n");
+	else
+		return sprintf(buf, "off\n");
+}
+
+/* set mode entry:
+     auto - if automode is supported, enable it and set mode to reported
+     none - disable charger and boost mode
+     host - charging mode for host/hub chargers (current limit 500mA)
+     dedicated - charging mode for dedicated chargers (unlimited current limit)
+     boost - disable charger and enable boost mode
+*/
+static ssize_t bq2415x_sysfs_set_mode(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	enum bq2415x_mode mode;
+	int ret = 0;
+
+	if (strncmp(buf, "auto", 4) == 0) {
+		if (bq->automode < 0)
+			return -ENOSYS;
+		bq->automode = 1;
+		mode = bq->reported_mode;
+	} else if (strncmp(buf, "none", 4) == 0) {
+		if (bq->automode > 0)
+			bq->automode = 0;
+		mode = BQ2415X_MODE_NONE;
+	} else if (strncmp(buf, "host", 4) == 0) {
+		if (bq->automode > 0)
+			bq->automode = 0;
+		mode = BQ2415X_MODE_HOST_CHARGER;
+	} else if (strncmp(buf, "dedicated", 9) == 0) {
+		if (bq->automode > 0)
+			bq->automode = 0;
+		mode = BQ2415X_MODE_DEDICATED_CHARGER;
+	} else if (strncmp(buf, "boost", 5) == 0) {
+		if (bq->automode > 0)
+			bq->automode = 0;
+		mode = BQ2415X_MODE_BOOST;
+	} else if (strncmp(buf, "reset", 5) == 0) {
+		bq2415x_reset_chip(bq);
+		bq2415x_set_defaults(bq);
+		if (bq->automode <= 0)
+			return count;
+		bq->automode = 1;
+		mode = bq->reported_mode;
+	} else
+		return -EINVAL;
+
+	ret = bq2415x_set_mode(bq, mode);
+	if (ret < 0)
+		return ret;
+	else
+		return count;
+}
+
+/* show mode entry (auto, none, host, dedicated or boost) */
+static ssize_t bq2415x_sysfs_show_mode(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	ssize_t ret = 0;
+
+	if (bq->automode > 0)
+		ret += sprintf(buf+ret, "auto (");
+
+	switch (bq->mode) {
+	case BQ2415X_MODE_NONE:
+		ret += sprintf(buf+ret, "none");
+		break;
+	case BQ2415X_MODE_HOST_CHARGER:
+		ret += sprintf(buf+ret, "host");
+		break;
+	case BQ2415X_MODE_DEDICATED_CHARGER:
+		ret += sprintf(buf+ret, "dedicated");
+		break;
+	case BQ2415X_MODE_BOOST:
+		ret += sprintf(buf+ret, "boost");
+		break;
+	}
+
+	if (bq->automode > 0)
+		ret += sprintf(buf+ret, ")");
+
+	ret += sprintf(buf+ret, "\n");
+	return ret;
+}
+
+/* show reported_mode entry (none, host, dedicated or boost) */
+static ssize_t bq2415x_sysfs_show_reported_mode(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+
+	if (bq->automode < 0)
+		return -EINVAL;
+
+	switch (bq->reported_mode) {
+	case BQ2415X_MODE_NONE:
+		return sprintf(buf, "none\n");
+	case BQ2415X_MODE_HOST_CHARGER:
+		return sprintf(buf, "host\n");
+	case BQ2415X_MODE_DEDICATED_CHARGER:
+		return sprintf(buf, "dedicated\n");
+	case BQ2415X_MODE_BOOST:
+		return sprintf(buf, "boost\n");
+	}
+
+	return -EINVAL;
+}
+
+/* directly set raw value to chip register, format: 'register value' */
+static ssize_t bq2415x_sysfs_set_registers(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	ssize_t ret = 0;
+	unsigned int reg;
+	unsigned int val;
+
+	if (sscanf(buf, "%x %x", &reg, &val) != 2)
+		return -EINVAL;
+
+	if (reg > 4 || val > 255)
+		return -EINVAL;
+
+	ret = bq2415x_i2c_write(bq, reg, val);
+	if (ret < 0)
+		return ret;
+	else
+		return count;
+}
+
+/* print value of chip register, format: 'register=value' */
+static ssize_t bq2415x_sysfs_print_reg(struct bq2415x_device *bq,
+					u8 reg, char *buf)
+{
+	int ret = bq2415x_i2c_read(bq, reg);
+	if (ret < 0)
+		return sprintf(buf, "%#.2x=error %d\n", reg, ret);
+	else
+		return sprintf(buf, "%#.2x=%#.2x\n", reg, ret);
+}
+
+/* show all raw values of chip register, format per line: 'register=value' */
+static ssize_t bq2415x_sysfs_show_registers(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	ssize_t ret = 0;
+
+	ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_STATUS, buf+ret);
+	ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_CONTROL, buf+ret);
+	ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_VOLTAGE, buf+ret);
+	ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_VENDER, buf+ret);
+	ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_CURRENT, buf+ret);
+	return ret;
+}
+
+/* set current and voltage limit entries (in mA or mV) */
+static ssize_t bq2415x_sysfs_set_limit(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	long val;
+	int ret;
+
+	if (kstrtol(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (strcmp(attr->attr.name, "current_limit") == 0)
+		ret = bq2415x_set_current_limit(bq, val);
+	else if (strcmp(attr->attr.name, "weak_battery_voltage") == 0)
+		ret = bq2415x_set_weak_battery_voltage(bq, val);
+	else if (strcmp(attr->attr.name, "battery_regulation_voltage") == 0)
+		ret = bq2415x_set_battery_regulation_voltage(bq, val);
+	else if (strcmp(attr->attr.name, "charge_current") == 0)
+		ret = bq2415x_set_charge_current(bq, val);
+	else if (strcmp(attr->attr.name, "termination_current") == 0)
+		ret = bq2415x_set_termination_current(bq, val);
+	else
+		return -EINVAL;
+
+	if (ret < 0)
+		return ret;
+	else
+		return count;
+}
+
+/* show current and voltage limit entries (in mA or mV) */
+static ssize_t bq2415x_sysfs_show_limit(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	int ret;
+
+	if (strcmp(attr->attr.name, "current_limit") == 0)
+		ret = bq2415x_get_current_limit(bq);
+	else if (strcmp(attr->attr.name, "weak_battery_voltage") == 0)
+		ret = bq2415x_get_weak_battery_voltage(bq);
+	else if (strcmp(attr->attr.name, "battery_regulation_voltage") == 0)
+		ret = bq2415x_get_battery_regulation_voltage(bq);
+	else if (strcmp(attr->attr.name, "charge_current") == 0)
+		ret = bq2415x_get_charge_current(bq);
+	else if (strcmp(attr->attr.name, "termination_current") == 0)
+		ret = bq2415x_get_termination_current(bq);
+	else
+		return -EINVAL;
+
+	if (ret < 0)
+		return ret;
+	else
+		return sprintf(buf, "%d\n", ret);
+}
+
+/* set *_enable entries */
+static ssize_t bq2415x_sysfs_set_enable(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	enum bq2415x_command command;
+	long val;
+	int ret;
+
+	if (kstrtol(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (strcmp(attr->attr.name, "charge_termination_enable") == 0)
+		command = val ? BQ2415X_CHARGE_TERMINATION_ENABLE :
+			BQ2415X_CHARGE_TERMINATION_DISABLE;
+	else if (strcmp(attr->attr.name, "high_impedance_enable") == 0)
+		command = val ? BQ2415X_HIGH_IMPEDANCE_ENABLE :
+			BQ2415X_HIGH_IMPEDANCE_DISABLE;
+	else if (strcmp(attr->attr.name, "otg_pin_enable") == 0)
+		command = val ? BQ2415X_OTG_PIN_ENABLE :
+			BQ2415X_OTG_PIN_DISABLE;
+	else if (strcmp(attr->attr.name, "stat_pin_enable") == 0)
+		command = val ? BQ2415X_STAT_PIN_ENABLE :
+			BQ2415X_STAT_PIN_DISABLE;
+	else
+		return -EINVAL;
+
+	ret = bq2415x_exec_command(bq, command);
+	if (ret < 0)
+		return ret;
+	else
+		return count;
+}
+
+/* show *_enable entries */
+static ssize_t bq2415x_sysfs_show_enable(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	enum bq2415x_command command;
+	int ret;
+
+	if (strcmp(attr->attr.name, "charge_termination_enable") == 0)
+		command = BQ2415X_CHARGE_TERMINATION_STATUS;
+	else if (strcmp(attr->attr.name, "high_impedance_enable") == 0)
+		command = BQ2415X_HIGH_IMPEDANCE_STATUS;
+	else if (strcmp(attr->attr.name, "otg_pin_enable") == 0)
+		command = BQ2415X_OTG_PIN_STATUS;
+	else if (strcmp(attr->attr.name, "stat_pin_enable") == 0)
+		command = BQ2415X_STAT_PIN_STATUS;
+	else
+		return -EINVAL;
+
+	ret = bq2415x_exec_command(bq, command);
+	if (ret < 0)
+		return ret;
+	else
+		return sprintf(buf, "%d\n", ret);
+}
+
+static DEVICE_ATTR(current_limit, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit);
+static DEVICE_ATTR(weak_battery_voltage, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit);
+static DEVICE_ATTR(battery_regulation_voltage, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit);
+static DEVICE_ATTR(charge_current, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit);
+static DEVICE_ATTR(termination_current, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit);
+
+static DEVICE_ATTR(charge_termination_enable, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_enable, bq2415x_sysfs_set_enable);
+static DEVICE_ATTR(high_impedance_enable, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_enable, bq2415x_sysfs_set_enable);
+static DEVICE_ATTR(otg_pin_enable, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_enable, bq2415x_sysfs_set_enable);
+static DEVICE_ATTR(stat_pin_enable, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_enable, bq2415x_sysfs_set_enable);
+
+static DEVICE_ATTR(reported_mode, S_IRUGO,
+		bq2415x_sysfs_show_reported_mode, NULL);
+static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_mode, bq2415x_sysfs_set_mode);
+static DEVICE_ATTR(timer, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_timer, bq2415x_sysfs_set_timer);
+
+static DEVICE_ATTR(registers, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_registers, bq2415x_sysfs_set_registers);
+
+static DEVICE_ATTR(otg_status, S_IRUGO, bq2415x_sysfs_show_status, NULL);
+static DEVICE_ATTR(charge_status, S_IRUGO, bq2415x_sysfs_show_status, NULL);
+static DEVICE_ATTR(boost_status, S_IRUGO, bq2415x_sysfs_show_status, NULL);
+static DEVICE_ATTR(fault_status, S_IRUGO, bq2415x_sysfs_show_status, NULL);
+
+static struct attribute *bq2415x_sysfs_attributes[] = {
+	&dev_attr_current_limit.attr,
+	&dev_attr_weak_battery_voltage.attr,
+	&dev_attr_battery_regulation_voltage.attr,
+	&dev_attr_charge_current.attr,
+	&dev_attr_termination_current.attr,
+
+	&dev_attr_charge_termination_enable.attr,
+	&dev_attr_high_impedance_enable.attr,
+	&dev_attr_otg_pin_enable.attr,
+	&dev_attr_stat_pin_enable.attr,
+
+	&dev_attr_reported_mode.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_timer.attr,
+
+	&dev_attr_registers.attr,
+
+	&dev_attr_otg_status.attr,
+	&dev_attr_charge_status.attr,
+	&dev_attr_boost_status.attr,
+	&dev_attr_fault_status.attr,
+	NULL,
+};
+
+static const struct attribute_group bq2415x_sysfs_attr_group = {
+	.attrs = bq2415x_sysfs_attributes,
+};
+
+static int bq2415x_sysfs_init(struct bq2415x_device *bq)
+{
+	return sysfs_create_group(&bq->charger.dev->kobj,
+			&bq2415x_sysfs_attr_group);
+}
+
+static void bq2415x_sysfs_exit(struct bq2415x_device *bq)
+{
+	sysfs_remove_group(&bq->charger.dev->kobj, &bq2415x_sysfs_attr_group);
+}
+
+/* main bq2415x probe function */
+static int bq2415x_probe(struct i2c_client *client,
+		const struct i2c_device_id *id)
+{
+	int ret;
+	int num;
+	char *name;
+	struct bq2415x_device *bq;
+
+	if (!client->dev.platform_data) {
+		dev_err(&client->dev, "platform data not set\n");
+		return -ENODEV;
+	}
+
+	/* Get new ID for the new device */
+	ret = idr_pre_get(&bq2415x_id, GFP_KERNEL);
+	if (ret == 0)
+		return -ENOMEM;
+
+	mutex_lock(&bq2415x_id_mutex);
+	ret = idr_get_new(&bq2415x_id, client, &num);
+	mutex_unlock(&bq2415x_id_mutex);
+
+	if (ret < 0)
+		return ret;
+
+	name = kasprintf(GFP_KERNEL, "%s-%d", id->name, num);
+	if (!name) {
+		dev_err(&client->dev, "failed to allocate device name\n");
+		ret = -ENOMEM;
+		goto error_1;
+	}
+
+	bq = kzalloc(sizeof(*bq), GFP_KERNEL);
+	if (!bq) {
+		dev_err(&client->dev, "failed to allocate device data\n");
+		ret = -ENOMEM;
+		goto error_2;
+	}
+
+	i2c_set_clientdata(client, bq);
+
+	bq->id = num;
+	bq->dev = &client->dev;
+	bq->chip = id->driver_data;
+	bq->name = name;
+	bq->mode = BQ2415X_MODE_NONE;
+	bq->reported_mode = BQ2415X_MODE_NONE;
+	bq->autotimer = 0;
+	bq->automode = 0;
+
+	memcpy(&bq->init_data, client->dev.platform_data,
+			sizeof(bq->init_data));
+
+	bq2415x_reset_chip(bq);
+
+	ret = bq2415x_power_supply_init(bq);
+	if (ret) {
+		dev_err(bq->dev, "failed to register power supply: %d\n", ret);
+		goto error_3;
+	}
+
+	ret = bq2415x_sysfs_init(bq);
+	if (ret) {
+		dev_err(bq->dev, "failed to create sysfs entries: %d\n", ret);
+		goto error_4;
+	}
+
+	ret = bq2415x_set_defaults(bq);
+	if (ret) {
+		dev_err(bq->dev, "failed to set default values: %d\n", ret);
+		goto error_5;
+	}
+
+	if (bq->init_data.set_mode_hook) {
+		if (bq->init_data.set_mode_hook(
+				bq2415x_hook_function, bq)) {
+			bq->automode = 1;
+			bq2415x_set_mode(bq, bq->reported_mode);
+			dev_info(bq->dev, "automode enabled\n");
+		} else {
+			bq->automode = -1;
+			dev_info(bq->dev, "automode failed\n");
+		}
+	} else {
+		bq->automode = -1;
+		dev_info(bq->dev, "automode not supported\n");
+	}
+
+	INIT_DELAYED_WORK(&bq->work, bq2415x_timer_work);
+	bq2415x_set_autotimer(bq, 1);
+
+	dev_info(bq->dev, "driver registered\n");
+	return 0;
+
+error_5:
+	bq2415x_sysfs_exit(bq);
+error_4:
+	bq2415x_power_supply_exit(bq);
+error_3:
+	kfree(bq);
+error_2:
+	kfree(name);
+error_1:
+	mutex_lock(&bq2415x_id_mutex);
+	idr_remove(&bq2415x_id, num);
+	mutex_unlock(&bq2415x_id_mutex);
+
+	return ret;
+}
+
+/* main bq2415x remove function */
+
+static int bq2415x_remove(struct i2c_client *client)
+{
+	struct bq2415x_device *bq = i2c_get_clientdata(client);
+
+	if (bq->init_data.set_mode_hook)
+		bq->init_data.set_mode_hook(NULL, NULL);
+
+	bq2415x_sysfs_exit(bq);
+	bq2415x_power_supply_exit(bq);
+
+	bq2415x_reset_chip(bq);
+
+	mutex_lock(&bq2415x_id_mutex);
+	idr_remove(&bq2415x_id, bq->id);
+	mutex_unlock(&bq2415x_id_mutex);
+
+	dev_info(bq->dev, "driver unregistered\n");
+
+	kfree(bq->name);
+	kfree(bq);
+
+	return 0;
+}
+
+static const struct i2c_device_id bq2415x_i2c_id_table[] = {
+	{ "bq2415x", BQUNKNOWN },
+	{ "bq24150", BQ24150 },
+	{ "bq24150a", BQ24150A },
+	{ "bq24151", BQ24151 },
+	{ "bq24151a", BQ24151A },
+	{ "bq24152", BQ24152 },
+	{ "bq24153", BQ24153 },
+	{ "bq24153a", BQ24153A },
+	{ "bq24155", BQ24155 },
+	{ "bq24156", BQ24156 },
+	{ "bq24156a", BQ24156A },
+	{ "bq24158", BQ24158 },
+	{},
+};
+MODULE_DEVICE_TABLE(i2c, bq2415x_i2c_id_table);
+
+static struct i2c_driver bq2415x_driver = {
+	.driver = {
+		.name = "bq2415x-charger",
+	},
+	.probe = bq2415x_probe,
+	.remove = bq2415x_remove,
+	.id_table = bq2415x_i2c_id_table,
+};
+
+static int __init bq2415x_init(void)
+{
+	return i2c_add_driver(&bq2415x_driver);
+}
+module_init(bq2415x_init);
+
+static void __exit bq2415x_exit(void)
+{
+	i2c_del_driver(&bq2415x_driver);
+}
+module_exit(bq2415x_exit);
+
+MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>");
+MODULE_DESCRIPTION("bq2415x charger driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/power/bq2415x_charger.h b/include/linux/power/bq2415x_charger.h
new file mode 100644
index 000000000000..fb6bf4d8aea8
--- /dev/null
+++ b/include/linux/power/bq2415x_charger.h
@@ -0,0 +1,90 @@
+/*
+    bq2415x_charger.h - bq2415x charger driver
+    Copyright (C) 2011-2012  Pali Rohár <pali.rohar@gmail.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+#ifndef BQ2415X_CHARGER_H
+#define BQ2415X_CHARGER_H
+
+/*
+  This is platform data for bq2415x chip. It contains default board voltages
+  and currents which can be also later configured via sysfs. If value is -1
+  then default chip value (specified in datasheet) will be used.
+
+  Value resistor_sense is needed for for configuring charge and termination
+  current. It it is less or equal to zero, configuring charge and termination
+  current will not be possible.
+
+  Function set_mode_hook is needed for automode (setting correct current limit
+  when charger is connected/disconnected or setting boost mode). When is NULL,
+  automode function is disabled. When is not NULL, it must have this prototype:
+
+    int (*set_mode_hook)(
+      void (*hook)(enum bq2415x_mode mode, void *data),
+      void *data)
+
+  hook is hook function (see below) and data is pointer to driver private data
+
+  bq2415x driver will call it as:
+
+    platform_data->set_mode_hook(bq2415x_hook_function, bq2415x_device);
+
+  Board/platform function set_mode_hook return non zero value when hook
+  function was successful registered. Platform code should call that hook
+  function (which get from pointer, with data) every time when charger was
+  connected/disconnected or require to enable boost mode. bq2415x driver then
+  will set correct current limit, enable/disable charger or boost mode.
+
+  Hook function has this prototype:
+
+    void hook(enum bq2415x_mode mode, void *data);
+
+  mode is bq2415x mode (charger or boost)
+  data is pointer to driver private data (which get from set_charger_type_hook)
+
+  When bq driver is being unloaded, it call function:
+
+    platform_data->set_mode_hook(NULL, NULL);
+
+  (hook function and driver private data are NULL)
+
+  After that board/platform code must not call driver hook function! It is
+  possible that pointer to hook function will not be valid and calling will
+  cause undefined result.
+
+*/
+
+/* Supported modes with maximal current limit */
+enum bq2415x_mode {
+	BQ2415X_MODE_NONE,		/* unknown or no charger (100mA) */
+	BQ2415X_MODE_HOST_CHARGER,	/* usb host/hub charger (500mA) */
+	BQ2415X_MODE_DEDICATED_CHARGER, /* dedicated charger (unlimited) */
+	BQ2415X_MODE_BOOST,		/* boost mode (charging disabled) */
+};
+
+struct bq2415x_platform_data {
+	int current_limit;		/* mA */
+	int weak_battery_voltage;	/* mV */
+	int battery_regulation_voltage;	/* mV */
+	int charge_current;		/* mA */
+	int termination_current;	/* mA */
+	int resistor_sense;		/* m ohm */
+	int (*set_mode_hook)(void (*hook)(enum bq2415x_mode mode, void *data),
+			     void *data);
+};
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 6c47a3e00c6e4f3cdac7566c1480de34d9e32e07 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov
Date: Sun, 18 Nov 2012 15:35:32 -0800
Subject: bq2415x_charger: Fix style issues

I hate doing these style fixups myself, but I also hate inconsitent code.
Normally I just ask to resubmit the patch with the issues fixed, but N900
is special: I have a selfish interest in it. :)

Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/bq2415x_charger.c       | 268 +++++++++++++++++++---------------
 include/linux/power/bq2415x_charger.h | 131 +++++++++--------
 2 files changed, 215 insertions(+), 184 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/bq2415x_charger.c b/drivers/power/bq2415x_charger.c
index 017b3c27f769..ee842b37f462 100644
--- a/drivers/power/bq2415x_charger.c
+++ b/drivers/power/bq2415x_charger.c
@@ -1,31 +1,32 @@
 /*
-    bq2415x_charger.c - bq2415x charger driver
-    Copyright (C) 2011-2012  Pali Rohár <pali.rohar@gmail.com>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-*/
+ * bq2415x charger driver
+ *
+ * Copyright (C) 2011-2012  Pali Rohár <pali.rohar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
 
 /*
-  Datasheets:
-  http://www.ti.com/product/bq24150
-  http://www.ti.com/product/bq24150a
-  http://www.ti.com/product/bq24152
-  http://www.ti.com/product/bq24153
-  http://www.ti.com/product/bq24153a
-  http://www.ti.com/product/bq24155
-*/
+ * Datasheets:
+ * http://www.ti.com/product/bq24150
+ * http://www.ti.com/product/bq24150a
+ * http://www.ti.com/product/bq24152
+ * http://www.ti.com/product/bq24153
+ * http://www.ti.com/product/bq24153a
+ * http://www.ti.com/product/bq24155
+ */
 
 #include <linux/version.h>
 #include <linux/kernel.h>
@@ -222,7 +223,7 @@ static int bq2415x_i2c_read(struct bq2415x_device *bq, u8 reg)
 
 /* read value from register, apply mask and right shift it */
 static int bq2415x_i2c_read_mask(struct bq2415x_device *bq, u8 reg,
-				u8 mask, u8 shift)
+				 u8 mask, u8 shift)
 {
 	int ret;
 
@@ -232,8 +233,7 @@ static int bq2415x_i2c_read_mask(struct bq2415x_device *bq, u8 reg,
 	ret = bq2415x_i2c_read(bq, reg);
 	if (ret < 0)
 		return ret;
-	else
-		return (ret & mask) >> shift;
+	return (ret & mask) >> shift;
 }
 
 /* read value from register and return one specified bit */
@@ -241,8 +241,7 @@ static int bq2415x_i2c_read_bit(struct bq2415x_device *bq, u8 reg, u8 bit)
 {
 	if (bit > 8)
 		return -EINVAL;
-	else
-		return bq2415x_i2c_read_mask(bq, reg, BIT(bit), bit);
+	return bq2415x_i2c_read_mask(bq, reg, BIT(bit), bit);
 }
 
 /**** i2c write functions ****/
@@ -278,7 +277,7 @@ static int bq2415x_i2c_write(struct bq2415x_device *bq, u8 reg, u8 val)
 
 /* read value from register, change it with mask left shifted and write back */
 static int bq2415x_i2c_write_mask(struct bq2415x_device *bq, u8 reg, u8 val,
-				u8 mask, u8 shift)
+				  u8 mask, u8 shift)
 {
 	int ret;
 
@@ -297,12 +296,11 @@ static int bq2415x_i2c_write_mask(struct bq2415x_device *bq, u8 reg, u8 val,
 
 /* change only one bit in register */
 static int bq2415x_i2c_write_bit(struct bq2415x_device *bq, u8 reg,
-				bool val, u8 bit)
+				 bool val, u8 bit)
 {
 	if (bit > 8)
 		return -EINVAL;
-	else
-		return bq2415x_i2c_write_mask(bq, reg, val, BIT(bit), bit);
+	return bq2415x_i2c_write_mask(bq, reg, val, BIT(bit), bit);
 }
 
 /**** global functions ****/
@@ -312,6 +310,7 @@ static int bq2415x_exec_command(struct bq2415x_device *bq,
 				enum bq2415x_command command)
 {
 	int ret;
+
 	switch (command) {
 	case BQ2415X_TIMER_RESET:
 		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_STATUS,
@@ -407,10 +406,8 @@ static int bq2415x_exec_command(struct bq2415x_device *bq,
 	case BQ2415X_REVISION:
 		return bq2415x_i2c_read_mask(bq, BQ2415X_REG_VENDER,
 			BQ2415X_MASK_REVISION, BQ2415X_SHIFT_REVISION);
-
-	default:
-		return -EINVAL;
 	}
+	return -EINVAL;
 }
 
 /* detect chip type */
@@ -470,6 +467,7 @@ static int bq2415x_detect_revision(struct bq2415x_device *bq)
 {
 	int ret = bq2415x_exec_command(bq, BQ2415X_REVISION);
 	int chip = bq2415x_detect_chip(bq);
+
 	if (ret < 0 || chip < 0)
 		return -1;
 
@@ -483,7 +481,6 @@ static int bq2415x_detect_revision(struct bq2415x_device *bq)
 			return ret;
 		else
 			return -1;
-
 	case BQ24153:
 	case BQ24153A:
 	case BQ24156:
@@ -495,13 +492,11 @@ static int bq2415x_detect_revision(struct bq2415x_device *bq)
 			return 1;
 		else
 			return -1;
-
 	case BQ24155:
 		if (ret == 3)
 			return 3;
 		else
 			return -1;
-
 	case BQUNKNOWN:
 		return -1;
 	}
@@ -512,13 +507,16 @@ static int bq2415x_detect_revision(struct bq2415x_device *bq)
 /* return chip vender code */
 static int bq2415x_get_vender_code(struct bq2415x_device *bq)
 {
-	int ret = bq2415x_exec_command(bq, BQ2415X_VENDER_CODE);
+	int ret;
+
+	ret = bq2415x_exec_command(bq, BQ2415X_VENDER_CODE);
 	if (ret < 0)
 		return 0;
-	else /* convert to binary */
-		return (ret & 0x1) +
-			((ret >> 1) & 0x1) * 10 +
-			((ret >> 2) & 0x1) * 100;
+
+	/* convert to binary */
+	return (ret & 0x1) +
+	       ((ret >> 1) & 0x1) * 10 +
+	       ((ret >> 2) & 0x1) * 100;
 }
 
 /* reset all chip registers to default state */
@@ -537,6 +535,7 @@ static void bq2415x_reset_chip(struct bq2415x_device *bq)
 static int bq2415x_set_current_limit(struct bq2415x_device *bq, int mA)
 {
 	int val;
+
 	if (mA <= 100)
 		val = 0;
 	else if (mA <= 500)
@@ -545,6 +544,7 @@ static int bq2415x_set_current_limit(struct bq2415x_device *bq, int mA)
 		val = 2;
 	else
 		val = 3;
+
 	return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CONTROL, val,
 			BQ2415X_MASK_LIMIT, BQ2415X_SHIFT_LIMIT);
 }
@@ -552,7 +552,9 @@ static int bq2415x_set_current_limit(struct bq2415x_device *bq, int mA)
 /* get current limit in mA */
 static int bq2415x_get_current_limit(struct bq2415x_device *bq)
 {
-	int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL,
+	int ret;
+
+	ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL,
 			BQ2415X_MASK_LIMIT, BQ2415X_SHIFT_LIMIT);
 	if (ret < 0)
 		return ret;
@@ -564,15 +566,15 @@ static int bq2415x_get_current_limit(struct bq2415x_device *bq)
 		return 800;
 	else if (ret == 3)
 		return 1800;
-	else
-		return -EINVAL;
+	return -EINVAL;
 }
 
 /* set weak battery voltage in mV */
 static int bq2415x_set_weak_battery_voltage(struct bq2415x_device *bq, int mV)
 {
-	/* round to 100mV */
 	int val;
+
+	/* round to 100mV */
 	if (mV <= 3400 + 50)
 		val = 0;
 	else if (mV <= 3500 + 50)
@@ -581,6 +583,7 @@ static int bq2415x_set_weak_battery_voltage(struct bq2415x_device *bq, int mV)
 		val = 2;
 	else
 		val = 3;
+
 	return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CONTROL, val,
 			BQ2415X_MASK_VLOWV, BQ2415X_SHIFT_VLOWV);
 }
@@ -588,17 +591,18 @@ static int bq2415x_set_weak_battery_voltage(struct bq2415x_device *bq, int mV)
 /* get weak battery voltage in mV */
 static int bq2415x_get_weak_battery_voltage(struct bq2415x_device *bq)
 {
-	int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL,
+	int ret;
+
+	ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL,
 			BQ2415X_MASK_VLOWV, BQ2415X_SHIFT_VLOWV);
 	if (ret < 0)
 		return ret;
-	else
-		return 100 * (34 + ret);
+	return 100 * (34 + ret);
 }
 
 /* set battery regulation voltage in mV */
 static int bq2415x_set_battery_regulation_voltage(struct bq2415x_device *bq,
-						int mV)
+						  int mV)
 {
 	int val = (mV/10 - 350) / 2;
 
@@ -616,21 +620,21 @@ static int bq2415x_get_battery_regulation_voltage(struct bq2415x_device *bq)
 {
 	int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_VOLTAGE,
 			BQ2415X_MASK_VO, BQ2415X_SHIFT_VO);
+
 	if (ret < 0)
 		return ret;
-	else
-		return 10 * (350 + 2*ret);
+	return 10 * (350 + 2*ret);
 }
 
 /* set charge current in mA (platform data must provide resistor sense) */
 static int bq2415x_set_charge_current(struct bq2415x_device *bq, int mA)
 {
 	int val;
+
 	if (bq->init_data.resistor_sense <= 0)
 		return -ENOSYS;
 
 	val = (mA * bq->init_data.resistor_sense - 37400) / 6800;
-
 	if (val < 0)
 		val = 0;
 	else if (val > 7)
@@ -645,6 +649,7 @@ static int bq2415x_set_charge_current(struct bq2415x_device *bq, int mA)
 static int bq2415x_get_charge_current(struct bq2415x_device *bq)
 {
 	int ret;
+
 	if (bq->init_data.resistor_sense <= 0)
 		return -ENOSYS;
 
@@ -652,19 +657,18 @@ static int bq2415x_get_charge_current(struct bq2415x_device *bq)
 			BQ2415X_MASK_VI_CHRG, BQ2415X_SHIFT_VI_CHRG);
 	if (ret < 0)
 		return ret;
-	else
-		return (37400 + 6800*ret) / bq->init_data.resistor_sense;
+	return (37400 + 6800*ret) / bq->init_data.resistor_sense;
 }
 
 /* set termination current in mA (platform data must provide resistor sense) */
 static int bq2415x_set_termination_current(struct bq2415x_device *bq, int mA)
 {
 	int val;
+
 	if (bq->init_data.resistor_sense <= 0)
 		return -ENOSYS;
 
 	val = (mA * bq->init_data.resistor_sense - 3400) / 3400;
-
 	if (val < 0)
 		val = 0;
 	else if (val > 7)
@@ -679,6 +683,7 @@ static int bq2415x_set_termination_current(struct bq2415x_device *bq, int mA)
 static int bq2415x_get_termination_current(struct bq2415x_device *bq)
 {
 	int ret;
+
 	if (bq->init_data.resistor_sense <= 0)
 		return -ENOSYS;
 
@@ -686,8 +691,7 @@ static int bq2415x_get_termination_current(struct bq2415x_device *bq)
 			BQ2415X_MASK_VI_TERM, BQ2415X_SHIFT_VI_TERM);
 	if (ret < 0)
 		return ret;
-	else
-		return (3400 + 3400*ret) / bq->init_data.resistor_sense;
+	return (3400 + 3400*ret) / bq->init_data.resistor_sense;
 }
 
 /* set default value of property */
@@ -706,14 +710,17 @@ static int bq2415x_set_defaults(struct bq2415x_device *bq)
 	bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_DISABLE);
 	bq2415x_exec_command(bq, BQ2415X_CHARGER_DISABLE);
 	bq2415x_exec_command(bq, BQ2415X_CHARGE_TERMINATION_DISABLE);
+
 	bq2415x_set_default_value(bq, current_limit);
 	bq2415x_set_default_value(bq, weak_battery_voltage);
 	bq2415x_set_default_value(bq, battery_regulation_voltage);
+
 	if (bq->init_data.resistor_sense > 0) {
 		bq2415x_set_default_value(bq, charge_current);
 		bq2415x_set_default_value(bq, termination_current);
 		bq2415x_exec_command(bq, BQ2415X_CHARGE_TERMINATION_ENABLE);
 	}
+
 	bq2415x_exec_command(bq, BQ2415X_CHARGER_ENABLE);
 	return 0;
 }
@@ -844,8 +851,10 @@ static void bq2415x_timer_error(struct bq2415x_device *bq, const char *msg)
 static void bq2415x_timer_work(struct work_struct *work)
 {
 	struct bq2415x_device *bq = container_of(work, struct bq2415x_device,
-						work.work);
-	int ret, error, boost;
+						 work.work);
+	int ret;
+	int error;
+	int boost;
 
 	if (!bq->autotimer)
 		return;
@@ -946,10 +955,11 @@ static enum power_supply_property bq2415x_power_supply_props[] = {
 };
 
 static int bq2415x_power_supply_get_property(struct power_supply *psy,
-	enum power_supply_property psp, union power_supply_propval *val)
+					     enum power_supply_property psp,
+					     union power_supply_propval *val)
 {
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	int ret;
 
 	switch (psp) {
@@ -1031,7 +1041,8 @@ static void bq2415x_power_supply_exit(struct bq2415x_device *bq)
 
 /* show *_status entries */
 static ssize_t bq2415x_sysfs_show_status(struct device *dev,
-		struct device_attribute *attr, char *buf)
+					 struct device_attribute *attr,
+					 char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
@@ -1053,17 +1064,19 @@ static ssize_t bq2415x_sysfs_show_status(struct device *dev,
 	ret = bq2415x_exec_command(bq, command);
 	if (ret < 0)
 		return ret;
-	else
-		return sprintf(buf, "%d\n", ret);
+	return sprintf(buf, "%d\n", ret);
 }
 
-/* set timer entry:
-     auto - enable auto mode
-     off - disable auto mode
-     (other values) - reset chip timer
-*/
+/*
+ * set timer entry:
+ *    auto - enable auto mode
+ *    off - disable auto mode
+ *    (other values) - reset chip timer
+ */
 static ssize_t bq2415x_sysfs_set_timer(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+				       struct device_attribute *attr,
+				       const char *buf,
+				       size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
@@ -1079,40 +1092,42 @@ static ssize_t bq2415x_sysfs_set_timer(struct device *dev,
 
 	if (ret < 0)
 		return ret;
-	else
-		return count;
+	return count;
 }
 
 /* show timer entry (auto or off) */
 static ssize_t bq2415x_sysfs_show_timer(struct device *dev,
-		struct device_attribute *attr, char *buf)
+					struct device_attribute *attr,
+					char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 
 	if (bq->timer_error)
 		return sprintf(buf, "%s\n", bq->timer_error);
 
 	if (bq->autotimer)
 		return sprintf(buf, "auto\n");
-	else
-		return sprintf(buf, "off\n");
+	return sprintf(buf, "off\n");
 }
 
-/* set mode entry:
-     auto - if automode is supported, enable it and set mode to reported
-     none - disable charger and boost mode
-     host - charging mode for host/hub chargers (current limit 500mA)
-     dedicated - charging mode for dedicated chargers (unlimited current limit)
-     boost - disable charger and enable boost mode
-*/
+/*
+ * set mode entry:
+ *    auto - if automode is supported, enable it and set mode to reported
+ *    none - disable charger and boost mode
+ *    host - charging mode for host/hub chargers (current limit 500mA)
+ *    dedicated - charging mode for dedicated chargers (unlimited current limit)
+ *    boost - disable charger and enable boost mode
+ */
 static ssize_t bq2415x_sysfs_set_mode(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+				      struct device_attribute *attr,
+				      const char *buf,
+				      size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	enum bq2415x_mode mode;
 	int ret = 0;
 
@@ -1144,19 +1159,20 @@ static ssize_t bq2415x_sysfs_set_mode(struct device *dev,
 			return count;
 		bq->automode = 1;
 		mode = bq->reported_mode;
-	} else
+	} else {
 		return -EINVAL;
+	}
 
 	ret = bq2415x_set_mode(bq, mode);
 	if (ret < 0)
 		return ret;
-	else
-		return count;
+	return count;
 }
 
 /* show mode entry (auto, none, host, dedicated or boost) */
 static ssize_t bq2415x_sysfs_show_mode(struct device *dev,
-		struct device_attribute *attr, char *buf)
+				       struct device_attribute *attr,
+				       char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
@@ -1190,11 +1206,12 @@ static ssize_t bq2415x_sysfs_show_mode(struct device *dev,
 
 /* show reported_mode entry (none, host, dedicated or boost) */
 static ssize_t bq2415x_sysfs_show_reported_mode(struct device *dev,
-		struct device_attribute *attr, char *buf)
+						struct device_attribute *attr,
+						char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 
 	if (bq->automode < 0)
 		return -EINVAL;
@@ -1215,11 +1232,13 @@ static ssize_t bq2415x_sysfs_show_reported_mode(struct device *dev,
 
 /* directly set raw value to chip register, format: 'register value' */
 static ssize_t bq2415x_sysfs_set_registers(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+					   struct device_attribute *attr,
+					   const char *buf,
+					   size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	ssize_t ret = 0;
 	unsigned int reg;
 	unsigned int val;
@@ -1233,28 +1252,29 @@ static ssize_t bq2415x_sysfs_set_registers(struct device *dev,
 	ret = bq2415x_i2c_write(bq, reg, val);
 	if (ret < 0)
 		return ret;
-	else
-		return count;
+	return count;
 }
 
 /* print value of chip register, format: 'register=value' */
 static ssize_t bq2415x_sysfs_print_reg(struct bq2415x_device *bq,
-					u8 reg, char *buf)
+				       u8 reg,
+				       char *buf)
 {
 	int ret = bq2415x_i2c_read(bq, reg);
+
 	if (ret < 0)
 		return sprintf(buf, "%#.2x=error %d\n", reg, ret);
-	else
-		return sprintf(buf, "%#.2x=%#.2x\n", reg, ret);
+	return sprintf(buf, "%#.2x=%#.2x\n", reg, ret);
 }
 
 /* show all raw values of chip register, format per line: 'register=value' */
 static ssize_t bq2415x_sysfs_show_registers(struct device *dev,
-		struct device_attribute *attr, char *buf)
+					    struct device_attribute *attr,
+					    char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	ssize_t ret = 0;
 
 	ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_STATUS, buf+ret);
@@ -1267,11 +1287,13 @@ static ssize_t bq2415x_sysfs_show_registers(struct device *dev,
 
 /* set current and voltage limit entries (in mA or mV) */
 static ssize_t bq2415x_sysfs_set_limit(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+				       struct device_attribute *attr,
+				       const char *buf,
+				       size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	long val;
 	int ret;
 
@@ -1293,17 +1315,17 @@ static ssize_t bq2415x_sysfs_set_limit(struct device *dev,
 
 	if (ret < 0)
 		return ret;
-	else
-		return count;
+	return count;
 }
 
 /* show current and voltage limit entries (in mA or mV) */
 static ssize_t bq2415x_sysfs_show_limit(struct device *dev,
-		struct device_attribute *attr, char *buf)
+					struct device_attribute *attr,
+					char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	int ret;
 
 	if (strcmp(attr->attr.name, "current_limit") == 0)
@@ -1321,17 +1343,18 @@ static ssize_t bq2415x_sysfs_show_limit(struct device *dev,
 
 	if (ret < 0)
 		return ret;
-	else
-		return sprintf(buf, "%d\n", ret);
+	return sprintf(buf, "%d\n", ret);
 }
 
 /* set *_enable entries */
 static ssize_t bq2415x_sysfs_set_enable(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+					struct device_attribute *attr,
+					const char *buf,
+					size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	enum bq2415x_command command;
 	long val;
 	int ret;
@@ -1357,17 +1380,17 @@ static ssize_t bq2415x_sysfs_set_enable(struct device *dev,
 	ret = bq2415x_exec_command(bq, command);
 	if (ret < 0)
 		return ret;
-	else
-		return count;
+	return count;
 }
 
 /* show *_enable entries */
 static ssize_t bq2415x_sysfs_show_enable(struct device *dev,
-		struct device_attribute *attr, char *buf)
+					 struct device_attribute *attr,
+					 char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	enum bq2415x_command command;
 	int ret;
 
@@ -1385,8 +1408,7 @@ static ssize_t bq2415x_sysfs_show_enable(struct device *dev,
 	ret = bq2415x_exec_command(bq, command);
 	if (ret < 0)
 		return ret;
-	else
-		return sprintf(buf, "%d\n", ret);
+	return sprintf(buf, "%d\n", ret);
 }
 
 static DEVICE_ATTR(current_limit, S_IWUSR | S_IRUGO,
@@ -1425,6 +1447,10 @@ static DEVICE_ATTR(boost_status, S_IRUGO, bq2415x_sysfs_show_status, NULL);
 static DEVICE_ATTR(fault_status, S_IRUGO, bq2415x_sysfs_show_status, NULL);
 
 static struct attribute *bq2415x_sysfs_attributes[] = {
+	/*
+	 * TODO: some (appropriate) of these attrs should be switched to
+	 * use power supply class props.
+	 */
 	&dev_attr_current_limit.attr,
 	&dev_attr_weak_battery_voltage.attr,
 	&dev_attr_battery_regulation_voltage.attr,
@@ -1466,7 +1492,7 @@ static void bq2415x_sysfs_exit(struct bq2415x_device *bq)
 
 /* main bq2415x probe function */
 static int bq2415x_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
+			 const struct i2c_device_id *id)
 {
 	int ret;
 	int num;
diff --git a/include/linux/power/bq2415x_charger.h b/include/linux/power/bq2415x_charger.h
index fb6bf4d8aea8..97a1665eaeaf 100644
--- a/include/linux/power/bq2415x_charger.h
+++ b/include/linux/power/bq2415x_charger.h
@@ -1,72 +1,77 @@
 /*
-    bq2415x_charger.h - bq2415x charger driver
-    Copyright (C) 2011-2012  Pali Rohár <pali.rohar@gmail.com>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-*/
+ * bq2415x charger driver
+ *
+ * Copyright (C) 2011-2012  Pali Rohár <pali.rohar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
 
 #ifndef BQ2415X_CHARGER_H
 #define BQ2415X_CHARGER_H
 
 /*
-  This is platform data for bq2415x chip. It contains default board voltages
-  and currents which can be also later configured via sysfs. If value is -1
-  then default chip value (specified in datasheet) will be used.
-
-  Value resistor_sense is needed for for configuring charge and termination
-  current. It it is less or equal to zero, configuring charge and termination
-  current will not be possible.
-
-  Function set_mode_hook is needed for automode (setting correct current limit
-  when charger is connected/disconnected or setting boost mode). When is NULL,
-  automode function is disabled. When is not NULL, it must have this prototype:
-
-    int (*set_mode_hook)(
-      void (*hook)(enum bq2415x_mode mode, void *data),
-      void *data)
-
-  hook is hook function (see below) and data is pointer to driver private data
-
-  bq2415x driver will call it as:
-
-    platform_data->set_mode_hook(bq2415x_hook_function, bq2415x_device);
-
-  Board/platform function set_mode_hook return non zero value when hook
-  function was successful registered. Platform code should call that hook
-  function (which get from pointer, with data) every time when charger was
-  connected/disconnected or require to enable boost mode. bq2415x driver then
-  will set correct current limit, enable/disable charger or boost mode.
-
-  Hook function has this prototype:
-
-    void hook(enum bq2415x_mode mode, void *data);
-
-  mode is bq2415x mode (charger or boost)
-  data is pointer to driver private data (which get from set_charger_type_hook)
-
-  When bq driver is being unloaded, it call function:
-
-    platform_data->set_mode_hook(NULL, NULL);
-
-  (hook function and driver private data are NULL)
-
-  After that board/platform code must not call driver hook function! It is
-  possible that pointer to hook function will not be valid and calling will
-  cause undefined result.
-
-*/
+ * This is platform data for bq2415x chip. It contains default board
+ * voltages and currents which can be also later configured via sysfs. If
+ * value is -1 then default chip value (specified in datasheet) will be
+ * used.
+ *
+ * Value resistor_sense is needed for for configuring charge and
+ * termination current. It it is less or equal to zero, configuring charge
+ * and termination current will not be possible.
+ *
+ * Function set_mode_hook is needed for automode (setting correct current
+ * limit when charger is connected/disconnected or setting boost mode).
+ * When is NULL, automode function is disabled. When is not NULL, it must
+ * have this prototype:
+ *
+ *    int (*set_mode_hook)(
+ *      void (*hook)(enum bq2415x_mode mode, void *data),
+ *      void *data)
+ *
+ * hook is hook function (see below) and data is pointer to driver private
+ * data
+ *
+ * bq2415x driver will call it as:
+ *
+ *    platform_data->set_mode_hook(bq2415x_hook_function, bq2415x_device);
+ *
+ * Board/platform function set_mode_hook return non zero value when hook
+ * function was successful registered. Platform code should call that hook
+ * function (which get from pointer, with data) every time when charger
+ * was connected/disconnected or require to enable boost mode. bq2415x
+ * driver then will set correct current limit, enable/disable charger or
+ * boost mode.
+ *
+ * Hook function has this prototype:
+ *
+ *    void hook(enum bq2415x_mode mode, void *data);
+ *
+ * mode is bq2415x mode (charger or boost)
+ * data is pointer to driver private data (which get from
+ * set_charger_type_hook)
+ *
+ * When bq driver is being unloaded, it call function:
+ *
+ *    platform_data->set_mode_hook(NULL, NULL);
+ *
+ * (hook function and driver private data are NULL)
+ *
+ * After that board/platform code must not call driver hook function! It
+ * is possible that pointer to hook function will not be valid and calling
+ * will cause undefined result.
+ */
 
 /* Supported modes with maximal current limit */
 enum bq2415x_mode {
-- 
cgit v1.2.3-55-g7522


From 73f7ef435934e952c1d70d83d69921ea5d1f6bd4 Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Fri, 16 Nov 2012 03:02:58 +0000
Subject: sysctl: Pass useful parameters to sysctl permissions

- Current is implicitly avaiable so passing current->nsproxy isn't useful.
- The ctl_table_header is needed to find how the sysctl table is connected
  to the rest of sysctl.
- ctl_table_root is avaiable in the ctl_table_header so no need to it.

With these changes it becomes possible to write a version of
net_sysctl_permission that takes into account the network namespace of
the sysctl table, an important feature in extending the user namespace.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/proc/proc_sysctl.c  | 9 +++++----
 include/linux/sysctl.h | 3 +--
 net/sysctl_net.c       | 3 +--
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index a781bdf06694..701580ddfcc3 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -378,12 +378,13 @@ static int test_perm(int mode, int op)
 	return -EACCES;
 }
 
-static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
+static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, int op)
 {
+	struct ctl_table_root *root = head->root;
 	int mode;
 
 	if (root->permissions)
-		mode = root->permissions(root, current->nsproxy, table);
+		mode = root->permissions(head, table);
 	else
 		mode = table->mode;
 
@@ -491,7 +492,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 	 * and won't be until we finish.
 	 */
 	error = -EPERM;
-	if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
+	if (sysctl_perm(head, table, write ? MAY_WRITE : MAY_READ))
 		goto out;
 
 	/* if that can happen at all, it should be -EINVAL, not -EISDIR */
@@ -717,7 +718,7 @@ static int proc_sys_permission(struct inode *inode, int mask)
 	if (!table) /* global root - r-xr-xr-x */
 		error = mask & MAY_WRITE ? -EACCES : 0;
 	else /* Use the permissions on the sysctl table entry */
-		error = sysctl_perm(head->root, table, mask & ~MAY_NOT_BLOCK);
+		error = sysctl_perm(head, table, mask & ~MAY_NOT_BLOCK);
 
 	sysctl_head_finish(head);
 	return error;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index cd844a6a8d5f..14a8ff2de11e 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -158,8 +158,7 @@ struct ctl_table_root {
 	struct ctl_table_set default_set;
 	struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
 					   struct nsproxy *namespaces);
-	int (*permissions)(struct ctl_table_root *root,
-			struct nsproxy *namespaces, struct ctl_table *table);
+	int (*permissions)(struct ctl_table_header *head, struct ctl_table *table);
 };
 
 /* struct ctl_path describes where in the hierarchy a table is added */
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index e3a6e37cd1c5..e98f3932bdfb 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -38,8 +38,7 @@ static int is_seen(struct ctl_table_set *set)
 }
 
 /* Return standard mode bits for table entry. */
-static int net_ctl_permissions(struct ctl_table_root *root,
-			       struct nsproxy *nsproxy,
+static int net_ctl_permissions(struct ctl_table_header *head,
 			       struct ctl_table *table)
 {
 	/* Allow network administrator to have same access as root. */
-- 
cgit v1.2.3-55-g7522


From e0f1abeba5c2d8a2183566717d99294fd1a29c2e Mon Sep 17 00:00:00 2001
From: Rajanikanth H.V
Date: Sun, 18 Nov 2012 18:45:41 -0800
Subject: ab8500: Add devicetree support for fuelgauge

- This patch adds device tree support for fuelgauge driver
- optimize bm devices platform_data usage and of_probe(...)
  Note: of_probe() routine for battery managed devices is made
  common across all bm drivers.
- test status:
  - interrupt numbers assigned differs between legacy and FDT mode.

Signed-off-by: Rajanikanth H.V <rajanikanth.hv@stericsson.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 Documentation/devicetree/bindings/mfd/ab8500.txt   |   7 +-
 .../devicetree/bindings/power_supply/ab8500/fg.txt |  58 +++
 arch/arm/boot/dts/dbx5x0.dtsi                      |  12 +-
 drivers/mfd/ab8500-core.c                          |   5 +
 drivers/power/Makefile                             |   2 +-
 drivers/power/ab8500_bmdata.c                      | 521 +++++++++++++++++++++
 drivers/power/ab8500_btemp.c                       |  16 +-
 drivers/power/ab8500_charger.c                     |  16 +-
 drivers/power/ab8500_fg.c                          |  82 ++--
 drivers/power/abx500_chargalg.c                    |   8 +-
 include/linux/mfd/abx500.h                         |  36 +-
 11 files changed, 667 insertions(+), 96 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/power_supply/ab8500/fg.txt
 create mode 100644 drivers/power/ab8500_bmdata.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mfd/ab8500.txt b/Documentation/devicetree/bindings/mfd/ab8500.txt
index ce83c8d3c00e..6ca8d817ef92 100644
--- a/Documentation/devicetree/bindings/mfd/ab8500.txt
+++ b/Documentation/devicetree/bindings/mfd/ab8500.txt
@@ -24,7 +24,12 @@ ab8500-bm                :                      :              : Battery Manager
 ab8500-btemp             :                      :              : Battery Temperature
 ab8500-charger           :                      :              : Battery Charger
 ab8500-codec             :                      :              : Audio Codec
-ab8500-fg                :                      :              : Fuel Gauge
+ab8500-fg                : 			: vddadc       : Fuel Gauge
+			 : NCONV_ACCU           :	       : Accumulate N Sample Conversion
+			 : BATT_OVV		:	       : Battery Over Voltage
+			 : LOW_BAT_F		:	       : LOW threshold battery voltage
+			 : CC_INT_CALIB		:	       : Coulomb Counter Internal Calibration
+			 : CCEOC		:	       : Coulomb Counter End of Conversion
 ab8500-gpadc             : HW_CONV_END          : vddadc       : Analogue to Digital Converter
                            SW_CONV_END          :              :
 ab8500-gpio              :                      :              : GPIO Controller
diff --git a/Documentation/devicetree/bindings/power_supply/ab8500/fg.txt b/Documentation/devicetree/bindings/power_supply/ab8500/fg.txt
new file mode 100644
index 000000000000..ccafcb9112fb
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/ab8500/fg.txt
@@ -0,0 +1,58 @@
+=== AB8500 Fuel Gauge Driver ===
+
+AB8500 is a mixed signal multimedia and power management
+device comprising: power and energy-management-module,
+wall-charger, usb-charger, audio codec, general purpose adc,
+tvout, clock management and sim card interface.
+
+Fuelgauge support is part of energy-management-modules, other
+components of this module are:
+main-charger, usb-combo-charger and battery-temperature-monitoring.
+
+The properties below describes the node for fuelgauge driver.
+
+Required Properties:
+- compatible = This shall be: "stericsson,ab8500-fg"
+- battery = Shall be battery specific information
+	Example:
+	ab8500_fg {
+		compatible = "stericsson,ab8500-fg";
+		battery	   = <&ab8500_battery>;
+	};
+
+dependent node:
+	ab8500_battery: ab8500_battery {
+	};
+	This node will provide information on 'thermistor interface' and
+	'battery technology type' used.
+
+Properties of this node are:
+thermistor-on-batctrl:
+	A boolean value indicating thermistor interface	to battery
+
+	Note:
+	'btemp' and 'batctrl' are the pins interfaced for battery temperature
+	measurement, 'btemp' signal is used when NTC(negative temperature
+	coefficient) resister is interfaced external to battery whereas
+	'batctrl' pin is used when NTC resister is internal to battery.
+
+	Example:
+	ab8500_battery: ab8500_battery {
+		thermistor-on-batctrl;
+	};
+	indicates: NTC resister is internal to battery, 'batctrl' is used
+		for thermal measurement.
+
+	The absence of property 'thermal-on-batctrl' indicates
+	NTC resister is external to battery and  'btemp' signal is used
+	for thermal measurement.
+
+battery-type:
+	This shall be the battery manufacturing technology type,
+	allowed types are:
+		"UNKNOWN" "NiMH" "LION" "LIPO" "LiFe" "NiCd" "LiMn"
+	Example:
+	ab8500_battery: ab8500_battery {
+		stericsson,battery-type = "LIPO";
+	}
+
diff --git a/arch/arm/boot/dts/dbx5x0.dtsi b/arch/arm/boot/dts/dbx5x0.dtsi
index 4b0e0ca08f40..0c81986904c5 100644
--- a/arch/arm/boot/dts/dbx5x0.dtsi
+++ b/arch/arm/boot/dts/dbx5x0.dtsi
@@ -352,7 +352,17 @@
 					vddadc-supply = <&ab8500_ldo_tvout_reg>;
 				};
 
-				ab8500-usb {
+				ab8500_battery: ab8500_battery {
+					stericsson,battery-type = "LIPO";
+					thermistor-on-batctrl;
+				};
+
+				ab8500_fg {
+					compatible = "stericsson,ab8500-fg";
+					battery	   = <&ab8500_battery>;
+				};
+
+				ab8500_usb {
 					compatible = "stericsson,ab8500-usb";
 					interrupts = < 90 0x4
 						       96 0x4
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 1667c77b5cde..7c3017ba73e4 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -1051,8 +1051,13 @@ static struct mfd_cell __devinitdata ab8500_bm_devs[] = {
 	},
 	{
 		.name = "ab8500-fg",
+		.of_compatible = "stericsson,ab8500-fg",
 		.num_resources = ARRAY_SIZE(ab8500_fg_resources),
 		.resources = ab8500_fg_resources,
+#ifndef CONFIG_OF
+		.platform_data = &ab8500_bm_data,
+		.pdata_size = sizeof(ab8500_bm_data),
+#endif
 	},
 	{
 		.name = "ab8500-chargalg",
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 74dfd9570154..696e3a960b3e 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -38,7 +38,7 @@ obj-$(CONFIG_CHARGER_PCF50633)	+= pcf50633-charger.o
 obj-$(CONFIG_BATTERY_JZ4740)	+= jz4740-battery.o
 obj-$(CONFIG_BATTERY_INTEL_MID)	+= intel_mid_battery.o
 obj-$(CONFIG_BATTERY_RX51)	+= rx51_battery.o
-obj-$(CONFIG_AB8500_BM)		+= ab8500_charger.o ab8500_btemp.o ab8500_fg.o abx500_chargalg.o
+obj-$(CONFIG_AB8500_BM)		+= ab8500_bmdata.o ab8500_charger.o ab8500_btemp.o ab8500_fg.o abx500_chargalg.o
 obj-$(CONFIG_CHARGER_ISP1704)	+= isp1704_charger.o
 obj-$(CONFIG_CHARGER_MAX8903)	+= max8903_charger.o
 obj-$(CONFIG_CHARGER_TWL4030)	+= twl4030_charger.o
diff --git a/drivers/power/ab8500_bmdata.c b/drivers/power/ab8500_bmdata.c
new file mode 100644
index 000000000000..e7639b6659f7
--- /dev/null
+++ b/drivers/power/ab8500_bmdata.c
@@ -0,0 +1,521 @@
+#include <linux/export.h>
+#include <linux/power_supply.h>
+#include <linux/of.h>
+#include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-bm.h>
+
+/*
+ * These are the defined batteries that uses a NTC and ID resistor placed
+ * inside of the battery pack.
+ * Note that the res_to_temp table must be strictly sorted by falling resistance
+ * values to work.
+ */
+static struct abx500_res_to_temp temp_tbl_A_thermistor[] = {
+	{-5, 53407},
+	{ 0, 48594},
+	{ 5, 43804},
+	{10, 39188},
+	{15, 34870},
+	{20, 30933},
+	{25, 27422},
+	{30, 24347},
+	{35, 21694},
+	{40, 19431},
+	{45, 17517},
+	{50, 15908},
+	{55, 14561},
+	{60, 13437},
+	{65, 12500},
+};
+
+static struct abx500_res_to_temp temp_tbl_B_thermistor[] = {
+	{-5, 165418},
+	{ 0, 159024},
+	{ 5, 151921},
+	{10, 144300},
+	{15, 136424},
+	{20, 128565},
+	{25, 120978},
+	{30, 113875},
+	{35, 107397},
+	{40, 101629},
+	{45,  96592},
+	{50,  92253},
+	{55,  88569},
+	{60,  85461},
+	{65,  82869},
+};
+
+static struct abx500_v_to_cap cap_tbl_A_thermistor[] = {
+	{4171,	100},
+	{4114,	 95},
+	{4009,	 83},
+	{3947,	 74},
+	{3907,	 67},
+	{3863,	 59},
+	{3830,	 56},
+	{3813,	 53},
+	{3791,	 46},
+	{3771,	 33},
+	{3754,	 25},
+	{3735,	 20},
+	{3717,	 17},
+	{3681,	 13},
+	{3664,	  8},
+	{3651,	  6},
+	{3635,	  5},
+	{3560,	  3},
+	{3408,    1},
+	{3247,	  0},
+};
+
+static struct abx500_v_to_cap cap_tbl_B_thermistor[] = {
+	{4161,	100},
+	{4124,	 98},
+	{4044,	 90},
+	{4003,	 85},
+	{3966,	 80},
+	{3933,	 75},
+	{3888,	 67},
+	{3849,	 60},
+	{3813,	 55},
+	{3787,	 47},
+	{3772,	 30},
+	{3751,	 25},
+	{3718,	 20},
+	{3681,	 16},
+	{3660,	 14},
+	{3589,	 10},
+	{3546,	  7},
+	{3495,	  4},
+	{3404,	  2},
+	{3250,	  0},
+};
+
+static struct abx500_v_to_cap cap_tbl[] = {
+	{4186,	100},
+	{4163,	 99},
+	{4114,	 95},
+	{4068,	 90},
+	{3990,	 80},
+	{3926,	 70},
+	{3898,	 65},
+	{3866,	 60},
+	{3833,	 55},
+	{3812,	 50},
+	{3787,	 40},
+	{3768,	 30},
+	{3747,	 25},
+	{3730,	 20},
+	{3705,	 15},
+	{3699,	 14},
+	{3684,	 12},
+	{3672,	  9},
+	{3657,	  7},
+	{3638,	  6},
+	{3556,	  4},
+	{3424,	  2},
+	{3317,	  1},
+	{3094,	  0},
+};
+
+/*
+ * Note that the res_to_temp table must be strictly sorted by falling
+ * resistance values to work.
+ */
+static struct abx500_res_to_temp temp_tbl[] = {
+	{-5, 214834},
+	{ 0, 162943},
+	{ 5, 124820},
+	{10,  96520},
+	{15,  75306},
+	{20,  59254},
+	{25,  47000},
+	{30,  37566},
+	{35,  30245},
+	{40,  24520},
+	{45,  20010},
+	{50,  16432},
+	{55,  13576},
+	{60,  11280},
+	{65,   9425},
+};
+
+/*
+ * Note that the batres_vs_temp table must be strictly sorted by falling
+ * temperature values to work.
+ */
+static struct batres_vs_temp temp_to_batres_tbl_thermistor[] = {
+	{ 40, 120},
+	{ 30, 135},
+	{ 20, 165},
+	{ 10, 230},
+	{ 00, 325},
+	{-10, 445},
+	{-20, 595},
+};
+
+/*
+ * Note that the batres_vs_temp table must be strictly sorted by falling
+ * temperature values to work.
+ */
+static struct batres_vs_temp temp_to_batres_tbl_ext_thermistor[] = {
+	{ 60, 300},
+	{ 30, 300},
+	{ 20, 300},
+	{ 10, 300},
+	{ 00, 300},
+	{-10, 300},
+	{-20, 300},
+};
+
+/* battery resistance table for LI ION 9100 battery */
+static struct batres_vs_temp temp_to_batres_tbl_9100[] = {
+	{ 60, 180},
+	{ 30, 180},
+	{ 20, 180},
+	{ 10, 180},
+	{ 00, 180},
+	{-10, 180},
+	{-20, 180},
+};
+
+static struct abx500_battery_type bat_type_thermistor[] = {
+[BATTERY_UNKNOWN] = {
+	/* First element always represent the UNKNOWN battery */
+	.name = POWER_SUPPLY_TECHNOLOGY_UNKNOWN,
+	.resis_high = 0,
+	.resis_low = 0,
+	.battery_resistance = 300,
+	.charge_full_design = 612,
+	.nominal_voltage = 3700,
+	.termination_vol = 4050,
+	.termination_curr = 200,
+	.recharge_vol = 3990,
+	.normal_cur_lvl = 400,
+	.normal_vol_lvl = 4100,
+	.maint_a_cur_lvl = 400,
+	.maint_a_vol_lvl = 4050,
+	.maint_a_chg_timer_h = 60,
+	.maint_b_cur_lvl = 400,
+	.maint_b_vol_lvl = 4000,
+	.maint_b_chg_timer_h = 200,
+	.low_high_cur_lvl = 300,
+	.low_high_vol_lvl = 4000,
+	.n_temp_tbl_elements = ARRAY_SIZE(temp_tbl),
+	.r_to_t_tbl = temp_tbl,
+	.n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl),
+	.v_to_cap_tbl = cap_tbl,
+	.n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor),
+	.batres_tbl = temp_to_batres_tbl_thermistor,
+},
+{
+	.name = POWER_SUPPLY_TECHNOLOGY_LIPO,
+	.resis_high = 53407,
+	.resis_low = 12500,
+	.battery_resistance = 300,
+	.charge_full_design = 900,
+	.nominal_voltage = 3600,
+	.termination_vol = 4150,
+	.termination_curr = 80,
+	.recharge_vol = 4130,
+	.normal_cur_lvl = 700,
+	.normal_vol_lvl = 4200,
+	.maint_a_cur_lvl = 600,
+	.maint_a_vol_lvl = 4150,
+	.maint_a_chg_timer_h = 60,
+	.maint_b_cur_lvl = 600,
+	.maint_b_vol_lvl = 4100,
+	.maint_b_chg_timer_h = 200,
+	.low_high_cur_lvl = 300,
+	.low_high_vol_lvl = 4000,
+	.n_temp_tbl_elements = ARRAY_SIZE(temp_tbl_A_thermistor),
+	.r_to_t_tbl = temp_tbl_A_thermistor,
+	.n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl_A_thermistor),
+	.v_to_cap_tbl = cap_tbl_A_thermistor,
+	.n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor),
+	.batres_tbl = temp_to_batres_tbl_thermistor,
+
+},
+{
+	.name = POWER_SUPPLY_TECHNOLOGY_LIPO,
+	.resis_high = 165418,
+	.resis_low = 82869,
+	.battery_resistance = 300,
+	.charge_full_design = 900,
+	.nominal_voltage = 3600,
+	.termination_vol = 4150,
+	.termination_curr = 80,
+	.recharge_vol = 4130,
+	.normal_cur_lvl = 700,
+	.normal_vol_lvl = 4200,
+	.maint_a_cur_lvl = 600,
+	.maint_a_vol_lvl = 4150,
+	.maint_a_chg_timer_h = 60,
+	.maint_b_cur_lvl = 600,
+	.maint_b_vol_lvl = 4100,
+	.maint_b_chg_timer_h = 200,
+	.low_high_cur_lvl = 300,
+	.low_high_vol_lvl = 4000,
+	.n_temp_tbl_elements = ARRAY_SIZE(temp_tbl_B_thermistor),
+	.r_to_t_tbl = temp_tbl_B_thermistor,
+	.n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl_B_thermistor),
+	.v_to_cap_tbl = cap_tbl_B_thermistor,
+	.n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor),
+	.batres_tbl = temp_to_batres_tbl_thermistor,
+},
+};
+
+static struct abx500_battery_type bat_type_ext_thermistor[] = {
+[BATTERY_UNKNOWN] = {
+	/* First element always represent the UNKNOWN battery */
+	.name = POWER_SUPPLY_TECHNOLOGY_UNKNOWN,
+	.resis_high = 0,
+	.resis_low = 0,
+	.battery_resistance = 300,
+	.charge_full_design = 612,
+	.nominal_voltage = 3700,
+	.termination_vol = 4050,
+	.termination_curr = 200,
+	.recharge_vol = 3990,
+	.normal_cur_lvl = 400,
+	.normal_vol_lvl = 4100,
+	.maint_a_cur_lvl = 400,
+	.maint_a_vol_lvl = 4050,
+	.maint_a_chg_timer_h = 60,
+	.maint_b_cur_lvl = 400,
+	.maint_b_vol_lvl = 4000,
+	.maint_b_chg_timer_h = 200,
+	.low_high_cur_lvl = 300,
+	.low_high_vol_lvl = 4000,
+	.n_temp_tbl_elements = ARRAY_SIZE(temp_tbl),
+	.r_to_t_tbl = temp_tbl,
+	.n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl),
+	.v_to_cap_tbl = cap_tbl,
+	.n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor),
+	.batres_tbl = temp_to_batres_tbl_thermistor,
+},
+/*
+ * These are the batteries that doesn't have an internal NTC resistor to measure
+ * its temperature. The temperature in this case is measure with a NTC placed
+ * near the battery but on the PCB.
+ */
+{
+	.name = POWER_SUPPLY_TECHNOLOGY_LIPO,
+	.resis_high = 76000,
+	.resis_low = 53000,
+	.battery_resistance = 300,
+	.charge_full_design = 900,
+	.nominal_voltage = 3700,
+	.termination_vol = 4150,
+	.termination_curr = 100,
+	.recharge_vol = 4130,
+	.normal_cur_lvl = 700,
+	.normal_vol_lvl = 4200,
+	.maint_a_cur_lvl = 600,
+	.maint_a_vol_lvl = 4150,
+	.maint_a_chg_timer_h = 60,
+	.maint_b_cur_lvl = 600,
+	.maint_b_vol_lvl = 4100,
+	.maint_b_chg_timer_h = 200,
+	.low_high_cur_lvl = 300,
+	.low_high_vol_lvl = 4000,
+	.n_temp_tbl_elements = ARRAY_SIZE(temp_tbl),
+	.r_to_t_tbl = temp_tbl,
+	.n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl),
+	.v_to_cap_tbl = cap_tbl,
+	.n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor),
+	.batres_tbl = temp_to_batres_tbl_thermistor,
+},
+{
+	.name = POWER_SUPPLY_TECHNOLOGY_LION,
+	.resis_high = 30000,
+	.resis_low = 10000,
+	.battery_resistance = 300,
+	.charge_full_design = 950,
+	.nominal_voltage = 3700,
+	.termination_vol = 4150,
+	.termination_curr = 100,
+	.recharge_vol = 4130,
+	.normal_cur_lvl = 700,
+	.normal_vol_lvl = 4200,
+	.maint_a_cur_lvl = 600,
+	.maint_a_vol_lvl = 4150,
+	.maint_a_chg_timer_h = 60,
+	.maint_b_cur_lvl = 600,
+	.maint_b_vol_lvl = 4100,
+	.maint_b_chg_timer_h = 200,
+	.low_high_cur_lvl = 300,
+	.low_high_vol_lvl = 4000,
+	.n_temp_tbl_elements = ARRAY_SIZE(temp_tbl),
+	.r_to_t_tbl = temp_tbl,
+	.n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl),
+	.v_to_cap_tbl = cap_tbl,
+	.n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor),
+	.batres_tbl = temp_to_batres_tbl_thermistor,
+},
+{
+	.name = POWER_SUPPLY_TECHNOLOGY_LION,
+	.resis_high = 95000,
+	.resis_low = 76001,
+	.battery_resistance = 300,
+	.charge_full_design = 950,
+	.nominal_voltage = 3700,
+	.termination_vol = 4150,
+	.termination_curr = 100,
+	.recharge_vol = 4130,
+	.normal_cur_lvl = 700,
+	.normal_vol_lvl = 4200,
+	.maint_a_cur_lvl = 600,
+	.maint_a_vol_lvl = 4150,
+	.maint_a_chg_timer_h = 60,
+	.maint_b_cur_lvl = 600,
+	.maint_b_vol_lvl = 4100,
+	.maint_b_chg_timer_h = 200,
+	.low_high_cur_lvl = 300,
+	.low_high_vol_lvl = 4000,
+	.n_temp_tbl_elements = ARRAY_SIZE(temp_tbl),
+	.r_to_t_tbl = temp_tbl,
+	.n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl),
+	.v_to_cap_tbl = cap_tbl,
+	.n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor),
+	.batres_tbl = temp_to_batres_tbl_thermistor,
+},
+};
+
+static const struct abx500_bm_capacity_levels cap_levels = {
+	.critical	= 2,
+	.low		= 10,
+	.normal		= 70,
+	.high		= 95,
+	.full		= 100,
+};
+
+static const struct abx500_fg_parameters fg = {
+	.recovery_sleep_timer = 10,
+	.recovery_total_time = 100,
+	.init_timer = 1,
+	.init_discard_time = 5,
+	.init_total_time = 40,
+	.high_curr_time = 60,
+	.accu_charging = 30,
+	.accu_high_curr = 30,
+	.high_curr_threshold = 50,
+	.lowbat_threshold = 3100,
+	.battok_falling_th_sel0 = 2860,
+	.battok_raising_th_sel1 = 2860,
+	.user_cap_limit = 15,
+	.maint_thres = 97,
+};
+
+static const struct abx500_maxim_parameters maxi_params = {
+	.ena_maxi = true,
+	.chg_curr = 910,
+	.wait_cycles = 10,
+	.charger_curr_step = 100,
+};
+
+static const struct abx500_bm_charger_parameters chg = {
+	.usb_volt_max		= 5500,
+	.usb_curr_max		= 1500,
+	.ac_volt_max		= 7500,
+	.ac_curr_max		= 1500,
+};
+
+struct abx500_bm_data ab8500_bm_data = {
+	.temp_under		= 3,
+	.temp_low		= 8,
+	.temp_high		= 43,
+	.temp_over		= 48,
+	.main_safety_tmr_h	= 4,
+	.temp_interval_chg	= 20,
+	.temp_interval_nochg	= 120,
+	.usb_safety_tmr_h	= 4,
+	.bkup_bat_v		= BUP_VCH_SEL_2P6V,
+	.bkup_bat_i		= BUP_ICH_SEL_150UA,
+	.no_maintenance		= false,
+	.adc_therm		= ABx500_ADC_THERM_BATCTRL,
+	.chg_unknown_bat	= false,
+	.enable_overshoot	= false,
+	.fg_res			= 100,
+	.cap_levels		= &cap_levels,
+	.bat_type		= bat_type_thermistor,
+	.n_btypes		= 3,
+	.batt_id		= 0,
+	.interval_charging	= 5,
+	.interval_not_charging	= 120,
+	.temp_hysteresis	= 3,
+	.gnd_lift_resistance	= 34,
+	.maxi			= &maxi_params,
+	.chg_params		= &chg,
+	.fg_params		= &fg,
+};
+
+int __devinit
+bmdevs_of_probe(struct device *dev,
+		struct device_node *np,
+		struct abx500_bm_data **battery)
+{
+	struct	abx500_battery_type *btype;
+	struct  device_node *np_bat_supply;
+	struct	abx500_bm_data *bat;
+	const char *btech;
+	char bat_tech[8];
+	int i, thermistor;
+
+	*battery = &ab8500_bm_data;
+
+	/* get phandle to 'battery-info' node */
+	np_bat_supply = of_parse_phandle(np, "battery", 0);
+	if (!np_bat_supply) {
+		dev_err(dev, "missing property battery\n");
+		return -EINVAL;
+	}
+	if (of_property_read_bool(np_bat_supply,
+			"thermistor-on-batctrl"))
+		thermistor = NTC_INTERNAL;
+	else
+		thermistor = NTC_EXTERNAL;
+
+	bat = *battery;
+	if (thermistor == NTC_EXTERNAL) {
+		bat->n_btypes  = 4;
+		bat->bat_type  = bat_type_ext_thermistor;
+		bat->adc_therm = ABx500_ADC_THERM_BATTEMP;
+	}
+	btech = of_get_property(np_bat_supply,
+		"stericsson,battery-type", NULL);
+	if (!btech) {
+		dev_warn(dev, "missing property battery-name/type\n");
+		strcpy(bat_tech, "UNKNOWN");
+	} else {
+		strcpy(bat_tech, btech);
+	}
+
+	if (strncmp(bat_tech, "LION", 4) == 0) {
+		bat->no_maintenance  = true;
+		bat->chg_unknown_bat = true;
+		bat->bat_type[BATTERY_UNKNOWN].charge_full_design = 2600;
+		bat->bat_type[BATTERY_UNKNOWN].termination_vol    = 4150;
+		bat->bat_type[BATTERY_UNKNOWN].recharge_vol	  = 4130;
+		bat->bat_type[BATTERY_UNKNOWN].normal_cur_lvl	  = 520;
+		bat->bat_type[BATTERY_UNKNOWN].normal_vol_lvl	  = 4200;
+	}
+	/* select the battery resolution table */
+	for (i = 0; i < bat->n_btypes; ++i) {
+		btype = (bat->bat_type + i);
+		if (thermistor == NTC_EXTERNAL) {
+			btype->batres_tbl =
+				temp_to_batres_tbl_ext_thermistor;
+		} else if (strncmp(bat_tech, "LION", 4) == 0) {
+			btype->batres_tbl =
+				temp_to_batres_tbl_9100;
+		} else {
+			btype->batres_tbl =
+				temp_to_batres_tbl_thermistor;
+		}
+	}
+	of_node_put(np_bat_supply);
+	return 0;
+}
diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index e3b6395b20dd..abc2abc16a5d 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -93,7 +93,7 @@ struct ab8500_btemp {
 	struct ab8500 *parent;
 	struct ab8500_gpadc *gpadc;
 	struct ab8500_fg *fg;
-	struct abx500_btemp_platform_data *pdata;
+	struct abx500_bmdevs_plat_data *pdata;
 	struct abx500_bm_data *bat;
 	struct power_supply btemp_psy;
 	struct ab8500_btemp_events events;
@@ -962,10 +962,10 @@ static int __devexit ab8500_btemp_remove(struct platform_device *pdev)
 
 static int __devinit ab8500_btemp_probe(struct platform_device *pdev)
 {
+	struct abx500_bmdevs_plat_data *plat_data = pdev->dev.platform_data;
+	struct ab8500_btemp *di;
 	int irq, i, ret = 0;
 	u8 val;
-	struct abx500_bm_plat_data *plat_data = pdev->dev.platform_data;
-	struct ab8500_btemp *di;
 
 	if (!plat_data) {
 		dev_err(&pdev->dev, "No platform data\n");
@@ -982,21 +982,13 @@ static int __devinit ab8500_btemp_probe(struct platform_device *pdev)
 	di->gpadc = ab8500_gpadc_get("ab8500-gpadc.0");
 
 	/* get btemp specific platform data */
-	di->pdata = plat_data->btemp;
+	di->pdata = plat_data;
 	if (!di->pdata) {
 		dev_err(di->dev, "no btemp platform data supplied\n");
 		ret = -EINVAL;
 		goto free_device_info;
 	}
 
-	/* get battery specific platform data */
-	di->bat = plat_data->battery;
-	if (!di->bat) {
-		dev_err(di->dev, "no battery platform data supplied\n");
-		ret = -EINVAL;
-		goto free_device_info;
-	}
-
 	/* BTEMP supply */
 	di->btemp_psy.name = "ab8500_btemp";
 	di->btemp_psy.type = POWER_SUPPLY_TYPE_BATTERY;
diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index 26ff759e2220..723edb47b1d8 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -220,7 +220,7 @@ struct ab8500_charger {
 	bool autopower;
 	struct ab8500 *parent;
 	struct ab8500_gpadc *gpadc;
-	struct abx500_charger_platform_data *pdata;
+	struct abx500_bmdevs_plat_data *pdata;
 	struct abx500_bm_data *bat;
 	struct ab8500_charger_event_flags flags;
 	struct ab8500_charger_usb_state usb_state;
@@ -2533,9 +2533,9 @@ static int __devexit ab8500_charger_remove(struct platform_device *pdev)
 
 static int __devinit ab8500_charger_probe(struct platform_device *pdev)
 {
-	int irq, i, charger_status, ret = 0;
-	struct abx500_bm_plat_data *plat_data = pdev->dev.platform_data;
+	struct abx500_bmdevs_plat_data *plat_data = pdev->dev.platform_data;
 	struct ab8500_charger *di;
+	int irq, i, charger_status, ret = 0;
 
 	if (!plat_data) {
 		dev_err(&pdev->dev, "No platform data\n");
@@ -2555,21 +2555,13 @@ static int __devinit ab8500_charger_probe(struct platform_device *pdev)
 	spin_lock_init(&di->usb_state.usb_lock);
 
 	/* get charger specific platform data */
-	di->pdata = plat_data->charger;
+	di->pdata = plat_data;
 	if (!di->pdata) {
 		dev_err(di->dev, "no charger platform data supplied\n");
 		ret = -EINVAL;
 		goto free_device_info;
 	}
 
-	/* get battery specific platform data */
-	di->bat = plat_data->battery;
-	if (!di->bat) {
-		dev_err(di->dev, "no battery platform data supplied\n");
-		ret = -EINVAL;
-		goto free_device_info;
-	}
-
 	di->autopower = false;
 
 	/* AC supply */
diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index 2db8cc254399..ed62ef788eb5 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -22,15 +22,16 @@
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/kobject.h>
-#include <linux/mfd/abx500/ab8500.h>
-#include <linux/mfd/abx500.h>
 #include <linux/slab.h>
-#include <linux/mfd/abx500/ab8500-bm.h>
 #include <linux/delay.h>
-#include <linux/mfd/abx500/ab8500-gpadc.h>
-#include <linux/mfd/abx500.h>
 #include <linux/time.h>
+#include <linux/of.h>
 #include <linux/completion.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-bm.h>
+#include <linux/mfd/abx500/ab8500-gpadc.h>
 
 #define MILLI_TO_MICRO			1000
 #define FG_LSB_IN_MA			1627
@@ -172,7 +173,6 @@ struct inst_curr_result_list {
  * @avg_cap:		Average capacity filter
  * @parent:		Pointer to the struct ab8500
  * @gpadc:		Pointer to the struct gpadc
- * @pdata:		Pointer to the abx500_fg platform data
  * @bat:		Pointer to the abx500_bm platform data
  * @fg_psy:		Structure that holds the FG specific battery properties
  * @fg_wq:		Work queue for running the FG algorithm
@@ -212,7 +212,6 @@ struct ab8500_fg {
 	struct ab8500_fg_avg_cap avg_cap;
 	struct ab8500 *parent;
 	struct ab8500_gpadc *gpadc;
-	struct abx500_fg_platform_data *pdata;
 	struct abx500_bm_data *bat;
 	struct power_supply fg_psy;
 	struct workqueue_struct *fg_wq;
@@ -2429,7 +2428,6 @@ static int __devexit ab8500_fg_remove(struct platform_device *pdev)
 	flush_scheduled_work();
 	power_supply_unregister(&di->fg_psy);
 	platform_set_drvdata(pdev, NULL);
-	kfree(di);
 	return ret;
 }
 
@@ -2442,21 +2440,39 @@ static struct ab8500_fg_interrupts ab8500_fg_irq[] = {
 	{"CCEOC", ab8500_fg_cc_data_end_handler},
 };
 
+static char *supply_interface[] = {
+	"ab8500_chargalg",
+	"ab8500_usb",
+};
+
 static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 {
+	struct device_node *np = pdev->dev.of_node;
+	struct ab8500_fg *di;
 	int i, irq;
 	int ret = 0;
-	struct abx500_bm_plat_data *plat_data = pdev->dev.platform_data;
-	struct ab8500_fg *di;
-
-	if (!plat_data) {
-		dev_err(&pdev->dev, "No platform data\n");
-		return -EINVAL;
-	}
 
-	di = kzalloc(sizeof(*di), GFP_KERNEL);
-	if (!di)
+	di = devm_kzalloc(&pdev->dev, sizeof(*di), GFP_KERNEL);
+	if (!di) {
+		dev_err(&pdev->dev, "%s no mem for ab8500_fg\n", __func__);
 		return -ENOMEM;
+	}
+	di->bat = pdev->mfd_cell->platform_data;
+	if (!di->bat) {
+		if (np) {
+			ret = bmdevs_of_probe(&pdev->dev, np, &di->bat);
+			if (ret) {
+				dev_err(&pdev->dev,
+					"failed to get battery information\n");
+				return ret;
+			}
+		} else {
+			dev_err(&pdev->dev, "missing dt node for ab8500_fg\n");
+			return -EINVAL;
+		}
+	} else {
+		dev_info(&pdev->dev, "falling back to legacy platform data\n");
+	}
 
 	mutex_init(&di->cc_lock);
 
@@ -2465,29 +2481,13 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 	di->parent = dev_get_drvdata(pdev->dev.parent);
 	di->gpadc = ab8500_gpadc_get("ab8500-gpadc.0");
 
-	/* get fg specific platform data */
-	di->pdata = plat_data->fg;
-	if (!di->pdata) {
-		dev_err(di->dev, "no fg platform data supplied\n");
-		ret = -EINVAL;
-		goto free_device_info;
-	}
-
-	/* get battery specific platform data */
-	di->bat = plat_data->battery;
-	if (!di->bat) {
-		dev_err(di->dev, "no battery platform data supplied\n");
-		ret = -EINVAL;
-		goto free_device_info;
-	}
-
 	di->fg_psy.name = "ab8500_fg";
 	di->fg_psy.type = POWER_SUPPLY_TYPE_BATTERY;
 	di->fg_psy.properties = ab8500_fg_props;
 	di->fg_psy.num_properties = ARRAY_SIZE(ab8500_fg_props);
 	di->fg_psy.get_property = ab8500_fg_get_property;
-	di->fg_psy.supplied_to = di->pdata->supplied_to;
-	di->fg_psy.num_supplicants = di->pdata->num_supplicants;
+	di->fg_psy.supplied_to = supply_interface;
+	di->fg_psy.num_supplicants = ARRAY_SIZE(supply_interface),
 	di->fg_psy.external_power_changed = ab8500_fg_external_power_changed;
 
 	di->bat_cap.max_mah_design = MILLI_TO_MICRO *
@@ -2506,8 +2506,7 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 	di->fg_wq = create_singlethread_workqueue("ab8500_fg_wq");
 	if (di->fg_wq == NULL) {
 		dev_err(di->dev, "failed to create work queue\n");
-		ret = -ENOMEM;
-		goto free_device_info;
+		return -ENOMEM;
 	}
 
 	/* Init work for running the fg algorithm instantly */
@@ -2606,12 +2605,14 @@ free_irq:
 	}
 free_inst_curr_wq:
 	destroy_workqueue(di->fg_wq);
-free_device_info:
-	kfree(di);
-
 	return ret;
 }
 
+static const struct of_device_id ab8500_fg_match[] = {
+	{ .compatible = "stericsson,ab8500-fg", },
+	{ },
+};
+
 static struct platform_driver ab8500_fg_driver = {
 	.probe = ab8500_fg_probe,
 	.remove = __devexit_p(ab8500_fg_remove),
@@ -2620,6 +2621,7 @@ static struct platform_driver ab8500_fg_driver = {
 	.driver = {
 		.name = "ab8500-fg",
 		.owner = THIS_MODULE,
+		.of_match_table = ab8500_fg_match,
 	},
 };
 
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index 4d302803ffcc..758ea76de2f8 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -231,7 +231,7 @@ struct abx500_chargalg {
 	struct abx500_chargalg_charger_info chg_info;
 	struct abx500_chargalg_battery_data batt_data;
 	struct abx500_chargalg_suspension_status susp_status;
-	struct abx500_chargalg_platform_data *pdata;
+	struct abx500_bmdevs_plat_data *pdata;
 	struct abx500_bm_data *bat;
 	struct power_supply chargalg_psy;
 	struct ux500_charger *ac_chg;
@@ -1802,7 +1802,7 @@ static int __devexit abx500_chargalg_remove(struct platform_device *pdev)
 
 static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 {
-	struct abx500_bm_plat_data *plat_data;
+	struct abx500_bmdevs_plat_data *plat_data;
 	int ret = 0;
 
 	struct abx500_chargalg *di =
@@ -1812,10 +1812,8 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 
 	/* get device struct */
 	di->dev = &pdev->dev;
-
 	plat_data = pdev->dev.platform_data;
-	di->pdata = plat_data->chargalg;
-	di->bat = plat_data->battery;
+	di->pdata = plat_data;
 
 	/* chargalg supply */
 	di->chargalg_psy.name = "abx500_chargalg";
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 5d5298d56026..33f2c58554f4 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -267,39 +267,27 @@ struct abx500_bm_data {
 	int gnd_lift_resistance;
 	const struct abx500_maxim_parameters *maxi;
 	const struct abx500_bm_capacity_levels *cap_levels;
-	const struct abx500_battery_type *bat_type;
+	struct abx500_battery_type *bat_type;
 	const struct abx500_bm_charger_parameters *chg_params;
 	const struct abx500_fg_parameters *fg_params;
 };
 
-struct abx500_chargalg_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
-};
-
-struct abx500_charger_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
-	bool autopower_cfg;
-};
+extern struct abx500_bm_data ab8500_bm_data;
 
-struct abx500_btemp_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
+struct abx500_bmdevs_plat_data {
+	char	**supplied_to;
+	size_t	num_supplicants;
+	bool	autopower_cfg;
 };
 
-struct abx500_fg_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
+enum {
+	NTC_EXTERNAL = 0,
+	NTC_INTERNAL,
 };
 
-struct abx500_bm_plat_data {
-	struct abx500_bm_data *battery;
-	struct abx500_charger_platform_data *charger;
-	struct abx500_btemp_platform_data *btemp;
-	struct abx500_fg_platform_data *fg;
-	struct abx500_chargalg_platform_data *chargalg;
-};
+int bmdevs_of_probe(struct device *dev,
+		struct device_node *np,
+		struct abx500_bm_data **battery);
 
 int abx500_set_register_interruptible(struct device *dev, u8 bank, u8 reg,
 	u8 value);
-- 
cgit v1.2.3-55-g7522


From a12810ab9fcf0c9fd5e50b5e350a3ffbeaa571be Mon Sep 17 00:00:00 2001
From: Rajanikanth H.V
Date: Wed, 31 Oct 2012 15:40:33 +0000
Subject: ab8500: Add devicetree support for chargalg

This patch adds device tree support for charging algorithm driver

Signed-off-by: Rajanikanth H.V <rajanikanth.hv@stericsson.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 .../bindings/power_supply/ab8500/chargalg.txt      | 16 +++++++
 arch/arm/boot/dts/dbx5x0.dtsi                      |  5 ++
 drivers/mfd/ab8500-core.c                          |  5 ++
 drivers/power/abx500_chargalg.c                    | 54 +++++++++++++++-------
 include/linux/mfd/abx500.h                         |  6 ---
 5 files changed, 64 insertions(+), 22 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt b/Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt
new file mode 100644
index 000000000000..ef5328371122
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt
@@ -0,0 +1,16 @@
+=== AB8500 Charging Algorithm Driver ===
+
+The properties below describes the node for chargalg driver.
+
+Required Properties:
+- compatible = Shall be: "stericsson,ab8500-chargalg"
+- battery = Shall be battery specific information
+
+Example:
+ab8500_chargalg {
+	compatible = "stericsson,ab8500-chargalg";
+	battery	   = <&ab8500_battery>;
+};
+
+For information on battery specific node, Ref:
+Documentation/devicetree/bindings/power_supply/ab8500/fg.txt
diff --git a/arch/arm/boot/dts/dbx5x0.dtsi b/arch/arm/boot/dts/dbx5x0.dtsi
index 1d4ad3098aeb..12a68af44903 100644
--- a/arch/arm/boot/dts/dbx5x0.dtsi
+++ b/arch/arm/boot/dts/dbx5x0.dtsi
@@ -373,6 +373,11 @@
 					vddadc-supply	= <&ab8500_ldo_tvout_reg>;
 				};
 
+				ab8500_chargalg {
+					compatible	= "stericsson,ab8500-chargalg";
+					battery		= <&ab8500_battery>;
+				};
+
 				ab8500_usb {
 					compatible = "stericsson,ab8500-usb";
 					interrupts = < 90 0x4
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index c7a120bfd50a..5ec70f26b9d5 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -1071,8 +1071,13 @@ static struct mfd_cell __devinitdata ab8500_bm_devs[] = {
 	},
 	{
 		.name = "ab8500-chargalg",
+		.of_compatible = "stericsson,ab8500-chargalg",
 		.num_resources = ARRAY_SIZE(ab8500_chargalg_resources),
 		.resources = ab8500_chargalg_resources,
+#ifndef CONFIG_OF
+		.platform_data = &ab8500_bm_data,
+		.pdata_size = sizeof(ab8500_bm_data),
+#endif
 	},
 };
 
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index 758ea76de2f8..dcdc4393b9e7 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -21,6 +21,8 @@
 #include <linux/completion.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
+#include <linux/of.h>
+#include <linux/mfd/core.h>
 #include <linux/mfd/abx500.h>
 #include <linux/mfd/abx500/ux500_chargalg.h>
 #include <linux/mfd/abx500/ab8500-bm.h>
@@ -205,7 +207,6 @@ enum maxim_ret {
  * @chg_info:		information about connected charger types
  * @batt_data:		data of the battery
  * @susp_status:	current charger suspension status
- * @pdata:		pointer to the abx500_chargalg platform data
  * @bat:		pointer to the abx500_bm platform data
  * @chargalg_psy:	structure that holds the battery properties exposed by
  *			the charging algorithm
@@ -231,7 +232,6 @@ struct abx500_chargalg {
 	struct abx500_chargalg_charger_info chg_info;
 	struct abx500_chargalg_battery_data batt_data;
 	struct abx500_chargalg_suspension_status susp_status;
-	struct abx500_bmdevs_plat_data *pdata;
 	struct abx500_bm_data *bat;
 	struct power_supply chargalg_psy;
 	struct ux500_charger *ac_chg;
@@ -1795,25 +1795,44 @@ static int __devexit abx500_chargalg_remove(struct platform_device *pdev)
 	flush_scheduled_work();
 	power_supply_unregister(&di->chargalg_psy);
 	platform_set_drvdata(pdev, NULL);
-	kfree(di);
 
 	return 0;
 }
 
+static char *supply_interface[] = {
+	"ab8500_fg",
+};
+
 static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 {
-	struct abx500_bmdevs_plat_data *plat_data;
+	struct device_node *np = pdev->dev.of_node;
+	struct abx500_chargalg *di;
 	int ret = 0;
 
-	struct abx500_chargalg *di =
-		kzalloc(sizeof(struct abx500_chargalg), GFP_KERNEL);
-	if (!di)
+	di = devm_kzalloc(&pdev->dev, sizeof(*di), GFP_KERNEL);
+	if (!di) {
+		dev_err(&pdev->dev, "%s no mem for ab8500_chargalg\n", __func__);
 		return -ENOMEM;
+	}
+	di->bat = pdev->mfd_cell->platform_data;
+	if (!di->bat) {
+		if (np) {
+			ret = bmdevs_of_probe(&pdev->dev, np, &di->bat);
+			if (ret) {
+				dev_err(&pdev->dev,
+					"failed to get battery information\n");
+				return ret;
+			}
+		} else {
+			dev_err(&pdev->dev, "missing dt node for ab8500_chargalg\n");
+			return -EINVAL;
+		}
+	} else {
+		dev_info(&pdev->dev, "falling back to legacy platform data\n");
+	}
 
 	/* get device struct */
 	di->dev = &pdev->dev;
-	plat_data = pdev->dev.platform_data;
-	di->pdata = plat_data;
 
 	/* chargalg supply */
 	di->chargalg_psy.name = "abx500_chargalg";
@@ -1821,8 +1840,8 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 	di->chargalg_psy.properties = abx500_chargalg_props;
 	di->chargalg_psy.num_properties = ARRAY_SIZE(abx500_chargalg_props);
 	di->chargalg_psy.get_property = abx500_chargalg_get_property;
-	di->chargalg_psy.supplied_to = di->pdata->supplied_to;
-	di->chargalg_psy.num_supplicants = di->pdata->num_supplicants;
+	di->chargalg_psy.supplied_to = supply_interface;
+	di->chargalg_psy.num_supplicants = ARRAY_SIZE(supply_interface),
 	di->chargalg_psy.external_power_changed =
 		abx500_chargalg_external_power_changed;
 
@@ -1842,7 +1861,7 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 		create_singlethread_workqueue("abx500_chargalg_wq");
 	if (di->chargalg_wq == NULL) {
 		dev_err(di->dev, "failed to create work queue\n");
-		goto free_device_info;
+		return -ENOMEM;
 	}
 
 	/* Init work for chargalg */
@@ -1883,20 +1902,23 @@ free_psy:
 	power_supply_unregister(&di->chargalg_psy);
 free_chargalg_wq:
 	destroy_workqueue(di->chargalg_wq);
-free_device_info:
-	kfree(di);
-
 	return ret;
 }
 
+static const struct of_device_id ab8500_chargalg_match[] = {
+	{ .compatible = "stericsson,ab8500-chargalg", },
+	{ },
+};
+
 static struct platform_driver abx500_chargalg_driver = {
 	.probe = abx500_chargalg_probe,
 	.remove = __devexit_p(abx500_chargalg_remove),
 	.suspend = abx500_chargalg_suspend,
 	.resume = abx500_chargalg_resume,
 	.driver = {
-		.name = "abx500-chargalg",
+		.name = "ab8500-chargalg",
 		.owner = THIS_MODULE,
+		.of_match_table = ab8500_chargalg_match,
 	},
 };
 
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 33f2c58554f4..2138bd33021a 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -274,12 +274,6 @@ struct abx500_bm_data {
 
 extern struct abx500_bm_data ab8500_bm_data;
 
-struct abx500_bmdevs_plat_data {
-	char	**supplied_to;
-	size_t	num_supplicants;
-	bool	autopower_cfg;
-};
-
 enum {
 	NTC_EXTERNAL = 0,
 	NTC_INTERNAL,
-- 
cgit v1.2.3-55-g7522


From 5928f5389664fffa295c39bbe23de73069124451 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Mon, 19 Nov 2012 06:58:29 +0530
Subject: regulator: max8973: add regulator driver support

The MAXIM MAX8973 high-efficiency, three phase, DC-DC step-down
switching regulator delievers up to 9A of output current. Each
phase operates at a 2MHz fixed frequency with a 120 deg shift
from the adjacent phase, allowing the use of small magnetic
component.

Add regulator driver for this device.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/Kconfig                   |  10 +
 drivers/regulator/Makefile                  |   1 +
 drivers/regulator/max8973-regulator.c       | 505 ++++++++++++++++++++++++++++
 include/linux/regulator/max8973-regulator.h |  72 ++++
 4 files changed, 588 insertions(+)
 create mode 100644 drivers/regulator/max8973-regulator.c
 create mode 100644 include/linux/regulator/max8973-regulator.h

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 67d47b59a66d..2b8031235125 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -204,6 +204,16 @@ config REGULATOR_MAX8952
 	  via I2C bus. Maxim 8952 has one voltage output and supports 4 DVS
 	  modes ranging from 0.77V to 1.40V by 0.01V steps.
 
+config REGULATOR_MAX8973
+	tristate "Maxim MAX8973 voltage regulator "
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  The MAXIM MAX8973 high-efficiency. three phase, DC-DC step-down
+	  switching regulator delievers up to 9A of output current. Each
+	  phase operates at a 2MHz fixed frequency with a 120 deg shift
+	  from the adjacent phase, allowing the use of small magnetic component.
+
 config REGULATOR_MAX8997
 	tristate "Maxim 8997/8966 regulator"
 	depends on MFD_MAX8997
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index e431eed8a878..3b51e0de33da 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_REGULATOR_MAX8660) += max8660.o
 obj-$(CONFIG_REGULATOR_MAX8907) += max8907-regulator.o
 obj-$(CONFIG_REGULATOR_MAX8925) += max8925-regulator.o
 obj-$(CONFIG_REGULATOR_MAX8952) += max8952.o
+obj-$(CONFIG_REGULATOR_MAX8973) += max8973-regulator.o
 obj-$(CONFIG_REGULATOR_MAX8997) += max8997.o
 obj-$(CONFIG_REGULATOR_MAX8998) += max8998.o
 obj-$(CONFIG_REGULATOR_MAX77686) += max77686.o
diff --git a/drivers/regulator/max8973-regulator.c b/drivers/regulator/max8973-regulator.c
new file mode 100644
index 000000000000..e6328f935767
--- /dev/null
+++ b/drivers/regulator/max8973-regulator.c
@@ -0,0 +1,505 @@
+/*
+ * max8973-regulator.c -- Maxim max8973
+ *
+ * Regulator driver for MAXIM 8973 DC-DC step-down switching regulator.
+ *
+ * Copyright (c) 2012, NVIDIA Corporation.
+ *
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/max8973-regulator.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/regmap.h>
+
+/* Register definitions */
+#define MAX8973_VOUT					0x0
+#define MAX8973_VOUT_DVS				0x1
+#define MAX8973_CONTROL1				0x2
+#define MAX8973_CONTROL2				0x3
+#define MAX8973_CHIPID1					0x4
+#define MAX8973_CHIPID2					0x5
+
+#define MAX8973_MAX_VOUT_REG				2
+
+/* MAX8973_VOUT */
+#define MAX8973_VOUT_ENABLE				BIT(7)
+#define MAX8973_VOUT_MASK				0x7F
+
+/* MAX8973_VOUT_DVS */
+#define MAX8973_DVS_VOUT_MASK				0x7F
+
+/* MAX8973_CONTROL1 */
+#define MAX8973_SNS_ENABLE				BIT(7)
+#define MAX8973_FPWM_EN_M				BIT(6)
+#define MAX8973_NFSR_ENABLE				BIT(5)
+#define MAX8973_AD_ENABLE				BIT(4)
+#define MAX8973_BIAS_ENABLE				BIT(3)
+#define MAX8973_FREQSHIFT_9PER				BIT(2)
+
+#define MAX8973_RAMP_12mV_PER_US			0x0
+#define MAX8973_RAMP_25mV_PER_US			0x1
+#define MAX8973_RAMP_50mV_PER_US			0x2
+#define MAX8973_RAMP_200mV_PER_US			0x3
+
+/* MAX8973_CONTROL2 */
+#define MAX8973_WDTMR_ENABLE				BIT(6)
+#define MAX8973_DISCH_ENBABLE				BIT(5)
+#define MAX8973_FT_ENABLE				BIT(4)
+
+#define MAX8973_CKKADV_TRIP_DISABLE			0xC
+#define MAX8973_CKKADV_TRIP_75mV_PER_US			0x0
+#define MAX8973_CKKADV_TRIP_150mV_PER_US		0x4
+#define MAX8973_CKKADV_TRIP_75mV_PER_US_HIST_DIS	0x8
+#define MAX8973_CONTROL_CLKADV_TRIP_MASK		0x00030000
+
+#define MAX8973_INDUCTOR_MIN_30_PER			0x0
+#define MAX8973_INDUCTOR_NOMINAL			0x1
+#define MAX8973_INDUCTOR_PLUS_30_PER			0x2
+#define MAX8973_INDUCTOR_PLUS_60_PER			0x3
+#define MAX8973_CONTROL_INDUCTOR_VALUE_MASK		0x00300000
+
+#define MAX8973_MIN_VOLATGE				606250
+#define MAX8973_MAX_VOLATGE				1400000
+#define MAX8973_VOLATGE_STEP				6250
+#define MAX8973_BUCK_N_VOLTAGE				0x80
+
+/* Maxim 8973 chip information */
+struct max8973_chip {
+	struct device *dev;
+	struct regulator_desc desc;
+	struct regulator_dev *rdev;
+	struct regmap *regmap;
+	bool enable_external_control;
+	int dvs_gpio;
+	int lru_index[MAX8973_MAX_VOUT_REG];
+	int curr_vout_val[MAX8973_MAX_VOUT_REG];
+	int curr_vout_reg;
+	int curr_gpio_val;
+	bool valid_dvs_gpio;
+};
+
+/*
+ * find_voltage_set_register: Find new voltage configuration register (VOUT).
+ * The finding of the new VOUT register will be based on the LRU mechanism.
+ * Each VOUT register will have different voltage configured . This
+ * Function will look if any of the VOUT register have requested voltage set
+ * or not.
+ *     - If it is already there then it will make that register as most
+ *       recently used and return as found so that caller need not to set
+ *       the VOUT register but need to set the proper gpios to select this
+ *       VOUT register.
+ *     - If requested voltage is not found then it will use the least
+ *       recently mechanism to get new VOUT register for new configuration
+ *       and will return not_found so that caller need to set new VOUT
+ *       register and then gpios (both).
+ */
+static bool find_voltage_set_register(struct max8973_chip *tps,
+		int req_vsel, int *vout_reg, int *gpio_val)
+{
+	int i;
+	bool found = false;
+	int new_vout_reg = tps->lru_index[MAX8973_MAX_VOUT_REG - 1];
+	int found_index = MAX8973_MAX_VOUT_REG - 1;
+
+	for (i = 0; i < MAX8973_MAX_VOUT_REG; ++i) {
+		if (tps->curr_vout_val[tps->lru_index[i]] == req_vsel) {
+			new_vout_reg = tps->lru_index[i];
+			found_index = i;
+			found = true;
+			goto update_lru_index;
+		}
+	}
+
+update_lru_index:
+	for (i = found_index; i > 0; i--)
+		tps->lru_index[i] = tps->lru_index[i - 1];
+
+	tps->lru_index[0] = new_vout_reg;
+	*gpio_val = new_vout_reg;
+	*vout_reg = MAX8973_VOUT + new_vout_reg;
+	return found;
+}
+
+static int max8973_dcdc_get_voltage_sel(struct regulator_dev *rdev)
+{
+	struct max8973_chip *max = rdev_get_drvdata(rdev);
+	unsigned int data;
+	int ret;
+
+	ret = regmap_read(max->regmap, max->curr_vout_reg, &data);
+	if (ret < 0) {
+		dev_err(max->dev, "register %d read failed, err = %d\n",
+			max->curr_vout_reg, ret);
+		return ret;
+	}
+	return data & MAX8973_VOUT_MASK;
+}
+
+static int max8973_dcdc_set_voltage_sel(struct regulator_dev *rdev,
+	     unsigned vsel)
+{
+	struct max8973_chip *max = rdev_get_drvdata(rdev);
+	int ret;
+	bool found = false;
+	int vout_reg = max->curr_vout_reg;
+	int gpio_val = max->curr_gpio_val;
+
+	/*
+	 * If gpios are available to select the VOUT register then least
+	 * recently used register for new configuration.
+	 */
+	if (max->valid_dvs_gpio)
+		found = find_voltage_set_register(max, vsel,
+					&vout_reg, &gpio_val);
+
+	if (!found) {
+		ret = regmap_update_bits(max->regmap, vout_reg,
+					MAX8973_VOUT_MASK, vsel);
+		if (ret < 0) {
+			dev_err(max->dev, "register %d update failed, err %d\n",
+				 vout_reg, ret);
+			return ret;
+		}
+		max->curr_vout_reg = vout_reg;
+		max->curr_vout_val[gpio_val] = vsel;
+	}
+
+	/* Select proper VOUT register vio gpios */
+	if (max->valid_dvs_gpio) {
+		gpio_set_value_cansleep(max->dvs_gpio, gpio_val & 0x1);
+		max->curr_gpio_val = gpio_val;
+	}
+	return 0;
+}
+
+static int max8973_dcdc_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	struct max8973_chip *max = rdev_get_drvdata(rdev);
+	int ret;
+	int pwm;
+
+	/* Enable force PWM mode in FAST mode only. */
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		pwm = MAX8973_FPWM_EN_M;
+		break;
+
+	case REGULATOR_MODE_NORMAL:
+		pwm = 0;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	ret = regmap_update_bits(max->regmap, MAX8973_CONTROL1,
+				MAX8973_FPWM_EN_M, pwm);
+	if (ret < 0)
+		dev_err(max->dev, "register %d update failed, err %d\n",
+				MAX8973_CONTROL1, ret);
+	return ret;
+}
+
+static unsigned int max8973_dcdc_get_mode(struct regulator_dev *rdev)
+{
+	struct max8973_chip *max = rdev_get_drvdata(rdev);
+	unsigned int data;
+	int ret;
+
+	ret = regmap_read(max->regmap, MAX8973_CONTROL1, &data);
+	if (ret < 0) {
+		dev_err(max->dev, "register %d read failed, err %d\n",
+				MAX8973_CONTROL1, ret);
+		return ret;
+	}
+	return (data & MAX8973_FPWM_EN_M) ?
+		REGULATOR_MODE_FAST : REGULATOR_MODE_NORMAL;
+}
+
+static struct regulator_ops max8973_dcdc_ops = {
+	.get_voltage_sel	= max8973_dcdc_get_voltage_sel,
+	.set_voltage_sel	= max8973_dcdc_set_voltage_sel,
+	.list_voltage		= regulator_list_voltage_linear,
+	.set_mode		= max8973_dcdc_set_mode,
+	.get_mode		= max8973_dcdc_get_mode,
+};
+
+static int __devinit max8973_init_dcdc(struct max8973_chip *max,
+		struct max8973_regulator_platform_data *pdata)
+{
+	int ret;
+	uint8_t	control1 = 0;
+	uint8_t control2 = 0;
+
+	if (pdata->control_flags & MAX8973_CONTROL_REMOTE_SENSE_ENABLE)
+		control1 |= MAX8973_SNS_ENABLE;
+
+	if (!(pdata->control_flags & MAX8973_CONTROL_FALLING_SLEW_RATE_ENABLE))
+		control1 |= MAX8973_NFSR_ENABLE;
+
+	if (pdata->control_flags & MAX8973_CONTROL_OUTPUT_ACTIVE_DISCH_ENABLE)
+		control1 |= MAX8973_AD_ENABLE;
+
+	if (pdata->control_flags & MAX8973_CONTROL_BIAS_ENABLE)
+		control1 |= MAX8973_BIAS_ENABLE;
+
+	if (pdata->control_flags & MAX8973_CONTROL_FREQ_SHIFT_9PER_ENABLE)
+		control1 |= MAX8973_FREQSHIFT_9PER;
+
+	/* Set ramp delay */
+	if (pdata->reg_init_data &&
+			pdata->reg_init_data->constraints.ramp_delay) {
+		if (pdata->reg_init_data->constraints.ramp_delay < 25000)
+			control1 = MAX8973_RAMP_12mV_PER_US;
+		else if (pdata->reg_init_data->constraints.ramp_delay < 50000)
+			control1 = MAX8973_RAMP_25mV_PER_US;
+		else if (pdata->reg_init_data->constraints.ramp_delay < 200000)
+			control1 = MAX8973_RAMP_50mV_PER_US;
+		else
+			control1 = MAX8973_RAMP_200mV_PER_US;
+	} else {
+		control1 = MAX8973_RAMP_12mV_PER_US;
+		max->desc.ramp_delay = 12500;
+	}
+
+	if (!(pdata->control_flags & MAX8973_CONTROL_PULL_DOWN_ENABLE))
+		control2 |= MAX8973_DISCH_ENBABLE;
+
+	/*  Clock advance trip configuration */
+	switch (pdata->control_flags & MAX8973_CONTROL_CLKADV_TRIP_MASK) {
+	case MAX8973_CONTROL_CLKADV_TRIP_DISABLED:
+		control2 |= MAX8973_CKKADV_TRIP_DISABLE;
+		break;
+
+	case MAX8973_CONTROL_CLKADV_TRIP_75mV_PER_US:
+		control2 |= MAX8973_CKKADV_TRIP_75mV_PER_US;
+		break;
+
+	case MAX8973_CONTROL_CLKADV_TRIP_150mV_PER_US:
+		control2 |= MAX8973_CKKADV_TRIP_150mV_PER_US;
+		break;
+
+	case MAX8973_CONTROL_CLKADV_TRIP_75mV_PER_US_HIST_DIS:
+		control2 |= MAX8973_CKKADV_TRIP_75mV_PER_US_HIST_DIS;
+		break;
+	}
+
+	/* Configure inductor value */
+	switch (pdata->control_flags & MAX8973_CONTROL_INDUCTOR_VALUE_MASK) {
+	case MAX8973_CONTROL_INDUCTOR_VALUE_NOMINAL:
+		control2 |= MAX8973_INDUCTOR_NOMINAL;
+		break;
+
+	case MAX8973_CONTROL_INDUCTOR_VALUE_MINUS_30_PER:
+		control2 |= MAX8973_INDUCTOR_MIN_30_PER;
+		break;
+
+	case MAX8973_CONTROL_INDUCTOR_VALUE_PLUS_30_PER:
+		control2 |= MAX8973_INDUCTOR_PLUS_30_PER;
+		break;
+
+	case MAX8973_CONTROL_INDUCTOR_VALUE_PLUS_60_PER:
+		control2 |= MAX8973_INDUCTOR_PLUS_60_PER;
+		break;
+	}
+
+	ret = regmap_write(max->regmap, MAX8973_CONTROL1, control1);
+	if (ret < 0) {
+		dev_err(max->dev, "register %d write failed, err = %d",
+				MAX8973_CONTROL1, ret);
+		return ret;
+	}
+
+	ret = regmap_write(max->regmap, MAX8973_CONTROL2, control2);
+	if (ret < 0) {
+		dev_err(max->dev, "register %d write failed, err = %d",
+				MAX8973_CONTROL2, ret);
+		return ret;
+	}
+
+	/* If external control is enabled then disable EN bit */
+	if (max->enable_external_control) {
+		ret = regmap_update_bits(max->regmap, MAX8973_VOUT,
+						MAX8973_VOUT_ENABLE, 0);
+		if (ret < 0)
+			dev_err(max->dev, "register %d update failed, err = %d",
+				MAX8973_VOUT, ret);
+	}
+	return ret;
+}
+
+static const struct regmap_config max8973_regmap_config = {
+	.reg_bits		= 8,
+	.val_bits		= 8,
+	.max_register		= MAX8973_CHIPID2,
+	.cache_type		= REGCACHE_RBTREE,
+};
+
+static int __devinit max8973_probe(struct i2c_client *client,
+				     const struct i2c_device_id *id)
+{
+	struct max8973_regulator_platform_data *pdata;
+	struct regulator_config config = { };
+	struct regulator_dev *rdev;
+	struct max8973_chip *max;
+	int ret;
+
+	pdata = client->dev.platform_data;
+	if (!pdata) {
+		dev_err(&client->dev, "No Platform data");
+		return -EIO;
+	}
+
+	max = devm_kzalloc(&client->dev, sizeof(*max), GFP_KERNEL);
+	if (!max) {
+		dev_err(&client->dev, "Memory allocation for max failed\n");
+		return -ENOMEM;
+	}
+
+	max->regmap = devm_regmap_init_i2c(client, &max8973_regmap_config);
+	if (IS_ERR(max->regmap)) {
+		ret = PTR_ERR(max->regmap);
+		dev_err(&client->dev, "regmap init failed, err %d\n", ret);
+		return ret;
+	}
+
+	i2c_set_clientdata(client, max);
+	max->dev = &client->dev;
+	max->desc.name = id->name;
+	max->desc.id = 0;
+	max->desc.ops = &max8973_dcdc_ops;
+	max->desc.type = REGULATOR_VOLTAGE;
+	max->desc.owner = THIS_MODULE;
+	max->desc.min_uV = MAX8973_MIN_VOLATGE;
+	max->desc.uV_step = MAX8973_VOLATGE_STEP;
+	max->desc.n_voltages = MAX8973_BUCK_N_VOLTAGE;
+
+	if (pdata->enable_ext_control) {
+		max->desc.enable_reg = MAX8973_VOUT;
+		max->desc.enable_mask = MAX8973_VOUT_ENABLE;
+		max8973_dcdc_ops.enable = regulator_is_enabled_regmap;
+		max8973_dcdc_ops.disable = regulator_disable_regmap;
+		max8973_dcdc_ops.is_enabled = regulator_is_enabled_regmap;
+	}
+
+	max->enable_external_control = pdata->enable_ext_control;
+	max->dvs_gpio = pdata->dvs_gpio;
+	max->curr_gpio_val = pdata->dvs_def_state;
+	max->curr_vout_reg = MAX8973_VOUT + pdata->dvs_def_state;
+	max->lru_index[0] = max->curr_vout_reg;
+	max->valid_dvs_gpio = false;
+
+	if (gpio_is_valid(max->dvs_gpio)) {
+		int gpio_flags;
+		int i;
+
+		gpio_flags = (pdata->dvs_def_state) ?
+				GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW;
+		ret = devm_gpio_request_one(&client->dev, max->dvs_gpio,
+				gpio_flags, "max8973-dvs");
+		if (ret) {
+			dev_err(&client->dev,
+				"gpio_request for gpio %d failed, err = %d\n",
+				max->dvs_gpio, ret);
+			return ret;
+		}
+		max->valid_dvs_gpio = true;
+
+		/*
+		 * Initialize the lru index with vout_reg id
+		 * The index 0 will be most recently used and
+		 * set with the max->curr_vout_reg */
+		for (i = 0; i < MAX8973_MAX_VOUT_REG; ++i)
+			max->lru_index[i] = i;
+		max->lru_index[0] = max->curr_vout_reg;
+		max->lru_index[max->curr_vout_reg] = 0;
+	}
+
+	ret = max8973_init_dcdc(max, pdata);
+	if (ret < 0) {
+		dev_err(max->dev, "Max8973 Init failed, err = %d\n", ret);
+		return ret;
+	}
+
+	config.dev = &client->dev;
+	config.init_data = pdata->reg_init_data;
+	config.driver_data = max;
+	config.of_node = client->dev.of_node;
+	config.regmap = max->regmap;
+
+	/* Register the regulators */
+	rdev = regulator_register(&max->desc, &config);
+	if (IS_ERR(rdev)) {
+		ret = PTR_ERR(rdev);
+		dev_err(max->dev, "regulator register failed, err %d\n", ret);
+		return ret;
+	}
+
+	max->rdev = rdev;
+	return 0;
+}
+
+static int __devexit max8973_remove(struct i2c_client *client)
+{
+	struct max8973_chip *max = i2c_get_clientdata(client);
+
+	regulator_unregister(max->rdev);
+	return 0;
+}
+
+static const struct i2c_device_id max8973_id[] = {
+	{.name = "max8973",},
+	{},
+};
+
+MODULE_DEVICE_TABLE(i2c, max8973_id);
+
+static struct i2c_driver max8973_i2c_driver = {
+	.driver = {
+		.name = "max8973",
+		.owner = THIS_MODULE,
+	},
+	.probe = max8973_probe,
+	.remove = __devexit_p(max8973_remove),
+	.id_table = max8973_id,
+};
+
+static int __init max8973_init(void)
+{
+	return i2c_add_driver(&max8973_i2c_driver);
+}
+subsys_initcall(max8973_init);
+
+static void __exit max8973_cleanup(void)
+{
+	i2c_del_driver(&max8973_i2c_driver);
+}
+module_exit(max8973_cleanup);
+
+MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
+MODULE_DESCRIPTION("MAX8973 voltage regulator driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regulator/max8973-regulator.h b/include/linux/regulator/max8973-regulator.h
new file mode 100644
index 000000000000..f8acc052e353
--- /dev/null
+++ b/include/linux/regulator/max8973-regulator.h
@@ -0,0 +1,72 @@
+/*
+ * max8973-regulator.h -- MAXIM 8973 regulator
+ *
+ * Interface for regulator driver for MAXIM 8973 DC-DC step-down
+ * switching regulator.
+ *
+ * Copyright (C) 2012 NVIDIA Corporation
+
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA	02110-1301, USA.
+ *
+ */
+
+#ifndef __LINUX_REGULATOR_MAX8973_H
+#define __LINUX_REGULATOR_MAX8973_H
+
+/*
+ * Control flags for configuration of the device.
+ * Client need to pass this information with ORed
+ */
+#define MAX8973_CONTROL_REMOTE_SENSE_ENABLE			0x00000001
+#define MAX8973_CONTROL_FALLING_SLEW_RATE_ENABLE		0x00000002
+#define MAX8973_CONTROL_OUTPUT_ACTIVE_DISCH_ENABLE		0x00000004
+#define MAX8973_CONTROL_BIAS_ENABLE				0x00000008
+#define MAX8973_CONTROL_PULL_DOWN_ENABLE			0x00000010
+#define MAX8973_CONTROL_FREQ_SHIFT_9PER_ENABLE			0x00000020
+
+#define MAX8973_CONTROL_CLKADV_TRIP_DISABLED			0x00000000
+#define MAX8973_CONTROL_CLKADV_TRIP_75mV_PER_US			0x00010000
+#define MAX8973_CONTROL_CLKADV_TRIP_150mV_PER_US		0x00020000
+#define MAX8973_CONTROL_CLKADV_TRIP_75mV_PER_US_HIST_DIS	0x00030000
+
+#define MAX8973_CONTROL_INDUCTOR_VALUE_NOMINAL			0x00000000
+#define MAX8973_CONTROL_INDUCTOR_VALUE_MINUS_30_PER		0x00100000
+#define MAX8973_CONTROL_INDUCTOR_VALUE_PLUS_30_PER		0x00200000
+#define MAX8973_CONTROL_INDUCTOR_VALUE_PLUS_60_PER		0x00300000
+
+/*
+ * struct max8973_regulator_platform_data - max8973 regulator platform data.
+ *
+ * @reg_init_data: The regulator init data.
+ * @control_flags: Control flags which are ORed value of above flags to
+ *		configure device.
+ * @enable_ext_control: Enable the voltage enable/disable through external
+ *		control signal from EN input pin. If it is false then
+ *		voltage output will be enabled/disabled through EN bit of
+ *		device register.
+ * @dvs_gpio: GPIO for dvs. It should be -1 if this is tied with fixed logic.
+ * @dvs_def_state: Default state of dvs. 1 if it is high else 0.
+ */
+struct max8973_regulator_platform_data {
+	struct regulator_init_data *reg_init_data;
+	unsigned long control_flags;
+	bool enable_ext_control;
+	int dvs_gpio;
+	unsigned dvs_def_state:1;
+};
+
+#endif /* __LINUX_REGULATOR_MAX8973_H */
-- 
cgit v1.2.3-55-g7522


From 0857ba3c24c308f42a242fe8a1894772750230ce Mon Sep 17 00:00:00 2001
From: Aaro Koskinen
Date: Sun, 18 Nov 2012 18:36:19 +0200
Subject: i2c: i2c-cbus-gpio: introduce driver

Add i2c driver to enable access to devices behind CBUS on Nokia Internet
Tablets.

The patch also adds CBUS I2C configuration for N8x0 which is one of the
users of this driver.

Acked-by: Felipe Balbi <balbi@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 .../devicetree/bindings/i2c/i2c-cbus-gpio.txt      |  27 ++
 arch/arm/mach-omap2/board-n8x0.c                   |  42 +++
 drivers/i2c/busses/Kconfig                         |  10 +
 drivers/i2c/busses/Makefile                        |   1 +
 drivers/i2c/busses/i2c-cbus-gpio.c                 | 300 +++++++++++++++++++++
 include/linux/platform_data/i2c-cbus-gpio.h        |  27 ++
 6 files changed, 407 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt
 create mode 100644 drivers/i2c/busses/i2c-cbus-gpio.c
 create mode 100644 include/linux/platform_data/i2c-cbus-gpio.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt b/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt
new file mode 100644
index 000000000000..8ce9cd2855b5
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt
@@ -0,0 +1,27 @@
+Device tree bindings for i2c-cbus-gpio driver
+
+Required properties:
+	- compatible = "i2c-cbus-gpio";
+	- gpios: clk, dat, sel
+	- #address-cells = <1>;
+	- #size-cells = <0>;
+
+Optional properties:
+	- child nodes conforming to i2c bus binding
+
+Example:
+
+i2c@0 {
+	compatible = "i2c-cbus-gpio";
+	gpios = <&gpio 66 0 /* clk */
+		 &gpio 65 0 /* dat */
+		 &gpio 64 0 /* sel */
+		>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	retu-mfd: retu@1 {
+		compatible = "retu-mfd";
+		reg = <0x1>;
+	};
+};
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index d95f727ca39a..bbfd74263c42 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -16,10 +16,12 @@
 #include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/irq.h>
 #include <linux/stddef.h>
 #include <linux/i2c.h>
 #include <linux/spi/spi.h>
 #include <linux/usb/musb.h>
+#include <linux/platform_data/i2c-cbus-gpio.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
 #include <linux/platform_data/mtd-onenand-omap2.h>
 #include <sound/tlv320aic3x.h>
@@ -39,6 +41,45 @@
 #define TUSB6010_GPIO_ENABLE	0
 #define TUSB6010_DMACHAN	0x3f
 
+#if defined(CONFIG_I2C_CBUS_GPIO) || defined(CONFIG_I2C_CBUS_GPIO_MODULE)
+static struct i2c_cbus_platform_data n8x0_cbus_data = {
+	.clk_gpio = 66,
+	.dat_gpio = 65,
+	.sel_gpio = 64,
+};
+
+static struct platform_device n8x0_cbus_device = {
+	.name	= "i2c-cbus-gpio",
+	.id	= 3,
+	.dev	= {
+		.platform_data = &n8x0_cbus_data,
+	},
+};
+
+static struct i2c_board_info n8x0_i2c_board_info_3[] __initdata = {
+	{
+		I2C_BOARD_INFO("retu-mfd", 0x01),
+	},
+};
+
+static void __init n8x0_cbus_init(void)
+{
+	const int retu_irq_gpio = 108;
+
+	if (gpio_request_one(retu_irq_gpio, GPIOF_IN, "Retu IRQ"))
+		return;
+	irq_set_irq_type(gpio_to_irq(retu_irq_gpio), IRQ_TYPE_EDGE_RISING);
+	n8x0_i2c_board_info_3[0].irq = gpio_to_irq(retu_irq_gpio);
+	i2c_register_board_info(3, n8x0_i2c_board_info_3,
+				ARRAY_SIZE(n8x0_i2c_board_info_3));
+	platform_device_register(&n8x0_cbus_device);
+}
+#else /* CONFIG_I2C_CBUS_GPIO */
+static void __init n8x0_cbus_init(void)
+{
+}
+#endif /* CONFIG_I2C_CBUS_GPIO */
+
 #if defined(CONFIG_USB_MUSB_TUSB6010) || defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
 /*
  * Enable or disable power to TUSB6010. When enabling, turn on 3.3 V and
@@ -677,6 +718,7 @@ static void __init n8x0_init_machine(void)
 	gpmc_onenand_init(board_onenand_data);
 	n8x0_mmc_init();
 	n8x0_usb_init();
+	n8x0_cbus_init();
 }
 
 MACHINE_START(NOKIA_N800, "Nokia N800")
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index e9df4612b7eb..e949edf644d4 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -337,6 +337,16 @@ config I2C_BLACKFIN_TWI_CLK_KHZ
 	help
 	  The unit of the TWI clock is kHz.
 
+config I2C_CBUS_GPIO
+	tristate "CBUS I2C driver"
+	depends on GENERIC_GPIO
+	help
+	  Support for CBUS access using I2C API. Mostly relevant for Nokia
+	  Internet Tablets (770, N800 and N810).
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called i2c-cbus-gpio.
+
 config I2C_CPM
 	tristate "Freescale CPM1 or CPM2 (MPC8xx/826x)"
 	depends on (CPM1 || CPM2) && OF_I2C
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 395b516ffa08..f9e3e0b5c827 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_I2C_POWERMAC)	+= i2c-powermac.o
 obj-$(CONFIG_I2C_AT91)		+= i2c-at91.o
 obj-$(CONFIG_I2C_AU1550)	+= i2c-au1550.o
 obj-$(CONFIG_I2C_BLACKFIN_TWI)	+= i2c-bfin-twi.o
+obj-$(CONFIG_I2C_CBUS_GPIO)	+= i2c-cbus-gpio.o
 obj-$(CONFIG_I2C_CPM)		+= i2c-cpm.o
 obj-$(CONFIG_I2C_DAVINCI)	+= i2c-davinci.o
 obj-$(CONFIG_I2C_DESIGNWARE_CORE)	+= i2c-designware-core.o
diff --git a/drivers/i2c/busses/i2c-cbus-gpio.c b/drivers/i2c/busses/i2c-cbus-gpio.c
new file mode 100644
index 000000000000..98386d659318
--- /dev/null
+++ b/drivers/i2c/busses/i2c-cbus-gpio.c
@@ -0,0 +1,300 @@
+/*
+ * CBUS I2C driver for Nokia Internet Tablets.
+ *
+ * Copyright (C) 2004-2010 Nokia Corporation
+ *
+ * Based on code written by Juha Yrjölä, David Weinehall, Mikko Ylinen and
+ * Felipe Balbi. Converted to I2C driver by Aaro Koskinen.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/io.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_gpio.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/i2c-cbus-gpio.h>
+
+/*
+ * Bit counts are derived from Nokia implementation. These should be checked
+ * if other CBUS implementations appear.
+ */
+#define CBUS_ADDR_BITS	3
+#define CBUS_REG_BITS	5
+
+struct cbus_host {
+	spinlock_t	lock;		/* host lock */
+	struct device	*dev;
+	int		clk_gpio;
+	int		dat_gpio;
+	int		sel_gpio;
+};
+
+/**
+ * cbus_send_bit - sends one bit over the bus
+ * @host: the host we're using
+ * @bit: one bit of information to send
+ */
+static void cbus_send_bit(struct cbus_host *host, unsigned bit)
+{
+	gpio_set_value(host->dat_gpio, bit ? 1 : 0);
+	gpio_set_value(host->clk_gpio, 1);
+	gpio_set_value(host->clk_gpio, 0);
+}
+
+/**
+ * cbus_send_data - sends @len amount of data over the bus
+ * @host: the host we're using
+ * @data: the data to send
+ * @len: size of the transfer
+ */
+static void cbus_send_data(struct cbus_host *host, unsigned data, unsigned len)
+{
+	int i;
+
+	for (i = len; i > 0; i--)
+		cbus_send_bit(host, data & (1 << (i - 1)));
+}
+
+/**
+ * cbus_receive_bit - receives one bit from the bus
+ * @host: the host we're using
+ */
+static int cbus_receive_bit(struct cbus_host *host)
+{
+	int ret;
+
+	gpio_set_value(host->clk_gpio, 1);
+	ret = gpio_get_value(host->dat_gpio);
+	gpio_set_value(host->clk_gpio, 0);
+	return ret;
+}
+
+/**
+ * cbus_receive_word - receives 16-bit word from the bus
+ * @host: the host we're using
+ */
+static int cbus_receive_word(struct cbus_host *host)
+{
+	int ret = 0;
+	int i;
+
+	for (i = 16; i > 0; i--) {
+		int bit = cbus_receive_bit(host);
+
+		if (bit < 0)
+			return bit;
+
+		if (bit)
+			ret |= 1 << (i - 1);
+	}
+	return ret;
+}
+
+/**
+ * cbus_transfer - transfers data over the bus
+ * @host: the host we're using
+ * @rw: read/write flag
+ * @dev: device address
+ * @reg: register address
+ * @data: if @rw == I2C_SBUS_WRITE data to send otherwise 0
+ */
+static int cbus_transfer(struct cbus_host *host, char rw, unsigned dev,
+			 unsigned reg, unsigned data)
+{
+	unsigned long flags;
+	int ret;
+
+	/* We don't want interrupts disturbing our transfer */
+	spin_lock_irqsave(&host->lock, flags);
+
+	/* Reset state and start of transfer, SEL stays down during transfer */
+	gpio_set_value(host->sel_gpio, 0);
+
+	/* Set the DAT pin to output */
+	gpio_direction_output(host->dat_gpio, 1);
+
+	/* Send the device address */
+	cbus_send_data(host, dev, CBUS_ADDR_BITS);
+
+	/* Send the rw flag */
+	cbus_send_bit(host, rw == I2C_SMBUS_READ);
+
+	/* Send the register address */
+	cbus_send_data(host, reg, CBUS_REG_BITS);
+
+	if (rw == I2C_SMBUS_WRITE) {
+		cbus_send_data(host, data, 16);
+		ret = 0;
+	} else {
+		ret = gpio_direction_input(host->dat_gpio);
+		if (ret) {
+			dev_dbg(host->dev, "failed setting direction\n");
+			goto out;
+		}
+		gpio_set_value(host->clk_gpio, 1);
+
+		ret = cbus_receive_word(host);
+		if (ret < 0) {
+			dev_dbg(host->dev, "failed receiving data\n");
+			goto out;
+		}
+	}
+
+	/* Indicate end of transfer, SEL goes up until next transfer */
+	gpio_set_value(host->sel_gpio, 1);
+	gpio_set_value(host->clk_gpio, 1);
+	gpio_set_value(host->clk_gpio, 0);
+
+out:
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	return ret;
+}
+
+static int cbus_i2c_smbus_xfer(struct i2c_adapter	*adapter,
+			       u16			addr,
+			       unsigned short		flags,
+			       char			read_write,
+			       u8			command,
+			       int			size,
+			       union i2c_smbus_data	*data)
+{
+	struct cbus_host *chost = i2c_get_adapdata(adapter);
+	int ret;
+
+	if (size != I2C_SMBUS_WORD_DATA)
+		return -EINVAL;
+
+	ret = cbus_transfer(chost, read_write == I2C_SMBUS_READ, addr,
+			    command, data->word);
+	if (ret < 0)
+		return ret;
+
+	if (read_write == I2C_SMBUS_READ)
+		data->word = ret;
+
+	return 0;
+}
+
+static u32 cbus_i2c_func(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_SMBUS_READ_WORD_DATA | I2C_FUNC_SMBUS_WRITE_WORD_DATA;
+}
+
+static const struct i2c_algorithm cbus_i2c_algo = {
+	.smbus_xfer	= cbus_i2c_smbus_xfer,
+	.functionality	= cbus_i2c_func,
+};
+
+static int cbus_i2c_remove(struct platform_device *pdev)
+{
+	struct i2c_adapter *adapter = platform_get_drvdata(pdev);
+
+	return i2c_del_adapter(adapter);
+}
+
+static int cbus_i2c_probe(struct platform_device *pdev)
+{
+	struct i2c_adapter *adapter;
+	struct cbus_host *chost;
+	int ret;
+
+	adapter = devm_kzalloc(&pdev->dev, sizeof(struct i2c_adapter),
+			       GFP_KERNEL);
+	if (!adapter)
+		return -ENOMEM;
+
+	chost = devm_kzalloc(&pdev->dev, sizeof(*chost), GFP_KERNEL);
+	if (!chost)
+		return -ENOMEM;
+
+	if (pdev->dev.of_node) {
+		struct device_node *dnode = pdev->dev.of_node;
+		if (of_gpio_count(dnode) != 3)
+			return -ENODEV;
+		chost->clk_gpio = of_get_gpio(dnode, 0);
+		chost->dat_gpio = of_get_gpio(dnode, 1);
+		chost->sel_gpio = of_get_gpio(dnode, 2);
+	} else if (pdev->dev.platform_data) {
+		struct i2c_cbus_platform_data *pdata = pdev->dev.platform_data;
+		chost->clk_gpio = pdata->clk_gpio;
+		chost->dat_gpio = pdata->dat_gpio;
+		chost->sel_gpio = pdata->sel_gpio;
+	} else {
+		return -ENODEV;
+	}
+
+	adapter->owner		= THIS_MODULE;
+	adapter->class		= I2C_CLASS_HWMON;
+	adapter->dev.parent	= &pdev->dev;
+	adapter->nr		= pdev->id;
+	adapter->timeout	= HZ;
+	adapter->algo		= &cbus_i2c_algo;
+	strlcpy(adapter->name, "CBUS I2C adapter", sizeof(adapter->name));
+
+	spin_lock_init(&chost->lock);
+	chost->dev = &pdev->dev;
+
+	ret = devm_gpio_request_one(&pdev->dev, chost->clk_gpio,
+				    GPIOF_OUT_INIT_LOW, "CBUS clk");
+	if (ret)
+		return ret;
+
+	ret = devm_gpio_request_one(&pdev->dev, chost->dat_gpio, GPIOF_IN,
+				    "CBUS data");
+	if (ret)
+		return ret;
+
+	ret = devm_gpio_request_one(&pdev->dev, chost->sel_gpio,
+				    GPIOF_OUT_INIT_HIGH, "CBUS sel");
+	if (ret)
+		return ret;
+
+	i2c_set_adapdata(adapter, chost);
+	platform_set_drvdata(pdev, adapter);
+
+	return i2c_add_numbered_adapter(adapter);
+}
+
+#if defined(CONFIG_OF)
+static const struct of_device_id i2c_cbus_dt_ids[] = {
+	{ .compatible = "i2c-cbus-gpio", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, i2c_cbus_dt_ids);
+#endif
+
+static struct platform_driver cbus_i2c_driver = {
+	.probe	= cbus_i2c_probe,
+	.remove	= cbus_i2c_remove,
+	.driver	= {
+		.owner	= THIS_MODULE,
+		.name	= "i2c-cbus-gpio",
+	},
+};
+module_platform_driver(cbus_i2c_driver);
+
+MODULE_ALIAS("platform:i2c-cbus-gpio");
+MODULE_DESCRIPTION("CBUS I2C driver");
+MODULE_AUTHOR("Juha Yrjölä");
+MODULE_AUTHOR("David Weinehall");
+MODULE_AUTHOR("Mikko Ylinen");
+MODULE_AUTHOR("Felipe Balbi");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/i2c-cbus-gpio.h b/include/linux/platform_data/i2c-cbus-gpio.h
new file mode 100644
index 000000000000..6faa992a9502
--- /dev/null
+++ b/include/linux/platform_data/i2c-cbus-gpio.h
@@ -0,0 +1,27 @@
+/*
+ * i2c-cbus-gpio.h - CBUS I2C platform_data definition
+ *
+ * Copyright (C) 2004-2009 Nokia Corporation
+ *
+ * Written by Felipe Balbi and Aaro Koskinen.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __INCLUDE_LINUX_I2C_CBUS_GPIO_H
+#define __INCLUDE_LINUX_I2C_CBUS_GPIO_H
+
+struct i2c_cbus_platform_data {
+	int dat_gpio;
+	int clk_gpio;
+	int sel_gpio;
+};
+
+#endif /* __INCLUDE_LINUX_I2C_CBUS_GPIO_H */
-- 
cgit v1.2.3-55-g7522


From 4a200c3b9a40242652b5734630bdd0bcf3aca75f Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires
Date: Mon, 12 Nov 2012 15:42:59 +0100
Subject: HID: i2c-hid: introduce HID over i2c specification implementation

Microsoft published the protocol specification of HID over i2c:
http://msdn.microsoft.com/en-us/library/windows/hardware/hh852380.aspx

This patch introduces an implementation of this protocol.

This implementation does not includes the ACPI part of the specification.
This will come when ACPI 5.0 devices enumeration will be available.

Once the ACPI part is done, OEM will not have to declare HID over I2C
devices in their platform specific driver.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig           |   2 +
 drivers/hid/Makefile          |   1 +
 drivers/hid/i2c-hid/Kconfig   |  21 +
 drivers/hid/i2c-hid/Makefile  |   5 +
 drivers/hid/i2c-hid/i2c-hid.c | 974 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/i2c-hid.h   |  35 ++
 6 files changed, 1038 insertions(+)
 create mode 100644 drivers/hid/i2c-hid/Kconfig
 create mode 100644 drivers/hid/i2c-hid/Makefile
 create mode 100644 drivers/hid/i2c-hid/i2c-hid.c
 create mode 100644 include/linux/i2c/i2c-hid.h

(limited to 'include/linux')

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 1630150ad2b1..95cfe9181bf2 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -728,4 +728,6 @@ endif # HID
 
 source "drivers/hid/usbhid/Kconfig"
 
+source "drivers/hid/i2c-hid/Kconfig"
+
 endmenu
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index cef68ca859d3..a1a28befac72 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -118,3 +118,4 @@ obj-$(CONFIG_USB_HID)		+= usbhid/
 obj-$(CONFIG_USB_MOUSE)		+= usbhid/
 obj-$(CONFIG_USB_KBD)		+= usbhid/
 
+obj-$(CONFIG_I2C_HID)		+= i2c-hid/
diff --git a/drivers/hid/i2c-hid/Kconfig b/drivers/hid/i2c-hid/Kconfig
new file mode 100644
index 000000000000..5b7d4d87a143
--- /dev/null
+++ b/drivers/hid/i2c-hid/Kconfig
@@ -0,0 +1,21 @@
+menu "I2C HID support"
+	depends on I2C
+
+config I2C_HID
+	tristate "HID over I2C transport layer"
+	default n
+	depends on I2C && INPUT
+	select HID
+	---help---
+	  Say Y here if you want to use the HID over i2c protocol
+	  implementation.
+
+	  If unsure, say N.
+
+	  This support is also available as a module.  If so, the module
+	  will be called i2c-hid.
+
+comment "Input core support is needed for HID over I2C input layer"
+	depends on I2C_HID && INPUT=n
+
+endmenu
diff --git a/drivers/hid/i2c-hid/Makefile b/drivers/hid/i2c-hid/Makefile
new file mode 100644
index 000000000000..832d8f9aaba2
--- /dev/null
+++ b/drivers/hid/i2c-hid/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the I2C input drivers
+#
+
+obj-$(CONFIG_I2C_HID)				+= i2c-hid.o
diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
new file mode 100644
index 000000000000..11140bdae660
--- /dev/null
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -0,0 +1,974 @@
+/*
+ * HID over I2C protocol implementation
+ *
+ * Copyright (c) 2012 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+ * Copyright (c) 2012 Ecole Nationale de l'Aviation Civile, France
+ * Copyright (c) 2012 Red Hat, Inc
+ *
+ * This code is partly based on "USB HID support for Linux":
+ *
+ *  Copyright (c) 1999 Andreas Gal
+ *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
+ *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
+ *  Copyright (c) 2007-2008 Oliver Neukum
+ *  Copyright (c) 2006-2010 Jiri Kosina
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/pm.h>
+#include <linux/device.h>
+#include <linux/wait.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/hid.h>
+
+#include <linux/i2c/i2c-hid.h>
+
+/* flags */
+#define I2C_HID_STARTED		(1 << 0)
+#define I2C_HID_RESET_PENDING	(1 << 1)
+#define I2C_HID_READ_PENDING	(1 << 2)
+
+#define I2C_HID_PWR_ON		0x00
+#define I2C_HID_PWR_SLEEP	0x01
+
+/* debug option */
+static bool debug = false;
+module_param(debug, bool, 0444);
+MODULE_PARM_DESC(debug, "print a lot of debug information");
+
+#define i2c_hid_dbg(ihid, fmt, arg...)	\
+	if (debug)			\
+		dev_printk(KERN_DEBUG, &(ihid)->client->dev, fmt, ##arg)
+
+struct i2c_hid_desc {
+	__le16 wHIDDescLength;
+	__le16 bcdVersion;
+	__le16 wReportDescLength;
+	__le16 wReportDescRegister;
+	__le16 wInputRegister;
+	__le16 wMaxInputLength;
+	__le16 wOutputRegister;
+	__le16 wMaxOutputLength;
+	__le16 wCommandRegister;
+	__le16 wDataRegister;
+	__le16 wVendorID;
+	__le16 wProductID;
+	__le16 wVersionID;
+} __packed;
+
+struct i2c_hid_cmd {
+	unsigned int registerIndex;
+	__u8 opcode;
+	unsigned int length;
+	bool wait;
+};
+
+union command {
+	u8 data[0];
+	struct cmd {
+		__le16 reg;
+		__u8 reportTypeID;
+		__u8 opcode;
+	} __packed c;
+};
+
+#define I2C_HID_CMD(opcode_) \
+	.opcode = opcode_, .length = 4, \
+	.registerIndex = offsetof(struct i2c_hid_desc, wCommandRegister)
+
+/* fetch HID descriptor */
+static const struct i2c_hid_cmd hid_descr_cmd = { .length = 2 };
+/* fetch report descriptors */
+static const struct i2c_hid_cmd hid_report_descr_cmd = {
+		.registerIndex = offsetof(struct i2c_hid_desc,
+			wReportDescRegister),
+		.opcode = 0x00,
+		.length = 2 };
+/* commands */
+static const struct i2c_hid_cmd hid_reset_cmd =		{ I2C_HID_CMD(0x01),
+							  .wait = true };
+static const struct i2c_hid_cmd hid_get_report_cmd =	{ I2C_HID_CMD(0x02) };
+static const struct i2c_hid_cmd hid_set_report_cmd =	{ I2C_HID_CMD(0x03) };
+static const struct i2c_hid_cmd hid_get_idle_cmd =	{ I2C_HID_CMD(0x04) };
+static const struct i2c_hid_cmd hid_set_idle_cmd =	{ I2C_HID_CMD(0x05) };
+static const struct i2c_hid_cmd hid_get_protocol_cmd =	{ I2C_HID_CMD(0x06) };
+static const struct i2c_hid_cmd hid_set_protocol_cmd =	{ I2C_HID_CMD(0x07) };
+static const struct i2c_hid_cmd hid_set_power_cmd =	{ I2C_HID_CMD(0x08) };
+/* read/write data register */
+static const struct i2c_hid_cmd hid_data_cmd = {
+		.registerIndex = offsetof(struct i2c_hid_desc, wDataRegister),
+		.opcode = 0x00,
+		.length = 2 };
+/* write output reports */
+static const struct i2c_hid_cmd hid_out_cmd = {
+		.registerIndex = offsetof(struct i2c_hid_desc,
+			wOutputRegister),
+		.opcode = 0x00,
+		.length = 2 };
+
+/* The main device structure */
+struct i2c_hid {
+	struct i2c_client	*client;	/* i2c client */
+	struct hid_device	*hid;	/* pointer to corresponding HID dev */
+	union {
+		__u8 hdesc_buffer[sizeof(struct i2c_hid_desc)];
+		struct i2c_hid_desc hdesc;	/* the HID Descriptor */
+	};
+	__le16			wHIDDescRegister; /* location of the i2c
+						   * register of the HID
+						   * descriptor. */
+	unsigned int		bufsize;	/* i2c buffer size */
+	char			*inbuf;		/* Input buffer */
+	char			*cmdbuf;	/* Command buffer */
+	char			*argsbuf;	/* Command arguments buffer */
+
+	unsigned long		flags;		/* device flags */
+
+	int			irq;		/* the interrupt line irq */
+
+	wait_queue_head_t	wait;		/* For waiting the interrupt */
+};
+
+static int __i2c_hid_command(struct i2c_client *client,
+		const struct i2c_hid_cmd *command, u8 reportID,
+		u8 reportType, u8 *args, int args_len,
+		unsigned char *buf_recv, int data_len)
+{
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+	union command *cmd = (union command *)ihid->cmdbuf;
+	int ret;
+	struct i2c_msg msg[2];
+	int msg_num = 1;
+
+	int length = command->length;
+	bool wait = command->wait;
+	unsigned int registerIndex = command->registerIndex;
+
+	/* special case for hid_descr_cmd */
+	if (command == &hid_descr_cmd) {
+		cmd->c.reg = ihid->wHIDDescRegister;
+	} else {
+		cmd->data[0] = ihid->hdesc_buffer[registerIndex];
+		cmd->data[1] = ihid->hdesc_buffer[registerIndex + 1];
+	}
+
+	if (length > 2) {
+		cmd->c.opcode = command->opcode;
+		cmd->c.reportTypeID = reportID | reportType << 4;
+	}
+
+	memcpy(cmd->data + length, args, args_len);
+	length += args_len;
+
+	i2c_hid_dbg(ihid, "%s: cmd=%*ph\n", __func__, length, cmd->data);
+
+	msg[0].addr = client->addr;
+	msg[0].flags = client->flags & I2C_M_TEN;
+	msg[0].len = length;
+	msg[0].buf = cmd->data;
+	if (data_len > 0) {
+		msg[1].addr = client->addr;
+		msg[1].flags = client->flags & I2C_M_TEN;
+		msg[1].flags |= I2C_M_RD;
+		msg[1].len = data_len;
+		msg[1].buf = buf_recv;
+		msg_num = 2;
+		set_bit(I2C_HID_READ_PENDING, &ihid->flags);
+	}
+
+	if (wait)
+		set_bit(I2C_HID_RESET_PENDING, &ihid->flags);
+
+	ret = i2c_transfer(client->adapter, msg, msg_num);
+
+	if (data_len > 0)
+		clear_bit(I2C_HID_READ_PENDING, &ihid->flags);
+
+	if (ret != msg_num)
+		return ret < 0 ? ret : -EIO;
+
+	ret = 0;
+
+	if (wait) {
+		i2c_hid_dbg(ihid, "%s: waiting...\n", __func__);
+		if (!wait_event_timeout(ihid->wait,
+				!test_bit(I2C_HID_RESET_PENDING, &ihid->flags),
+				msecs_to_jiffies(5000)))
+			ret = -ENODATA;
+		i2c_hid_dbg(ihid, "%s: finished.\n", __func__);
+	}
+
+	return ret;
+}
+
+static int i2c_hid_command(struct i2c_client *client,
+		const struct i2c_hid_cmd *command,
+		unsigned char *buf_recv, int data_len)
+{
+	return __i2c_hid_command(client, command, 0, 0, NULL, 0,
+				buf_recv, data_len);
+}
+
+static int i2c_hid_get_report(struct i2c_client *client, u8 reportType,
+		u8 reportID, unsigned char *buf_recv, int data_len)
+{
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+	u8 args[3];
+	int ret;
+	int args_len = 0;
+	u16 readRegister = le16_to_cpu(ihid->hdesc.wDataRegister);
+
+	i2c_hid_dbg(ihid, "%s\n", __func__);
+
+	if (reportID >= 0x0F) {
+		args[args_len++] = reportID;
+		reportID = 0x0F;
+	}
+
+	args[args_len++] = readRegister & 0xFF;
+	args[args_len++] = readRegister >> 8;
+
+	ret = __i2c_hid_command(client, &hid_get_report_cmd, reportID,
+		reportType, args, args_len, buf_recv, data_len);
+	if (ret) {
+		dev_err(&client->dev,
+			"failed to retrieve report from device.\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int i2c_hid_set_report(struct i2c_client *client, u8 reportType,
+		u8 reportID, unsigned char *buf, size_t data_len)
+{
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+	u8 *args = ihid->argsbuf;
+	int ret;
+	u16 dataRegister = le16_to_cpu(ihid->hdesc.wDataRegister);
+
+	/* hidraw already checked that data_len < HID_MAX_BUFFER_SIZE */
+	u16 size =	2			/* size */ +
+			(reportID ? 1 : 0)	/* reportID */ +
+			data_len		/* buf */;
+	int args_len =	(reportID >= 0x0F ? 1 : 0) /* optional third byte */ +
+			2			/* dataRegister */ +
+			size			/* args */;
+	int index = 0;
+
+	i2c_hid_dbg(ihid, "%s\n", __func__);
+
+	if (reportID >= 0x0F) {
+		args[index++] = reportID;
+		reportID = 0x0F;
+	}
+
+	args[index++] = dataRegister & 0xFF;
+	args[index++] = dataRegister >> 8;
+
+	args[index++] = size & 0xFF;
+	args[index++] = size >> 8;
+
+	if (reportID)
+		args[index++] = reportID;
+
+	memcpy(&args[index], buf, data_len);
+
+	ret = __i2c_hid_command(client, &hid_set_report_cmd, reportID,
+		reportType, args, args_len, NULL, 0);
+	if (ret) {
+		dev_err(&client->dev, "failed to set a report to device.\n");
+		return -EINVAL;
+	}
+
+	return data_len;
+}
+
+static int i2c_hid_set_power(struct i2c_client *client, int power_state)
+{
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+	int ret;
+
+	i2c_hid_dbg(ihid, "%s\n", __func__);
+
+	ret = __i2c_hid_command(client, &hid_set_power_cmd, power_state,
+		0, NULL, 0, NULL, 0);
+	if (ret)
+		dev_err(&client->dev, "failed to change power setting.\n");
+
+	return ret;
+}
+
+static int i2c_hid_hwreset(struct i2c_client *client)
+{
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+	int ret;
+
+	i2c_hid_dbg(ihid, "%s\n", __func__);
+
+	ret = i2c_hid_set_power(client, I2C_HID_PWR_ON);
+	if (ret)
+		return ret;
+
+	i2c_hid_dbg(ihid, "resetting...\n");
+
+	ret = i2c_hid_command(client, &hid_reset_cmd, NULL, 0);
+	if (ret) {
+		dev_err(&client->dev, "failed to reset device.\n");
+		i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int i2c_hid_get_input(struct i2c_hid *ihid)
+{
+	int ret, ret_size;
+	int size = le16_to_cpu(ihid->hdesc.wMaxInputLength);
+
+	ret = i2c_master_recv(ihid->client, ihid->inbuf, size);
+	if (ret != size) {
+		if (ret < 0)
+			return ret;
+
+		dev_err(&ihid->client->dev, "%s: got %d data instead of %d\n",
+			__func__, ret, size);
+		return ret;
+	}
+
+	ret_size = ihid->inbuf[0] | ihid->inbuf[1] << 8;
+
+	if (!ret_size) {
+		/* host or device initiated RESET completed */
+		if (test_and_clear_bit(I2C_HID_RESET_PENDING, &ihid->flags))
+			wake_up(&ihid->wait);
+		return 0;
+	}
+
+	if (ret_size > size) {
+		dev_err(&ihid->client->dev, "%s: incomplete report (%d/%d)\n",
+			__func__, size, ret_size);
+		return -EIO;
+	}
+
+	i2c_hid_dbg(ihid, "input: %*ph\n", ret_size, ihid->inbuf);
+
+	if (test_bit(I2C_HID_STARTED, &ihid->flags))
+		hid_input_report(ihid->hid, HID_INPUT_REPORT, ihid->inbuf + 2,
+				ret_size - 2, 1);
+
+	return 0;
+}
+
+static irqreturn_t i2c_hid_irq(int irq, void *dev_id)
+{
+	struct i2c_hid *ihid = dev_id;
+
+	if (test_bit(I2C_HID_READ_PENDING, &ihid->flags))
+		return IRQ_HANDLED;
+
+	i2c_hid_get_input(ihid);
+
+	return IRQ_HANDLED;
+}
+
+static int i2c_hid_get_report_length(struct hid_report *report)
+{
+	return ((report->size - 1) >> 3) + 1 +
+		report->device->report_enum[report->type].numbered + 2;
+}
+
+static void i2c_hid_init_report(struct hid_report *report, u8 *buffer,
+	size_t bufsize)
+{
+	struct hid_device *hid = report->device;
+	struct i2c_client *client = hid->driver_data;
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+	unsigned int size, ret_size;
+
+	size = i2c_hid_get_report_length(report);
+	i2c_hid_get_report(client,
+			report->type == HID_FEATURE_REPORT ? 0x03 : 0x01,
+			report->id, buffer, size);
+
+	i2c_hid_dbg(ihid, "report (len=%d): %*ph\n", size, size, ihid->inbuf);
+
+	ret_size = buffer[0] | (buffer[1] << 8);
+
+	if (ret_size != size) {
+		dev_err(&client->dev, "error in %s size:%d / ret_size:%d\n",
+			__func__, size, ret_size);
+		return;
+	}
+
+	/* hid->driver_lock is held as we are in probe function,
+	 * we just need to setup the input fields, so using
+	 * hid_report_raw_event is safe. */
+	hid_report_raw_event(hid, report->type, buffer + 2, size - 2, 1);
+}
+
+/*
+ * Initialize all reports
+ */
+static void i2c_hid_init_reports(struct hid_device *hid)
+{
+	struct hid_report *report;
+	struct i2c_client *client = hid->driver_data;
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+	u8 *inbuf = kzalloc(ihid->bufsize, GFP_KERNEL);
+
+	if (!inbuf)
+		return;
+
+	list_for_each_entry(report,
+		&hid->report_enum[HID_INPUT_REPORT].report_list, list)
+		i2c_hid_init_report(report, inbuf, ihid->bufsize);
+
+	list_for_each_entry(report,
+		&hid->report_enum[HID_FEATURE_REPORT].report_list, list)
+		i2c_hid_init_report(report, inbuf, ihid->bufsize);
+
+	kfree(inbuf);
+}
+
+/*
+ * Traverse the supplied list of reports and find the longest
+ */
+static void i2c_hid_find_max_report(struct hid_device *hid, unsigned int type,
+		unsigned int *max)
+{
+	struct hid_report *report;
+	unsigned int size;
+
+	/* We should not rely on wMaxInputLength, as some devices may set it to
+	 * a wrong length. */
+	list_for_each_entry(report, &hid->report_enum[type].report_list, list) {
+		size = i2c_hid_get_report_length(report);
+		if (*max < size)
+			*max = size;
+	}
+}
+
+static int i2c_hid_alloc_buffers(struct i2c_hid *ihid)
+{
+	/* the worst case is computed from the set_report command with a
+	 * reportID > 15 and the maximum report length */
+	int args_len = sizeof(__u8) + /* optional ReportID byte */
+		       sizeof(__u16) + /* data register */
+		       sizeof(__u16) + /* size of the report */
+		       ihid->bufsize; /* report */
+
+	ihid->inbuf = kzalloc(ihid->bufsize, GFP_KERNEL);
+
+	if (!ihid->inbuf)
+		return -ENOMEM;
+
+	ihid->argsbuf = kzalloc(args_len, GFP_KERNEL);
+
+	if (!ihid->argsbuf) {
+		kfree(ihid->inbuf);
+		return -ENOMEM;
+	}
+
+	ihid->cmdbuf = kzalloc(sizeof(union command) + args_len, GFP_KERNEL);
+
+	if (!ihid->cmdbuf) {
+		kfree(ihid->inbuf);
+		kfree(ihid->argsbuf);
+		ihid->inbuf = NULL;
+		ihid->argsbuf = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void i2c_hid_free_buffers(struct i2c_hid *ihid)
+{
+	kfree(ihid->inbuf);
+	kfree(ihid->argsbuf);
+	kfree(ihid->cmdbuf);
+	ihid->inbuf = NULL;
+	ihid->cmdbuf = NULL;
+	ihid->argsbuf = NULL;
+}
+
+static int i2c_hid_get_raw_report(struct hid_device *hid,
+		unsigned char report_number, __u8 *buf, size_t count,
+		unsigned char report_type)
+{
+	struct i2c_client *client = hid->driver_data;
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+	int ret;
+
+	if (report_type == HID_OUTPUT_REPORT)
+		return -EINVAL;
+
+	if (count > ihid->bufsize)
+		count = ihid->bufsize;
+
+	ret = i2c_hid_get_report(client,
+			report_type == HID_FEATURE_REPORT ? 0x03 : 0x01,
+			report_number, ihid->inbuf, count);
+
+	if (ret < 0)
+		return ret;
+
+	count = ihid->inbuf[0] | (ihid->inbuf[1] << 8);
+
+	memcpy(buf, ihid->inbuf + 2, count);
+
+	return count;
+}
+
+static int i2c_hid_output_raw_report(struct hid_device *hid, __u8 *buf,
+		size_t count, unsigned char report_type)
+{
+	struct i2c_client *client = hid->driver_data;
+	int report_id = buf[0];
+
+	if (report_type == HID_INPUT_REPORT)
+		return -EINVAL;
+
+	return i2c_hid_set_report(client,
+				report_type == HID_FEATURE_REPORT ? 0x03 : 0x02,
+				report_id, buf, count);
+}
+
+static int i2c_hid_parse(struct hid_device *hid)
+{
+	struct i2c_client *client = hid->driver_data;
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+	struct i2c_hid_desc *hdesc = &ihid->hdesc;
+	unsigned int rsize;
+	char *rdesc;
+	int ret;
+	int tries = 3;
+
+	i2c_hid_dbg(ihid, "entering %s\n", __func__);
+
+	rsize = le16_to_cpu(hdesc->wReportDescLength);
+	if (!rsize || rsize > HID_MAX_DESCRIPTOR_SIZE) {
+		dbg_hid("weird size of report descriptor (%u)\n", rsize);
+		return -EINVAL;
+	}
+
+	do {
+		ret = i2c_hid_hwreset(client);
+		if (ret)
+			msleep(1000);
+	} while (tries-- > 0 && ret);
+
+	if (ret)
+		return ret;
+
+	rdesc = kzalloc(rsize, GFP_KERNEL);
+
+	if (!rdesc) {
+		dbg_hid("couldn't allocate rdesc memory\n");
+		return -ENOMEM;
+	}
+
+	i2c_hid_dbg(ihid, "asking HID report descriptor\n");
+
+	ret = i2c_hid_command(client, &hid_report_descr_cmd, rdesc, rsize);
+	if (ret) {
+		hid_err(hid, "reading report descriptor failed\n");
+		kfree(rdesc);
+		return -EIO;
+	}
+
+	i2c_hid_dbg(ihid, "Report Descriptor: %*ph\n", rsize, rdesc);
+
+	ret = hid_parse_report(hid, rdesc, rsize);
+	kfree(rdesc);
+	if (ret) {
+		dbg_hid("parsing report descriptor failed\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int i2c_hid_start(struct hid_device *hid)
+{
+	struct i2c_client *client = hid->driver_data;
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+	int ret;
+	int old_bufsize = ihid->bufsize;
+
+	ihid->bufsize = HID_MIN_BUFFER_SIZE;
+	i2c_hid_find_max_report(hid, HID_INPUT_REPORT, &ihid->bufsize);
+	i2c_hid_find_max_report(hid, HID_OUTPUT_REPORT, &ihid->bufsize);
+	i2c_hid_find_max_report(hid, HID_FEATURE_REPORT, &ihid->bufsize);
+
+	if (ihid->bufsize > old_bufsize || !ihid->inbuf || !ihid->cmdbuf) {
+		i2c_hid_free_buffers(ihid);
+
+		ret = i2c_hid_alloc_buffers(ihid);
+
+		if (ret) {
+			ihid->bufsize = old_bufsize;
+			return ret;
+		}
+	}
+
+	if (!(hid->quirks & HID_QUIRK_NO_INIT_REPORTS))
+		i2c_hid_init_reports(hid);
+
+	return 0;
+}
+
+static void i2c_hid_stop(struct hid_device *hid)
+{
+	struct i2c_client *client = hid->driver_data;
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+
+	hid->claimed = 0;
+
+	i2c_hid_free_buffers(ihid);
+}
+
+static int i2c_hid_open(struct hid_device *hid)
+{
+	struct i2c_client *client = hid->driver_data;
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+	int ret;
+
+	if (!hid->open++) {
+		ret = i2c_hid_set_power(client, I2C_HID_PWR_ON);
+		if (ret) {
+			hid->open--;
+			return -EIO;
+		}
+		set_bit(I2C_HID_STARTED, &ihid->flags);
+	}
+	return 0;
+}
+
+static void i2c_hid_close(struct hid_device *hid)
+{
+	struct i2c_client *client = hid->driver_data;
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+
+	/* protecting hid->open to make sure we don't restart
+	 * data acquistion due to a resumption we no longer
+	 * care about
+	 */
+	if (!--hid->open) {
+		clear_bit(I2C_HID_STARTED, &ihid->flags);
+
+		/* Save some power */
+		i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
+	}
+}
+
+static int i2c_hid_power(struct hid_device *hid, int lvl)
+{
+	struct i2c_client *client = hid->driver_data;
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+	int ret = 0;
+
+	i2c_hid_dbg(ihid, "%s lvl:%d\n", __func__, lvl);
+
+	switch (lvl) {
+	case PM_HINT_FULLON:
+		ret = i2c_hid_set_power(client, I2C_HID_PWR_ON);
+		break;
+	case PM_HINT_NORMAL:
+		ret = i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
+		break;
+	}
+	return ret;
+}
+
+static int i2c_hid_hidinput_input_event(struct input_dev *dev,
+		unsigned int type, unsigned int code, int value)
+{
+	struct hid_device *hid = input_get_drvdata(dev);
+	struct hid_field *field;
+	int offset;
+
+	if (type == EV_FF)
+		return input_ff_event(dev, type, code, value);
+
+	if (type != EV_LED)
+		return -1;
+
+	offset = hidinput_find_field(hid, type, code, &field);
+
+	if (offset == -1) {
+		hid_warn(dev, "event field not found\n");
+		return -1;
+	}
+
+	hid_set_field(field, offset, value);
+
+	return 0;
+}
+
+static struct hid_ll_driver i2c_hid_ll_driver = {
+	.parse = i2c_hid_parse,
+	.start = i2c_hid_start,
+	.stop = i2c_hid_stop,
+	.open = i2c_hid_open,
+	.close = i2c_hid_close,
+	.power = i2c_hid_power,
+	.hidinput_input_event = i2c_hid_hidinput_input_event,
+};
+
+static int __devinit i2c_hid_init_irq(struct i2c_client *client)
+{
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+	int ret;
+
+	dev_dbg(&client->dev, "Requesting IRQ: %d\n", client->irq);
+
+	ret = request_threaded_irq(client->irq, NULL, i2c_hid_irq,
+			IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+			client->name, ihid);
+	if (ret < 0) {
+		dev_dbg(&client->dev,
+			"Could not register for %s interrupt, irq = %d,"
+			" ret = %d\n",
+		client->name, client->irq, ret);
+
+		return ret;
+	}
+
+	ihid->irq = client->irq;
+
+	return 0;
+}
+
+static int __devinit i2c_hid_fetch_hid_descriptor(struct i2c_hid *ihid)
+{
+	struct i2c_client *client = ihid->client;
+	struct i2c_hid_desc *hdesc = &ihid->hdesc;
+	unsigned int dsize;
+	int ret;
+
+	/* Fetch the length of HID description, retrieve the 4 first bytes:
+	 * bytes 0-1 -> length
+	 * bytes 2-3 -> bcdVersion (has to be 1.00) */
+	ret = i2c_hid_command(client, &hid_descr_cmd, ihid->hdesc_buffer, 4);
+
+	i2c_hid_dbg(ihid, "%s, ihid->hdesc_buffer: %*ph\n",
+			__func__, 4, ihid->hdesc_buffer);
+
+	if (ret) {
+		dev_err(&client->dev, "HID_DESCR_LENGTH_CMD Fail (ret=%d)\n",
+			ret);
+		return -ENODEV;
+	}
+
+	dsize = le16_to_cpu(hdesc->wHIDDescLength);
+	if (!dsize || dsize > HID_MAX_DESCRIPTOR_SIZE) {
+		dev_err(&client->dev, "weird size of HID descriptor (%u)\n",
+			dsize);
+		return -ENODEV;
+	}
+
+	/* check bcdVersion == 1.0 */
+	if (le16_to_cpu(hdesc->bcdVersion) != 0x0100) {
+		dev_err(&client->dev,
+			"unexpected HID descriptor bcdVersion (0x%04x)\n",
+			le16_to_cpu(hdesc->bcdVersion));
+		return -ENODEV;
+	}
+
+	i2c_hid_dbg(ihid, "Fetching the HID descriptor\n");
+
+	ret = i2c_hid_command(client, &hid_descr_cmd, ihid->hdesc_buffer,
+				dsize);
+	if (ret) {
+		dev_err(&client->dev, "hid_descr_cmd Fail\n");
+		return -ENODEV;
+	}
+
+	i2c_hid_dbg(ihid, "HID Descriptor: %*ph\n", dsize, ihid->hdesc_buffer);
+
+	return 0;
+}
+
+static int __devinit i2c_hid_probe(struct i2c_client *client,
+		const struct i2c_device_id *dev_id)
+{
+	int ret;
+	struct i2c_hid *ihid;
+	struct hid_device *hid;
+	__u16 hidRegister;
+	struct i2c_hid_platform_data *platform_data = client->dev.platform_data;
+
+	dbg_hid("HID probe called for i2c 0x%02x\n", client->addr);
+
+	if (!platform_data) {
+		dev_err(&client->dev, "HID register address not provided\n");
+		return -EINVAL;
+	}
+
+	if (!client->irq) {
+		dev_err(&client->dev,
+			"HID over i2c has not been provided an Int IRQ\n");
+		return -EINVAL;
+	}
+
+	ihid = kzalloc(sizeof(struct i2c_hid), GFP_KERNEL);
+	if (!ihid)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, ihid);
+
+	ihid->client = client;
+
+	hidRegister = platform_data->hid_descriptor_address;
+	ihid->wHIDDescRegister = cpu_to_le16(hidRegister);
+
+	init_waitqueue_head(&ihid->wait);
+
+	/* we need to allocate the command buffer without knowing the maximum
+	 * size of the reports. Let's use HID_MIN_BUFFER_SIZE, then we do the
+	 * real computation later. */
+	ihid->bufsize = HID_MIN_BUFFER_SIZE;
+	i2c_hid_alloc_buffers(ihid);
+
+	ret = i2c_hid_fetch_hid_descriptor(ihid);
+	if (ret < 0)
+		goto err;
+
+	ret = i2c_hid_init_irq(client);
+	if (ret < 0)
+		goto err;
+
+	hid = hid_allocate_device();
+	if (IS_ERR(hid)) {
+		ret = PTR_ERR(hid);
+		goto err;
+	}
+
+	ihid->hid = hid;
+
+	hid->driver_data = client;
+	hid->ll_driver = &i2c_hid_ll_driver;
+	hid->hid_get_raw_report = i2c_hid_get_raw_report;
+	hid->hid_output_raw_report = i2c_hid_output_raw_report;
+	hid->dev.parent = &client->dev;
+	hid->bus = BUS_I2C;
+	hid->version = le16_to_cpu(ihid->hdesc.bcdVersion);
+	hid->vendor = le16_to_cpu(ihid->hdesc.wVendorID);
+	hid->product = le16_to_cpu(ihid->hdesc.wProductID);
+
+	snprintf(hid->name, sizeof(hid->name), "%s %04X:%04X",
+		 client->name, hid->vendor, hid->product);
+
+	ret = hid_add_device(hid);
+	if (ret) {
+		if (ret != -ENODEV)
+			hid_err(client, "can't add hid device: %d\n", ret);
+		goto err_mem_free;
+	}
+
+	return 0;
+
+err_mem_free:
+	hid_destroy_device(hid);
+
+err:
+	if (ihid->irq)
+		free_irq(ihid->irq, ihid);
+
+	kfree(ihid);
+	return ret;
+}
+
+static int __devexit i2c_hid_remove(struct i2c_client *client)
+{
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+	struct hid_device *hid;
+
+	if (WARN_ON(!ihid))
+		return -1;
+
+	hid = ihid->hid;
+	hid_destroy_device(hid);
+
+	free_irq(client->irq, ihid);
+
+	kfree(ihid);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int i2c_hid_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
+
+	if (device_may_wakeup(&client->dev))
+		enable_irq_wake(ihid->irq);
+
+	/* Save some power */
+	i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
+
+	return 0;
+}
+
+static int i2c_hid_resume(struct device *dev)
+{
+	int ret;
+	struct i2c_client *client = to_i2c_client(dev);
+
+	ret = i2c_hid_hwreset(client);
+	if (ret)
+		return ret;
+
+	if (device_may_wakeup(&client->dev))
+		disable_irq_wake(client->irq);
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(i2c_hid_pm, i2c_hid_suspend, i2c_hid_resume);
+
+static const struct i2c_device_id i2c_hid_id_table[] = {
+	{ "i2c_hid", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, i2c_hid_id_table);
+
+
+static struct i2c_driver i2c_hid_driver = {
+	.driver = {
+		.name	= "i2c_hid",
+		.owner	= THIS_MODULE,
+		.pm	= &i2c_hid_pm,
+	},
+
+	.probe		= i2c_hid_probe,
+	.remove		= __devexit_p(i2c_hid_remove),
+
+	.id_table	= i2c_hid_id_table,
+};
+
+module_i2c_driver(i2c_hid_driver);
+
+MODULE_DESCRIPTION("HID over I2C core driver");
+MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@gmail.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/i2c-hid.h b/include/linux/i2c/i2c-hid.h
new file mode 100644
index 000000000000..60e411d764d4
--- /dev/null
+++ b/include/linux/i2c/i2c-hid.h
@@ -0,0 +1,35 @@
+/*
+ * HID over I2C protocol implementation
+ *
+ * Copyright (c) 2012 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+ * Copyright (c) 2012 Ecole Nationale de l'Aviation Civile, France
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive for
+ * more details.
+ */
+
+#ifndef __LINUX_I2C_HID_H
+#define __LINUX_I2C_HID_H
+
+#include <linux/types.h>
+
+/**
+ * struct i2chid_platform_data - used by hid over i2c implementation.
+ * @hid_descriptor_address: i2c register where the HID descriptor is stored.
+ *
+ * Note that it is the responsibility of the platform driver (or the acpi 5.0
+ * driver) to setup the irq related to the gpio in the struct i2c_board_info.
+ * The platform driver should also setup the gpio according to the device:
+ *
+ * A typical example is the following:
+ *	irq = gpio_to_irq(intr_gpio);
+ *	hkdk4412_i2c_devs5[0].irq = irq; // store the irq in i2c_board_info
+ *	gpio_request(intr_gpio, "elan-irq");
+ *	s3c_gpio_setpull(intr_gpio, S3C_GPIO_PULL_UP);
+ */
+struct i2c_hid_platform_data {
+	u16 hid_descriptor_address;
+};
+
+#endif /* __LINUX_I2C_HID_H */
-- 
cgit v1.2.3-55-g7522


From 02582e9bcc36ed503ffede46e104a885dea222fb Mon Sep 17 00:00:00 2001
From: Masanari Iida
Date: Wed, 22 Aug 2012 19:11:26 +0900
Subject: treewide: fix typo of "suport" in various comments and Kconfig

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/hwmon/pmbus                      | 2 +-
 Documentation/video4linux/bttv/Sound-FAQ       | 2 +-
 arch/mips/txx9/generic/pci.c                   | 2 +-
 arch/powerpc/include/asm/oprofile_impl.h       | 2 +-
 arch/sh/boards/board-espt.c                    | 2 +-
 drivers/acpi/acpica/dsopcode.c                 | 2 +-
 drivers/net/bonding/bonding.h                  | 2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c | 2 +-
 drivers/net/ethernet/micrel/ksz884x.c          | 2 +-
 drivers/net/wireless/atmel.c                   | 2 +-
 drivers/net/wireless/ipw2x00/ipw2100.h         | 2 +-
 drivers/regulator/palmas-regulator.c           | 2 +-
 drivers/scsi/libsas/sas_expander.c             | 2 +-
 drivers/staging/comedi/drivers/usbdux.c        | 2 +-
 drivers/staging/csr/netdev.c                   | 2 +-
 drivers/staging/tidspbridge/rmgr/node.c        | 2 +-
 drivers/usb/core/driver.c                      | 2 +-
 fs/cifs/README                                 | 2 +-
 fs/notify/fanotify/Kconfig                     | 2 +-
 include/linux/stmmac.h                         | 2 +-
 net/ipv4/tcp_cong.c                            | 2 +-
 net/mac80211/driver-ops.h                      | 2 +-
 net/sctp/endpointola.c                         | 2 +-
 net/sctp/sm_statefuns.c                        | 2 +-
 sound/pci/ice1712/delta.c                      | 2 +-
 25 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/hwmon/pmbus b/Documentation/hwmon/pmbus
index f90f99920cc5..3d3a0f97f966 100644
--- a/Documentation/hwmon/pmbus
+++ b/Documentation/hwmon/pmbus
@@ -138,7 +138,7 @@ Sysfs entries
 
 When probing the chip, the driver identifies which PMBus registers are
 supported, and determines available sensors from this information.
-Attribute files only exist if respective sensors are suported by the chip.
+Attribute files only exist if respective sensors are supported by the chip.
 Labels are provided to inform the user about the sensor associated with
 a given sysfs entry.
 
diff --git a/Documentation/video4linux/bttv/Sound-FAQ b/Documentation/video4linux/bttv/Sound-FAQ
index 395f6c6fdd98..d3f1d7783d1c 100644
--- a/Documentation/video4linux/bttv/Sound-FAQ
+++ b/Documentation/video4linux/bttv/Sound-FAQ
@@ -82,7 +82,7 @@ card installed, you might to check out if you can read these registers
 values used by the windows driver.  A tool to do this is available
 from ftp://telepresence.dmem.strath.ac.uk/pub/bt848/winutil, but it
 does'nt work with bt878 boards according to some reports I received.
-Another one with bt878 suport is available from
+Another one with bt878 support is available from
 http://btwincap.sourceforge.net/Files/btspy2.00.zip
 
 You might also dig around in the *.ini files of the Windows applications.
diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c
index 4efd9185f294..b14ee53581a9 100644
--- a/arch/mips/txx9/generic/pci.c
+++ b/arch/mips/txx9/generic/pci.c
@@ -341,7 +341,7 @@ static void __devinit quirk_slc90e66_ide(struct pci_dev *dev)
 
 static void __devinit tc35815_fixup(struct pci_dev *dev)
 {
-	/* This device may have PM registers but not they are not suported. */
+	/* This device may have PM registers but not they are not supported. */
 	if (dev->pm_cap) {
 		dev_info(&dev->dev, "PM disabled\n");
 		dev->pm_cap = 0;
diff --git a/arch/powerpc/include/asm/oprofile_impl.h b/arch/powerpc/include/asm/oprofile_impl.h
index 639dc96077ab..d697b08994c9 100644
--- a/arch/powerpc/include/asm/oprofile_impl.h
+++ b/arch/powerpc/include/asm/oprofile_impl.h
@@ -34,7 +34,7 @@ struct op_system_config {
 	unsigned long mmcra;
 #ifdef CONFIG_OPROFILE_CELL
 	/* Register for oprofile user tool to check cell kernel profiling
-	 * suport.
+	 * support.
 	 */
 	unsigned long cell_support;
 #endif
diff --git a/arch/sh/boards/board-espt.c b/arch/sh/boards/board-espt.c
index 6cba0a7068bc..d71a0bcf8145 100644
--- a/arch/sh/boards/board-espt.c
+++ b/arch/sh/boards/board-espt.c
@@ -1,5 +1,5 @@
 /*
- * Data Technology Inc. ESPT-GIGA board suport
+ * Data Technology Inc. ESPT-GIGA board support
  *
  * Copyright (C) 2008, 2009 Renesas Solutions Corp.
  * Copyright (C) 2008, 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c
index aa34d8984d34..4fbb45fadb1e 100644
--- a/drivers/acpi/acpica/dsopcode.c
+++ b/drivers/acpi/acpica/dsopcode.c
@@ -1,6 +1,6 @@
 /******************************************************************************
  *
- * Module Name: dsopcode - Dispatcher suport for regions and fields
+ * Module Name: dsopcode - Dispatcher support for regions and fields
  *
  *****************************************************************************/
 
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index f8af2fcd3d16..486b1cbd8da8 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -244,7 +244,7 @@ struct bonding {
 	struct   delayed_work ad_work;
 	struct   delayed_work mcast_work;
 #ifdef CONFIG_DEBUG_FS
-	/* debugging suport via debugfs */
+	/* debugging support via debugfs */
 	struct	 dentry *debug_dir;
 #endif /* CONFIG_DEBUG_FS */
 };
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index 614981c02264..a4fb5c0207be 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -4318,7 +4318,7 @@ static int bnx2x_queue_comp_cmd(struct bnx2x *bp,
 
 	if (o->next_tx_only >= o->max_cos)
 		/* >= becuase tx only must always be smaller than cos since the
-		 * primary connection suports COS 0
+		 * primary connection supports COS 0
 		 */
 		BNX2X_ERR("illegal value for next tx_only: %d. max cos was %d",
 			   o->next_tx_only, o->max_cos);
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index 318fee91c79d..693a88dddb48 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -6767,7 +6767,7 @@ static int stp;
 /*
  * This enables fast aging in the KSZ8842 switch.  Not sure what situation
  * needs that.  However, fast aging is used to flush the dynamic MAC table when
- * STP suport is enabled.
+ * STP support is enabled.
  */
 static int fast_aging;
 
diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c
index 4a4e98f71807..4374079dfc2a 100644
--- a/drivers/net/wireless/atmel.c
+++ b/drivers/net/wireless/atmel.c
@@ -2963,7 +2963,7 @@ static void send_association_request(struct atmel_private *priv, int is_reassoc)
 	ssid_el_p[1] = priv->SSID_size;
 	memcpy(ssid_el_p + 2, priv->SSID, priv->SSID_size);
 	ssid_el_p[2 + priv->SSID_size] = WLAN_EID_SUPP_RATES;
-	ssid_el_p[3 + priv->SSID_size] = 4; /* len of suported rates */
+	ssid_el_p[3 + priv->SSID_size] = 4; /* len of supported rates */
 	memcpy(ssid_el_p + 4 + priv->SSID_size, atmel_basic_rates, 4);
 
 	atmel_transmit_management_frame(priv, &header, (void *)&body, bodysize);
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.h b/drivers/net/wireless/ipw2x00/ipw2100.h
index 973125242490..5fe17cbab1f3 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.h
+++ b/drivers/net/wireless/ipw2x00/ipw2100.h
@@ -1045,7 +1045,7 @@ typedef enum _ORDINAL_TABLE_1 {	// NS - means Not Supported by FW
 	IPW_ORD_POWER_MGMT_MODE,	// Power mode - 0=CAM, 1=PSP
 	IPW_ORD_POWER_MGMT_INDEX,	//NS //
 	IPW_ORD_COUNTRY_CODE,	// IEEE country code as recv'd from beacon
-	IPW_ORD_COUNTRY_CHANNELS,	// channels suported by country
+	IPW_ORD_COUNTRY_CHANNELS,	// channels supported by country
 // IPW_ORD_COUNTRY_CHANNELS:
 // For 11b the lower 2-byte are used for channels from 1-14
 //   and the higher 2-byte are not used.
diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c
index 07aee694ba92..08fec06dba50 100644
--- a/drivers/regulator/palmas-regulator.c
+++ b/drivers/regulator/palmas-regulator.c
@@ -773,7 +773,7 @@ static __devinit int palmas_probe(struct platform_device *pdev)
 		/*
 		 * read and store the RANGE bit for later use
 		 * This must be done before regulator is probed otherwise
-		 * we error in probe with unsuportable ranges.
+		 * we error in probe with unsupportable ranges.
 		 */
 		if (id != PALMAS_REG_SMPS10) {
 			addr = palmas_regs_info[id].vsel_addr;
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index efc6e72f09f3..aec2e0da5016 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1800,7 +1800,7 @@ out:
  * @dev:domain device to be detect.
  * @src_dev: the device which originated BROADCAST(CHANGE).
  *
- * Add self-configuration expander suport. Suppose two expander cascading,
+ * Add self-configuration expander support. Suppose two expander cascading,
  * when the first level expander is self-configuring, hotplug the disks in
  * second level expander, BROADCAST(CHANGE) will not only be originated
  * in the second level expander, but also be originated in the first level
diff --git a/drivers/staging/comedi/drivers/usbdux.c b/drivers/staging/comedi/drivers/usbdux.c
index b536bba74351..c484a7352940 100644
--- a/drivers/staging/comedi/drivers/usbdux.c
+++ b/drivers/staging/comedi/drivers/usbdux.c
@@ -73,7 +73,7 @@ sampling rate. If you sample two channels you get 4kHz and so on.
  *       And loads of cleaning up, in particular streamlining the
  *       bulk transfers.
  * 1.1:  moved EP4 transfers to EP1 to make space for a PWM output on EP4
- * 1.2:  added PWM suport via EP4
+ * 1.2:  added PWM support via EP4
  * 2.0:  PWM seems to be stable and is not interfering with the other functions
  * 2.1:  changed PWM API
  * 2.2:  added firmware kernel request to fix an udev problem
diff --git a/drivers/staging/csr/netdev.c b/drivers/staging/csr/netdev.c
index 9a52ab408e1a..fc43cc22629c 100644
--- a/drivers/staging/csr/netdev.c
+++ b/drivers/staging/csr/netdev.c
@@ -55,7 +55,7 @@
 #include <net/pkt_sched.h>
 
 
-/* Wext handler is suported only if CSR_SUPPORT_WEXT is defined */
+/* Wext handler is supported only if CSR_SUPPORT_WEXT is defined */
 #ifdef CSR_SUPPORT_WEXT
 extern struct iw_handler_def unifi_iw_handler_def;
 #endif /* CSR_SUPPORT_WEXT */
diff --git a/drivers/staging/tidspbridge/rmgr/node.c b/drivers/staging/tidspbridge/rmgr/node.c
index 294e9b40f516..737f4a9d86a3 100644
--- a/drivers/staging/tidspbridge/rmgr/node.c
+++ b/drivers/staging/tidspbridge/rmgr/node.c
@@ -736,7 +736,7 @@ DBAPI node_alloc_msg_buf(struct node_object *hnode, u32 usize,
 		case 4:
 			break;
 		default:
-			/* alignment value not suportted */
+			/* alignment value not supportted */
 			status = -EPERM;
 			break;
 		}
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 6056db7af410..263c5035eabf 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -238,7 +238,7 @@ static int usb_probe_device(struct device *dev)
 	/* TODO: Add real matching code */
 
 	/* The device should always appear to be in use
-	 * unless the driver suports autosuspend.
+	 * unless the driver supports autosuspend.
 	 */
 	if (!udriver->supports_autosuspend)
 		error = usb_autoresume_device(udev);
diff --git a/fs/cifs/README b/fs/cifs/README
index 22ab7b5b8da7..2d5622f60e11 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -480,7 +480,7 @@ A partial list of the supported mount options follows:
 		Unicode on the wire.
  nomapchars     Do not translate any of these seven characters (default).
  nocase         Request case insensitive path name matching (case
-		sensitive is the default if the server suports it).
+		sensitive is the default if the server supports it).
 		(mount option "ignorecase" is identical to "nocase")
  posixpaths     If CIFS Unix extensions are supported, attempt to
 		negotiate posix path name support which allows certain
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
index 7dceff005a67..e5f911bd80d2 100644
--- a/fs/notify/fanotify/Kconfig
+++ b/fs/notify/fanotify/Kconfig
@@ -4,7 +4,7 @@ config FANOTIFY
 	select ANON_INODES
 	default n
 	---help---
-	   Say Y here to enable fanotify suport.  fanotify is a file access
+	   Say Y here to enable fanotify support.  fanotify is a file access
 	   notification system which differs from inotify in that it sends
 	   an open file descriptor to the userspace listener along with
 	   the event.
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index a1547ea3920d..a619f7025cc9 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -61,7 +61,7 @@
 #define STMMAC_CSR_I_16		0xE	/* clk_csr_i/16 */
 #define STMMAC_CSR_I_18		0xF	/* clk_csr_i/18 */
 
-/* AXI DMA Burst length suported */
+/* AXI DMA Burst length supported */
 #define DMA_AXI_BLEN_4		(1 << 1)
 #define DMA_AXI_BLEN_8		(1 << 2)
 #define DMA_AXI_BLEN_16		(1 << 3)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 1432cdb0644c..701a7f6fb9b1 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -1,7 +1,7 @@
 /*
  * Plugable TCP congestion control support and newReno
  * congestion control.
- * Based on ideas from I/O scheduler suport and Web100.
+ * Based on ideas from I/O scheduler support and Web100.
  *
  * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org>
  */
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index da9003b20004..f28772443bd3 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -602,7 +602,7 @@ static inline void drv_reset_tsf(struct ieee80211_local *local,
 
 static inline int drv_tx_last_beacon(struct ieee80211_local *local)
 {
-	int ret = 0; /* default unsuported op for less congestion */
+	int ret = 0; /* default unsupported op for less congestion */
 
 	might_sleep();
 
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 1859e2bc83d1..bd7e6a6a815a 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -163,7 +163,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 
 	list_add(&null_key->key_list, &ep->endpoint_shared_keys);
 
-	/* Allocate and initialize transorms arrays for suported HMACs. */
+	/* Allocate and initialize transorms arrays for supported HMACs. */
 	err = sctp_auth_init_hmacs(ep, gfp);
 	if (err)
 		goto nomem_hmacs;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index b6adef8a1e93..49493f335861 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3994,7 +3994,7 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net,
 	chunk->subh.auth_hdr = auth_hdr;
 	skb_pull(chunk->skb, sizeof(struct sctp_authhdr));
 
-	/* Make sure that we suport the HMAC algorithm from the auth
+	/* Make sure that we support the HMAC algorithm from the auth
 	 * chunk.
 	 */
 	if (!sctp_auth_asoc_verify_hmac_id(asoc, auth_hdr->hmac_id))
diff --git a/sound/pci/ice1712/delta.c b/sound/pci/ice1712/delta.c
index 20c6b079d0df..d09239cf7961 100644
--- a/sound/pci/ice1712/delta.c
+++ b/sound/pci/ice1712/delta.c
@@ -617,7 +617,7 @@ static int __devinit snd_ice1712_delta_init(struct snd_ice1712 *ice)
 		ice->num_total_dacs = 4;	/* two AK4324 codecs */
 		break;
 	case ICE1712_SUBDEVICE_VX442:
-	case ICE1712_SUBDEVICE_DELTA66E:	/* omni not suported yet */
+	case ICE1712_SUBDEVICE_DELTA66E:	/* omni not supported yet */
 		ice->num_total_dacs = 4;
 		ice->num_total_adcs = 4;
 		break;
-- 
cgit v1.2.3-55-g7522


From d93cf0687c9853bf91b1b9a5124ab97ebc47e00c Mon Sep 17 00:00:00 2001
From: Adam Buchbinder
Date: Wed, 19 Sep 2012 21:47:58 -0400
Subject: various: Fix spelling of "registered" in comments.

Some comments misspell "registered"; this fixes them. No code changes.

Signed-off-by: Adam Buchbinder <adam.buchbinder@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/cris/include/arch-v10/arch/irq.h | 2 +-
 arch/cris/include/arch-v32/arch/irq.h | 2 +-
 include/linux/netdevice.h             | 2 +-
 include/net/irda/irlmp.h              | 2 +-
 net/netfilter/nf_log.c                | 2 +-
 sound/core/seq/seq_device.c           | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/cris/include/arch-v10/arch/irq.h b/arch/cris/include/arch-v10/arch/irq.h
index 7d345947b3ee..ca2675ae08ed 100644
--- a/arch/cris/include/arch-v10/arch/irq.h
+++ b/arch/cris/include/arch-v10/arch/irq.h
@@ -142,7 +142,7 @@ __asm__ ( \
  * it here, we would not get the multiple_irq at all.
  *
  * The non-blocking here is based on the knowledge that the timer interrupt is 
- * registred as a fast interrupt (IRQF_DISABLED) so that we _know_ there will not
+ * registered as a fast interrupt (IRQF_DISABLED) so that we _know_ there will not
  * be an sti() before the timer irq handler is run to acknowledge the interrupt.
  */
 
diff --git a/arch/cris/include/arch-v32/arch/irq.h b/arch/cris/include/arch-v32/arch/irq.h
index b31e9984f849..fe3cdd22bed4 100644
--- a/arch/cris/include/arch-v32/arch/irq.h
+++ b/arch/cris/include/arch-v32/arch/irq.h
@@ -103,7 +103,7 @@ __asm__ (				\
  * if we had BLOCK'edit here, we would not get the multiple_irq at all.
  *
  * The non-blocking here is based on the knowledge that the timer interrupt is
- * registred as a fast interrupt (IRQF_DISABLED) so that we _know_ there will not
+ * registered as a fast interrupt (IRQF_DISABLED) so that we _know_ there will not
  * be an sti() before the timer irq handler is run to acknowledge the interrupt.
  */
 #define BUILD_TIMER_IRQ(nr, mask) 	\
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f8eda0276f03..b2056e6e2acc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -369,7 +369,7 @@ typedef enum gro_result gro_result_t;
  *
  * If the rx_handler consider the skb should be ignored, it should return
  * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that
- * are registred on exact device (ptype->dev == skb->dev).
+ * are registered on exact device (ptype->dev == skb->dev).
  *
  * If the rx_handler didn't changed skb->dev, but want the skb to be normally
  * delivered, it should return RX_HANDLER_PASS.
diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h
index fff11b7fe8a4..591f78631f13 100644
--- a/include/net/irda/irlmp.h
+++ b/include/net/irda/irlmp.h
@@ -134,7 +134,7 @@ typedef struct {
 } CACHE_ENTRY;
 
 /*
- *  Information about each registred IrLAP layer
+ *  Information about each registered IrLAP layer
  */
 struct lap_cb {
 	irda_queue_t queue; /* Must be first */
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 703fb26aa48d..9e312695c818 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -32,7 +32,7 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
 	return NULL;
 }
 
-/* return EEXIST if the same logger is registred, 0 on success. */
+/* return EEXIST if the same logger is registered, 0 on success. */
 int nf_log_register(u_int8_t pf, struct nf_logger *logger)
 {
 	const struct nf_logger *llog;
diff --git a/sound/core/seq/seq_device.c b/sound/core/seq/seq_device.c
index 60e8fc1b3440..040c60e1da28 100644
--- a/sound/core/seq/seq_device.c
+++ b/sound/core/seq/seq_device.c
@@ -66,7 +66,7 @@ struct ops_list {
 	/* operators */
 	struct snd_seq_dev_ops ops;
 
-	/* registred devices */
+	/* registered devices */
 	struct list_head dev_list;	/* list of devices */
 	int num_devices;	/* number of associated devices */
 	int num_init_devices;	/* number of initialized devices */
-- 
cgit v1.2.3-55-g7522


From 48fc7f7e787dd65ffe88521bce31f4062ba273eb Mon Sep 17 00:00:00 2001
From: Adam Buchbinder
Date: Wed, 19 Sep 2012 21:48:00 -0400
Subject: Fix misspellings of "whether" in comments.

"Whether" is misspelled in various comments across the tree; this
fixes them. No code changes.

Signed-off-by: Adam Buchbinder <adam.buchbinder@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/arm/mach-s3c24xx/include/mach/bast-map.h                   | 2 +-
 arch/arm/mach-s3c24xx/include/mach/dma.h                        | 2 +-
 arch/arm/mach-s3c24xx/include/mach/vr1000-map.h                 | 2 +-
 arch/arm/mach-s3c24xx/pm.c                                      | 2 +-
 arch/arm/plat-s3c24xx/dma.c                                     | 2 +-
 arch/m68k/math-emu/fp_log.c                                     | 2 +-
 arch/powerpc/include/asm/pte-hash64-64k.h                       | 2 +-
 arch/powerpc/include/asm/smu.h                                  | 4 ++--
 arch/powerpc/kernel/legacy_serial.c                             | 2 +-
 arch/powerpc/kernel/of_platform.c                               | 2 +-
 arch/powerpc/kernel/signal_64.c                                 | 2 +-
 arch/powerpc/mm/slice.c                                         | 2 +-
 arch/powerpc/platforms/52xx/mpc52xx_gpt.c                       | 2 +-
 arch/powerpc/platforms/cell/iommu.c                             | 2 +-
 arch/powerpc/platforms/cell/spider-pic.c                        | 6 +++---
 arch/powerpc/platforms/powermac/pic.c                           | 2 +-
 drivers/gpu/drm/radeon/evergreen.c                              | 2 +-
 drivers/gpu/drm/radeon/ni.c                                     | 4 ++--
 drivers/gpu/drm/radeon/si.c                                     | 6 +++---
 drivers/i2c/busses/i2c-nuc900.c                                 | 2 +-
 drivers/i2c/busses/i2c-s3c2410.c                                | 4 ++--
 drivers/mtd/maps/plat-ram.c                                     | 2 +-
 drivers/mtd/nand/s3c2410.c                                      | 2 +-
 drivers/net/ethernet/8390/ax88796.c                             | 2 +-
 drivers/net/ethernet/myricom/myri10ge/myri10ge_mcp_gen_header.h | 2 +-
 drivers/net/sungem_phy.c                                        | 8 ++++----
 drivers/rtc/rtc-isl1208.c                                       | 2 +-
 drivers/rtc/rtc-s3c.c                                           | 2 +-
 drivers/s390/block/dasd_devmap.c                                | 2 +-
 drivers/usb/storage/realtek_cr.c                                | 2 +-
 drivers/watchdog/booke_wdt.c                                    | 2 +-
 fs/btrfs/extent-tree.c                                          | 2 +-
 fs/btrfs/ordered-data.h                                         | 2 +-
 fs/ext4/ext4.h                                                  | 2 +-
 fs/fhandle.c                                                    | 4 ++--
 fs/jbd/transaction.c                                            | 2 +-
 fs/jbd2/transaction.c                                           | 2 +-
 fs/logfs/inode.c                                                | 2 +-
 include/linux/lru_cache.h                                       | 4 ++--
 include/linux/vgaarb.h                                          | 4 ++--
 include/linux/watchdog.h                                        | 2 +-
 include/net/sock.h                                              | 2 +-
 net/can/proc.c                                                  | 2 +-
 sound/pci/es1968.c                                              | 2 +-
 sound/usb/quirks.c                                              | 2 +-
 45 files changed, 58 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-s3c24xx/include/mach/bast-map.h b/arch/arm/mach-s3c24xx/include/mach/bast-map.h
index 6e7dc9d0cf0e..eecea2a50f8f 100644
--- a/arch/arm/mach-s3c24xx/include/mach/bast-map.h
+++ b/arch/arm/mach-s3c24xx/include/mach/bast-map.h
@@ -74,7 +74,7 @@
 
 
 /* 0xE0000000 contains the IO space that is split by speed and
- * wether the access is for 8 or 16bit IO... this ensures that
+ * whether the access is for 8 or 16bit IO... this ensures that
  * the correct access is made
  *
  * 0x10000000 of space, partitioned as so:
diff --git a/arch/arm/mach-s3c24xx/include/mach/dma.h b/arch/arm/mach-s3c24xx/include/mach/dma.h
index ee99fd56c043..6b72d5a4b377 100644
--- a/arch/arm/mach-s3c24xx/include/mach/dma.h
+++ b/arch/arm/mach-s3c24xx/include/mach/dma.h
@@ -88,7 +88,7 @@ enum s3c2410_dma_state {
  *
  * This represents the state of the DMA engine, wrt to the loaded / running
  * transfers. Since we don't have any way of knowing exactly the state of
- * the DMA transfers, we need to know the state to make decisions on wether
+ * the DMA transfers, we need to know the state to make decisions on whether
  * we can
  *
  * S3C2410_DMA_NONE
diff --git a/arch/arm/mach-s3c24xx/include/mach/vr1000-map.h b/arch/arm/mach-s3c24xx/include/mach/vr1000-map.h
index 99612fcc4eb2..28376e56dd3b 100644
--- a/arch/arm/mach-s3c24xx/include/mach/vr1000-map.h
+++ b/arch/arm/mach-s3c24xx/include/mach/vr1000-map.h
@@ -51,7 +51,7 @@
 #define VR1000_VA_PC104_IRQMASK VR1000_IOADDR(0x00600000)
 
 /* 0xE0000000 contains the IO space that is split by speed and
- * wether the access is for 8 or 16bit IO... this ensures that
+ * whether the access is for 8 or 16bit IO... this ensures that
  * the correct access is made
  *
  * 0x10000000 of space, partitioned as so:
diff --git a/arch/arm/mach-s3c24xx/pm.c b/arch/arm/mach-s3c24xx/pm.c
index 60627e63a254..724755f0b0f5 100644
--- a/arch/arm/mach-s3c24xx/pm.c
+++ b/arch/arm/mach-s3c24xx/pm.c
@@ -121,7 +121,7 @@ void s3c_pm_configure_extint(void)
 	int pin;
 
 	/* for each of the external interrupts (EINT0..EINT15) we
-	 * need to check wether it is an external interrupt source,
+	 * need to check whether it is an external interrupt source,
 	 * and then configure it as an input if it is not
 	*/
 
diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c
index db98e7021f0d..3eb5139656af 100644
--- a/arch/arm/plat-s3c24xx/dma.c
+++ b/arch/arm/plat-s3c24xx/dma.c
@@ -325,7 +325,7 @@ static int s3c2410_dma_start(struct s3c2410_dma_chan *chan)
 
 	chan->state = S3C2410_DMA_RUNNING;
 
-	/* check wether there is anything to load, and if not, see
+	/* check whether there is anything to load, and if not, see
 	 * if we can find anything to load
 	 */
 
diff --git a/arch/m68k/math-emu/fp_log.c b/arch/m68k/math-emu/fp_log.c
index 3384a5244fbd..0663067870f2 100644
--- a/arch/m68k/math-emu/fp_log.c
+++ b/arch/m68k/math-emu/fp_log.c
@@ -50,7 +50,7 @@ fp_fsqrt(struct fp_ext *dest, struct fp_ext *src)
 	 * sqrt(m*2^e) =
 	 *		 sqrt(2*m) * 2^(p)	, if e = 2*p + 1
 	 *
-	 * So we use the last bit of the exponent to decide wether to
+	 * So we use the last bit of the exponent to decide whether to
 	 * use the m or 2*m.
 	 *
 	 * Since only the fractional part of the mantissa is stored and
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index eedf427c9124..3e13e23e4fdf 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -23,7 +23,7 @@
 
 /* Note the full page bits must be in the same location as for normal
  * 4k pages as the same assembly will be used to insert 64K pages
- * wether the kernel has CONFIG_PPC_64K_PAGES or not
+ * whether the kernel has CONFIG_PPC_64K_PAGES or not
  */
 #define _PAGE_F_SECOND  0x00008000 /* full page: hidx bits */
 #define _PAGE_F_GIX     0x00007000 /* full page: hidx bits */
diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h
index ae20ce1af4c7..6e909f3e6a46 100644
--- a/arch/powerpc/include/asm/smu.h
+++ b/arch/powerpc/include/asm/smu.h
@@ -132,7 +132,7 @@
   *
   * At this point, the OF driver seems to have a limitation on transfer
   * sizes of 0xd bytes on reads and 0x5 bytes on writes. I do not know
-  * wether this is just an OF limit due to some temporary buffer size
+  * whether this is just an OF limit due to some temporary buffer size
   * or if this is an SMU imposed limit. This driver has the same limitation
   * for now as I use a 0x10 bytes temporary buffer as well
   *
@@ -236,7 +236,7 @@
  *   3 (optional): enable nmi? [0x00 or 0x01]
  *
  * Returns:
- *   If parameter 2 is 0x00 and parameter 3 is not specified, returns wether
+ *   If parameter 2 is 0x00 and parameter 3 is not specified, returns whether
  *   NMI is enabled. Otherwise unknown.
  */
 #define   SMU_CMD_MISC_df_NMI_OPTION		0x04
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index bedd12e1cfbc..0733b05eb856 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -387,7 +387,7 @@ void __init find_legacy_serial_ports(void)
 			of_node_put(parent);
 			continue;
 		}
-		/* Check for known pciclass, and also check wether we have
+		/* Check for known pciclass, and also check whether we have
 		 * a device with child nodes for ports or not
 		 */
 		if (of_device_is_compatible(np, "pciclass,0700") ||
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 2049f2d00ffe..9db8ec07ec94 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -82,7 +82,7 @@ static int __devinit of_pci_phb_probe(struct platform_device *dev)
 		return -ENXIO;
 
 	/* Claim resources. This might need some rework as well depending
-	 * wether we are doing probe-only or not, like assigning unassigned
+	 * whether we are doing probe-only or not, like assigning unassigned
 	 * resources etc...
 	 */
 	pcibios_claim_one_bus(phb->bus);
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index d183f8719a50..1ca045d44324 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -83,7 +83,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
 	 * the context). This is very important because we must ensure we
 	 * don't lose the VRSAVE content that may have been set prior to
 	 * the process doing its first vector operation
-	 * Userland shall check AT_HWCAP to know wether it can rely on the
+	 * Userland shall check AT_HWCAP to know whether it can rely on the
 	 * v_regs pointer or not
 	 */
 #ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 5829d2a950d4..cf9dada734b6 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -722,7 +722,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 }
 
 /*
- * is_hugepage_only_range() is used by generic code to verify wether
+ * is_hugepage_only_range() is used by generic code to verify whether
  * a normal mmap mapping (non hugetlbfs) is valid on a given area.
  *
  * until the generic code provides a more generic hook and/or starts
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 028470b95886..a51cb07bd663 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -526,7 +526,7 @@ EXPORT_SYMBOL(mpc52xx_gpt_timer_period);
 
 #define WDT_IDENTITY	    "mpc52xx watchdog on GPT0"
 
-/* wdt_is_active stores wether or not the /dev/watchdog device is opened */
+/* wdt_is_active stores whether or not the /dev/watchdog device is opened */
 static unsigned long wdt_is_active;
 
 /* wdt-capable gpt */
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index dca213666747..e56bb651da1a 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -728,7 +728,7 @@ static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np)
 		 nid, np->full_name);
 
 	/* XXX todo: If we can have multiple windows on the same IOMMU, which
-	 * isn't the case today, we probably want here to check wether the
+	 * isn't the case today, we probably want here to check whether the
 	 * iommu for that node is already setup.
 	 * However, there might be issue with getting the size right so let's
 	 * ignore that for now. We might want to completely get rid of the
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index d8b7cc8a66ca..8e299447127e 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -148,7 +148,7 @@ static int spider_set_irq_type(struct irq_data *d, unsigned int type)
 
 	/* Configure the source. One gross hack that was there before and
 	 * that I've kept around is the priority to the BE which I set to
-	 * be the same as the interrupt source number. I don't know wether
+	 * be the same as the interrupt source number. I don't know whether
 	 * that's supposed to make any kind of sense however, we'll have to
 	 * decide that, but for now, I'm not changing the behaviour.
 	 */
@@ -220,7 +220,7 @@ static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc)
 /* For hooking up the cascace we have a problem. Our device-tree is
  * crap and we don't know on which BE iic interrupt we are hooked on at
  * least not the "standard" way. We can reconstitute it based on two
- * informations though: which BE node we are connected to and wether
+ * informations though: which BE node we are connected to and whether
  * we are connected to IOIF0 or IOIF1. Right now, we really only care
  * about the IBM cell blade and we know that its firmware gives us an
  * interrupt-map property which is pretty strange.
@@ -232,7 +232,7 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
 	int imaplen, intsize, unit;
 	struct device_node *iic;
 
-	/* First, we check wether we have a real "interrupts" in the device
+	/* First, we check whether we have a real "interrupts" in the device
 	 * tree in case the device-tree is ever fixed
 	 */
 	struct of_irq oirq;
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index c4e630576ff2..31036b56670e 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -529,7 +529,7 @@ static int __init pmac_pic_probe_mpic(void)
 void __init pmac_pic_init(void)
 {
 	/* We configure the OF parsing based on our oldworld vs. newworld
-	 * platform type and wether we were booted by BootX.
+	 * platform type and whether we were booted by BootX.
 	 */
 #ifdef CONFIG_PPC32
 	if (!pmac_newworld)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 14313ad43b76..42509492717e 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1648,7 +1648,7 @@ static int evergreen_cp_resume(struct radeon_device *rdev)
 	ring->wptr = 0;
 	WREG32(CP_RB_WPTR, ring->wptr);
 
-	/* set the wb address wether it's enabled or not */
+	/* set the wb address whether it's enabled or not */
 	WREG32(CP_RB_RPTR_ADDR,
 	       ((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC));
 	WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 81e6a568c29d..cda01f808f12 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1059,7 +1059,7 @@ static int cayman_cp_resume(struct radeon_device *rdev)
 
 	WREG32(CP_DEBUG, (1 << 27));
 
-	/* set the wb address wether it's enabled or not */
+	/* set the wb address whether it's enabled or not */
 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
 	WREG32(SCRATCH_UMSK, 0xff);
 
@@ -1076,7 +1076,7 @@ static int cayman_cp_resume(struct radeon_device *rdev)
 #endif
 		WREG32(cp_rb_cntl[i], rb_cntl);
 
-		/* set the wb address wether it's enabled or not */
+		/* set the wb address whether it's enabled or not */
 		addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET;
 		WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC);
 		WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF);
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index b0db712060fb..ea4691f79ccd 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -2007,7 +2007,7 @@ static int si_cp_resume(struct radeon_device *rdev)
 	ring->wptr = 0;
 	WREG32(CP_RB0_WPTR, ring->wptr);
 
-	/* set the wb address wether it's enabled or not */
+	/* set the wb address whether it's enabled or not */
 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
 
@@ -2040,7 +2040,7 @@ static int si_cp_resume(struct radeon_device *rdev)
 	ring->wptr = 0;
 	WREG32(CP_RB1_WPTR, ring->wptr);
 
-	/* set the wb address wether it's enabled or not */
+	/* set the wb address whether it's enabled or not */
 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
 
@@ -2066,7 +2066,7 @@ static int si_cp_resume(struct radeon_device *rdev)
 	ring->wptr = 0;
 	WREG32(CP_RB2_WPTR, ring->wptr);
 
-	/* set the wb address wether it's enabled or not */
+	/* set the wb address whether it's enabled or not */
 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
 
diff --git a/drivers/i2c/busses/i2c-nuc900.c b/drivers/i2c/busses/i2c-nuc900.c
index f41502ef3f55..a23b91b0b738 100644
--- a/drivers/i2c/busses/i2c-nuc900.c
+++ b/drivers/i2c/busses/i2c-nuc900.c
@@ -304,7 +304,7 @@ retry_write:
 
 	case STATE_READ:
 		/* we have a byte of data in the data register, do
-		 * something with it, and then work out wether we are
+		 * something with it, and then work out whether we are
 		 * going to do any more read/write
 		 */
 
diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index 3e0335f1fc60..de9e1fa0aa0e 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -208,7 +208,7 @@ static void s3c24xx_i2c_message_start(struct s3c24xx_i2c *i2c,
 	if (msg->flags & I2C_M_REV_DIR_ADDR)
 		addr ^= 1;
 
-	/* todo - check for wether ack wanted or not */
+	/* todo - check for whether ack wanted or not */
 	s3c24xx_i2c_enable_ack(i2c);
 
 	iiccon = readl(i2c->regs + S3C2410_IICCON);
@@ -397,7 +397,7 @@ static int i2c_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat)
 
 	case STATE_READ:
 		/* we have a byte of data in the data register, do
-		 * something with it, and then work out wether we are
+		 * something with it, and then work out whether we are
 		 * going to do any more read/write
 		 */
 
diff --git a/drivers/mtd/maps/plat-ram.c b/drivers/mtd/maps/plat-ram.c
index 891558de3ec1..2de66b062f0d 100644
--- a/drivers/mtd/maps/plat-ram.c
+++ b/drivers/mtd/maps/plat-ram.c
@@ -219,7 +219,7 @@ static int platram_probe(struct platform_device *pdev)
 
 	platram_setrw(info, PLATRAM_RW);
 
-	/* check to see if there are any available partitions, or wether
+	/* check to see if there are any available partitions, or whether
 	 * to add this device whole */
 
 	err = mtd_device_parse_register(info->mtd, pdata->probes, NULL,
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index 295e4bedad96..79ded48e7427 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -879,7 +879,7 @@ static void s3c2410_nand_update_chip(struct s3c2410_nand_info *info,
 	if (chip->ecc.mode != NAND_ECC_HW)
 		return;
 
-		/* change the behaviour depending on wether we are using
+		/* change the behaviour depending on whether we are using
 		 * the large or small page nand device */
 
 	if (chip->page_shift > 10) {
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 203ff9dccadb..846381c31185 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -109,7 +109,7 @@ static inline struct ax_device *to_ax_dev(struct net_device *dev)
 /*
  * ax_initial_check
  *
- * do an initial probe for the card to check wether it exists
+ * do an initial probe for the card to check whether it exists
  * and is functional
  */
 static int ax_initial_check(struct net_device *dev)
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge_mcp_gen_header.h b/drivers/net/ethernet/myricom/myri10ge/myri10ge_mcp_gen_header.h
index 7ec4b864a550..75ec5e7cf50d 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge_mcp_gen_header.h
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge_mcp_gen_header.h
@@ -27,7 +27,7 @@ struct mcp_gen_header {
 	 *
 	 * Fields below this comment are extensions added in later versions
 	 * of this struct, drivers should compare the header_length against
-	 * offsetof(field) to check wether a given MCP implements them.
+	 * offsetof(field) to check whether a given MCP implements them.
 	 *
 	 * Never remove any field.  Keep everything naturally align.
 	 */
diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c
index 58f13adaa549..ae7cd7f3656d 100644
--- a/drivers/net/sungem_phy.c
+++ b/drivers/net/sungem_phy.c
@@ -608,7 +608,7 @@ static int bcm5421_poll_link(struct mii_phy* phy)
 	if ( mode == BCM54XX_COPPER)
 		return genmii_poll_link(phy);
 
-	/* try to find out wether we have a link */
+	/* try to find out whether we have a link */
 	phy_write(phy, MII_NCONFIG, 0x2000);
 	phy_reg = phy_read(phy, MII_NCONFIG);
 
@@ -634,7 +634,7 @@ static int bcm5421_read_link(struct mii_phy* phy)
 
 	phy->speed = SPEED_1000;
 
-	/* find out wether we are running half- or full duplex */
+	/* find out whether we are running half- or full duplex */
 	phy_write(phy, MII_NCONFIG, 0x2000);
 	phy_reg = phy_read(phy, MII_NCONFIG);
 
@@ -681,7 +681,7 @@ static int bcm5461_poll_link(struct mii_phy* phy)
 	if ( mode == BCM54XX_COPPER)
 		return genmii_poll_link(phy);
 
-	/* find out wether we have a link */
+	/* find out whether we have a link */
 	phy_write(phy, MII_NCONFIG, 0x7000);
 	phy_reg = phy_read(phy, MII_NCONFIG);
 
@@ -710,7 +710,7 @@ static int bcm5461_read_link(struct mii_phy* phy)
 
 	phy->speed = SPEED_1000;
 
-	/* find out wether we are running half- or full duplex */
+	/* find out whether we are running half- or full duplex */
 	phy_write(phy, MII_NCONFIG, 0x7000);
 	phy_reg = phy_read(phy, MII_NCONFIG);
 
diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
index 26c81f233606..afb7cfa85ccc 100644
--- a/drivers/rtc/rtc-isl1208.c
+++ b/drivers/rtc/rtc-isl1208.c
@@ -118,7 +118,7 @@ isl1208_i2c_set_regs(struct i2c_client *client, u8 reg, u8 const buf[],
 	return ret;
 }
 
-/* simple check to see wether we have a isl1208 */
+/* simple check to see whether we have a isl1208 */
 static int
 isl1208_i2c_validate_client(struct i2c_client *client)
 {
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 77823d21d314..a7a2a998fa91 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -186,7 +186,7 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 	rtc_tm->tm_year = readb(base + S3C2410_RTCYEAR);
 	rtc_tm->tm_sec  = readb(base + S3C2410_RTCSEC);
 
-	/* the only way to work out wether the system was mid-update
+	/* the only way to work out whether the system was mid-update
 	 * when we read it is to check the second counter, and if it
 	 * is zero, then we re-try the entire read
 	 */
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index b2b8c18eeced..20cfd028edcf 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -1344,7 +1344,7 @@ dasd_get_feature(struct ccw_device *cdev, int feature)
 
 /*
  * Set / reset given feature.
- * Flag indicates wether to set (!=0) or the reset (=0) the feature.
+ * Flag indicates whether to set (!=0) or the reset (=0) the feature.
  */
 int
 dasd_set_feature(struct ccw_device *cdev, int feature, int flag)
diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c
index d36446dd7ae8..73c93cf364c8 100644
--- a/drivers/usb/storage/realtek_cr.c
+++ b/drivers/usb/storage/realtek_cr.c
@@ -883,7 +883,7 @@ static void rts51x_invoke_transport(struct scsi_cmnd *srb, struct us_data *us)
 		} else {
 			US_DEBUGP("%s: NOT working scsi, not SS\n", __func__);
 			chip->proto_handler_backup(srb, us);
-			/* Check wether card is plugged in */
+			/* Check whether card is plugged in */
 			if (srb->cmnd[0] == TEST_UNIT_READY) {
 				if (srb->result == SAM_STAT_GOOD) {
 					SET_LUN_READY(chip, srb->device->lun);
diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c
index 5b06d31ab6a9..c0bc92d8e438 100644
--- a/drivers/watchdog/booke_wdt.c
+++ b/drivers/watchdog/booke_wdt.c
@@ -212,7 +212,7 @@ static long booke_wdt_ioctl(struct file *file,
 	return 0;
 }
 
-/* wdt_is_active stores wether or not the /dev/watchdog device is opened */
+/* wdt_is_active stores whether or not the /dev/watchdog device is opened */
 static unsigned long wdt_is_active;
 
 static int booke_wdt_open(struct inode *inode, struct file *file)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3d3e2c17d8d1..06b2635073f3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3888,7 +3888,7 @@ static int flush_space(struct btrfs_root *root,
  * @root - the root we're allocating for
  * @block_rsv - the block_rsv we're allocating for
  * @orig_bytes - the number of bytes we want
- * @flush - wether or not we can flush to make our reservation
+ * @flush - whether or not we can flush to make our reservation
  *
  * This will reserve orgi_bytes number of bytes from the space info associated
  * with the block_rsv.  If there is not enough space it will make an attempt to
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index dd27a0b46a37..853fc7beedfa 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -76,7 +76,7 @@ struct btrfs_ordered_sum {
 
 #define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */
 
-#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent
+#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent
 				       * has done its due diligence in updating
 				       * the isize. */
 
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3c20de1d59d0..df163da388c9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2455,7 +2455,7 @@ TAS_BUFFER_FNS(Uninit, uninit)
 BUFFER_FNS(Da_Mapped, da_mapped)
 
 /*
- * Add new method to test wether block and inode bitmaps are properly
+ * Add new method to test whether block and inode bitmaps are properly
  * initialized. With uninit_bg reading the block from disk is not enough
  * to mark the bitmap uptodate. We need to also zero-out the bitmap
  */
diff --git a/fs/fhandle.c b/fs/fhandle.c
index f775bfdd6e4a..cccdc874bb55 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -22,7 +22,7 @@ static long do_sys_name_to_handle(struct path *path,
 	struct file_handle *handle = NULL;
 
 	/*
-	 * We need t make sure wether the file system
+	 * We need to make sure whether the file system
 	 * support decoding of the file handle
 	 */
 	if (!path->dentry->d_sb->s_export_op ||
@@ -40,7 +40,7 @@ static long do_sys_name_to_handle(struct path *path,
 	if (!handle)
 		return -ENOMEM;
 
-	/* convert handle size to  multiple of sizeof(u32) */
+	/* convert handle size to multiple of sizeof(u32) */
 	handle_dwords = f_handle.handle_bytes >> 2;
 
 	/* we ask for a non connected handle */
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 78b7f84241d4..8b472c8bbf7a 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1259,7 +1259,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
 		goto not_jbd;
 	}
 
-	/* keep track of wether or not this transaction modified us */
+	/* keep track of whether or not this transaction modified us */
 	was_modified = jh->b_modified;
 
 	/*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index a74ba4659549..d8da40e99d84 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1261,7 +1261,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 		goto not_jbd;
 	}
 
-	/* keep track of wether or not this transaction modified us */
+	/* keep track of whether or not this transaction modified us */
 	was_modified = jh->b_modified;
 
 	/*
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index adb90116d36b..af49e2d6941a 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -33,7 +33,7 @@
  * are being written out - and waiting for GC to make progress, naturally.
  *
  * So we cannot just call iget() or some variant of it, but first have to check
- * wether the inode in question might be in I_FREEING state.  Therefore we
+ * whether the inode in question might be in I_FREEING state.  Therefore we
  * maintain our own per-sb list of "almost deleted" inodes and check against
  * that list first.  Normally this should be at most 1-2 entries long.
  *
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 7a71ffad037c..cafc7f99e124 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -52,8 +52,8 @@ We replicate IO (more or less synchronously) to local and remote disk.
 
 For crash recovery after replication node failure,
   we need to resync all regions that have been target of in-flight WRITE IO
-  (in use, or "hot", regions), as we don't know wether or not those WRITEs have
-  made it to stable storage.
+  (in use, or "hot", regions), as we don't know whether or not those WRITEs
+  have made it to stable storage.
 
   To avoid a "full resync", we need to persistently track these regions.
 
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index 0ee42d9acdc0..2c02f3a8d2ba 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -78,7 +78,7 @@ extern void vga_set_legacy_decoding(struct pci_dev *pdev,
  *     This function acquires VGA resources for the given
  *     card and mark those resources locked. If the resource requested
  *     are "normal" (and not legacy) resources, the arbiter will first check
- *     wether the card is doing legacy decoding for that type of resource. If
+ *     whether the card is doing legacy decoding for that type of resource. If
  *     yes, the lock is "converted" into a legacy resource lock.
  *     The arbiter will first look for all VGA cards that might conflict
  *     and disable their IOs and/or Memory access, including VGA forwarding
@@ -89,7 +89,7 @@ extern void vga_set_legacy_decoding(struct pci_dev *pdev,
  *     This function will block if some conflicting card is already locking
  *     one of the required resources (or any resource on a different bus
  *     segment, since P2P bridges don't differenciate VGA memory and IO
- *     afaik). You can indicate wether this blocking should be interruptible
+ *     afaik). You can indicate whether this blocking should be interruptible
  *     by a signal (for userland interface) or not.
  *     Must not be called at interrupt time or in atomic context.
  *     If the card already owns the resources, the function succeeds.
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index b7f45d48b2de..87490ac4bd87 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -105,7 +105,7 @@ struct watchdog_device {
 #define WATCHDOG_NOWAYOUT_INIT_STATUS	0
 #endif
 
-/* Use the following function to check wether or not the watchdog is active */
+/* Use the following function to check whether or not the watchdog is active */
 static inline bool watchdog_active(struct watchdog_device *wdd)
 {
 	return test_bit(WDOG_ACTIVE, &wdd->status);
diff --git a/include/net/sock.h b/include/net/sock.h
index c945fba4f543..a95e0756e56e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -213,7 +213,7 @@ struct cg_proto;
   *	@sk_sndbuf: size of send buffer in bytes
   *	@sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
   *		   %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
-  *	@sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
+  *	@sk_no_check: %SO_NO_CHECK setting, whether or not checkup packets
   *	@sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
   *	@sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
   *	@sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
diff --git a/net/can/proc.c b/net/can/proc.c
index 3b6dd3180492..ae566902d2bf 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -397,7 +397,7 @@ static inline void can_rcvlist_sff_proc_show_one(struct seq_file *m,
 	int i;
 	int all_empty = 1;
 
-	/* check wether at least one list is non-empty */
+	/* check whether at least one list is non-empty */
 	for (i = 0; i < 0x800; i++)
 		if (!hlist_empty(&d->rx_sff[i])) {
 			all_empty = 0;
diff --git a/sound/pci/es1968.c b/sound/pci/es1968.c
index 5d0e568fdea1..dc14234fea29 100644
--- a/sound/pci/es1968.c
+++ b/sound/pci/es1968.c
@@ -2291,7 +2291,7 @@ static void snd_es1968_chip_init(struct es1968 *chip)
 	outb(0x88, iobase+0x1f);
 
 	/* it appears some maestros (dell 7500) only work if these are set,
-	   regardless of wether we use the assp or not. */
+	   regardless of whether we use the assp or not. */
 
 	outb(0, iobase + ASSP_CONTROL_B);
 	outb(3, iobase + ASSP_CONTROL_A);	/* M: Reserved bits... */
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 0f58b4b6d702..007fcecdf5cd 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -675,7 +675,7 @@ int snd_usb_apply_boot_quirk(struct usb_device *dev,
  */
 int snd_usb_is_big_endian_format(struct snd_usb_audio *chip, struct audioformat *fp)
 {
-	/* it depends on altsetting wether the device is big-endian or not */
+	/* it depends on altsetting whether the device is big-endian or not */
 	switch (chip->usb_id) {
 	case USB_ID(0x0763, 0x2001): /* M-Audio Quattro: captured data only */
 		if (fp->altsetting == 2 || fp->altsetting == 3 ||
-- 
cgit v1.2.3-55-g7522


From b3834be5c42a5d2fd85ff4b819fa38983b1450e6 Mon Sep 17 00:00:00 2001
From: Adam Buchbinder
Date: Wed, 19 Sep 2012 21:48:02 -0400
Subject: various: Fix spelling of "asynchronous" in comments.

"Asynchronous" is misspelled in some comments. No code changes.

Signed-off-by: Adam Buchbinder <adam.buchbinder@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/char/ipmi/ipmi_msghandler.c     | 2 +-
 drivers/char/ipmi/ipmi_si_intf.c        | 2 +-
 drivers/firewire/init_ohci1394_dma.c    | 4 ++--
 drivers/firewire/ohci.c                 | 2 +-
 drivers/net/ethernet/mellanox/mlx4/eq.c | 2 +-
 drivers/tty/ipwireless/setup_protocol.h | 2 +-
 fs/notify/notification.c                | 2 +-
 include/linux/dlm.h                     | 2 +-
 include/linux/ipmi_smi.h                | 2 +-
 mm/memory_hotplug.c                     | 6 +++---
 sound/pci/sis7019.c                     | 2 +-
 11 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index a0c84bb30856..053201b062a4 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -3789,7 +3789,7 @@ static int handle_one_recv_msg(ipmi_smi_t          intf,
 
 	} else if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2))
 		   && (msg->rsp[1] == IPMI_READ_EVENT_MSG_BUFFER_CMD)) {
-		/* It's an asyncronous event. */
+		/* It's an asynchronous event. */
 		requeue = handle_read_event_rsp(intf, msg);
 	} else {
 		/* It's a response from the local BMC. */
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 32a6c7e256bd..c9c3501784e0 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -155,7 +155,7 @@ enum si_stat_indexes {
 	/* Number of watchdog pretimeouts. */
 	SI_STAT_watchdog_pretimeouts,
 
-	/* Number of asyncronous messages received. */
+	/* Number of asynchronous messages received. */
 	SI_STAT_incoming_messages,
 
 
diff --git a/drivers/firewire/init_ohci1394_dma.c b/drivers/firewire/init_ohci1394_dma.c
index a9a347adb353..2cc89ce745c9 100644
--- a/drivers/firewire/init_ohci1394_dma.c
+++ b/drivers/firewire/init_ohci1394_dma.c
@@ -149,10 +149,10 @@ static inline void __init init_ohci1394_initialize(struct ohci *ohci)
 	reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, 0xffffffff);
 	reg_write(ohci, OHCI1394_IsoXmitIntEventClear, 0xffffffff);
 
-	/* Accept asyncronous transfer requests from all nodes for now */
+	/* Accept asynchronous transfer requests from all nodes for now */
 	reg_write(ohci, OHCI1394_AsReqFilterHiSet, 0x80000000);
 
-	/* Specify asyncronous transfer retries */
+	/* Specify asynchronous transfer retries */
 	reg_write(ohci, OHCI1394_ATRetries,
 		  OHCI1394_MAX_AT_REQ_RETRIES |
 		  (OHCI1394_MAX_AT_RESP_RETRIES<<4) |
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 834e71d2324d..961e4398664b 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -1281,7 +1281,7 @@ static int at_context_queue_packet(struct context *ctx,
 	d[0].res_count = cpu_to_le16(packet->timestamp);
 
 	/*
-	 * The DMA format for asyncronous link packets is different
+	 * The DMA format for asynchronous link packets is different
 	 * from the IEEE1394 layout, so shift the fields around
 	 * accordingly.
 	 */
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index b84a88bc44dc..c48cf6f6529c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1267,7 +1267,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev)
 		/* Temporary use polling for command completions */
 		mlx4_cmd_use_polling(dev);
 
-		/* Map the new eq to handle all asyncronous events */
+		/* Map the new eq to handle all asynchronous events */
 		err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
 				  priv->eq_table.eq[i].eqn);
 		if (err) {
diff --git a/drivers/tty/ipwireless/setup_protocol.h b/drivers/tty/ipwireless/setup_protocol.h
index 9d6bcc77c73c..002c34e72521 100644
--- a/drivers/tty/ipwireless/setup_protocol.h
+++ b/drivers/tty/ipwireless/setup_protocol.h
@@ -59,7 +59,7 @@ struct tl_setup_config_done_msg {
 	unsigned char sig_no;		/* TL_SETUP_SIGNO_CONFIG_DONE_MSG */
 } __attribute__ ((__packed__));
 
-/* Asyncronous messages */
+/* Asynchronous messages */
 struct tl_setup_open_msg {
 	unsigned char sig_no;		/* TL_SETUP_SIGNO_OPEN_MSG */
 	unsigned char port_no;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index c887b1378f7e..48cb994e4922 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -18,7 +18,7 @@
 
 /*
  * Basic idea behind the notification queue: An fsnotify group (like inotify)
- * sends the userspace notification about events asyncronously some time after
+ * sends the userspace notification about events asynchronously some time after
  * the event happened.  When inotify gets an event it will need to add that
  * event to the group notify queue.  Since a single event might need to be on
  * multiple group's notification queues we can't add the event directly to each
diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index 1d47dcce11e1..d02da2c6fc1a 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -98,7 +98,7 @@ int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force);
 /*
  * dlm_lock
  *
- * Make an asyncronous request to acquire or convert a lock on a named
+ * Make an asynchronous request to acquire or convert a lock on a named
  * resource.
  *
  * lockspace: context for the request
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index fcb5d44ea635..8ea3fe0b9759 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -216,7 +216,7 @@ int ipmi_unregister_smi(ipmi_smi_t intf);
 
 /*
  * The lower layer reports received messages through this interface.
- * The data_size should be zero if this is an asyncronous message.  If
+ * The data_size should be zero if this is an asynchronous message.  If
  * the lower layer gets an error sending a message, it should format
  * an error response in the message response.
  */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 56b758ae57d2..bb81b7f417a8 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -950,10 +950,10 @@ repeat:
 			goto repeat;
 		}
 	}
-	/* drain all zone's lru pagevec, this is asyncronous... */
+	/* drain all zone's lru pagevec, this is asynchronous... */
 	lru_add_drain_all();
 	yield();
-	/* drain pcp pages , this is synchrouns. */
+	/* drain pcp pages, this is synchronous. */
 	drain_all_pages();
 	/* check again */
 	offlined_pages = check_pages_isolated(start_pfn, end_pfn);
@@ -962,7 +962,7 @@ repeat:
 		goto failed_removal;
 	}
 	printk(KERN_INFO "Offlined Pages %ld\n", offlined_pages);
-	/* Ok, all of our target is islaoted.
+	/* Ok, all of our target is isolated.
 	   We cannot do rollback at this point. */
 	offline_isolated_pages(start_pfn, end_pfn);
 	/* reset pagetype flags and makes migrate type to be MOVABLE */
diff --git a/sound/pci/sis7019.c b/sound/pci/sis7019.c
index 51e43407ebc5..9024545ebb86 100644
--- a/sound/pci/sis7019.c
+++ b/sound/pci/sis7019.c
@@ -1171,7 +1171,7 @@ static int sis_chip_init(struct sis7019 *sis)
 	outl(SIS_DMA_CSR_PCI_SETTINGS, io + SIS_DMA_CSR);
 
 	/* Reset the synchronization groups for all of the channels
-	 * to be asyncronous. If we start doing SPDIF or 5.1 sound, etc.
+	 * to be asynchronous. If we start doing SPDIF or 5.1 sound, etc.
 	 * we'll need to change how we handle these. Until then, we just
 	 * assign sub-mixer 0 to all playback channels, and avoid any
 	 * attenuation on the audio.
-- 
cgit v1.2.3-55-g7522


From 49f4d8b93ccf9454284b6f524b96c66d8d7fbccc Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Thu, 2 Aug 2012 04:25:10 -0700
Subject: pidns: Capture the user namespace and filter ns_last_pid

- Capture the the user namespace that creates the pid namespace
- Use that user namespace to test if it is ok to write to
  /proc/sys/kernel/ns_last_pid.

Zhao Hongjiang <zhaohongjiang@huawei.com> noticed I was missing a put_user_ns
in when destroying a pid_ns.  I have foloded his patch into this one
so that bisects will work properly.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/pid_namespace.h |  8 +++++---
 kernel/nsproxy.c              |  2 +-
 kernel/pid.c                  |  1 +
 kernel/pid_namespace.c        | 17 ++++++++++++-----
 4 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 65e3e87eacc5..c89c9cfcd247 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -31,6 +31,7 @@ struct pid_namespace {
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	struct bsd_acct_struct *bacct;
 #endif
+	struct user_namespace *user_ns;
 	kgid_t pid_gid;
 	int hide_pid;
 	int reboot;	/* group exit code if this pidns was rebooted */
@@ -46,7 +47,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 	return ns;
 }
 
-extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
+extern struct pid_namespace *copy_pid_ns(unsigned long flags,
+	struct user_namespace *user_ns, struct pid_namespace *ns);
 extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
 extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
 extern void put_pid_ns(struct pid_namespace *ns);
@@ -59,8 +61,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 	return ns;
 }
 
-static inline struct pid_namespace *
-copy_pid_ns(unsigned long flags, struct pid_namespace *ns)
+static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
+	struct user_namespace *user_ns, struct pid_namespace *ns)
 {
 	if (flags & CLONE_NEWPID)
 		ns = ERR_PTR(-EINVAL);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 7e1c3de1ce45..ca27d2c5264d 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -84,7 +84,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 		goto out_ipc;
 	}
 
-	new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
+	new_nsp->pid_ns = copy_pid_ns(flags, task_cred_xxx(tsk, user_ns), task_active_pid_ns(tsk));
 	if (IS_ERR(new_nsp->pid_ns)) {
 		err = PTR_ERR(new_nsp->pid_ns);
 		goto out_pid;
diff --git a/kernel/pid.c b/kernel/pid.c
index aebd4f5aaf41..2a624f1486e1 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -78,6 +78,7 @@ struct pid_namespace init_pid_ns = {
 	.last_pid = 0,
 	.level = 0,
 	.child_reaper = &init_task,
+	.user_ns = &init_user_ns,
 };
 EXPORT_SYMBOL_GPL(init_pid_ns);
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 7b07cc0dfb75..b2604950aa50 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -10,6 +10,7 @@
 
 #include <linux/pid.h>
 #include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
 #include <linux/syscalls.h>
 #include <linux/err.h>
 #include <linux/acct.h>
@@ -74,7 +75,8 @@ err_alloc:
 /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
 #define MAX_PID_NS_LEVEL 32
 
-static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns)
+static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
+	struct pid_namespace *parent_pid_ns)
 {
 	struct pid_namespace *ns;
 	unsigned int level = parent_pid_ns->level + 1;
@@ -102,6 +104,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
 	kref_init(&ns->kref);
 	ns->level = level;
 	ns->parent = get_pid_ns(parent_pid_ns);
+	ns->user_ns = get_user_ns(user_ns);
 
 	set_bit(0, ns->pidmap[0].page);
 	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
@@ -117,6 +120,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
 
 out_put_parent_pid_ns:
 	put_pid_ns(parent_pid_ns);
+	put_user_ns(user_ns);
 out_free_map:
 	kfree(ns->pidmap[0].page);
 out_free:
@@ -131,16 +135,18 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
+	put_user_ns(ns->user_ns);
 	kmem_cache_free(pid_ns_cachep, ns);
 }
 
-struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
+struct pid_namespace *copy_pid_ns(unsigned long flags,
+	struct user_namespace *user_ns, struct pid_namespace *old_ns)
 {
 	if (!(flags & CLONE_NEWPID))
 		return get_pid_ns(old_ns);
 	if (flags & (CLONE_THREAD|CLONE_PARENT))
 		return ERR_PTR(-EINVAL);
-	return create_pid_namespace(old_ns);
+	return create_pid_namespace(user_ns, old_ns);
 }
 
 static void free_pid_ns(struct kref *kref)
@@ -239,9 +245,10 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 static int pid_ns_ctl_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp, loff_t *ppos)
 {
+	struct pid_namespace *pid_ns = task_active_pid_ns(current);
 	struct ctl_table tmp = *table;
 
-	if (write && !capable(CAP_SYS_ADMIN))
+	if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	/*
@@ -250,7 +257,7 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write,
 	 * it should synchronize its usage with external means.
 	 */
 
-	tmp.data = &current->nsproxy->pid_ns->last_pid;
+	tmp.data = &pid_ns->last_pid;
 	return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 }
 
-- 
cgit v1.2.3-55-g7522


From 0a01f2cc390e10633a54f72c608cc3fe19a50c3d Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Wed, 1 Aug 2012 10:33:47 -0700
Subject: pidns: Make the pidns proc mount/umount logic obvious.

Track the number of pids in the proc hash table.  When the number of
pids goes to 0 schedule work to unmount the kernel mount of proc.

Move the mount of proc into alloc_pid when we allocate the pid for
init.

Remove the surprising calls of pid_ns_release proc in fork and
proc_flush_task.  Those code paths really shouldn't know about proc
namespace implementation details and people have demonstrated several
times that finding and understanding those code paths is difficult and
non-obvious.

Because of the call path detach pid is alwasy called with the
rtnl_lock held free_pid is not allowed to sleep, so the work to
unmounting proc is moved to a work queue.  This has the side benefit
of not blocking the entire world waiting for the unnecessary
rcu_barrier in deactivate_locked_super.

In the process of making the code clear and obvious this fixes a bug
reported by Gao feng <gaofeng@cn.fujitsu.com> where we would leak a
mount of proc during clone(CLONE_NEWPID|CLONE_NEWNET) if copy_pid_ns
succeeded and copy_net_ns failed.

Acked-by: "Serge E. Hallyn" <serge@hallyn.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/proc/base.c                |  4 ----
 fs/proc/root.c                |  5 -----
 include/linux/pid_namespace.h |  2 ++
 kernel/fork.c                 |  2 --
 kernel/pid.c                  | 21 +++++++++++++++++----
 kernel/pid_namespace.c        | 14 +++++++-------
 6 files changed, 26 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6177fc238fdb..7621dc51cff8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2590,10 +2590,6 @@ void proc_flush_task(struct task_struct *task)
 		proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
 					tgid->numbers[i].nr);
 	}
-
-	upid = &pid->numbers[pid->level];
-	if (upid->nr == 1)
-		pid_ns_release_proc(upid->ns);
 }
 
 static struct dentry *proc_pid_instantiate(struct inode *dir,
diff --git a/fs/proc/root.c b/fs/proc/root.c
index fc1609321a78..f2f251158d35 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -155,11 +155,6 @@ void __init proc_root_init(void)
 	err = register_filesystem(&proc_fs_type);
 	if (err)
 		return;
-	err = pid_ns_prepare_proc(&init_pid_ns);
-	if (err) {
-		unregister_filesystem(&proc_fs_type);
-		return;
-	}
 
 	proc_self_init();
 	proc_symlink("mounts", NULL, "self/mounts");
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index c89c9cfcd247..4c96acdb2489 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -21,6 +21,7 @@ struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
 	int last_pid;
+	int nr_hashed;
 	struct task_struct *child_reaper;
 	struct kmem_cache *pid_cachep;
 	unsigned int level;
@@ -32,6 +33,7 @@ struct pid_namespace {
 	struct bsd_acct_struct *bacct;
 #endif
 	struct user_namespace *user_ns;
+	struct work_struct proc_work;
 	kgid_t pid_gid;
 	int hide_pid;
 	int reboot;	/* group exit code if this pidns was rebooted */
diff --git a/kernel/fork.c b/kernel/fork.c
index 7798c247f4b9..666dc8b06606 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1476,8 +1476,6 @@ bad_fork_cleanup_io:
 	if (p->io_context)
 		exit_io_context(p);
 bad_fork_cleanup_namespaces:
-	if (unlikely(clone_flags & CLONE_NEWPID))
-		pid_ns_release_proc(p->nsproxy->pid_ns);
 	exit_task_namespaces(p);
 bad_fork_cleanup_mm:
 	if (p->mm)
diff --git a/kernel/pid.c b/kernel/pid.c
index 3a5f238c1ca0..e957f8b09136 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -36,6 +36,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/init_task.h>
 #include <linux/syscalls.h>
+#include <linux/proc_fs.h>
 
 #define pid_hashfn(nr, ns)	\
 	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -270,8 +271,12 @@ void free_pid(struct pid *pid)
 	unsigned long flags;
 
 	spin_lock_irqsave(&pidmap_lock, flags);
-	for (i = 0; i <= pid->level; i++)
-		hlist_del_rcu(&pid->numbers[i].pid_chain);
+	for (i = 0; i <= pid->level; i++) {
+		struct upid *upid = pid->numbers + i;
+		hlist_del_rcu(&upid->pid_chain);
+		if (--upid->ns->nr_hashed == 0)
+			schedule_work(&upid->ns->proc_work);
+	}
 	spin_unlock_irqrestore(&pidmap_lock, flags);
 
 	for (i = 0; i <= pid->level; i++)
@@ -293,6 +298,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 		goto out;
 
 	tmp = ns;
+	pid->level = ns->level;
 	for (i = ns->level; i >= 0; i--) {
 		nr = alloc_pidmap(tmp);
 		if (nr < 0)
@@ -303,17 +309,23 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 		tmp = tmp->parent;
 	}
 
+	if (unlikely(is_child_reaper(pid))) {
+		if (pid_ns_prepare_proc(ns))
+			goto out_free;
+	}
+
 	get_pid_ns(ns);
-	pid->level = ns->level;
 	atomic_set(&pid->count, 1);
 	for (type = 0; type < PIDTYPE_MAX; ++type)
 		INIT_HLIST_HEAD(&pid->tasks[type]);
 
 	upid = pid->numbers + ns->level;
 	spin_lock_irq(&pidmap_lock);
-	for ( ; upid >= pid->numbers; --upid)
+	for ( ; upid >= pid->numbers; --upid) {
 		hlist_add_head_rcu(&upid->pid_chain,
 				&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
+		upid->ns->nr_hashed++;
+	}
 	spin_unlock_irq(&pidmap_lock);
 
 out:
@@ -570,6 +582,7 @@ void __init pidmap_init(void)
 	/* Reserve PID 0. We never call free_pidmap(0) */
 	set_bit(0, init_pid_ns.pidmap[0].page);
 	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
+	init_pid_ns.nr_hashed = 1;
 
 	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
 			SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index b2604950aa50..84591cfeefc1 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -72,6 +72,12 @@ err_alloc:
 	return NULL;
 }
 
+static void proc_cleanup_work(struct work_struct *work)
+{
+	struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work);
+	pid_ns_release_proc(ns);
+}
+
 /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
 #define MAX_PID_NS_LEVEL 32
 
@@ -105,6 +111,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	ns->level = level;
 	ns->parent = get_pid_ns(parent_pid_ns);
 	ns->user_ns = get_user_ns(user_ns);
+	INIT_WORK(&ns->proc_work, proc_cleanup_work);
 
 	set_bit(0, ns->pidmap[0].page);
 	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
@@ -112,15 +119,8 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	for (i = 1; i < PIDMAP_ENTRIES; i++)
 		atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
 
-	err = pid_ns_prepare_proc(ns);
-	if (err)
-		goto out_put_parent_pid_ns;
-
 	return ns;
 
-out_put_parent_pid_ns:
-	put_pid_ns(parent_pid_ns);
-	put_user_ns(user_ns);
 out_free_map:
 	kfree(ns->pidmap[0].page);
 out_free:
-- 
cgit v1.2.3-55-g7522


From 57e8391d327609cbf12d843259c968b9e5c1838f Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Sun, 7 Mar 2010 18:17:03 -0800
Subject: pidns: Add setns support

- Pid namespaces are designed to be inescapable so verify that the
  passed in pid namespace is a child of the currently active
  pid namespace or the currently active pid namespace itself.

  Allowing the currently active pid namespace is important so
  the effects of an earlier setns can be cancelled.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/namespaces.c    |  3 +++
 include/linux/proc_fs.h |  1 +
 kernel/pid_namespace.c  | 54 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b178ed733c36..85ca047e35f1 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -24,6 +24,9 @@ static const struct proc_ns_operations *ns_entries[] = {
 #ifdef CONFIG_IPC_NS
 	&ipcns_operations,
 #endif
+#ifdef CONFIG_PID_NS
+	&pidns_operations,
+#endif
 };
 
 static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 3fd2e871ff1b..acaafcd40aa5 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -251,6 +251,7 @@ struct proc_ns_operations {
 extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
 extern const struct proc_ns_operations ipcns_operations;
+extern const struct proc_ns_operations pidns_operations;
 
 union proc_op {
 	int (*proc_get_link)(struct dentry *, struct path *);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 0dbbc66b6ec6..f78fc48c86bc 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -301,6 +301,60 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
 	return 0;
 }
 
+static void *pidns_get(struct task_struct *task)
+{
+	struct pid_namespace *ns;
+
+	rcu_read_lock();
+	ns = get_pid_ns(task_active_pid_ns(task));
+	rcu_read_unlock();
+
+	return ns;
+}
+
+static void pidns_put(void *ns)
+{
+	put_pid_ns(ns);
+}
+
+static int pidns_install(struct nsproxy *nsproxy, void *ns)
+{
+	struct pid_namespace *active = task_active_pid_ns(current);
+	struct pid_namespace *ancestor, *new = ns;
+
+	if (!ns_capable(new->user_ns, CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/*
+	 * Only allow entering the current active pid namespace
+	 * or a child of the current active pid namespace.
+	 *
+	 * This is required for fork to return a usable pid value and
+	 * this maintains the property that processes and their
+	 * children can not escape their current pid namespace.
+	 */
+	if (new->level < active->level)
+		return -EINVAL;
+
+	ancestor = new;
+	while (ancestor->level > active->level)
+		ancestor = ancestor->parent;
+	if (ancestor != active)
+		return -EINVAL;
+
+	put_pid_ns(nsproxy->pid_ns);
+	nsproxy->pid_ns = get_pid_ns(new);
+	return 0;
+}
+
+const struct proc_ns_operations pidns_operations = {
+	.name		= "pid",
+	.type		= CLONE_NEWPID,
+	.get		= pidns_get,
+	.put		= pidns_put,
+	.install	= pidns_install,
+};
+
 static __init int pid_namespaces_init(void)
 {
 	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
-- 
cgit v1.2.3-55-g7522


From 8823c079ba7136dc1948d6f6dcb5f8022bde438e Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Sun, 7 Mar 2010 18:49:36 -0800
Subject: vfs: Add setns support for the mount namespace

setns support for the mount namespace is a little tricky as an
arbitrary decision must be made about what to set fs->root and
fs->pwd to, as there is no expectation of a relationship between
the two mount namespaces.  Therefore I arbitrarily find the root
mount point, and follow every mount on top of it to find the top
of the mount stack.  Then I set fs->root and fs->pwd to that
location.  The topmost root of the mount stack seems like a
reasonable place to be.

Bind mount support for the mount namespace inodes has the
possibility of creating circular dependencies between mount
namespaces.  Circular dependencies can result in loops that
prevent mount namespaces from every being freed.  I avoid
creating those circular dependencies by adding a sequence number
to the mount namespace and require all bind mounts be of a
younger mount namespace into an older mount namespace.

Add a helper function proc_ns_inode so it is possible to
detect when we are attempting to bind mound a namespace inode.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/mount.h              |  1 +
 fs/namespace.c          | 95 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/proc/namespaces.c    |  5 +++
 include/linux/proc_fs.h |  7 ++++
 4 files changed, 108 insertions(+)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index 4f291f9de641..e9c37dd3d00d 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -6,6 +6,7 @@ struct mnt_namespace {
 	atomic_t		count;
 	struct mount *	root;
 	struct list_head	list;
+	u64			seq;	/* Sequence number to prevent loops */
 	wait_queue_head_t poll;
 	int event;
 };
diff --git a/fs/namespace.c b/fs/namespace.c
index 24960626bb6b..d287e7e74644 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -20,6 +20,7 @@
 #include <linux/fs_struct.h>	/* get_fs_root et.al. */
 #include <linux/fsnotify.h>	/* fsnotify_vfsmount_delete */
 #include <linux/uaccess.h>
+#include <linux/proc_fs.h>
 #include "pnode.h"
 #include "internal.h"
 
@@ -1308,6 +1309,26 @@ static int mount_is_safe(struct path *path)
 #endif
 }
 
+static bool mnt_ns_loop(struct path *path)
+{
+	/* Could bind mounting the mount namespace inode cause a
+	 * mount namespace loop?
+	 */
+	struct inode *inode = path->dentry->d_inode;
+	struct proc_inode *ei;
+	struct mnt_namespace *mnt_ns;
+
+	if (!proc_ns_inode(inode))
+		return false;
+
+	ei = PROC_I(inode);
+	if (ei->ns_ops != &mntns_operations)
+		return false;
+
+	mnt_ns = ei->ns;
+	return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
+}
+
 struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 					int flag)
 {
@@ -1655,6 +1676,10 @@ static int do_loopback(struct path *path, const char *old_name,
 	if (err)
 		return err;
 
+	err = -EINVAL;
+	if (mnt_ns_loop(&old_path))
+		goto out; 
+
 	err = lock_mount(path);
 	if (err)
 		goto out;
@@ -2261,6 +2286,15 @@ dput_out:
 	return retval;
 }
 
+/*
+ * Assign a sequence number so we can detect when we attempt to bind
+ * mount a reference to an older mount namespace into the current
+ * mount namespace, preventing reference counting loops.  A 64bit
+ * number incrementing at 10Ghz will take 12,427 years to wrap which
+ * is effectively never, so we can ignore the possibility.
+ */
+static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
+
 static struct mnt_namespace *alloc_mnt_ns(void)
 {
 	struct mnt_namespace *new_ns;
@@ -2268,6 +2302,7 @@ static struct mnt_namespace *alloc_mnt_ns(void)
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 	if (!new_ns)
 		return ERR_PTR(-ENOMEM);
+	new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
 	atomic_set(&new_ns->count, 1);
 	new_ns->root = NULL;
 	INIT_LIST_HEAD(&new_ns->list);
@@ -2681,3 +2716,63 @@ bool our_mnt(struct vfsmount *mnt)
 {
 	return check_mnt(real_mount(mnt));
 }
+
+static void *mntns_get(struct task_struct *task)
+{
+	struct mnt_namespace *ns = NULL;
+	struct nsproxy *nsproxy;
+
+	rcu_read_lock();
+	nsproxy = task_nsproxy(task);
+	if (nsproxy) {
+		ns = nsproxy->mnt_ns;
+		get_mnt_ns(ns);
+	}
+	rcu_read_unlock();
+
+	return ns;
+}
+
+static void mntns_put(void *ns)
+{
+	put_mnt_ns(ns);
+}
+
+static int mntns_install(struct nsproxy *nsproxy, void *ns)
+{
+	struct fs_struct *fs = current->fs;
+	struct mnt_namespace *mnt_ns = ns;
+	struct path root;
+
+	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_CHROOT))
+		return -EINVAL;
+
+	if (fs->users != 1)
+		return -EINVAL;
+
+	get_mnt_ns(mnt_ns);
+	put_mnt_ns(nsproxy->mnt_ns);
+	nsproxy->mnt_ns = mnt_ns;
+
+	/* Find the root */
+	root.mnt    = &mnt_ns->root->mnt;
+	root.dentry = mnt_ns->root->mnt.mnt_root;
+	path_get(&root);
+	while(d_mountpoint(root.dentry) && follow_down_one(&root))
+		;
+
+	/* Update the pwd and root */
+	set_fs_pwd(fs, &root);
+	set_fs_root(fs, &root);
+
+	path_put(&root);
+	return 0;
+}
+
+const struct proc_ns_operations mntns_operations = {
+	.name		= "mnt",
+	.type		= CLONE_NEWNS,
+	.get		= mntns_get,
+	.put		= mntns_put,
+	.install	= mntns_install,
+};
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 85ca047e35f1..2a17fd9ae6a9 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -27,6 +27,7 @@ static const struct proc_ns_operations *ns_entries[] = {
 #ifdef CONFIG_PID_NS
 	&pidns_operations,
 #endif
+	&mntns_operations,
 };
 
 static const struct file_operations ns_file_operations = {
@@ -201,3 +202,7 @@ out_invalid:
 	return ERR_PTR(-EINVAL);
 }
 
+bool proc_ns_inode(struct inode *inode)
+{
+	return inode->i_fop == &ns_file_operations;
+}
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index acaafcd40aa5..9014c041e752 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -174,6 +174,7 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
 	struct proc_dir_entry *parent);
 
 extern struct file *proc_ns_fget(int fd);
+extern bool proc_ns_inode(struct inode *inode);
 
 #else
 
@@ -229,6 +230,11 @@ static inline struct file *proc_ns_fget(int fd)
 	return ERR_PTR(-EINVAL);
 }
 
+static inline bool proc_ns_inode(struct inode *inode)
+{
+	return false;
+}
+
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)
@@ -252,6 +258,7 @@ extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
 extern const struct proc_ns_operations ipcns_operations;
 extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations mntns_operations;
 
 union proc_op {
 	int (*proc_get_link)(struct dentry *, struct path *);
-- 
cgit v1.2.3-55-g7522


From 771b1371686e0a63e938ada28de020b9a0040f55 Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Thu, 26 Jul 2012 21:08:32 -0700
Subject: vfs: Add a user namespace reference from struct mnt_namespace

This will allow for support for unprivileged mounts in a new user namespace.

Acked-by: "Serge E. Hallyn" <serge@hallyn.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/mount.h                    |  1 +
 fs/namespace.c                | 24 ++++++++++++++++--------
 include/linux/mnt_namespace.h |  3 ++-
 kernel/nsproxy.c              |  2 +-
 4 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index e9c37dd3d00d..630fafc616bb 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -6,6 +6,7 @@ struct mnt_namespace {
 	atomic_t		count;
 	struct mount *	root;
 	struct list_head	list;
+	struct user_namespace	*user_ns;
 	u64			seq;	/* Sequence number to prevent loops */
 	wait_queue_head_t poll;
 	int event;
diff --git a/fs/namespace.c b/fs/namespace.c
index d287e7e74644..207c7ba84ad3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -12,6 +12,7 @@
 #include <linux/export.h>
 #include <linux/capability.h>
 #include <linux/mnt_namespace.h>
+#include <linux/user_namespace.h>
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/idr.h>
@@ -2286,6 +2287,12 @@ dput_out:
 	return retval;
 }
 
+static void free_mnt_ns(struct mnt_namespace *ns)
+{
+	put_user_ns(ns->user_ns);
+	kfree(ns);
+}
+
 /*
  * Assign a sequence number so we can detect when we attempt to bind
  * mount a reference to an older mount namespace into the current
@@ -2295,7 +2302,7 @@ dput_out:
  */
 static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
 
-static struct mnt_namespace *alloc_mnt_ns(void)
+static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 {
 	struct mnt_namespace *new_ns;
 
@@ -2308,6 +2315,7 @@ static struct mnt_namespace *alloc_mnt_ns(void)
 	INIT_LIST_HEAD(&new_ns->list);
 	init_waitqueue_head(&new_ns->poll);
 	new_ns->event = 0;
+	new_ns->user_ns = get_user_ns(user_ns);
 	return new_ns;
 }
 
@@ -2316,7 +2324,7 @@ static struct mnt_namespace *alloc_mnt_ns(void)
  * copied from the namespace of the passed in task structure.
  */
 static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
-		struct fs_struct *fs)
+		struct user_namespace *user_ns, struct fs_struct *fs)
 {
 	struct mnt_namespace *new_ns;
 	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
@@ -2324,7 +2332,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	struct mount *old = mnt_ns->root;
 	struct mount *new;
 
-	new_ns = alloc_mnt_ns();
+	new_ns = alloc_mnt_ns(user_ns);
 	if (IS_ERR(new_ns))
 		return new_ns;
 
@@ -2333,7 +2341,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE);
 	if (IS_ERR(new)) {
 		up_write(&namespace_sem);
-		kfree(new_ns);
+		free_mnt_ns(new_ns);
 		return ERR_CAST(new);
 	}
 	new_ns->root = new;
@@ -2374,7 +2382,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 }
 
 struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
-		struct fs_struct *new_fs)
+		struct user_namespace *user_ns, struct fs_struct *new_fs)
 {
 	struct mnt_namespace *new_ns;
 
@@ -2384,7 +2392,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 	if (!(flags & CLONE_NEWNS))
 		return ns;
 
-	new_ns = dup_mnt_ns(ns, new_fs);
+	new_ns = dup_mnt_ns(ns, user_ns, new_fs);
 
 	put_mnt_ns(ns);
 	return new_ns;
@@ -2396,7 +2404,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
  */
 static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
 {
-	struct mnt_namespace *new_ns = alloc_mnt_ns();
+	struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
 	if (!IS_ERR(new_ns)) {
 		struct mount *mnt = real_mount(m);
 		mnt->mnt_ns = new_ns;
@@ -2682,7 +2690,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
 	br_write_unlock(&vfsmount_lock);
 	up_write(&namespace_sem);
 	release_mounts(&umount_list);
-	kfree(ns);
+	free_mnt_ns(ns);
 }
 
 struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 5a8e3903d770..12b2ab510323 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -4,9 +4,10 @@
 
 struct mnt_namespace;
 struct fs_struct;
+struct user_namespace;
 
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
-		struct fs_struct *);
+		struct user_namespace *, struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
 
 extern const struct file_operations proc_mounts_operations;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index b8d4d8709d70..7f8b051fc19f 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -66,7 +66,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 	if (!new_nsp)
 		return ERR_PTR(-ENOMEM);
 
-	new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
+	new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, task_cred_xxx(tsk, user_ns), new_fs);
 	if (IS_ERR(new_nsp->mnt_ns)) {
 		err = PTR_ERR(new_nsp->mnt_ns);
 		goto out_ns;
-- 
cgit v1.2.3-55-g7522


From 0c55cfc4166d9a0f38de779bd4d75a90afbe7734 Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Thu, 26 Jul 2012 21:42:03 -0700
Subject: vfs: Allow unprivileged manipulation of the mount namespace.

- Add a filesystem flag to mark filesystems that are safe to mount as
  an unprivileged user.

- Add a filesystem flag to mark filesystems that don't need MNT_NODEV
  when mounted by an unprivileged user.

- Relax the permission checks to allow unprivileged users that have
  CAP_SYS_ADMIN permissions in the user namespace referred to by the
  current mount namespace to be allowed to mount, unmount, and move
  filesystems.

Acked-by: "Serge E. Hallyn" <serge@hallyn.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c     | 69 ++++++++++++++++++++++++++++++++++--------------------
 include/linux/fs.h |  2 ++
 2 files changed, 45 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 4dfcaf05d17c..9ddc86f93221 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1269,7 +1269,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
 		goto dput_and_out;
 
 	retval = -EPERM;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		goto dput_and_out;
 
 	retval = do_umount(mnt, flags);
@@ -1295,7 +1295,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
 
 static int mount_is_safe(struct path *path)
 {
-	if (capable(CAP_SYS_ADMIN))
+	if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return 0;
 	return -EPERM;
 #ifdef notyet
@@ -1633,7 +1633,7 @@ static int do_change_type(struct path *path, int flag)
 	int type;
 	int err = 0;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	if (path->dentry != path->mnt->mnt_root)
@@ -1797,7 +1797,7 @@ static int do_move_mount(struct path *path, const char *old_name)
 	struct mount *p;
 	struct mount *old;
 	int err = 0;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 	if (!old_name || !*old_name)
 		return -EINVAL;
@@ -1884,21 +1884,6 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
 	return ERR_PTR(err);
 }
 
-static struct vfsmount *
-do_kern_mount(const char *fstype, int flags, const char *name, void *data)
-{
-	struct file_system_type *type = get_fs_type(fstype);
-	struct vfsmount *mnt;
-	if (!type)
-		return ERR_PTR(-ENODEV);
-	mnt = vfs_kern_mount(type, flags, name, data);
-	if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
-	    !mnt->mnt_sb->s_subtype)
-		mnt = fs_set_subtype(mnt, fstype);
-	put_filesystem(type);
-	return mnt;
-}
-
 /*
  * add a mount into a namespace's mount tree
  */
@@ -1944,20 +1929,46 @@ unlock:
  * create a new mount for userspace and request it to be added into the
  * namespace's tree
  */
-static int do_new_mount(struct path *path, const char *type, int flags,
+static int do_new_mount(struct path *path, const char *fstype, int flags,
 			int mnt_flags, const char *name, void *data)
 {
+	struct file_system_type *type;
+	struct user_namespace *user_ns;
 	struct vfsmount *mnt;
 	int err;
 
-	if (!type)
+	if (!fstype)
 		return -EINVAL;
 
 	/* we need capabilities... */
-	if (!capable(CAP_SYS_ADMIN))
+	user_ns = real_mount(path->mnt)->mnt_ns->user_ns;
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
-	mnt = do_kern_mount(type, flags, name, data);
+	type = get_fs_type(fstype);
+	if (!type)
+		return -ENODEV;
+
+	if (user_ns != &init_user_ns) {
+		if (!(type->fs_flags & FS_USERNS_MOUNT)) {
+			put_filesystem(type);
+			return -EPERM;
+		}
+		/* Only in special cases allow devices from mounts
+		 * created outside the initial user namespace.
+		 */
+		if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+			flags |= MS_NODEV;
+			mnt_flags |= MNT_NODEV;
+		}
+	}
+
+	mnt = vfs_kern_mount(type, flags, name, data);
+	if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
+	    !mnt->mnt_sb->s_subtype)
+		mnt = fs_set_subtype(mnt, fstype);
+
+	put_filesystem(type);
 	if (IS_ERR(mnt))
 		return PTR_ERR(mnt);
 
@@ -2549,7 +2560,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	struct mount *new_mnt, *root_mnt;
 	int error;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	error = user_path_dir(new_root, &new);
@@ -2631,8 +2642,13 @@ static void __init init_mount_tree(void)
 	struct vfsmount *mnt;
 	struct mnt_namespace *ns;
 	struct path root;
+	struct file_system_type *type;
 
-	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
+	type = get_fs_type("rootfs");
+	if (!type)
+		panic("Can't find rootfs type");
+	mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
+	put_filesystem(type);
 	if (IS_ERR(mnt))
 		panic("Can't create rootfs");
 
@@ -2757,7 +2773,8 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
 	struct mnt_namespace *mnt_ns = ns;
 	struct path root;
 
-	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_CHROOT))
+	if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
+	    !nsown_capable(CAP_SYS_CHROOT))
 		return -EINVAL;
 
 	if (fs->users != 1)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b33cfc97b9ca..5037aa6817fd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1812,6 +1812,8 @@ struct file_system_type {
 #define FS_REQUIRES_DEV		1 
 #define FS_BINARY_MOUNTDATA	2
 #define FS_HAS_SUBTYPE		4
+#define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
+#define FS_USERNS_DEV_MOUNT	16 /* A userns mount does not imply MNT_NODEV */
 #define FS_REVAL_DOT		16384	/* Check the paths ".", ".." for staleness */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	struct dentry *(*mount) (struct file_system_type *, int,
-- 
cgit v1.2.3-55-g7522


From 5a306f5887d5fd840beb8ea872897fa89e8fcdef Mon Sep 17 00:00:00 2001
From: Johannes Berg
Date: Wed, 14 Nov 2012 23:22:21 +0100
Subject: mac80211: introduce IEEE80211_NUM_TIDS and use it

Introduce IEEE80211_NUM_TIDS in the generic 802.11
header file and use it in place of STA_TID_NUM and
NUM_RX_DATA_QUEUES which are both really the number
of TIDs.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  |  2 ++
 net/mac80211/agg-rx.c      |  2 +-
 net/mac80211/agg-tx.c      | 12 ++++++------
 net/mac80211/debugfs_key.c |  6 +++---
 net/mac80211/debugfs_sta.c | 10 +++++-----
 net/mac80211/ht.c          |  4 ++--
 net/mac80211/key.c         | 10 +++++-----
 net/mac80211/key.h         |  8 +++-----
 net/mac80211/rx.c          |  4 ++--
 net/mac80211/sta_info.c    | 10 +++++-----
 net/mac80211/sta_info.h    | 19 +++++++++----------
 11 files changed, 43 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 4530d4960953..d68790903b9e 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -131,6 +131,8 @@
 
 #define IEEE80211_MAX_MESH_ID_LEN	32
 
+#define IEEE80211_NUM_TIDS		16
+
 #define IEEE80211_QOS_CTL_LEN		2
 /* 1d tag mask */
 #define IEEE80211_QOS_CTL_TAG1D_MASK		0x0007
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 186d9919b043..808338a1bce5 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -118,7 +118,7 @@ void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap,
 		return;
 	}
 
-	for (i = 0; i < STA_TID_NUM; i++)
+	for (i = 0; i < IEEE80211_NUM_TIDS; i++)
 		if (ba_rx_bitmap & BIT(i))
 			set_bit(i, sta->ampdu_mlme.tid_rx_stop_requested);
 
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 3195a6307f50..4152ed1034b8 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -448,7 +448,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
 	if (WARN_ON(!local->ops->ampdu_action))
 		return -EINVAL;
 
-	if ((tid >= STA_TID_NUM) ||
+	if ((tid >= IEEE80211_NUM_TIDS) ||
 	    !(local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) ||
 	    (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW))
 		return -EINVAL;
@@ -605,9 +605,9 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
 
 	trace_api_start_tx_ba_cb(sdata, ra, tid);
 
-	if (tid >= STA_TID_NUM) {
+	if (tid >= IEEE80211_NUM_TIDS) {
 		ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n",
-		       tid, STA_TID_NUM);
+		       tid, IEEE80211_NUM_TIDS);
 		return;
 	}
 
@@ -687,7 +687,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 	if (!local->ops->ampdu_action)
 		return -EINVAL;
 
-	if (tid >= STA_TID_NUM)
+	if (tid >= IEEE80211_NUM_TIDS)
 		return -EINVAL;
 
 	spin_lock_bh(&sta->lock);
@@ -722,9 +722,9 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 
 	trace_api_stop_tx_ba_cb(sdata, ra, tid);
 
-	if (tid >= STA_TID_NUM) {
+	if (tid >= IEEE80211_NUM_TIDS) {
 		ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n",
-		       tid, STA_TID_NUM);
+		       tid, IEEE80211_NUM_TIDS);
 		return;
 	}
 
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 090d08ff22c4..2d4235497f1b 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -116,7 +116,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 				size_t count, loff_t *ppos)
 {
 	struct ieee80211_key *key = file->private_data;
-	char buf[14*NUM_RX_DATA_QUEUES+1], *p = buf;
+	char buf[14*IEEE80211_NUM_TIDS+1], *p = buf;
 	int i, len;
 	const u8 *rpn;
 
@@ -126,7 +126,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 		len = scnprintf(buf, sizeof(buf), "\n");
 		break;
 	case WLAN_CIPHER_SUITE_TKIP:
-		for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
+		for (i = 0; i < IEEE80211_NUM_TIDS; i++)
 			p += scnprintf(p, sizeof(buf)+buf-p,
 				       "%08x %04x\n",
 				       key->u.tkip.rx[i].iv32,
@@ -134,7 +134,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 		len = p - buf;
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
-		for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) {
+		for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) {
 			rpn = key->u.ccmp.rx_pn[i];
 			p += scnprintf(p, sizeof(buf)+buf-p,
 				       "%02x%02x%02x%02x%02x%02x\n",
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 5ccec2c1e9f6..3d103929d41a 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -131,10 +131,10 @@ STA_OPS(connected_time);
 static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf,
 				      size_t count, loff_t *ppos)
 {
-	char buf[15*NUM_RX_DATA_QUEUES], *p = buf;
+	char buf[15*IEEE80211_NUM_TIDS], *p = buf;
 	int i;
 	struct sta_info *sta = file->private_data;
-	for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
+	for (i = 0; i < IEEE80211_NUM_TIDS; i++)
 		p += scnprintf(p, sizeof(buf)+buf-p, "%x ",
 			       le16_to_cpu(sta->last_seq_ctrl[i]));
 	p += scnprintf(p, sizeof(buf)+buf-p, "\n");
@@ -145,7 +145,7 @@ STA_OPS(last_seq_ctrl);
 static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 					size_t count, loff_t *ppos)
 {
-	char buf[71 + STA_TID_NUM * 40], *p = buf;
+	char buf[71 + IEEE80211_NUM_TIDS * 40], *p = buf;
 	int i;
 	struct sta_info *sta = file->private_data;
 	struct tid_ampdu_rx *tid_rx;
@@ -158,7 +158,7 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 	p += scnprintf(p, sizeof(buf) + buf - p,
 		       "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tpending\n");
 
-	for (i = 0; i < STA_TID_NUM; i++) {
+	for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
 		tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]);
 		tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]);
 
@@ -220,7 +220,7 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu
 
 	tid = simple_strtoul(buf, NULL, 0);
 
-	if (tid >= STA_TID_NUM)
+	if (tid >= IEEE80211_NUM_TIDS)
 		return -EINVAL;
 
 	if (tx) {
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 4b4538d63925..a71d891794a4 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -185,7 +185,7 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, bool tx)
 
 	cancel_work_sync(&sta->ampdu_mlme.work);
 
-	for (i = 0; i <  STA_TID_NUM; i++) {
+	for (i = 0; i <  IEEE80211_NUM_TIDS; i++) {
 		__ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR, tx);
 		__ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
 					       WLAN_REASON_QSTA_LEAVE_QBSS, tx);
@@ -209,7 +209,7 @@ void ieee80211_ba_session_work(struct work_struct *work)
 		return;
 
 	mutex_lock(&sta->ampdu_mlme.mtx);
-	for (tid = 0; tid < STA_TID_NUM; tid++) {
+	for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) {
 		if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired))
 			___ieee80211_stop_rx_ba_session(
 				sta, tid, WLAN_BACK_RECIPIENT,
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 0f18ef59392d..619c5d697999 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -339,7 +339,7 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 		key->conf.iv_len = TKIP_IV_LEN;
 		key->conf.icv_len = TKIP_ICV_LEN;
 		if (seq) {
-			for (i = 0; i < NUM_RX_DATA_QUEUES; i++) {
+			for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
 				key->u.tkip.rx[i].iv32 =
 					get_unaligned_le32(&seq[2]);
 				key->u.tkip.rx[i].iv16 =
@@ -352,7 +352,7 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 		key->conf.iv_len = CCMP_HDR_LEN;
 		key->conf.icv_len = CCMP_MIC_LEN;
 		if (seq) {
-			for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++)
+			for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++)
 				for (j = 0; j < CCMP_PN_LEN; j++)
 					key->u.ccmp.rx_pn[i][j] =
 						seq[CCMP_PN_LEN - j - 1];
@@ -655,16 +655,16 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
 
 	switch (key->conf.cipher) {
 	case WLAN_CIPHER_SUITE_TKIP:
-		if (WARN_ON(tid < 0 || tid >= NUM_RX_DATA_QUEUES))
+		if (WARN_ON(tid < 0 || tid >= IEEE80211_NUM_TIDS))
 			return;
 		seq->tkip.iv32 = key->u.tkip.rx[tid].iv32;
 		seq->tkip.iv16 = key->u.tkip.rx[tid].iv16;
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
-		if (WARN_ON(tid < -1 || tid >= NUM_RX_DATA_QUEUES))
+		if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS))
 			return;
 		if (tid < 0)
-			pn = key->u.ccmp.rx_pn[NUM_RX_DATA_QUEUES];
+			pn = key->u.ccmp.rx_pn[IEEE80211_NUM_TIDS];
 		else
 			pn = key->u.ccmp.rx_pn[tid];
 		memcpy(seq->ccmp.pn, pn, CCMP_PN_LEN);
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 7d4e31f037d7..7cff0d3a519c 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -30,8 +30,6 @@
 #define TKIP_ICV_LEN		4
 #define CMAC_PN_LEN		6
 
-#define NUM_RX_DATA_QUEUES	16
-
 struct ieee80211_local;
 struct ieee80211_sub_if_data;
 struct sta_info;
@@ -82,17 +80,17 @@ struct ieee80211_key {
 			struct tkip_ctx tx;
 
 			/* last received RSC */
-			struct tkip_ctx rx[NUM_RX_DATA_QUEUES];
+			struct tkip_ctx rx[IEEE80211_NUM_TIDS];
 		} tkip;
 		struct {
 			atomic64_t tx_pn;
 			/*
 			 * Last received packet number. The first
-			 * NUM_RX_DATA_QUEUES counters are used with Data
+			 * IEEE80211_NUM_TIDS counters are used with Data
 			 * frames and the last counter is used with Robust
 			 * Management frames.
 			 */
-			u8 rx_pn[NUM_RX_DATA_QUEUES + 1][CCMP_PN_LEN];
+			u8 rx_pn[IEEE80211_NUM_TIDS + 1][CCMP_PN_LEN];
 			struct crypto_cipher *tfm;
 			u32 replays; /* dot11RSNAStatsCCMPReplays */
 		} ccmp;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e3daee8fdf7a..8480bbf1a707 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -408,10 +408,10 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
 		 *
 		 * We also use that counter for non-QoS STAs.
 		 */
-		seqno_idx = NUM_RX_DATA_QUEUES;
+		seqno_idx = IEEE80211_NUM_TIDS;
 		security_idx = 0;
 		if (ieee80211_is_mgmt(hdr->frame_control))
-			security_idx = NUM_RX_DATA_QUEUES;
+			security_idx = IEEE80211_NUM_TIDS;
 		tid = 0;
 	}
 
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f7bb54f9ab72..a0836d7187c1 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -142,7 +142,7 @@ static void free_sta_work(struct work_struct *wk)
 	 * drivers have to handle aggregation stop being requested, followed
 	 * directly by station destruction.
 	 */
-	for (i = 0; i < STA_TID_NUM; i++) {
+	for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
 		tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]);
 		if (!tid_tx)
 			continue;
@@ -330,7 +330,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 		return NULL;
 	}
 
-	for (i = 0; i < STA_TID_NUM; i++) {
+	for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
 		/*
 		 * timer_to_tid must be initialized with identity mapping
 		 * to enable session_timer's data differentiation. See
@@ -343,7 +343,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 		skb_queue_head_init(&sta->tx_filtered[i]);
 	}
 
-	for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
+	for (i = 0; i < IEEE80211_NUM_TIDS; i++)
 		sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);
 
 	sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr);
@@ -985,7 +985,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 
 	clear_sta_flag(sta, WLAN_STA_SP);
 
-	BUILD_BUG_ON(BITS_TO_LONGS(STA_TID_NUM) > 1);
+	BUILD_BUG_ON(BITS_TO_LONGS(IEEE80211_NUM_TIDS) > 1);
 	sta->driver_buffered_tids = 0;
 
 	if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS))
@@ -1369,7 +1369,7 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
 {
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
 
-	if (WARN_ON(tid >= STA_TID_NUM))
+	if (WARN_ON(tid >= IEEE80211_NUM_TIDS))
 		return;
 
 	if (buffered)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index c88f161f8118..776f3d0b4a47 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -80,7 +80,6 @@ enum ieee80211_sta_info_flags {
 	WLAN_STA_TOFFSET_KNOWN,
 };
 
-#define STA_TID_NUM 16
 #define ADDBA_RESP_INTERVAL HZ
 #define HT_AGG_MAX_RETRIES		15
 #define HT_AGG_BURST_RETRIES		3
@@ -197,15 +196,15 @@ struct tid_ampdu_rx {
 struct sta_ampdu_mlme {
 	struct mutex mtx;
 	/* rx */
-	struct tid_ampdu_rx __rcu *tid_rx[STA_TID_NUM];
-	unsigned long tid_rx_timer_expired[BITS_TO_LONGS(STA_TID_NUM)];
-	unsigned long tid_rx_stop_requested[BITS_TO_LONGS(STA_TID_NUM)];
+	struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS];
+	unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
+	unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
 	/* tx */
 	struct work_struct work;
-	struct tid_ampdu_tx __rcu *tid_tx[STA_TID_NUM];
-	struct tid_ampdu_tx *tid_start_tx[STA_TID_NUM];
-	unsigned long last_addba_req_time[STA_TID_NUM];
-	u8 addba_req_num[STA_TID_NUM];
+	struct tid_ampdu_tx __rcu *tid_tx[IEEE80211_NUM_TIDS];
+	struct tid_ampdu_tx *tid_start_tx[IEEE80211_NUM_TIDS];
+	unsigned long last_addba_req_time[IEEE80211_NUM_TIDS];
+	u8 addba_req_num[IEEE80211_NUM_TIDS];
 	u8 dialog_token_allocator;
 };
 
@@ -330,7 +329,7 @@ struct sta_info {
 	int last_signal;
 	struct ewma avg_signal;
 	/* Plus 1 for non-QoS frames */
-	__le16 last_seq_ctrl[NUM_RX_DATA_QUEUES + 1];
+	__le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1];
 
 	/* Updated from TX status path only, no locking requirements */
 	unsigned long tx_filtered_count;
@@ -351,7 +350,7 @@ struct sta_info {
 	 * Aggregation information, locked with lock.
 	 */
 	struct sta_ampdu_mlme ampdu_mlme;
-	u8 timer_to_tid[STA_TID_NUM];
+	u8 timer_to_tid[IEEE80211_NUM_TIDS];
 
 #ifdef CONFIG_MAC80211_MESH
 	/*
-- 
cgit v1.2.3-55-g7522


From fd25b4c2f226de818e1d2b71e3e681d28bcaf5ba Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Tue, 13 Nov 2012 18:21:22 +0100
Subject: vtime: Remove the underscore prefix invasion

Prepending irq-unsafe vtime APIs with underscores was actually
a bad idea as the result is a big mess in the API namespace that
is even waiting to be further extended. Also these helpers
are always called from irq safe callers except kvm. Just
provide a vtime_account_system_irqsafe() for this specific
case so that we can remove the underscore prefix on other
vtime functions.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/ia64/kernel/time.c    |  8 ++++----
 arch/powerpc/kernel/time.c |  4 ++--
 arch/s390/kernel/vtime.c   |  4 ++--
 include/linux/kvm_host.h   |  4 ++--
 include/linux/vtime.h      |  8 ++++----
 kernel/sched/cputime.c     | 12 ++++++------
 6 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 5e4850305d3f..f6388216080d 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -106,9 +106,9 @@ void vtime_task_switch(struct task_struct *prev)
 	struct thread_info *ni = task_thread_info(current);
 
 	if (idle_task(smp_processor_id()) != prev)
-		__vtime_account_system(prev);
+		vtime_account_system(prev);
 	else
-		__vtime_account_idle(prev);
+		vtime_account_idle(prev);
 
 	vtime_account_user(prev);
 
@@ -135,14 +135,14 @@ static cputime_t vtime_delta(struct task_struct *tsk)
 	return delta_stime;
 }
 
-void __vtime_account_system(struct task_struct *tsk)
+void vtime_account_system(struct task_struct *tsk)
 {
 	cputime_t delta = vtime_delta(tsk);
 
 	account_system_time(tsk, 0, delta, delta);
 }
 
-void __vtime_account_idle(struct task_struct *tsk)
+void vtime_account_idle(struct task_struct *tsk)
 {
 	account_idle_time(vtime_delta(tsk));
 }
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 0db456f30d45..ce4cb772dc78 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -336,7 +336,7 @@ static u64 vtime_delta(struct task_struct *tsk,
 	return delta;
 }
 
-void __vtime_account_system(struct task_struct *tsk)
+void vtime_account_system(struct task_struct *tsk)
 {
 	u64 delta, sys_scaled, stolen;
 
@@ -346,7 +346,7 @@ void __vtime_account_system(struct task_struct *tsk)
 		account_steal_time(stolen);
 }
 
-void __vtime_account_idle(struct task_struct *tsk)
+void vtime_account_idle(struct task_struct *tsk)
 {
 	u64 delta, sys_scaled, stolen;
 
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 783e988c4e1e..80d1dbc5d42e 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -140,9 +140,9 @@ void vtime_account(struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(vtime_account);
 
-void __vtime_account_system(struct task_struct *tsk)
+void vtime_account_system(struct task_struct *tsk)
 __attribute__((alias("vtime_account")));
-EXPORT_SYMBOL_GPL(__vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_system);
 
 void __kprobes vtime_stop_cpu(void)
 {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0e2212fe4784..f17158bdd4fc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -741,7 +741,7 @@ static inline void kvm_guest_enter(void)
 	 * This is running in ioctl context so we can avoid
 	 * the call to vtime_account() with its unnecessary idle check.
 	 */
-	vtime_account_system(current);
+	vtime_account_system_irqsafe(current);
 	current->flags |= PF_VCPU;
 	/* KVM does not hold any references to rcu protected data when it
 	 * switches CPU into a guest mode. In fact switching to a guest mode
@@ -759,7 +759,7 @@ static inline void kvm_guest_exit(void)
 	 * This is running in ioctl context so we can avoid
 	 * the call to vtime_account() with its unnecessary idle check.
 	 */
-	vtime_account_system(current);
+	vtime_account_system_irqsafe(current);
 	current->flags &= ~PF_VCPU;
 }
 
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 0c2a2d303020..5ad13c325deb 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -5,14 +5,14 @@ struct task_struct;
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 extern void vtime_task_switch(struct task_struct *prev);
-extern void __vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_system(struct task_struct *tsk);
-extern void __vtime_account_idle(struct task_struct *tsk);
+extern void vtime_account_system_irqsafe(struct task_struct *tsk);
+extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account(struct task_struct *tsk);
 #else
 static inline void vtime_task_switch(struct task_struct *prev) { }
-static inline void __vtime_account_system(struct task_struct *tsk) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
+static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
 static inline void vtime_account(struct task_struct *tsk) { }
 #endif
 
@@ -40,7 +40,7 @@ static inline void vtime_account_irq_enter(struct task_struct *tsk)
 static inline void vtime_account_irq_exit(struct task_struct *tsk)
 {
 	/* On hard|softirq exit we always account to hard|softirq cputime */
-	__vtime_account_system(tsk);
+	vtime_account_system(tsk);
 	irqtime_account_irq(tsk);
 }
 
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 8d859dae5bed..c0aa1ba752ea 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -433,20 +433,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	*st = cputime.stime;
 }
 
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_system_irqsafe(struct task_struct *tsk)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	__vtime_account_system(tsk);
+	vtime_account_system(tsk);
 	local_irq_restore(flags);
 }
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe);
 
 /*
  * Archs that account the whole time spent in the idle task
  * (outside irq) as idle time can rely on this and just implement
- * __vtime_account_system() and __vtime_account_idle(). Archs that
+ * vtime_account_system() and vtime_account_idle(). Archs that
  * have other meaning of the idle time (s390 only includes the
  * time spent by the CPU when it's in low power mode) must override
  * vtime_account().
@@ -459,9 +459,9 @@ void vtime_account(struct task_struct *tsk)
 	local_irq_save(flags);
 
 	if (in_interrupt() || !is_idle_task(tsk))
-		__vtime_account_system(tsk);
+		vtime_account_system(tsk);
 	else
-		__vtime_account_idle(tsk);
+		vtime_account_idle(tsk);
 
 	local_irq_restore(flags);
 }
-- 
cgit v1.2.3-55-g7522


From bcebdf846522056a84ba0b0cba5f5413868c9394 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Tue, 13 Nov 2012 23:51:06 +0100
Subject: vtime: Explicitly account pending user time on process tick

All vtime implementations just flush the user time on process
tick. Consolidate that in generic code by calling a user time
accounting helper. This avoids an indirect call in ia64 and
prepare to also consolidate vtime context switch code.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/ia64/kernel/time.c     | 11 +----------
 arch/powerpc/kernel/time.c  | 14 +++++++-------
 arch/s390/kernel/vtime.c    |  7 ++++++-
 include/linux/kernel_stat.h |  8 ++++++++
 include/linux/vtime.h       |  1 +
 5 files changed, 23 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index f6388216080d..834c78bd3b5f 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -83,7 +83,7 @@ static struct clocksource *itc_clocksource;
 
 extern cputime_t cycle_to_cputime(u64 cyc);
 
-static void vtime_account_user(struct task_struct *tsk)
+void vtime_account_user(struct task_struct *tsk)
 {
 	cputime_t delta_utime;
 	struct thread_info *ti = task_thread_info(tsk);
@@ -147,15 +147,6 @@ void vtime_account_idle(struct task_struct *tsk)
 	account_idle_time(vtime_delta(tsk));
 }
 
-/*
- * Called from the timer interrupt handler to charge accumulated user time
- * to the current process.  Must be called with interrupts disabled.
- */
-void account_process_tick(struct task_struct *p, int user_tick)
-{
-	vtime_account_user(p);
-}
-
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
 static irqreturn_t
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index ce4cb772dc78..a667aaf85846 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -355,15 +355,15 @@ void vtime_account_idle(struct task_struct *tsk)
 }
 
 /*
- * Transfer the user and system times accumulated in the paca
- * by the exception entry and exit code to the generic process
- * user and system time records.
+ * Transfer the user time accumulated in the paca
+ * by the exception entry and exit code to the generic
+ * process user time records.
  * Must be called with interrupts disabled.
- * Assumes that vtime_account() has been called recently
- * (i.e. since the last entry from usermode) so that
+ * Assumes that vtime_account_system/idle() has been called
+ * recently (i.e. since the last entry from usermode) so that
  * get_paca()->user_time_scaled is up to date.
  */
-void account_process_tick(struct task_struct *tsk, int user_tick)
+void vtime_account_user(struct task_struct *tsk)
 {
 	cputime_t utime, utimescaled;
 
@@ -378,7 +378,7 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
 void vtime_task_switch(struct task_struct *prev)
 {
 	vtime_account(prev);
-	account_process_tick(prev, 0);
+	vtime_account_user(prev);
 }
 
 #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 80d1dbc5d42e..7c6d861a1a40 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -112,7 +112,12 @@ void vtime_task_switch(struct task_struct *prev)
 	S390_lowcore.system_timer = ti->system_timer;
 }
 
-void account_process_tick(struct task_struct *tsk, int user_tick)
+/*
+ * In s390, accounting pending user time also implies
+ * accounting system time in order to correctly compute
+ * the stolen time accounting.
+ */
+void vtime_account_user(struct task_struct *tsk)
 {
 	if (do_account_vtime(tsk, HARDIRQ_OFFSET))
 		virt_timer_expire();
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 1865b1f29770..66b70780e910 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -127,7 +127,15 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
 extern void account_steal_time(cputime_t);
 extern void account_idle_time(cputime_t);
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+static inline void account_process_tick(struct task_struct *tsk, int user)
+{
+	vtime_account_user(tsk);
+}
+#else
 extern void account_process_tick(struct task_struct *, int user);
+#endif
+
 extern void account_steal_ticks(unsigned long ticks);
 extern void account_idle_ticks(unsigned long ticks);
 
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 5ad13c325deb..ae30ab58431a 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -8,6 +8,7 @@ extern void vtime_task_switch(struct task_struct *prev);
 extern void vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_system_irqsafe(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
+extern void vtime_account_user(struct task_struct *tsk);
 extern void vtime_account(struct task_struct *tsk);
 #else
 static inline void vtime_task_switch(struct task_struct *prev) { }
-- 
cgit v1.2.3-55-g7522


From febfcef60d4f9457785b45aab548bc7ee5ea158f Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 19 Nov 2012 08:13:36 -0800
Subject: cgroup: cgroup->dentry isn't a RCU pointer

cgroup->dentry is marked and used as a RCU pointer; however, it isn't
one - the final dentry put doesn't go through call_rcu().  cgroup and
dentry share the same RCU freeing rule via synchronize_rcu() in
cgroup_diput() (kfree_rcu() used on cgrp is unnecessary).  If cgrp is
accessible under RCU read lock, so is its dentry and dereferencing
cgrp->dentry doesn't need any further RCU protection or annotation.

While not being accurate, before the previous patch, the RCU accessors
served a purpose as memory barriers - cgroup->dentry used to be
assigned after the cgroup was made visible to cgroup_path(), so the
assignment and dereferencing in cgroup_path() needed the memory
barrier pair.  Now that list_add_tail_rcu() happens after
cgroup->dentry is assigned, this no longer is necessary.

Remove the now unnecessary and misleading RCU annotations from
cgroup->dentry.  To make up for the removal of rcu_dereference_check()
in cgroup_path(), add an explicit rcu_lockdep_assert(), which asserts
the dereference rule of @cgrp, not cgrp->dentry.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  2 +-
 kernel/cgroup.c        | 11 ++++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 8f64b459fbd4..d605857c4bf3 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -165,7 +165,7 @@ struct cgroup {
 	struct list_head files;		/* my files */
 
 	struct cgroup *parent;		/* my parent */
-	struct dentry __rcu *dentry;	/* cgroup fs entry, RCU protected */
+	struct dentry *dentry;		/* cgroup fs entry, RCU protected */
 
 	/* Private pointers for each registered subsystem */
 	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d62a529db2f7..affc76d7f739 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1756,9 +1756,11 @@ static struct kobject *cgroup_kobj;
  */
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 {
+	struct dentry *dentry = cgrp->dentry;
 	char *start;
-	struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
-						      cgroup_lock_is_held());
+
+	rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(),
+			   "cgroup_path() called without proper locking");
 
 	if (!dentry || cgrp == dummytop) {
 		/*
@@ -1782,8 +1784,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 		if (!cgrp)
 			break;
 
-		dentry = rcu_dereference_check(cgrp->dentry,
-					       cgroup_lock_is_held());
+		dentry = cgrp->dentry;
 		if (!cgrp->parent)
 			continue;
 		if (--start < buf)
@@ -4124,7 +4125,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 
 	/* allocation complete, commit to creation */
 	dentry->d_fsdata = cgrp;
-	rcu_assign_pointer(cgrp->dentry, dentry);
+	cgrp->dentry = dentry;
 	list_add_tail(&cgrp->allcg_node, &root->allcg_list);
 	list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
 	root->number_of_cgroups++;
-- 
cgit v1.2.3-55-g7522


From 38b53abaa3e0c7e750ef73eee919cf42eee6b134 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 19 Nov 2012 08:13:36 -0800
Subject: cgroup: make CSS_* flags bit masks instead of bit positions

Currently, CSS_* flags are defined as bit positions and manipulated
using atomic bitops.  There's no reason to use atomic bitops for them
and bit positions are clunkier to deal with than bit masks.  Make
CSS_* bit masks instead and use the usual C bitwise operators to
access them.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 8 ++++----
 kernel/cgroup.c        | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d605857c4bf3..a0fc64167129 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -81,7 +81,7 @@ struct cgroup_subsys_state {
 
 /* bits in struct cgroup_subsys_state flags field */
 enum {
-	CSS_ROOT, /* This CSS is the root of the subsystem */
+	CSS_ROOT	= (1 << 0), /* this CSS is the root of the subsystem */
 };
 
 /* Caller must verify that the css is not for root cgroup */
@@ -100,7 +100,7 @@ static inline void __css_get(struct cgroup_subsys_state *css, int count)
 static inline void css_get(struct cgroup_subsys_state *css)
 {
 	/* We don't need to reference count the root state */
-	if (!test_bit(CSS_ROOT, &css->flags))
+	if (!(css->flags & CSS_ROOT))
 		__css_get(css, 1);
 }
 
@@ -113,7 +113,7 @@ static inline void css_get(struct cgroup_subsys_state *css)
 extern bool __css_tryget(struct cgroup_subsys_state *css);
 static inline bool css_tryget(struct cgroup_subsys_state *css)
 {
-	if (test_bit(CSS_ROOT, &css->flags))
+	if (css->flags & CSS_ROOT)
 		return true;
 	return __css_tryget(css);
 }
@@ -126,7 +126,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
 extern void __css_put(struct cgroup_subsys_state *css);
 static inline void css_put(struct cgroup_subsys_state *css)
 {
-	if (!test_bit(CSS_ROOT, &css->flags))
+	if (!(css->flags & CSS_ROOT))
 		__css_put(css);
 }
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index affc76d7f739..82ad8785fafe 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4020,7 +4020,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
 	css->flags = 0;
 	css->id = NULL;
 	if (cgrp == dummytop)
-		set_bit(CSS_ROOT, &css->flags);
+		css->flags |= CSS_ROOT;
 	BUG_ON(cgrp->subsys[ss->subsys_id]);
 	cgrp->subsys[ss->subsys_id] = css;
 
-- 
cgit v1.2.3-55-g7522


From a31f2d3ff7fe20cbe2a143515a7d7c408b29dd0d Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 19 Nov 2012 08:13:37 -0800
Subject: cgroup: introduce CSS_ONLINE flag and on/offline_css() helpers

New helpers on/offline_css() respectively wrap ->post_create() and
->pre_destroy() invocations.  online_css() sets CSS_ONLINE after
->post_create() is complete and offline_css() invokes ->pre_destroy()
iff CSS_ONLINE is set and clears it while also handling the temporary
dropping of cgroup_mutex.

This patch doesn't introduce any behavior change at the moment but
will be used to improve cgroup_create() failure path and allow
->post_create() to fail.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  1 +
 kernel/cgroup.c        | 65 ++++++++++++++++++++++++++++++++------------------
 2 files changed, 43 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a0fc64167129..f4a9c9836906 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -82,6 +82,7 @@ struct cgroup_subsys_state {
 /* bits in struct cgroup_subsys_state flags field */
 enum {
 	CSS_ROOT	= (1 << 0), /* this CSS is the root of the subsystem */
+	CSS_ONLINE	= (1 << 1), /* between ->post_create() and ->pre_destroy() */
 };
 
 /* Caller must verify that the css is not for root cgroup */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4412d9694f13..78a3d5c0968e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4035,6 +4035,42 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
 	INIT_WORK(&css->dput_work, css_dput_fn);
 }
 
+/* invoke ->post_create() on a new CSS and mark it online */
+static void online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+	lockdep_assert_held(&cgroup_mutex);
+
+	if (ss->post_create)
+		ss->post_create(cgrp);
+	cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;
+}
+
+/* if the CSS is online, invoke ->pre_destory() on it and mark it offline */
+static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
+	__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
+{
+	struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+
+	lockdep_assert_held(&cgroup_mutex);
+
+	if (!(css->flags & CSS_ONLINE))
+		return;
+
+	/*
+	 * pre_destroy() should be called with cgroup_mutex unlocked.  See
+	 * 3fa59dfbc3 ("cgroup: fix potential deadlock in pre_destroy") for
+	 * details.  This temporary unlocking should go away once
+	 * cgroup_mutex is unexported from controllers.
+	 */
+	if (ss->pre_destroy) {
+		mutex_unlock(&cgroup_mutex);
+		ss->pre_destroy(cgrp);
+		mutex_lock(&cgroup_mutex);
+	}
+
+	cgrp->subsys[ss->subsys_id]->flags &= ~CSS_ONLINE;
+}
+
 /*
  * cgroup_create - create a cgroup
  * @parent: cgroup that will be parent of the new cgroup
@@ -4137,8 +4173,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 		dget(dentry);
 
 		/* creation succeeded, notify subsystems */
-		if (ss->post_create)
-			ss->post_create(cgrp);
+		online_css(ss, cgrp);
 	}
 
 	err = cgroup_populate_dir(cgrp, true, root->subsys_mask);
@@ -4240,18 +4275,9 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	}
 	set_bit(CGRP_REMOVED, &cgrp->flags);
 
-	/*
-	 * Tell subsystems to initate destruction.  pre_destroy() should be
-	 * called with cgroup_mutex unlocked.  See 3fa59dfbc3 ("cgroup: fix
-	 * potential deadlock in pre_destroy") for details.  This temporary
-	 * unlocking should go away once cgroup_mutex is unexported from
-	 * controllers.
-	 */
-	mutex_unlock(&cgroup_mutex);
+	/* tell subsystems to initate destruction */
 	for_each_subsys(cgrp->root, ss)
-		if (ss->pre_destroy)
-			ss->pre_destroy(cgrp);
-	mutex_lock(&cgroup_mutex);
+		offline_css(ss, cgrp);
 
 	/*
 	 * Put all the base refs.  Each css holds an extra reference to the
@@ -4354,9 +4380,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 	BUG_ON(!list_empty(&init_task.tasks));
 
 	ss->active = 1;
-
-	if (ss->post_create)
-		ss->post_create(dummytop);
+	online_css(ss, dummytop);
 
 	mutex_unlock(&cgroup_mutex);
 
@@ -4469,9 +4493,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 	write_unlock(&css_set_lock);
 
 	ss->active = 1;
-
-	if (ss->post_create)
-		ss->post_create(dummytop);
+	online_css(ss, dummytop);
 
 	/* success! */
 	mutex_unlock(&cgroup_mutex);
@@ -4501,12 +4523,9 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
 	 */
 	BUG_ON(ss->root != &rootnode);
 
-	/* ->pre_destroy() should be called outside cgroup_mutex for now */
-	if (ss->pre_destroy)
-		ss->pre_destroy(dummytop);
-
 	mutex_lock(&cgroup_mutex);
 
+	offline_css(ss, dummytop);
 	ss->active = 0;
 
 	if (ss->use_id) {
-- 
cgit v1.2.3-55-g7522


From b1929db42f8a649d9a9e397119f628c27fd4021f Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 19 Nov 2012 08:13:38 -0800
Subject: cgroup: allow ->post_create() to fail

There could be cases where controllers want to do initialization
operations which may fail from ->post_create().  This patch makes
->post_create() return -errno to indicate failure and online_css()
relay such failures.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Glauber Costa <glommer@parallels.com>
---
 include/linux/cgroup.h  |  2 +-
 kernel/cgroup.c         | 29 +++++++++++++++++++----------
 kernel/cgroup_freezer.c |  4 +++-
 3 files changed, 23 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f4a9c9836906..03d8a92786da 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -440,7 +440,7 @@ int cgroup_taskset_size(struct cgroup_taskset *tset);
 
 struct cgroup_subsys {
 	struct cgroup_subsys_state *(*create)(struct cgroup *cgrp);
-	void (*post_create)(struct cgroup *cgrp);
+	int (*post_create)(struct cgroup *cgrp);
 	void (*pre_destroy)(struct cgroup *cgrp);
 	void (*destroy)(struct cgroup *cgrp);
 	int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 027b66e66698..c389f4258681 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4041,14 +4041,18 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
 	INIT_WORK(&css->dput_work, css_dput_fn);
 }
 
-/* invoke ->post_create() on a new CSS and mark it online */
-static void online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
+/* invoke ->post_create() on a new CSS and mark it online if successful */
+static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
 {
+	int ret = 0;
+
 	lockdep_assert_held(&cgroup_mutex);
 
 	if (ss->post_create)
-		ss->post_create(cgrp);
-	cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;
+		ret = ss->post_create(cgrp);
+	if (!ret)
+		cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;
+	return ret;
 }
 
 /* if the CSS is online, invoke ->pre_destory() on it and mark it offline */
@@ -4174,12 +4178,15 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 	list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
 	root->number_of_cgroups++;
 
-	for_each_subsys(root, ss) {
-		/* each css holds a ref to the cgroup's dentry */
+	/* each css holds a ref to the cgroup's dentry */
+	for_each_subsys(root, ss)
 		dget(dentry);
 
-		/* creation succeeded, notify subsystems */
-		online_css(ss, cgrp);
+	/* creation succeeded, notify subsystems */
+	for_each_subsys(root, ss) {
+		err = online_css(ss, cgrp);
+		if (err)
+			goto err_destroy;
 	}
 
 	err = cgroup_populate_dir(cgrp, true, root->subsys_mask);
@@ -4393,7 +4400,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 	BUG_ON(!list_empty(&init_task.tasks));
 
 	ss->active = 1;
-	online_css(ss, dummytop);
+	BUG_ON(online_css(ss, dummytop));
 
 	mutex_unlock(&cgroup_mutex);
 
@@ -4500,7 +4507,9 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 	write_unlock(&css_set_lock);
 
 	ss->active = 1;
-	online_css(ss, dummytop);
+	ret = online_css(ss, dummytop);
+	if (ret)
+		goto err_unload;
 
 	/* success! */
 	mutex_unlock(&cgroup_mutex);
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 670a4af7dc94..ee8bb671688c 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -112,7 +112,7 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup)
  * parent's freezing state while holding both parent's and our
  * freezer->lock.
  */
-static void freezer_post_create(struct cgroup *cgroup)
+static int freezer_post_create(struct cgroup *cgroup)
 {
 	struct freezer *freezer = cgroup_freezer(cgroup);
 	struct freezer *parent = parent_freezer(freezer);
@@ -136,6 +136,8 @@ static void freezer_post_create(struct cgroup *cgroup)
 	spin_unlock(&freezer->lock);
 	if (parent)
 		spin_unlock_irq(&parent->lock);
+
+	return 0;
 }
 
 /**
-- 
cgit v1.2.3-55-g7522


From 92fb97487a7e41b222c1417cabd1d1ab7cc3a48c Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 19 Nov 2012 08:13:38 -0800
Subject: cgroup: rename ->create/post_create/pre_destroy/destroy() to
 ->css_alloc/online/offline/free()

Rename cgroup_subsys css lifetime related callbacks to better describe
what their roles are.  Also, update documentation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 Documentation/cgroups/cgroups.txt | 49 ++++++++++++++++++++++---------------
 block/blk-cgroup.c                | 14 +++++------
 include/linux/cgroup.h            | 35 ++++++++++++++-------------
 kernel/cgroup.c                   | 51 ++++++++++++++++++++-------------------
 kernel/cgroup_freezer.c           | 20 +++++++--------
 kernel/cpuset.c                   | 10 ++++----
 kernel/events/core.c              |  8 +++---
 kernel/sched/core.c               | 16 ++++++------
 mm/hugetlb_cgroup.c               | 14 +++++------
 mm/memcontrol.c                   | 12 ++++-----
 net/core/netprio_cgroup.c         |  8 +++---
 net/sched/cls_cgroup.c            |  8 +++---
 security/device_cgroup.c          |  8 +++---
 13 files changed, 132 insertions(+), 121 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 9e04196c4d78..b06eea217403 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -553,16 +553,16 @@ call to cgroup_unload_subsys(). It should also set its_subsys.module =
 THIS_MODULE in its .c file.
 
 Each subsystem may export the following methods. The only mandatory
-methods are create/destroy. Any others that are null are presumed to
+methods are css_alloc/free. Any others that are null are presumed to
 be successful no-ops.
 
-struct cgroup_subsys_state *create(struct cgroup *cgrp)
+struct cgroup_subsys_state *css_alloc(struct cgroup *cgrp)
 (cgroup_mutex held by caller)
 
-Called to create a subsystem state object for a cgroup. The
+Called to allocate a subsystem state object for a cgroup. The
 subsystem should allocate its subsystem state object for the passed
 cgroup, returning a pointer to the new object on success or a
-negative error code. On success, the subsystem pointer should point to
+ERR_PTR() value. On success, the subsystem pointer should point to
 a structure of type cgroup_subsys_state (typically embedded in a
 larger subsystem-specific object), which will be initialized by the
 cgroup system. Note that this will be called at initialization to
@@ -571,24 +571,33 @@ identified by the passed cgroup object having a NULL parent (since
 it's the root of the hierarchy) and may be an appropriate place for
 initialization code.
 
-void destroy(struct cgroup *cgrp)
+int css_online(struct cgroup *cgrp)
 (cgroup_mutex held by caller)
 
-The cgroup system is about to destroy the passed cgroup; the subsystem
-should do any necessary cleanup and free its subsystem state
-object. By the time this method is called, the cgroup has already been
-unlinked from the file system and from the child list of its parent;
-cgroup->parent is still valid. (Note - can also be called for a
-newly-created cgroup if an error occurs after this subsystem's
-create() method has been called for the new cgroup).
-
-int pre_destroy(struct cgroup *cgrp);
-
-Called before checking the reference count on each subsystem. This may
-be useful for subsystems which have some extra references even if
-there are not tasks in the cgroup. If pre_destroy() returns error code,
-rmdir() will fail with it. From this behavior, pre_destroy() can be
-called multiple times against a cgroup.
+Called after @cgrp successfully completed all allocations and made
+visible to cgroup_for_each_child/descendant_*() iterators. The
+subsystem may choose to fail creation by returning -errno. This
+callback can be used to implement reliable state sharing and
+propagation along the hierarchy. See the comment on
+cgroup_for_each_descendant_pre() for details.
+
+void css_offline(struct cgroup *cgrp);
+
+This is the counterpart of css_online() and called iff css_online()
+has succeeded on @cgrp. This signifies the beginning of the end of
+@cgrp. @cgrp is being removed and the subsystem should start dropping
+all references it's holding on @cgrp. When all references are dropped,
+cgroup removal will proceed to the next step - css_free(). After this
+callback, @cgrp should be considered dead to the subsystem.
+
+void css_free(struct cgroup *cgrp)
+(cgroup_mutex held by caller)
+
+The cgroup system is about to free @cgrp; the subsystem should free
+its subsystem state object. By the time this method is called, @cgrp
+is completely unused; @cgrp->parent is still valid. (Note - can also
+be called for a newly-created cgroup if an error occurs after this
+subsystem's create() method has been called for the new cgroup).
 
 int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 (cgroup_mutex held by caller)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 3dc60fc441cb..3f6d39d23bb6 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -600,7 +600,7 @@ struct cftype blkcg_files[] = {
 };
 
 /**
- * blkcg_pre_destroy - cgroup pre_destroy callback
+ * blkcg_css_offline - cgroup css_offline callback
  * @cgroup: cgroup of interest
  *
  * This function is called when @cgroup is about to go away and responsible
@@ -610,7 +610,7 @@ struct cftype blkcg_files[] = {
  *
  * This is the blkcg counterpart of ioc_release_fn().
  */
-static void blkcg_pre_destroy(struct cgroup *cgroup)
+static void blkcg_css_offline(struct cgroup *cgroup)
 {
 	struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
 
@@ -634,7 +634,7 @@ static void blkcg_pre_destroy(struct cgroup *cgroup)
 	spin_unlock_irq(&blkcg->lock);
 }
 
-static void blkcg_destroy(struct cgroup *cgroup)
+static void blkcg_css_free(struct cgroup *cgroup)
 {
 	struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
 
@@ -642,7 +642,7 @@ static void blkcg_destroy(struct cgroup *cgroup)
 		kfree(blkcg);
 }
 
-static struct cgroup_subsys_state *blkcg_create(struct cgroup *cgroup)
+static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup)
 {
 	static atomic64_t id_seq = ATOMIC64_INIT(0);
 	struct blkcg *blkcg;
@@ -739,10 +739,10 @@ static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 
 struct cgroup_subsys blkio_subsys = {
 	.name = "blkio",
-	.create = blkcg_create,
+	.css_alloc = blkcg_css_alloc,
+	.css_offline = blkcg_css_offline,
+	.css_free = blkcg_css_free,
 	.can_attach = blkcg_can_attach,
-	.pre_destroy = blkcg_pre_destroy,
-	.destroy = blkcg_destroy,
 	.subsys_id = blkio_subsys_id,
 	.base_cftypes = blkcg_files,
 	.module = THIS_MODULE,
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 03d8a92786da..7a2189ca8327 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -82,7 +82,7 @@ struct cgroup_subsys_state {
 /* bits in struct cgroup_subsys_state flags field */
 enum {
 	CSS_ROOT	= (1 << 0), /* this CSS is the root of the subsystem */
-	CSS_ONLINE	= (1 << 1), /* between ->post_create() and ->pre_destroy() */
+	CSS_ONLINE	= (1 << 1), /* between ->css_online() and ->css_offline() */
 };
 
 /* Caller must verify that the css is not for root cgroup */
@@ -439,10 +439,11 @@ int cgroup_taskset_size(struct cgroup_taskset *tset);
  */
 
 struct cgroup_subsys {
-	struct cgroup_subsys_state *(*create)(struct cgroup *cgrp);
-	int (*post_create)(struct cgroup *cgrp);
-	void (*pre_destroy)(struct cgroup *cgrp);
-	void (*destroy)(struct cgroup *cgrp);
+	struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp);
+	int (*css_online)(struct cgroup *cgrp);
+	void (*css_offline)(struct cgroup *cgrp);
+	void (*css_free)(struct cgroup *cgrp);
+
 	int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
 	void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
 	void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
@@ -541,13 +542,13 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
  * @cgroup: cgroup whose children to walk
  *
  * Walk @cgroup's children.  Must be called under rcu_read_lock().  A child
- * cgroup which hasn't finished ->post_create() or already has finished
- * ->pre_destroy() may show up during traversal and it's each subsystem's
+ * cgroup which hasn't finished ->css_online() or already has finished
+ * ->css_offline() may show up during traversal and it's each subsystem's
  * responsibility to verify that each @pos is alive.
  *
- * If a subsystem synchronizes against the parent in its ->post_create()
- * and before starting iterating, a cgroup which finished ->post_create()
- * is guaranteed to be visible in the future iterations.
+ * If a subsystem synchronizes against the parent in its ->css_online() and
+ * before starting iterating, a cgroup which finished ->css_online() is
+ * guaranteed to be visible in the future iterations.
  */
 #define cgroup_for_each_child(pos, cgroup)				\
 	list_for_each_entry_rcu(pos, &(cgroup)->children, sibling)
@@ -561,19 +562,19 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
  * @cgroup: cgroup whose descendants to walk
  *
  * Walk @cgroup's descendants.  Must be called under rcu_read_lock().  A
- * descendant cgroup which hasn't finished ->post_create() or already has
- * finished ->pre_destroy() may show up during traversal and it's each
+ * descendant cgroup which hasn't finished ->css_online() or already has
+ * finished ->css_offline() may show up during traversal and it's each
  * subsystem's responsibility to verify that each @pos is alive.
  *
- * If a subsystem synchronizes against the parent in its ->post_create()
- * and before starting iterating, and synchronizes against @pos on each
- * iteration, any descendant cgroup which finished ->post_create() is
+ * If a subsystem synchronizes against the parent in its ->css_online() and
+ * before starting iterating, and synchronizes against @pos on each
+ * iteration, any descendant cgroup which finished ->css_offline() is
  * guaranteed to be visible in the future iterations.
  *
  * In other words, the following guarantees that a descendant can't escape
  * state updates of its ancestors.
  *
- * my_post_create(@cgrp)
+ * my_online(@cgrp)
  * {
  *	Lock @cgrp->parent and @cgrp;
  *	Inherit state from @cgrp->parent;
@@ -606,7 +607,7 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
  * iteration should lock and unlock both @pos->parent and @pos.
  *
  * Alternatively, a subsystem may choose to use a single global lock to
- * synchronize ->post_create() and ->pre_destroy() against tree-walking
+ * synchronize ->css_online() and ->css_offline() against tree-walking
  * operations.
  */
 #define cgroup_for_each_descendant_pre(pos, cgroup)			\
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c389f4258681..d35463bab487 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -876,7 +876,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 		 * Release the subsystem state objects.
 		 */
 		for_each_subsys(cgrp->root, ss)
-			ss->destroy(cgrp);
+			ss->css_free(cgrp);
 
 		cgrp->root->number_of_cgroups--;
 		mutex_unlock(&cgroup_mutex);
@@ -4048,8 +4048,8 @@ static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
 
 	lockdep_assert_held(&cgroup_mutex);
 
-	if (ss->post_create)
-		ret = ss->post_create(cgrp);
+	if (ss->css_online)
+		ret = ss->css_online(cgrp);
 	if (!ret)
 		cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;
 	return ret;
@@ -4067,14 +4067,14 @@ static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
 		return;
 
 	/*
-	 * pre_destroy() should be called with cgroup_mutex unlocked.  See
+	 * css_offline() should be called with cgroup_mutex unlocked.  See
 	 * 3fa59dfbc3 ("cgroup: fix potential deadlock in pre_destroy") for
 	 * details.  This temporary unlocking should go away once
 	 * cgroup_mutex is unexported from controllers.
 	 */
-	if (ss->pre_destroy) {
+	if (ss->css_offline) {
 		mutex_unlock(&cgroup_mutex);
-		ss->pre_destroy(cgrp);
+		ss->css_offline(cgrp);
 		mutex_lock(&cgroup_mutex);
 	}
 
@@ -4136,7 +4136,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 	for_each_subsys(root, ss) {
 		struct cgroup_subsys_state *css;
 
-		css = ss->create(cgrp);
+		css = ss->css_alloc(cgrp);
 		if (IS_ERR(css)) {
 			err = PTR_ERR(css);
 			goto err_free_all;
@@ -4147,7 +4147,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 			if (err)
 				goto err_free_all;
 		}
-		/* At error, ->destroy() callback has to free assigned ID. */
+		/* At error, ->css_free() callback has to free assigned ID. */
 		if (clone_children(parent) && ss->post_clone)
 			ss->post_clone(cgrp);
 
@@ -4201,7 +4201,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 err_free_all:
 	for_each_subsys(root, ss) {
 		if (cgrp->subsys[ss->subsys_id])
-			ss->destroy(cgrp);
+			ss->css_free(cgrp);
 	}
 	mutex_unlock(&cgroup_mutex);
 	/* Release the reference count that we took on the superblock */
@@ -4381,7 +4381,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 	/* Create the top cgroup state for this subsystem */
 	list_add(&ss->sibling, &rootnode.subsys_list);
 	ss->root = &rootnode;
-	css = ss->create(dummytop);
+	css = ss->css_alloc(dummytop);
 	/* We don't handle early failures gracefully */
 	BUG_ON(IS_ERR(css));
 	init_cgroup_css(css, ss, dummytop);
@@ -4425,7 +4425,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 
 	/* check name and function validity */
 	if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
-	    ss->create == NULL || ss->destroy == NULL)
+	    ss->css_alloc == NULL || ss->css_free == NULL)
 		return -EINVAL;
 
 	/*
@@ -4454,10 +4454,11 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 	subsys[ss->subsys_id] = ss;
 
 	/*
-	 * no ss->create seems to need anything important in the ss struct, so
-	 * this can happen first (i.e. before the rootnode attachment).
+	 * no ss->css_alloc seems to need anything important in the ss
+	 * struct, so this can happen first (i.e. before the rootnode
+	 * attachment).
 	 */
-	css = ss->create(dummytop);
+	css = ss->css_alloc(dummytop);
 	if (IS_ERR(css)) {
 		/* failure case - need to deassign the subsys[] slot. */
 		subsys[ss->subsys_id] = NULL;
@@ -4577,12 +4578,12 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
 	write_unlock(&css_set_lock);
 
 	/*
-	 * remove subsystem's css from the dummytop and free it - need to free
-	 * before marking as null because ss->destroy needs the cgrp->subsys
-	 * pointer to find their state. note that this also takes care of
-	 * freeing the css_id.
+	 * remove subsystem's css from the dummytop and free it - need to
+	 * free before marking as null because ss->css_free needs the
+	 * cgrp->subsys pointer to find their state. note that this also
+	 * takes care of freeing the css_id.
 	 */
-	ss->destroy(dummytop);
+	ss->css_free(dummytop);
 	dummytop->subsys[ss->subsys_id] = NULL;
 
 	mutex_unlock(&cgroup_mutex);
@@ -4626,8 +4627,8 @@ int __init cgroup_init_early(void)
 
 		BUG_ON(!ss->name);
 		BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
-		BUG_ON(!ss->create);
-		BUG_ON(!ss->destroy);
+		BUG_ON(!ss->css_alloc);
+		BUG_ON(!ss->css_free);
 		if (ss->subsys_id != i) {
 			printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
 			       ss->name, ss->subsys_id);
@@ -5439,7 +5440,7 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
 }
 
 #ifdef CONFIG_CGROUP_DEBUG
-static struct cgroup_subsys_state *debug_create(struct cgroup *cont)
+static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cont)
 {
 	struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
 
@@ -5449,7 +5450,7 @@ static struct cgroup_subsys_state *debug_create(struct cgroup *cont)
 	return css;
 }
 
-static void debug_destroy(struct cgroup *cont)
+static void debug_css_free(struct cgroup *cont)
 {
 	kfree(cont->subsys[debug_subsys_id]);
 }
@@ -5578,8 +5579,8 @@ static struct cftype debug_files[] =  {
 
 struct cgroup_subsys debug_subsys = {
 	.name = "debug",
-	.create = debug_create,
-	.destroy = debug_destroy,
+	.css_alloc = debug_css_alloc,
+	.css_free = debug_css_free,
 	.subsys_id = debug_subsys_id,
 	.base_cftypes = debug_files,
 };
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index ee8bb671688c..75dda1ea5026 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -92,7 +92,7 @@ static const char *freezer_state_strs(unsigned int state)
 
 struct cgroup_subsys freezer_subsys;
 
-static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup)
+static struct cgroup_subsys_state *freezer_css_alloc(struct cgroup *cgroup)
 {
 	struct freezer *freezer;
 
@@ -105,14 +105,14 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup)
 }
 
 /**
- * freezer_post_create - commit creation of a freezer cgroup
+ * freezer_css_online - commit creation of a freezer cgroup
  * @cgroup: cgroup being created
  *
  * We're committing to creation of @cgroup.  Mark it online and inherit
  * parent's freezing state while holding both parent's and our
  * freezer->lock.
  */
-static int freezer_post_create(struct cgroup *cgroup)
+static int freezer_css_online(struct cgroup *cgroup)
 {
 	struct freezer *freezer = cgroup_freezer(cgroup);
 	struct freezer *parent = parent_freezer(freezer);
@@ -141,13 +141,13 @@ static int freezer_post_create(struct cgroup *cgroup)
 }
 
 /**
- * freezer_pre_destroy - initiate destruction of @cgroup
+ * freezer_css_offline - initiate destruction of @cgroup
  * @cgroup: cgroup being destroyed
  *
  * @cgroup is going away.  Mark it dead and decrement system_freezing_count
  * if it was holding one.
  */
-static void freezer_pre_destroy(struct cgroup *cgroup)
+static void freezer_css_offline(struct cgroup *cgroup)
 {
 	struct freezer *freezer = cgroup_freezer(cgroup);
 
@@ -161,7 +161,7 @@ static void freezer_pre_destroy(struct cgroup *cgroup)
 	spin_unlock_irq(&freezer->lock);
 }
 
-static void freezer_destroy(struct cgroup *cgroup)
+static void freezer_css_free(struct cgroup *cgroup)
 {
 	kfree(cgroup_freezer(cgroup));
 }
@@ -477,10 +477,10 @@ static struct cftype files[] = {
 
 struct cgroup_subsys freezer_subsys = {
 	.name		= "freezer",
-	.create		= freezer_create,
-	.post_create	= freezer_post_create,
-	.pre_destroy	= freezer_pre_destroy,
-	.destroy	= freezer_destroy,
+	.css_alloc	= freezer_css_alloc,
+	.css_online	= freezer_css_online,
+	.css_offline	= freezer_css_offline,
+	.css_free	= freezer_css_free,
 	.subsys_id	= freezer_subsys_id,
 	.attach		= freezer_attach,
 	.fork		= freezer_fork,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f33c7153b6d7..06931337c4e5 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1821,11 +1821,11 @@ static void cpuset_post_clone(struct cgroup *cgroup)
 }
 
 /*
- *	cpuset_create - create a cpuset
+ *	cpuset_css_alloc - allocate a cpuset css
  *	cont:	control group that the new cpuset will be part of
  */
 
-static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont)
+static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont)
 {
 	struct cpuset *cs;
 	struct cpuset *parent;
@@ -1864,7 +1864,7 @@ static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont)
  * will call async_rebuild_sched_domains().
  */
 
-static void cpuset_destroy(struct cgroup *cont)
+static void cpuset_css_free(struct cgroup *cont)
 {
 	struct cpuset *cs = cgroup_cs(cont);
 
@@ -1878,8 +1878,8 @@ static void cpuset_destroy(struct cgroup *cont)
 
 struct cgroup_subsys cpuset_subsys = {
 	.name = "cpuset",
-	.create = cpuset_create,
-	.destroy = cpuset_destroy,
+	.css_alloc = cpuset_css_alloc,
+	.css_free = cpuset_css_free,
 	.can_attach = cpuset_can_attach,
 	.attach = cpuset_attach,
 	.post_clone = cpuset_post_clone,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index dbccf83c134d..f9ff5493171d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7434,7 +7434,7 @@ unlock:
 device_initcall(perf_event_sysfs_init);
 
 #ifdef CONFIG_CGROUP_PERF
-static struct cgroup_subsys_state *perf_cgroup_create(struct cgroup *cont)
+static struct cgroup_subsys_state *perf_cgroup_css_alloc(struct cgroup *cont)
 {
 	struct perf_cgroup *jc;
 
@@ -7451,7 +7451,7 @@ static struct cgroup_subsys_state *perf_cgroup_create(struct cgroup *cont)
 	return &jc->css;
 }
 
-static void perf_cgroup_destroy(struct cgroup *cont)
+static void perf_cgroup_css_free(struct cgroup *cont)
 {
 	struct perf_cgroup *jc;
 	jc = container_of(cgroup_subsys_state(cont, perf_subsys_id),
@@ -7492,8 +7492,8 @@ static void perf_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp,
 struct cgroup_subsys perf_subsys = {
 	.name		= "perf_event",
 	.subsys_id	= perf_subsys_id,
-	.create		= perf_cgroup_create,
-	.destroy	= perf_cgroup_destroy,
+	.css_alloc	= perf_cgroup_css_alloc,
+	.css_free	= perf_cgroup_css_free,
 	.exit		= perf_cgroup_exit,
 	.attach		= perf_cgroup_attach,
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d8927fda712..6f20c8fb2326 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7468,7 +7468,7 @@ static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
 			    struct task_group, css);
 }
 
-static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp)
+static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
 {
 	struct task_group *tg, *parent;
 
@@ -7485,7 +7485,7 @@ static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp)
 	return &tg->css;
 }
 
-static void cpu_cgroup_destroy(struct cgroup *cgrp)
+static void cpu_cgroup_css_free(struct cgroup *cgrp)
 {
 	struct task_group *tg = cgroup_tg(cgrp);
 
@@ -7845,8 +7845,8 @@ static struct cftype cpu_files[] = {
 
 struct cgroup_subsys cpu_cgroup_subsys = {
 	.name		= "cpu",
-	.create		= cpu_cgroup_create,
-	.destroy	= cpu_cgroup_destroy,
+	.css_alloc	= cpu_cgroup_css_alloc,
+	.css_free	= cpu_cgroup_css_free,
 	.can_attach	= cpu_cgroup_can_attach,
 	.attach		= cpu_cgroup_attach,
 	.exit		= cpu_cgroup_exit,
@@ -7869,7 +7869,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 struct cpuacct root_cpuacct;
 
 /* create a new cpu accounting group */
-static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp)
+static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp)
 {
 	struct cpuacct *ca;
 
@@ -7899,7 +7899,7 @@ out:
 }
 
 /* destroy an existing cpu accounting group */
-static void cpuacct_destroy(struct cgroup *cgrp)
+static void cpuacct_css_free(struct cgroup *cgrp)
 {
 	struct cpuacct *ca = cgroup_ca(cgrp);
 
@@ -8070,8 +8070,8 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 
 struct cgroup_subsys cpuacct_subsys = {
 	.name = "cpuacct",
-	.create = cpuacct_create,
-	.destroy = cpuacct_destroy,
+	.css_alloc = cpuacct_css_alloc,
+	.css_free = cpuacct_css_free,
 	.subsys_id = cpuacct_subsys_id,
 	.base_cftypes = files,
 };
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 0d3a1a317731..b5bde7a5c017 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -77,7 +77,7 @@ static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg)
 	return false;
 }
 
-static struct cgroup_subsys_state *hugetlb_cgroup_create(struct cgroup *cgroup)
+static struct cgroup_subsys_state *hugetlb_cgroup_css_alloc(struct cgroup *cgroup)
 {
 	int idx;
 	struct cgroup *parent_cgroup;
@@ -101,7 +101,7 @@ static struct cgroup_subsys_state *hugetlb_cgroup_create(struct cgroup *cgroup)
 	return &h_cgroup->css;
 }
 
-static void hugetlb_cgroup_destroy(struct cgroup *cgroup)
+static void hugetlb_cgroup_css_free(struct cgroup *cgroup)
 {
 	struct hugetlb_cgroup *h_cgroup;
 
@@ -155,7 +155,7 @@ out:
  * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
  * the parent cgroup.
  */
-static void hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
+static void hugetlb_cgroup_css_offline(struct cgroup *cgroup)
 {
 	struct hstate *h;
 	struct page *page;
@@ -404,8 +404,8 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
 
 struct cgroup_subsys hugetlb_subsys = {
 	.name = "hugetlb",
-	.create     = hugetlb_cgroup_create,
-	.pre_destroy = hugetlb_cgroup_pre_destroy,
-	.destroy    = hugetlb_cgroup_destroy,
-	.subsys_id  = hugetlb_subsys_id,
+	.css_alloc	= hugetlb_cgroup_css_alloc,
+	.css_offline	= hugetlb_cgroup_css_offline,
+	.css_free	= hugetlb_cgroup_css_free,
+	.subsys_id	= hugetlb_subsys_id,
 };
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 08adaaae6fcc..8b0b2b028e23 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4922,7 +4922,7 @@ err_cleanup:
 }
 
 static struct cgroup_subsys_state * __ref
-mem_cgroup_create(struct cgroup *cont)
+mem_cgroup_css_alloc(struct cgroup *cont)
 {
 	struct mem_cgroup *memcg, *parent;
 	long error = -ENOMEM;
@@ -5003,14 +5003,14 @@ free_out:
 	return ERR_PTR(error);
 }
 
-static void mem_cgroup_pre_destroy(struct cgroup *cont)
+static void mem_cgroup_css_offline(struct cgroup *cont)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
 	mem_cgroup_reparent_charges(memcg);
 }
 
-static void mem_cgroup_destroy(struct cgroup *cont)
+static void mem_cgroup_css_free(struct cgroup *cont)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
@@ -5600,9 +5600,9 @@ static void mem_cgroup_move_task(struct cgroup *cont,
 struct cgroup_subsys mem_cgroup_subsys = {
 	.name = "memory",
 	.subsys_id = mem_cgroup_subsys_id,
-	.create = mem_cgroup_create,
-	.pre_destroy = mem_cgroup_pre_destroy,
-	.destroy = mem_cgroup_destroy,
+	.css_alloc = mem_cgroup_css_alloc,
+	.css_offline = mem_cgroup_css_offline,
+	.css_free = mem_cgroup_css_free,
 	.can_attach = mem_cgroup_can_attach,
 	.cancel_attach = mem_cgroup_cancel_attach,
 	.attach = mem_cgroup_move_task,
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 79285a36035f..f0b6b0d572c1 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -108,7 +108,7 @@ static int write_update_netdev_table(struct net_device *dev)
 	return ret;
 }
 
-static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
+static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
 {
 	struct cgroup_netprio_state *cs;
 	int ret = -EINVAL;
@@ -132,7 +132,7 @@ out:
 	return ERR_PTR(ret);
 }
 
-static void cgrp_destroy(struct cgroup *cgrp)
+static void cgrp_css_free(struct cgroup *cgrp)
 {
 	struct cgroup_netprio_state *cs;
 	struct net_device *dev;
@@ -276,8 +276,8 @@ static struct cftype ss_files[] = {
 
 struct cgroup_subsys net_prio_subsys = {
 	.name		= "net_prio",
-	.create		= cgrp_create,
-	.destroy	= cgrp_destroy,
+	.css_alloc	= cgrp_css_alloc,
+	.css_free	= cgrp_css_free,
 	.attach		= net_prio_attach,
 	.subsys_id	= net_prio_subsys_id,
 	.base_cftypes	= ss_files,
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 2ecde225ae60..8cdc18e075fb 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -34,7 +34,7 @@ static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
 			    struct cgroup_cls_state, css);
 }
 
-static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
+static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
 {
 	struct cgroup_cls_state *cs;
 
@@ -48,7 +48,7 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
 	return &cs->css;
 }
 
-static void cgrp_destroy(struct cgroup *cgrp)
+static void cgrp_css_free(struct cgroup *cgrp)
 {
 	kfree(cgrp_cls_state(cgrp));
 }
@@ -75,8 +75,8 @@ static struct cftype ss_files[] = {
 
 struct cgroup_subsys net_cls_subsys = {
 	.name		= "net_cls",
-	.create		= cgrp_create,
-	.destroy	= cgrp_destroy,
+	.css_alloc	= cgrp_css_alloc,
+	.css_free	= cgrp_css_free,
 	.subsys_id	= net_cls_subsys_id,
 	.base_cftypes	= ss_files,
 	.module		= THIS_MODULE,
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 78a16f5b7275..19ecc8de9e6b 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -180,7 +180,7 @@ static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
 /*
  * called from kernel/cgroup.c with cgroup_lock() held.
  */
-static struct cgroup_subsys_state *devcgroup_create(struct cgroup *cgroup)
+static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
 {
 	struct dev_cgroup *dev_cgroup, *parent_dev_cgroup;
 	struct cgroup *parent_cgroup;
@@ -210,7 +210,7 @@ static struct cgroup_subsys_state *devcgroup_create(struct cgroup *cgroup)
 	return &dev_cgroup->css;
 }
 
-static void devcgroup_destroy(struct cgroup *cgroup)
+static void devcgroup_css_free(struct cgroup *cgroup)
 {
 	struct dev_cgroup *dev_cgroup;
 
@@ -564,8 +564,8 @@ static struct cftype dev_cgroup_files[] = {
 struct cgroup_subsys devices_subsys = {
 	.name = "devices",
 	.can_attach = devcgroup_can_attach,
-	.create = devcgroup_create,
-	.destroy = devcgroup_destroy,
+	.css_alloc = devcgroup_css_alloc,
+	.css_free = devcgroup_css_free,
 	.subsys_id = devices_subsys_id,
 	.base_cftypes = dev_cgroup_files,
 
-- 
cgit v1.2.3-55-g7522


From 2260e7fc1f18ad815324605c1ce7d5c6fd9b19a2 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 19 Nov 2012 08:13:38 -0800
Subject: cgroup: s/CGRP_CLONE_CHILDREN/CGRP_CPUSET_CLONE_CHILDREN/

clone_children is only meaningful for cpuset and will stay that way.
Rename the flag to reflect that and update documentation.  Also, drop
clone_children() wrapper in cgroup.c.  The thin wrapper is used only a
few times and one of them will go away soon.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Glauber Costa <glommer@parallels.com>
---
 Documentation/cgroups/cgroups.txt |  8 +++-----
 include/linux/cgroup.h            |  6 ++++--
 kernel/cgroup.c                   | 28 ++++++++++++----------------
 3 files changed, 19 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index b06eea217403..24cdf76bd20c 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -299,11 +299,9 @@ a cgroup hierarchy's release_agent path is empty.
 1.5 What does clone_children do ?
 ---------------------------------
 
-If the clone_children flag is enabled (1) in a cgroup, then all
-cgroups created beneath will call the post_clone callbacks for each
-subsystem of the newly created cgroup. Usually when this callback is
-implemented for a subsystem, it copies the values of the parent
-subsystem, this is the case for the cpuset.
+This flag only affects the cpuset controller. If the clone_children
+flag is enabled (1) in a cgroup, a new cpuset cgroup will copy its
+configuration from the parent during initialization.
 
 1.6 How do I use cgroups ?
 --------------------------
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 7a2189ca8327..d2f82979f6c1 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -143,9 +143,11 @@ enum {
 	/* Control Group requires release notifications to userspace */
 	CGRP_NOTIFY_ON_RELEASE,
 	/*
-	 * Clone cgroup values when creating a new child cgroup
+	 * Clone the parent's configuration when creating a new child
+	 * cpuset cgroup.  For historical reasons, this option can be
+	 * specified at mount time and thus is implemented here.
 	 */
-	CGRP_CLONE_CHILDREN,
+	CGRP_CPUSET_CLONE_CHILDREN,
 };
 
 struct cgroup {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d35463bab487..2895880e6800 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -296,11 +296,6 @@ static int notify_on_release(const struct cgroup *cgrp)
 	return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
 }
 
-static int clone_children(const struct cgroup *cgrp)
-{
-	return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
-}
-
 /*
  * for_each_subsys() allows you to iterate on each subsystem attached to
  * an active hierarchy
@@ -1101,7 +1096,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
 		seq_puts(seq, ",xattr");
 	if (strlen(root->release_agent_path))
 		seq_printf(seq, ",release_agent=%s", root->release_agent_path);
-	if (clone_children(&root->top_cgroup))
+	if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
 		seq_puts(seq, ",clone_children");
 	if (strlen(root->name))
 		seq_printf(seq, ",name=%s", root->name);
@@ -1113,7 +1108,7 @@ struct cgroup_sb_opts {
 	unsigned long subsys_mask;
 	unsigned long flags;
 	char *release_agent;
-	bool clone_children;
+	bool cpuset_clone_children;
 	char *name;
 	/* User explicitly requested empty subsystem */
 	bool none;
@@ -1164,7 +1159,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 			continue;
 		}
 		if (!strcmp(token, "clone_children")) {
-			opts->clone_children = true;
+			opts->cpuset_clone_children = true;
 			continue;
 		}
 		if (!strcmp(token, "xattr")) {
@@ -1474,8 +1469,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
 		strcpy(root->release_agent_path, opts->release_agent);
 	if (opts->name)
 		strcpy(root->name, opts->name);
-	if (opts->clone_children)
-		set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
+	if (opts->cpuset_clone_children)
+		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags);
 	return root;
 }
 
@@ -3905,7 +3900,7 @@ fail:
 static u64 cgroup_clone_children_read(struct cgroup *cgrp,
 				    struct cftype *cft)
 {
-	return clone_children(cgrp);
+	return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
 }
 
 static int cgroup_clone_children_write(struct cgroup *cgrp,
@@ -3913,9 +3908,9 @@ static int cgroup_clone_children_write(struct cgroup *cgrp,
 				     u64 val)
 {
 	if (val)
-		set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
 	else
-		clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+		clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
 	return 0;
 }
 
@@ -4130,8 +4125,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 	if (notify_on_release(parent))
 		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
 
-	if (clone_children(parent))
-		set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+	if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
+		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
 
 	for_each_subsys(root, ss) {
 		struct cgroup_subsys_state *css;
@@ -4148,7 +4143,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 				goto err_free_all;
 		}
 		/* At error, ->css_free() callback has to free assigned ID. */
-		if (clone_children(parent) && ss->post_clone)
+		if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags) &&
+		    ss->post_clone)
 			ss->post_clone(cgrp);
 
 		if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
-- 
cgit v1.2.3-55-g7522


From 033fa1c5f5f73833598a0beb022c0048fb769dad Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 19 Nov 2012 08:13:39 -0800
Subject: cgroup, cpuset: remove cgroup_subsys->post_clone()

Currently CGRP_CPUSET_CLONE_CHILDREN triggers ->post_clone().  Now
that clone_children is cpuset specific, there's no reason to have this
rather odd option activation mechanism in cgroup core.  cpuset can
check the flag from its ->css_allocate() and take the necessary
action.

Move cpuset_post_clone() logic to the end of cpuset_css_alloc() and
remove cgroup_subsys->post_clone().

Loosely based on Glauber's "generalize post_clone into post_create"
patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Original-patch-by: Glauber Costa <glommer@parallels.com>
Original-patch: <1351686554-22592-2-git-send-email-glommer@parallels.com>
Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Glauber Costa <glommer@parallels.com>
---
 Documentation/cgroups/cgroups.txt |  8 ----
 include/linux/cgroup.h            |  1 -
 kernel/cgroup.c                   |  4 --
 kernel/cpuset.c                   | 80 ++++++++++++++++++---------------------
 4 files changed, 36 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 24cdf76bd20c..bcf1a00b06a1 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -642,14 +642,6 @@ void exit(struct task_struct *task)
 
 Called during task exit.
 
-void post_clone(struct cgroup *cgrp)
-(cgroup_mutex held by caller)
-
-Called during cgroup_create() to do any parameter
-initialization which might be required before a task could attach.  For
-example, in cpusets, no task may attach before 'cpus' and 'mems' are set
-up.
-
 void bind(struct cgroup *root)
 (cgroup_mutex held by caller)
 
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d2f82979f6c1..c798997e5011 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -452,7 +452,6 @@ struct cgroup_subsys {
 	void (*fork)(struct task_struct *task);
 	void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp,
 		     struct task_struct *task);
-	void (*post_clone)(struct cgroup *cgrp);
 	void (*bind)(struct cgroup *root);
 
 	int subsys_id;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2895880e6800..96719f73e95d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4142,10 +4142,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 			if (err)
 				goto err_free_all;
 		}
-		/* At error, ->css_free() callback has to free assigned ID. */
-		if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags) &&
-		    ss->post_clone)
-			ss->post_clone(cgrp);
 
 		if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
 		    parent->parent) {
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 06931337c4e5..b017887d632f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1783,43 +1783,6 @@ static struct cftype files[] = {
 	{ }	/* terminate */
 };
 
-/*
- * post_clone() is called during cgroup_create() when the
- * clone_children mount argument was specified.  The cgroup
- * can not yet have any tasks.
- *
- * Currently we refuse to set up the cgroup - thereby
- * refusing the task to be entered, and as a result refusing
- * the sys_unshare() or clone() which initiated it - if any
- * sibling cpusets have exclusive cpus or mem.
- *
- * If this becomes a problem for some users who wish to
- * allow that scenario, then cpuset_post_clone() could be
- * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
- * (and likewise for mems) to the new cgroup. Called with cgroup_mutex
- * held.
- */
-static void cpuset_post_clone(struct cgroup *cgroup)
-{
-	struct cgroup *parent, *child;
-	struct cpuset *cs, *parent_cs;
-
-	parent = cgroup->parent;
-	list_for_each_entry(child, &parent->children, sibling) {
-		cs = cgroup_cs(child);
-		if (is_mem_exclusive(cs) || is_cpu_exclusive(cs))
-			return;
-	}
-	cs = cgroup_cs(cgroup);
-	parent_cs = cgroup_cs(parent);
-
-	mutex_lock(&callback_mutex);
-	cs->mems_allowed = parent_cs->mems_allowed;
-	cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
-	mutex_unlock(&callback_mutex);
-	return;
-}
-
 /*
  *	cpuset_css_alloc - allocate a cpuset css
  *	cont:	control group that the new cpuset will be part of
@@ -1827,13 +1790,14 @@ static void cpuset_post_clone(struct cgroup *cgroup)
 
 static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont)
 {
-	struct cpuset *cs;
-	struct cpuset *parent;
+	struct cgroup *parent_cg = cont->parent;
+	struct cgroup *tmp_cg;
+	struct cpuset *parent, *cs;
 
-	if (!cont->parent) {
+	if (!parent_cg)
 		return &top_cpuset.css;
-	}
-	parent = cgroup_cs(cont->parent);
+	parent = cgroup_cs(parent_cg);
+
 	cs = kmalloc(sizeof(*cs), GFP_KERNEL);
 	if (!cs)
 		return ERR_PTR(-ENOMEM);
@@ -1855,7 +1819,36 @@ static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont)
 
 	cs->parent = parent;
 	number_of_cpusets++;
-	return &cs->css ;
+
+	if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cont->flags))
+		goto skip_clone;
+
+	/*
+	 * Clone @parent's configuration if CGRP_CPUSET_CLONE_CHILDREN is
+	 * set.  This flag handling is implemented in cgroup core for
+	 * histrical reasons - the flag may be specified during mount.
+	 *
+	 * Currently, if any sibling cpusets have exclusive cpus or mem, we
+	 * refuse to clone the configuration - thereby refusing the task to
+	 * be entered, and as a result refusing the sys_unshare() or
+	 * clone() which initiated it.  If this becomes a problem for some
+	 * users who wish to allow that scenario, then this could be
+	 * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
+	 * (and likewise for mems) to the new cgroup.
+	 */
+	list_for_each_entry(tmp_cg, &parent_cg->children, sibling) {
+		struct cpuset *tmp_cs = cgroup_cs(tmp_cg);
+
+		if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs))
+			goto skip_clone;
+	}
+
+	mutex_lock(&callback_mutex);
+	cs->mems_allowed = parent->mems_allowed;
+	cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
+	mutex_unlock(&callback_mutex);
+skip_clone:
+	return &cs->css;
 }
 
 /*
@@ -1882,7 +1875,6 @@ struct cgroup_subsys cpuset_subsys = {
 	.css_free = cpuset_css_free,
 	.can_attach = cpuset_can_attach,
 	.attach = cpuset_attach,
-	.post_clone = cpuset_post_clone,
 	.subsys_id = cpuset_subsys_id,
 	.base_cftypes = files,
 	.early_init = 1,
-- 
cgit v1.2.3-55-g7522


From 2abb74eaf6e73cb42e87bdc9e9c2535ce485e614 Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD
Date: Sat, 17 Nov 2012 23:05:06 +0100
Subject: atmel: move ATMEL_MAX_UART to platform_data/atmel.h

Modify both AT91 and AVR32 platforms.
Use 7 for it as the sam9260 or the sam9g25 have 7 of them DBGU included.

Reported-by: Joachim Eastwood <joachim.eastwood@jotron.com>
Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/mach-at91/include/mach/hardware.h  | 3 ---
 arch/avr32/mach-at32ap/include/mach/board.h | 1 -
 include/linux/platform_data/atmel.h         | 6 ++++++
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-at91/include/mach/hardware.h b/arch/arm/mach-at91/include/mach/hardware.h
index 711a7892d331..a832e0707611 100644
--- a/arch/arm/mach-at91/include/mach/hardware.h
+++ b/arch/arm/mach-at91/include/mach/hardware.h
@@ -90,9 +90,6 @@
 #define AT91_SRAM_MAX		SZ_1M
 #define AT91_VIRT_BASE		(AT91_IO_VIRT_BASE - AT91_SRAM_MAX)
 
-/* Serial ports */
-#define ATMEL_MAX_UART		7		/* 6 USART3's and one DBGU port (SAM9260) */
-
 /* External Memory Map */
 #define AT91_CHIPSELECT_0	0x10000000
 #define AT91_CHIPSELECT_1	0x20000000
diff --git a/arch/avr32/mach-at32ap/include/mach/board.h b/arch/avr32/mach-at32ap/include/mach/board.h
index dca93450cb0e..d485b0391357 100644
--- a/arch/avr32/mach-at32ap/include/mach/board.h
+++ b/arch/avr32/mach-at32ap/include/mach/board.h
@@ -26,7 +26,6 @@ static inline void __deprecated at32_add_system_devices(void)
 
 }
 
-#define ATMEL_MAX_UART	4
 extern struct platform_device *atmel_default_console_device;
 
 /* Flags for selecting USART extra pins */
diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h
index dbd6d53cc270..6a293b7fff3b 100644
--- a/include/linux/platform_data/atmel.h
+++ b/include/linux/platform_data/atmel.h
@@ -19,6 +19,12 @@
 #include <linux/serial.h>
 #include <linux/platform_data/macb.h>
 
+/*
+ * at91: 6 USARTs and one DBGU port (SAM9260)
+ * avr32: 4
+ */
+#define ATMEL_MAX_UART	7
+
  /* USB Device */
 struct at91_udc_data {
 	int	vbus_pin;		/* high == host powering us */
-- 
cgit v1.2.3-55-g7522


From 0a950f65e1e64f4e82b4b5507773848ea88bcb8e Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 19 Nov 2012 09:02:12 -0800
Subject: cgroup: add cgroup->id

With the introduction of generic cgroup hierarchy iterators, css_id is
being phased out.  It was unnecessarily complex, id'ing the wrong
thing (cgroups need IDs, not CSSes) and has other oddities like not
being available at ->css_alloc().

This patch adds cgroup->id, which is a simple per-hierarchy
ida-allocated ID which is assigned before ->css_alloc() and released
after ->css_free().

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
---
 include/linux/cgroup.h |  2 ++
 kernel/cgroup.c        | 15 ++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c798997e5011..82cf9e6fc77b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -159,6 +159,8 @@ struct cgroup {
 	 */
 	atomic_t count;
 
+	int id;				/* ida allocated in-hierarchy ID */
+
 	/*
 	 * We link our 'sibling' struct into our parent's 'children'.
 	 * Our children link their 'sibling' into our 'children'.
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 96719f73e95d..6db01417d39a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -138,6 +138,9 @@ struct cgroupfs_root {
 	/* Hierarchy-specific flags */
 	unsigned long flags;
 
+	/* IDs for cgroups in this hierarchy */
+	struct ida cgroup_ida;
+
 	/* The path to use for release notifications. */
 	char release_agent_path[PATH_MAX];
 
@@ -890,6 +893,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 
 		simple_xattrs_free(&cgrp->xattrs);
 
+		ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
 		kfree_rcu(cgrp, rcu_head);
 	} else {
 		struct cfent *cfe = __d_cfe(dentry);
@@ -1465,6 +1469,7 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
 
 	root->subsys_mask = opts->subsys_mask;
 	root->flags = opts->flags;
+	ida_init(&root->cgroup_ida);
 	if (opts->release_agent)
 		strcpy(root->release_agent_path, opts->release_agent);
 	if (opts->name)
@@ -1483,6 +1488,7 @@ static void cgroup_drop_root(struct cgroupfs_root *root)
 	spin_lock(&hierarchy_id_lock);
 	ida_remove(&hierarchy_ida, root->hierarchy_id);
 	spin_unlock(&hierarchy_id_lock);
+	ida_destroy(&root->cgroup_ida);
 	kfree(root);
 }
 
@@ -4093,10 +4099,15 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 	struct cgroup_subsys *ss;
 	struct super_block *sb = root->sb;
 
+	/* allocate the cgroup and its ID, 0 is reserved for the root */
 	cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
 	if (!cgrp)
 		return -ENOMEM;
 
+	cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL);
+	if (cgrp->id < 0)
+		goto err_free_cgrp;
+
 	/*
 	 * Only live parents can have children.  Note that the liveliness
 	 * check isn't strictly necessary because cgroup_mkdir() and
@@ -4106,7 +4117,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 	 */
 	if (!cgroup_lock_live_group(parent)) {
 		err = -ENODEV;
-		goto err_free_cgrp;
+		goto err_free_id;
 	}
 
 	/* Grab a reference on the superblock so the hierarchy doesn't
@@ -4198,6 +4209,8 @@ err_free_all:
 	mutex_unlock(&cgroup_mutex);
 	/* Release the reference count that we took on the superblock */
 	deactivate_super(sb);
+err_free_id:
+	ida_simple_remove(&root->cgroup_ida, cgrp->id);
 err_free_cgrp:
 	kfree(cgrp);
 	return err;
-- 
cgit v1.2.3-55-g7522


From 50d69b5184169caeb85e082f627200da9b0e1677 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen
Date: Tue, 13 Nov 2012 11:48:00 +0000
Subject: iio: Fix iio_buffer_register stub signature

Match the iio_buffer_register stub signature up to the real function and make
the second parameter const. This fixes a the following warnings if
CONFIG_IIO_BUFFER is disabled:

	drivers/staging/iio/accel/adis16201_core.c: In function ‘adis16201_probe’:
	drivers/staging/iio/accel/adis16201_core.c:536: warning: passing argument 2 of ‘iio_buffer_register’ discards qualifiers from pointer target type
	drivers/staging/iio/accel/adis16203_core.c: In function ‘adis16203_probe’:
	drivers/staging/iio/accel/adis16203_core.c:468: warning: passing argument 2 of ‘iio_buffer_register’ discards qualifiers from pointer target type
	drivers/staging/iio/accel/adis16204_core.c: In function ‘adis16204_probe’:
	drivers/staging/iio/accel/adis16204_core.c:527: warning: passing argument 2 of ‘iio_buffer_register’ discards qualifiers from pointer target type
	drivers/staging/iio/accel/adis16209_core.c: In function ‘adis16209_probe’:
	drivers/staging/iio/accel/adis16209_core.c:542: warning: passing argument 2 of ‘iio_buffer_register’ discards qualifiers from pointer target type
	drivers/staging/iio/accel/adis16240_core.c: In function ‘adis16240_probe’:
	drivers/staging/iio/accel/adis16240_core.c:588: warning: passing argument 2 of ‘iio_buffer_register’ discards qualifiers from pointer target type

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/buffer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index 027040569180..f3eea18fdf46 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -195,7 +195,7 @@ bool iio_validate_scan_mask_onehot(struct iio_dev *indio_dev,
 #else /* CONFIG_IIO_BUFFER */
 
 static inline int iio_buffer_register(struct iio_dev *indio_dev,
-					   struct iio_chan_spec *channels,
+					   const struct iio_chan_spec *channels,
 					   int num_channels)
 {
 	return 0;
-- 
cgit v1.2.3-55-g7522


From ec04cb048d79cd778c06e28f34395a46d774800d Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen
Date: Tue, 13 Nov 2012 13:28:00 +0000
Subject: staging:iio: Move adis library out of staging

Now that the adis library no longer depends on the sw_ring buffer implementation
we can move it out of staging.

While we are at it also sort the entries in the iio Kconfig and Makefile to be
in alphabetical order.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/Kconfig                        |   7 +-
 drivers/iio/Makefile                       |   7 +-
 drivers/iio/imu/Kconfig                    |  11 +
 drivers/iio/imu/Makefile                   |   8 +
 drivers/iio/imu/adis.c                     | 337 ++++++++++++++++++++++++++++
 drivers/iio/imu/adis_buffer.c              | 159 ++++++++++++++
 drivers/iio/imu/adis_trigger.c             |  89 ++++++++
 drivers/staging/iio/accel/adis16201_core.c |   2 +-
 drivers/staging/iio/accel/adis16203_core.c |   2 +-
 drivers/staging/iio/accel/adis16204_core.c |   2 +-
 drivers/staging/iio/accel/adis16209_core.c |   2 +-
 drivers/staging/iio/accel/adis16220.h      |   2 +-
 drivers/staging/iio/accel/adis16240_core.c |   2 +-
 drivers/staging/iio/gyro/adis16260.h       |   2 +-
 drivers/staging/iio/imu/Kconfig            |  12 -
 drivers/staging/iio/imu/Makefile           |   5 -
 drivers/staging/iio/imu/adis.c             | 338 -----------------------------
 drivers/staging/iio/imu/adis.h             | 186 ----------------
 drivers/staging/iio/imu/adis_buffer.c      | 160 --------------
 drivers/staging/iio/imu/adis_trigger.c     |  90 --------
 include/linux/iio/imu/adis.h               | 186 ++++++++++++++++
 21 files changed, 805 insertions(+), 804 deletions(-)
 create mode 100644 drivers/iio/imu/Kconfig
 create mode 100644 drivers/iio/imu/Makefile
 create mode 100644 drivers/iio/imu/adis.c
 create mode 100644 drivers/iio/imu/adis_buffer.c
 create mode 100644 drivers/iio/imu/adis_trigger.c
 delete mode 100644 drivers/staging/iio/imu/adis.c
 delete mode 100644 drivers/staging/iio/imu/adis.h
 delete mode 100644 drivers/staging/iio/imu/adis_buffer.c
 delete mode 100644 drivers/staging/iio/imu/adis_trigger.c
 create mode 100644 include/linux/iio/imu/adis.h

(limited to 'include/linux')

diff --git a/drivers/iio/Kconfig b/drivers/iio/Kconfig
index 65ae734c607d..b2f963be3993 100644
--- a/drivers/iio/Kconfig
+++ b/drivers/iio/Kconfig
@@ -63,11 +63,12 @@ config IIO_CONSUMERS_PER_TRIGGER
 source "drivers/iio/accel/Kconfig"
 source "drivers/iio/adc/Kconfig"
 source "drivers/iio/amplifiers/Kconfig"
-source "drivers/iio/light/Kconfig"
-source "drivers/iio/frequency/Kconfig"
-source "drivers/iio/dac/Kconfig"
 source "drivers/iio/common/Kconfig"
+source "drivers/iio/dac/Kconfig"
+source "drivers/iio/frequency/Kconfig"
 source "drivers/iio/gyro/Kconfig"
+source "drivers/iio/imu/Kconfig"
+source "drivers/iio/light/Kconfig"
 source "drivers/iio/magnetometer/Kconfig"
 
 endif # IIO
diff --git a/drivers/iio/Makefile b/drivers/iio/Makefile
index 31d76a07ec65..a0e8cdd67e4d 100644
--- a/drivers/iio/Makefile
+++ b/drivers/iio/Makefile
@@ -14,9 +14,10 @@ obj-$(CONFIG_IIO_KFIFO_BUF) += kfifo_buf.o
 obj-y += accel/
 obj-y += adc/
 obj-y += amplifiers/
-obj-y += light/
-obj-y += frequency/
-obj-y += dac/
 obj-y += common/
+obj-y += dac/
 obj-y += gyro/
+obj-y += frequency/
+obj-y += imu/
+obj-y += light/
 obj-y += magnetometer/
diff --git a/drivers/iio/imu/Kconfig b/drivers/iio/imu/Kconfig
new file mode 100644
index 000000000000..c24410c873c7
--- /dev/null
+++ b/drivers/iio/imu/Kconfig
@@ -0,0 +1,11 @@
+config IIO_ADIS_LIB
+	tristate
+	help
+	  A set of IO helper functions for the Analog Devices ADIS* device family.
+
+config IIO_ADIS_LIB_BUFFER
+	bool
+	select IIO_TRIGGERED_BUFFER
+	help
+	  A set of buffer helper functions for the Analog Devices ADIS* device
+	  family.
diff --git a/drivers/iio/imu/Makefile b/drivers/iio/imu/Makefile
new file mode 100644
index 000000000000..97676ab5723d
--- /dev/null
+++ b/drivers/iio/imu/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for Inertial Measurement Units
+#
+
+adis_lib-y += adis.o
+adis_lib-$(CONFIG_IIO_ADIS_LIB_BUFFER) += adis_trigger.o
+adis_lib-$(CONFIG_IIO_ADIS_LIB_BUFFER) += adis_buffer.o
+obj-$(CONFIG_IIO_ADIS_LIB) += adis_lib.o
diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c
new file mode 100644
index 000000000000..8259b774078f
--- /dev/null
+++ b/drivers/iio/imu/adis.c
@@ -0,0 +1,337 @@
+/*
+ * Common library for ADIS16XXX devices
+ *
+ * Copyright 2012 Analog Devices Inc.
+ *   Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/spi/spi.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/module.h>
+#include <asm/unaligned.h>
+
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/imu/adis.h>
+
+#define ADIS_MSC_CTRL_DATA_RDY_EN	BIT(2)
+#define ADIS_MSC_CTRL_DATA_RDY_POL_HIGH	BIT(1)
+#define ADIS_MSC_CTRL_DATA_RDY_DIO2	BIT(0)
+#define ADIS_GLOB_CMD_SW_RESET		BIT(7)
+
+/**
+ * adis_write_reg_8() - Write single byte to a register
+ * @adis: The adis device
+ * @reg: The address of the register to be written
+ * @val: The value to write
+ */
+int adis_write_reg_8(struct adis *adis, unsigned int reg, uint8_t val)
+{
+	int ret;
+
+	mutex_lock(&adis->txrx_lock);
+	adis->tx[0] = ADIS_WRITE_REG(reg);
+	adis->tx[1] = val;
+
+	ret = spi_write(adis->spi, adis->tx, 2);
+	mutex_unlock(&adis->txrx_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(adis_write_reg_8);
+
+/**
+ * adis_write_reg_16() - Write 2 bytes to a pair of registers
+ * @adis: The adis device
+ * @reg: The address of the lower of the two registers
+ * @val: Value to be written
+ */
+int adis_write_reg_16(struct adis *adis, unsigned int reg, uint16_t value)
+{
+	int ret;
+	struct spi_message msg;
+	struct spi_transfer xfers[] = {
+		{
+			.tx_buf = adis->tx,
+			.bits_per_word = 8,
+			.len = 2,
+			.cs_change = 1,
+			.delay_usecs = adis->data->write_delay,
+		}, {
+			.tx_buf = adis->tx + 2,
+			.bits_per_word = 8,
+			.len = 2,
+			.delay_usecs = adis->data->write_delay,
+		},
+	};
+
+	mutex_lock(&adis->txrx_lock);
+	adis->tx[0] = ADIS_WRITE_REG(reg);
+	adis->tx[1] = value & 0xff;
+	adis->tx[2] = ADIS_WRITE_REG(reg + 1);
+	adis->tx[3] = (value >> 8) & 0xff;
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfers[0], &msg);
+	spi_message_add_tail(&xfers[1], &msg);
+	ret = spi_sync(adis->spi, &msg);
+	mutex_unlock(&adis->txrx_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(adis_write_reg_16);
+
+/**
+ * adis_read_reg_16() - read 2 bytes from a 16-bit register
+ * @adis: The adis device
+ * @reg: The address of the lower of the two registers
+ * @val: The value read back from the device
+ */
+int adis_read_reg_16(struct adis *adis, unsigned int reg, uint16_t *val)
+{
+	struct spi_message msg;
+	int ret;
+	struct spi_transfer xfers[] = {
+		{
+			.tx_buf = adis->tx,
+			.bits_per_word = 8,
+			.len = 2,
+			.cs_change = 1,
+			.delay_usecs = adis->data->read_delay,
+		}, {
+			.rx_buf = adis->rx,
+			.bits_per_word = 8,
+			.len = 2,
+			.delay_usecs = adis->data->read_delay,
+		},
+	};
+
+	mutex_lock(&adis->txrx_lock);
+	adis->tx[0] = ADIS_READ_REG(reg);
+	adis->tx[1] = 0;
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfers[0], &msg);
+	spi_message_add_tail(&xfers[1], &msg);
+	ret = spi_sync(adis->spi, &msg);
+	if (ret) {
+		dev_err(&adis->spi->dev, "Failed to read 16 bit register 0x%02X: %d\n",
+				reg, ret);
+		goto error_ret;
+	}
+	*val = get_unaligned_be16(adis->rx);
+
+error_ret:
+	mutex_unlock(&adis->txrx_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(adis_read_reg_16);
+
+/**
+ * adis_enable_irq() - Enable or disable data ready IRQ
+ * @adis: The adis device
+ * @enable: Whether to enable the IRQ
+ *
+ * Returns 0 on success, negative error code otherwise
+ */
+int adis_enable_irq(struct adis *adis, bool enable)
+{
+	int ret = 0;
+	uint16_t msc;
+
+	ret = adis_read_reg_16(adis, adis->data->msc_ctrl_reg, &msc);
+	if (ret)
+		goto error_ret;
+
+	msc |= ADIS_MSC_CTRL_DATA_RDY_POL_HIGH;
+	msc &= ~ADIS_MSC_CTRL_DATA_RDY_DIO2;
+	if (enable)
+		msc |= ADIS_MSC_CTRL_DATA_RDY_EN;
+	else
+		msc &= ~ADIS_MSC_CTRL_DATA_RDY_EN;
+
+	ret = adis_write_reg_16(adis, adis->data->msc_ctrl_reg, msc);
+
+error_ret:
+	return ret;
+}
+EXPORT_SYMBOL(adis_enable_irq);
+
+/**
+ * adis_check_status() - Check the device for error conditions
+ * @adis: The adis device
+ *
+ * Returns 0 on success, a negative error code otherwise
+ */
+int adis_check_status(struct adis *adis)
+{
+	uint16_t status;
+	int ret;
+	int i;
+
+	ret = adis_read_reg_16(adis, adis->data->diag_stat_reg, &status);
+	if (ret < 0)
+		return ret;
+
+	status &= adis->data->status_error_mask;
+
+	if (status == 0)
+		return 0;
+
+	for (i = 0; i < 16; ++i) {
+		if (status & BIT(i)) {
+			dev_err(&adis->spi->dev, "%s.\n",
+				adis->data->status_error_msgs[i]);
+		}
+	}
+
+	return -EIO;
+}
+EXPORT_SYMBOL_GPL(adis_check_status);
+
+/**
+ * adis_reset() - Reset the device
+ * @adis: The adis device
+ *
+ * Returns 0 on success, a negative error code otherwise
+ */
+int adis_reset(struct adis *adis)
+{
+	int ret;
+
+	ret = adis_write_reg_8(adis, adis->data->glob_cmd_reg,
+			ADIS_GLOB_CMD_SW_RESET);
+	if (ret)
+		dev_err(&adis->spi->dev, "Failed to reset device: %d\n", ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(adis_reset);
+
+static int adis_self_test(struct adis *adis)
+{
+	int ret;
+
+	ret = adis_write_reg_16(adis, adis->data->msc_ctrl_reg,
+			adis->data->self_test_mask);
+	if (ret) {
+		dev_err(&adis->spi->dev, "Failed to initiate self test: %d\n",
+			ret);
+		return ret;
+	}
+
+	msleep(adis->data->startup_delay);
+
+	return adis_check_status(adis);
+}
+
+/**
+ * adis_inital_startup() - Performs device self-test
+ * @adis: The adis device
+ *
+ * Returns 0 if the device is operational, a negative error code otherwise.
+ *
+ * This function should be called early on in the device initialization sequence
+ * to ensure that the device is in a sane and known state and that it is usable.
+ */
+int adis_initial_startup(struct adis *adis)
+{
+	int ret;
+
+	ret = adis_self_test(adis);
+	if (ret) {
+		dev_err(&adis->spi->dev, "Self-test failed, trying reset.\n");
+		adis_reset(adis);
+		msleep(adis->data->startup_delay);
+		ret = adis_self_test(adis);
+		if (ret) {
+			dev_err(&adis->spi->dev, "Second self-test failed, giving up.\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(adis_initial_startup);
+
+/**
+ * adis_single_conversion() - Performs a single sample conversion
+ * @indio_dev: The IIO device
+ * @chan: The IIO channel
+ * @error_mask: Mask for the error bit
+ * @val: Result of the conversion
+ *
+ * Returns IIO_VAL_INT on success, a negative error code otherwise.
+ *
+ * The function performs a single conversion on a given channel and post
+ * processes the value accordingly to the channel spec. If a error_mask is given
+ * the function will check if the mask is set in the returned raw value. If it
+ * is set the function will perform a self-check. If the device does not report
+ * a error bit in the channels raw value set error_mask to 0.
+ */
+int adis_single_conversion(struct iio_dev *indio_dev,
+	const struct iio_chan_spec *chan, unsigned int error_mask, int *val)
+{
+	struct adis *adis = iio_device_get_drvdata(indio_dev);
+	uint16_t val16;
+	int ret;
+
+	mutex_lock(&indio_dev->mlock);
+
+	ret = adis_read_reg_16(adis, chan->address, &val16);
+	if (ret)
+		goto err_unlock;
+
+	if (val16 & error_mask) {
+		ret = adis_check_status(adis);
+		if (ret)
+			goto err_unlock;
+	}
+
+	if (chan->scan_type.sign == 's')
+		*val = sign_extend32(val16, chan->scan_type.realbits - 1);
+	else
+		*val = val16 & ((1 << chan->scan_type.realbits) - 1);
+
+	ret = IIO_VAL_INT;
+err_unlock:
+	mutex_unlock(&indio_dev->mlock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(adis_single_conversion);
+
+/**
+ * adis_init() - Initialize adis device structure
+ * @adis:	The adis device
+ * @indio_dev:	The iio device
+ * @spi:	The spi device
+ * @data:	Chip specific data
+ *
+ * Returns 0 on success, a negative error code otherwise.
+ *
+ * This function must be called, before any other adis helper function may be
+ * called.
+ */
+int adis_init(struct adis *adis, struct iio_dev *indio_dev,
+	struct spi_device *spi, const struct adis_data *data)
+{
+	mutex_init(&adis->txrx_lock);
+	adis->spi = spi;
+	adis->data = data;
+	iio_device_set_drvdata(indio_dev, adis);
+
+	return adis_enable_irq(adis, false);
+}
+EXPORT_SYMBOL_GPL(adis_init);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_DESCRIPTION("Common library code for ADIS16XXX devices");
diff --git a/drivers/iio/imu/adis_buffer.c b/drivers/iio/imu/adis_buffer.c
new file mode 100644
index 000000000000..a91b4cbdc73a
--- /dev/null
+++ b/drivers/iio/imu/adis_buffer.c
@@ -0,0 +1,159 @@
+/*
+ * Common library for ADIS16XXX devices
+ *
+ * Copyright 2012 Analog Devices Inc.
+ *   Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/export.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/kernel.h>
+#include <linux/spi/spi.h>
+#include <linux/slab.h>
+
+#include <linux/iio/iio.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iio/triggered_buffer.h>
+#include <linux/iio/imu/adis.h>
+
+int adis_update_scan_mode(struct iio_dev *indio_dev,
+	const unsigned long *scan_mask)
+{
+	struct adis *adis = iio_device_get_drvdata(indio_dev);
+	const struct iio_chan_spec *chan;
+	unsigned int scan_count;
+	unsigned int i, j;
+	__be16 *tx, *rx;
+
+	kfree(adis->xfer);
+	kfree(adis->buffer);
+
+	scan_count = indio_dev->scan_bytes / 2;
+
+	adis->xfer = kcalloc(scan_count + 1, sizeof(*adis->xfer), GFP_KERNEL);
+	if (!adis->xfer)
+		return -ENOMEM;
+
+	adis->buffer = kzalloc(indio_dev->scan_bytes * 2, GFP_KERNEL);
+	if (!adis->buffer)
+		return -ENOMEM;
+
+	rx = adis->buffer;
+	tx = rx + indio_dev->scan_bytes;
+
+	spi_message_init(&adis->msg);
+
+	for (j = 0; j <= scan_count; j++) {
+		adis->xfer[j].bits_per_word = 8;
+		if (j != scan_count)
+			adis->xfer[j].cs_change = 1;
+		adis->xfer[j].len = 2;
+		adis->xfer[j].delay_usecs = adis->data->read_delay;
+		if (j < scan_count)
+			adis->xfer[j].tx_buf = &tx[j];
+		if (j >= 1)
+			adis->xfer[j].rx_buf = &rx[j - 1];
+		spi_message_add_tail(&adis->xfer[j], &adis->msg);
+	}
+
+	chan = indio_dev->channels;
+	for (i = 0; i < indio_dev->num_channels; i++, chan++) {
+		if (!test_bit(chan->scan_index, scan_mask))
+			continue;
+		*tx++ = cpu_to_be16(chan->address << 8);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(adis_update_scan_mode);
+
+static irqreturn_t adis_trigger_handler(int irq, void *p)
+{
+	struct iio_poll_func *pf = p;
+	struct iio_dev *indio_dev = pf->indio_dev;
+	struct adis *adis = iio_device_get_drvdata(indio_dev);
+	int ret;
+
+	if (!adis->buffer)
+		return -ENOMEM;
+
+	ret = spi_sync(adis->spi, &adis->msg);
+	if (ret)
+		dev_err(&adis->spi->dev, "Failed to read data: %d", ret);
+
+	/* Guaranteed to be aligned with 8 byte boundary */
+	if (indio_dev->scan_timestamp) {
+		void *b = adis->buffer + indio_dev->scan_bytes - sizeof(s64);
+		*(s64 *)b = pf->timestamp;
+	}
+
+	iio_push_to_buffers(indio_dev, adis->buffer);
+
+	iio_trigger_notify_done(indio_dev->trig);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * adis_setup_buffer_and_trigger() - Sets up buffer and trigger for the adis device
+ * @adis: The adis device.
+ * @indio_dev: The IIO device.
+ * @trigger_handler: Optional trigger handler, may be NULL.
+ *
+ * Returns 0 on success, a negative error code otherwise.
+ *
+ * This function sets up the buffer and trigger for a adis devices.  If
+ * 'trigger_handler' is NULL the default trigger handler will be used. The
+ * default trigger handler will simply read the registers assigned to the
+ * currently active channels.
+ *
+ * adis_cleanup_buffer_and_trigger() should be called to free the resources
+ * allocated by this function.
+ */
+int adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev,
+	irqreturn_t (*trigger_handler)(int, void *))
+{
+	int ret;
+
+	if (!trigger_handler)
+		trigger_handler = adis_trigger_handler;
+
+	ret = iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+		trigger_handler, NULL);
+	if (ret)
+		return ret;
+
+	if (adis->spi->irq) {
+		ret = adis_probe_trigger(adis, indio_dev);
+		if (ret)
+			goto error_buffer_cleanup;
+	}
+	return 0;
+
+error_buffer_cleanup:
+	iio_triggered_buffer_cleanup(indio_dev);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(adis_setup_buffer_and_trigger);
+
+/**
+ * adis_cleanup_buffer_and_trigger() - Free buffer and trigger resources
+ * @adis: The adis device.
+ * @indio_dev: The IIO device.
+ *
+ * Frees resources allocated by adis_setup_buffer_and_trigger()
+ */
+void adis_cleanup_buffer_and_trigger(struct adis *adis,
+	struct iio_dev *indio_dev)
+{
+	if (adis->spi->irq)
+		adis_remove_trigger(adis);
+	kfree(adis->buffer);
+	kfree(adis->xfer);
+	iio_triggered_buffer_cleanup(indio_dev);
+}
+EXPORT_SYMBOL_GPL(adis_cleanup_buffer_and_trigger);
diff --git a/drivers/iio/imu/adis_trigger.c b/drivers/iio/imu/adis_trigger.c
new file mode 100644
index 000000000000..5a24c9cac343
--- /dev/null
+++ b/drivers/iio/imu/adis_trigger.c
@@ -0,0 +1,89 @@
+/*
+ * Common library for ADIS16XXX devices
+ *
+ * Copyright 2012 Analog Devices Inc.
+ *   Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/spi/spi.h>
+#include <linux/export.h>
+
+#include <linux/iio/iio.h>
+#include <linux/iio/trigger.h>
+#include <linux/iio/imu/adis.h>
+
+static int adis_data_rdy_trigger_set_state(struct iio_trigger *trig,
+						bool state)
+{
+	struct adis *adis = trig->private_data;
+
+	return adis_enable_irq(adis, state);
+}
+
+static const struct iio_trigger_ops adis_trigger_ops = {
+	.owner = THIS_MODULE,
+	.set_trigger_state = &adis_data_rdy_trigger_set_state,
+};
+
+/**
+ * adis_probe_trigger() - Sets up trigger for a adis device
+ * @adis: The adis device
+ * @indio_dev: The IIO device
+ *
+ * Returns 0 on success or a negative error code
+ *
+ * adis_remove_trigger() should be used to free the trigger.
+ */
+int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev)
+{
+	int ret;
+
+	adis->trig = iio_trigger_alloc("%s-dev%d", indio_dev->name,
+					indio_dev->id);
+	if (adis->trig == NULL)
+		return -ENOMEM;
+
+	ret = request_irq(adis->spi->irq,
+			  &iio_trigger_generic_data_rdy_poll,
+			  IRQF_TRIGGER_RISING,
+			  indio_dev->name,
+			  adis->trig);
+	if (ret)
+		goto error_free_trig;
+
+	adis->trig->dev.parent = &adis->spi->dev;
+	adis->trig->ops = &adis_trigger_ops;
+	adis->trig->private_data = adis;
+	ret = iio_trigger_register(adis->trig);
+
+	indio_dev->trig = adis->trig;
+	if (ret)
+		goto error_free_irq;
+
+	return 0;
+
+error_free_irq:
+	free_irq(adis->spi->irq, adis->trig);
+error_free_trig:
+	iio_trigger_free(adis->trig);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(adis_probe_trigger);
+
+/**
+ * adis_remove_trigger() - Remove trigger for a adis devices
+ * @adis: The adis device
+ *
+ * Removes the trigger previously registered with adis_probe_trigger().
+ */
+void adis_remove_trigger(struct adis *adis)
+{
+	iio_trigger_unregister(adis->trig);
+	free_irq(adis->spi->irq, adis->trig);
+	iio_trigger_free(adis->trig);
+}
+EXPORT_SYMBOL_GPL(adis_remove_trigger);
diff --git a/drivers/staging/iio/accel/adis16201_core.c b/drivers/staging/iio/accel/adis16201_core.c
index 833dd6b73bc3..ccdc8d23f6a3 100644
--- a/drivers/staging/iio/accel/adis16201_core.c
+++ b/drivers/staging/iio/accel/adis16201_core.c
@@ -18,9 +18,9 @@
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
 #include <linux/iio/buffer.h>
+#include <linux/iio/imu/adis.h>
 
 #include "adis16201.h"
-#include "../imu/adis.h"
 
 static const u8 adis16201_addresses[] = {
 	[ADIS16201_SCAN_ACC_X] = ADIS16201_XACCL_OFFS,
diff --git a/drivers/staging/iio/accel/adis16203_core.c b/drivers/staging/iio/accel/adis16203_core.c
index f631e578fbd1..202985ea3531 100644
--- a/drivers/staging/iio/accel/adis16203_core.c
+++ b/drivers/staging/iio/accel/adis16203_core.c
@@ -18,9 +18,9 @@
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
 #include <linux/iio/buffer.h>
+#include <linux/iio/imu/adis.h>
 
 #include "adis16203.h"
-#include "../imu/adis.h"
 
 #define DRIVER_NAME		"adis16203"
 
diff --git a/drivers/staging/iio/accel/adis16204_core.c b/drivers/staging/iio/accel/adis16204_core.c
index dbec841ce30c..6dafad67cd23 100644
--- a/drivers/staging/iio/accel/adis16204_core.c
+++ b/drivers/staging/iio/accel/adis16204_core.c
@@ -21,9 +21,9 @@
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
 #include <linux/iio/buffer.h>
+#include <linux/iio/imu/adis.h>
 
 #include "adis16204.h"
-#include "../imu/adis.h"
 
 /* Unique to this driver currently */
 
diff --git a/drivers/staging/iio/accel/adis16209_core.c b/drivers/staging/iio/accel/adis16209_core.c
index f9f9d582b32d..d2921c30a8bb 100644
--- a/drivers/staging/iio/accel/adis16209_core.c
+++ b/drivers/staging/iio/accel/adis16209_core.c
@@ -19,9 +19,9 @@
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
 #include <linux/iio/buffer.h>
+#include <linux/iio/imu/adis.h>
 
 #include "adis16209.h"
-#include "../imu/adis.h"
 
 static const u8 adis16209_addresses[8][1] = {
 	[ADIS16209_SCAN_SUPPLY] = { },
diff --git a/drivers/staging/iio/accel/adis16220.h b/drivers/staging/iio/accel/adis16220.h
index 7cc4d2f3ec28..a894ad7fb26d 100644
--- a/drivers/staging/iio/accel/adis16220.h
+++ b/drivers/staging/iio/accel/adis16220.h
@@ -1,7 +1,7 @@
 #ifndef SPI_ADIS16220_H_
 #define SPI_ADIS16220_H_
 
-#include "../imu/adis.h"
+#include <linux/iio/imu/adis.h>
 
 #define ADIS16220_STARTUP_DELAY	220 /* ms */
 
diff --git a/drivers/staging/iio/accel/adis16240_core.c b/drivers/staging/iio/accel/adis16240_core.c
index 3d1a8a9921ad..d098b49cc18b 100644
--- a/drivers/staging/iio/accel/adis16240_core.c
+++ b/drivers/staging/iio/accel/adis16240_core.c
@@ -22,9 +22,9 @@
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
 #include <linux/iio/buffer.h>
+#include <linux/iio/imu/adis.h>
 
 #include "adis16240.h"
-#include "../imu/adis.h"
 
 static ssize_t adis16240_spi_read_signed(struct device *dev,
 		struct device_attribute *attr,
diff --git a/drivers/staging/iio/gyro/adis16260.h b/drivers/staging/iio/gyro/adis16260.h
index ea5eba205bbf..df3c0b7e954a 100644
--- a/drivers/staging/iio/gyro/adis16260.h
+++ b/drivers/staging/iio/gyro/adis16260.h
@@ -2,7 +2,7 @@
 #define SPI_ADIS16260_H_
 
 #include "adis16260_platform_data.h"
-#include "../imu/adis.h"
+#include <linux/iio/imu/adis.h>
 
 #define ADIS16260_STARTUP_DELAY	220 /* ms */
 
diff --git a/drivers/staging/iio/imu/Kconfig b/drivers/staging/iio/imu/Kconfig
index 2c564edeb172..2c2f47de2630 100644
--- a/drivers/staging/iio/imu/Kconfig
+++ b/drivers/staging/iio/imu/Kconfig
@@ -15,15 +15,3 @@ config ADIS16400
 	  (adis16400 series also have magnetometers).
 
 endmenu
-
-config IIO_ADIS_LIB
-	tristate
-	help
-	  A set of IO helper functions for the Analog Devices ADIS* device family.
-
-config IIO_ADIS_LIB_BUFFER
-	bool
-	select IIO_TRIGGERED_BUFFER
-	help
-	  A set of buffer helper functions for the Analog Devices ADIS* device
-	  family.
diff --git a/drivers/staging/iio/imu/Makefile b/drivers/staging/iio/imu/Makefile
index 65dafba1e5df..3400a13d1522 100644
--- a/drivers/staging/iio/imu/Makefile
+++ b/drivers/staging/iio/imu/Makefile
@@ -5,8 +5,3 @@
 adis16400-y             := adis16400_core.o
 adis16400-$(CONFIG_IIO_BUFFER) += adis16400_ring.o adis16400_trigger.o
 obj-$(CONFIG_ADIS16400) += adis16400.o
-
-adis_lib-y += adis.o
-adis_lib-$(CONFIG_IIO_ADIS_LIB_BUFFER) += adis_trigger.o
-adis_lib-$(CONFIG_IIO_ADIS_LIB_BUFFER) += adis_buffer.o
-obj-$(CONFIG_IIO_ADIS_LIB) += adis_lib.o
diff --git a/drivers/staging/iio/imu/adis.c b/drivers/staging/iio/imu/adis.c
deleted file mode 100644
index 0bd21022e4c0..000000000000
--- a/drivers/staging/iio/imu/adis.c
+++ /dev/null
@@ -1,338 +0,0 @@
-/*
- * Common library for ADIS16XXX devices
- *
- * Copyright 2012 Analog Devices Inc.
- *   Author: Lars-Peter Clausen <lars@metafoo.de>
- *
- * Licensed under the GPL-2 or later.
- */
-
-#include <linux/delay.h>
-#include <linux/mutex.h>
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/spi/spi.h>
-#include <linux/slab.h>
-#include <linux/sysfs.h>
-#include <linux/module.h>
-#include <asm/unaligned.h>
-
-#include <linux/iio/iio.h>
-#include <linux/iio/sysfs.h>
-#include <linux/iio/buffer.h>
-
-#include "adis.h"
-
-#define ADIS_MSC_CTRL_DATA_RDY_EN	BIT(2)
-#define ADIS_MSC_CTRL_DATA_RDY_POL_HIGH	BIT(1)
-#define ADIS_MSC_CTRL_DATA_RDY_DIO2	BIT(0)
-#define ADIS_GLOB_CMD_SW_RESET		BIT(7)
-
-/**
- * adis_write_reg_8() - Write single byte to a register
- * @adis: The adis device
- * @reg: The address of the register to be written
- * @val: The value to write
- */
-int adis_write_reg_8(struct adis *adis, unsigned int reg, uint8_t val)
-{
-	int ret;
-
-	mutex_lock(&adis->txrx_lock);
-	adis->tx[0] = ADIS_WRITE_REG(reg);
-	adis->tx[1] = val;
-
-	ret = spi_write(adis->spi, adis->tx, 2);
-	mutex_unlock(&adis->txrx_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(adis_write_reg_8);
-
-/**
- * adis_write_reg_16() - Write 2 bytes to a pair of registers
- * @adis: The adis device
- * @reg: The address of the lower of the two registers
- * @val: Value to be written
- */
-int adis_write_reg_16(struct adis *adis, unsigned int reg, uint16_t value)
-{
-	int ret;
-	struct spi_message msg;
-	struct spi_transfer xfers[] = {
-		{
-			.tx_buf = adis->tx,
-			.bits_per_word = 8,
-			.len = 2,
-			.cs_change = 1,
-			.delay_usecs = adis->data->write_delay,
-		}, {
-			.tx_buf = adis->tx + 2,
-			.bits_per_word = 8,
-			.len = 2,
-			.delay_usecs = adis->data->write_delay,
-		},
-	};
-
-	mutex_lock(&adis->txrx_lock);
-	adis->tx[0] = ADIS_WRITE_REG(reg);
-	adis->tx[1] = value & 0xff;
-	adis->tx[2] = ADIS_WRITE_REG(reg + 1);
-	adis->tx[3] = (value >> 8) & 0xff;
-
-	spi_message_init(&msg);
-	spi_message_add_tail(&xfers[0], &msg);
-	spi_message_add_tail(&xfers[1], &msg);
-	ret = spi_sync(adis->spi, &msg);
-	mutex_unlock(&adis->txrx_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(adis_write_reg_16);
-
-/**
- * adis_read_reg_16() - read 2 bytes from a 16-bit register
- * @adis: The adis device
- * @reg: The address of the lower of the two registers
- * @val: The value read back from the device
- */
-int adis_read_reg_16(struct adis *adis, unsigned int reg, uint16_t *val)
-{
-	struct spi_message msg;
-	int ret;
-	struct spi_transfer xfers[] = {
-		{
-			.tx_buf = adis->tx,
-			.bits_per_word = 8,
-			.len = 2,
-			.cs_change = 1,
-			.delay_usecs = adis->data->read_delay,
-		}, {
-			.rx_buf = adis->rx,
-			.bits_per_word = 8,
-			.len = 2,
-			.delay_usecs = adis->data->read_delay,
-		},
-	};
-
-	mutex_lock(&adis->txrx_lock);
-	adis->tx[0] = ADIS_READ_REG(reg);
-	adis->tx[1] = 0;
-
-	spi_message_init(&msg);
-	spi_message_add_tail(&xfers[0], &msg);
-	spi_message_add_tail(&xfers[1], &msg);
-	ret = spi_sync(adis->spi, &msg);
-	if (ret) {
-		dev_err(&adis->spi->dev, "Failed to read 16 bit register 0x%02X: %d\n",
-				reg, ret);
-		goto error_ret;
-	}
-	*val = get_unaligned_be16(adis->rx);
-
-error_ret:
-	mutex_unlock(&adis->txrx_lock);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(adis_read_reg_16);
-
-/**
- * adis_enable_irq() - Enable or disable data ready IRQ
- * @adis: The adis device
- * @enable: Whether to enable the IRQ
- *
- * Returns 0 on success, negative error code otherwise
- */
-int adis_enable_irq(struct adis *adis, bool enable)
-{
-	int ret = 0;
-	uint16_t msc;
-
-	ret = adis_read_reg_16(adis, adis->data->msc_ctrl_reg, &msc);
-	if (ret)
-		goto error_ret;
-
-	msc |= ADIS_MSC_CTRL_DATA_RDY_POL_HIGH;
-	msc &= ~ADIS_MSC_CTRL_DATA_RDY_DIO2;
-	if (enable)
-		msc |= ADIS_MSC_CTRL_DATA_RDY_EN;
-	else
-		msc &= ~ADIS_MSC_CTRL_DATA_RDY_EN;
-
-	ret = adis_write_reg_16(adis, adis->data->msc_ctrl_reg, msc);
-
-error_ret:
-	return ret;
-}
-EXPORT_SYMBOL(adis_enable_irq);
-
-/**
- * adis_check_status() - Check the device for error conditions
- * @adis: The adis device
- *
- * Returns 0 on success, a negative error code otherwise
- */
-int adis_check_status(struct adis *adis)
-{
-	uint16_t status;
-	int ret;
-	int i;
-
-	ret = adis_read_reg_16(adis, adis->data->diag_stat_reg, &status);
-	if (ret < 0)
-		return ret;
-
-	status &= adis->data->status_error_mask;
-
-	if (status == 0)
-		return 0;
-
-	for (i = 0; i < 16; ++i) {
-		if (status & BIT(i)) {
-			dev_err(&adis->spi->dev, "%s.\n",
-				adis->data->status_error_msgs[i]);
-		}
-	}
-
-	return -EIO;
-}
-EXPORT_SYMBOL_GPL(adis_check_status);
-
-/**
- * adis_reset() - Reset the device
- * @adis: The adis device
- *
- * Returns 0 on success, a negative error code otherwise
- */
-int adis_reset(struct adis *adis)
-{
-	int ret;
-
-	ret = adis_write_reg_8(adis, adis->data->glob_cmd_reg,
-			ADIS_GLOB_CMD_SW_RESET);
-	if (ret)
-		dev_err(&adis->spi->dev, "Failed to reset device: %d\n", ret);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(adis_reset);
-
-static int adis_self_test(struct adis *adis)
-{
-	int ret;
-
-	ret = adis_write_reg_16(adis, adis->data->msc_ctrl_reg,
-			adis->data->self_test_mask);
-	if (ret) {
-		dev_err(&adis->spi->dev, "Failed to initiate self test: %d\n",
-			ret);
-		return ret;
-	}
-
-	msleep(adis->data->startup_delay);
-
-	return adis_check_status(adis);
-}
-
-/**
- * adis_inital_startup() - Performs device self-test
- * @adis: The adis device
- *
- * Returns 0 if the device is operational, a negative error code otherwise.
- *
- * This function should be called early on in the device initialization sequence
- * to ensure that the device is in a sane and known state and that it is usable.
- */
-int adis_initial_startup(struct adis *adis)
-{
-	int ret;
-
-	ret = adis_self_test(adis);
-	if (ret) {
-		dev_err(&adis->spi->dev, "Self-test failed, trying reset.\n");
-		adis_reset(adis);
-		msleep(adis->data->startup_delay);
-		ret = adis_self_test(adis);
-		if (ret) {
-			dev_err(&adis->spi->dev, "Second self-test failed, giving up.\n");
-			return ret;
-		}
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(adis_initial_startup);
-
-/**
- * adis_single_conversion() - Performs a single sample conversion
- * @indio_dev: The IIO device
- * @chan: The IIO channel
- * @error_mask: Mask for the error bit
- * @val: Result of the conversion
- *
- * Returns IIO_VAL_INT on success, a negative error code otherwise.
- *
- * The function performs a single conversion on a given channel and post
- * processes the value accordingly to the channel spec. If a error_mask is given
- * the function will check if the mask is set in the returned raw value. If it
- * is set the function will perform a self-check. If the device does not report
- * a error bit in the channels raw value set error_mask to 0.
- */
-int adis_single_conversion(struct iio_dev *indio_dev,
-	const struct iio_chan_spec *chan, unsigned int error_mask, int *val)
-{
-	struct adis *adis = iio_device_get_drvdata(indio_dev);
-	uint16_t val16;
-	int ret;
-
-	mutex_lock(&indio_dev->mlock);
-
-	ret = adis_read_reg_16(adis, chan->address, &val16);
-	if (ret)
-		goto err_unlock;
-
-	if (val16 & error_mask) {
-		ret = adis_check_status(adis);
-		if (ret)
-			goto err_unlock;
-	}
-
-	if (chan->scan_type.sign == 's')
-		*val = sign_extend32(val16, chan->scan_type.realbits - 1);
-	else
-		*val = val16 & ((1 << chan->scan_type.realbits) - 1);
-
-	ret = IIO_VAL_INT;
-err_unlock:
-	mutex_unlock(&indio_dev->mlock);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(adis_single_conversion);
-
-/**
- * adis_init() - Initialize adis device structure
- * @adis:	The adis device
- * @indio_dev:	The iio device
- * @spi:	The spi device
- * @data:	Chip specific data
- *
- * Returns 0 on success, a negative error code otherwise.
- *
- * This function must be called, before any other adis helper function may be
- * called.
- */
-int adis_init(struct adis *adis, struct iio_dev *indio_dev,
-	struct spi_device *spi, const struct adis_data *data)
-{
-	mutex_init(&adis->txrx_lock);
-	adis->spi = spi;
-	adis->data = data;
-	iio_device_set_drvdata(indio_dev, adis);
-
-	return adis_enable_irq(adis, false);
-}
-EXPORT_SYMBOL_GPL(adis_init);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
-MODULE_DESCRIPTION("Common library code for ADIS16XXX devices");
diff --git a/drivers/staging/iio/imu/adis.h b/drivers/staging/iio/imu/adis.h
deleted file mode 100644
index 8c3304d44b97..000000000000
--- a/drivers/staging/iio/imu/adis.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Common library for ADIS16XXX devices
- *
- * Copyright 2012 Analog Devices Inc.
- *   Author: Lars-Peter Clausen <lars@metafoo.de>
- *
- * Licensed under the GPL-2 or later.
- */
-
-#ifndef __IIO_ADIS_H__
-#define __IIO_ADIS_H__
-
-#include <linux/spi/spi.h>
-#include <linux/interrupt.h>
-#include <linux/iio/types.h>
-
-#define ADIS_WRITE_REG(reg) (0x80 | (reg))
-#define ADIS_READ_REG(reg) (reg)
-
-/**
- * struct adis_data - ADIS chip variant specific data
- * @read_delay: SPI delay for read operations in us
- * @write_delay: SPI delay for write operations in us
- * @glob_cmd_reg: Register address of the GLOB_CMD register
- * @msc_ctrl_reg: Register address of the MSC_CTRL register
- * @diag_stat_reg: Register address of the DIAG_STAT register
- * @status_error_msgs: Array of error messgaes
- * @status_error_mask:
- */
-struct adis_data {
-	unsigned int read_delay;
-	unsigned int write_delay;
-
-	unsigned int glob_cmd_reg;
-	unsigned int msc_ctrl_reg;
-	unsigned int diag_stat_reg;
-
-	unsigned int self_test_mask;
-	unsigned int startup_delay;
-
-	const char * const *status_error_msgs;
-	unsigned int status_error_mask;
-};
-
-struct adis {
-	struct spi_device	*spi;
-	struct iio_trigger	*trig;
-
-	const struct adis_data	*data;
-
-	struct mutex		txrx_lock;
-	struct spi_message	msg;
-	struct spi_transfer	*xfer;
-	void			*buffer;
-
-	uint8_t			tx[8] ____cacheline_aligned;
-	uint8_t			rx[4];
-};
-
-int adis_init(struct adis *adis, struct iio_dev *indio_dev,
-	struct spi_device *spi, const struct adis_data *data);
-int adis_reset(struct adis *adis);
-
-int adis_write_reg_8(struct adis *adis, unsigned int reg, uint8_t val);
-int adis_write_reg_16(struct adis *adis, unsigned int reg, uint16_t val);
-int adis_read_reg_16(struct adis *adis, unsigned int reg, uint16_t *val);
-
-int adis_enable_irq(struct adis *adis, bool enable);
-int adis_check_status(struct adis *adis);
-
-int adis_initial_startup(struct adis *adis);
-
-int adis_single_conversion(struct iio_dev *indio_dev,
-	const struct iio_chan_spec *chan, unsigned int error_mask,
-	int *val);
-
-#define ADIS_VOLTAGE_CHAN(addr, si, chan, name, bits) { \
-	.type = IIO_VOLTAGE, \
-	.indexed = 1, \
-	.channel = (chan), \
-	.extend_name = name, \
-	.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \
-		IIO_CHAN_INFO_SCALE_SEPARATE_BIT, \
-	.address = (addr), \
-	.scan_index = (si), \
-	.scan_type = { \
-		.sign = 'u', \
-		.realbits = (bits), \
-		.storagebits = 16, \
-		.endianness = IIO_BE, \
-	}, \
-}
-
-#define ADIS_SUPPLY_CHAN(addr, si, bits) \
-	ADIS_VOLTAGE_CHAN(addr, si, 0, "supply", bits)
-
-#define ADIS_AUX_ADC_CHAN(addr, si, bits) \
-	ADIS_VOLTAGE_CHAN(addr, si, 1, NULL, bits)
-
-#define ADIS_TEMP_CHAN(addr, si, bits) { \
-	.type = IIO_TEMP, \
-	.indexed = 1, \
-	.channel = 0, \
-	.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \
-		IIO_CHAN_INFO_SCALE_SEPARATE_BIT | \
-		IIO_CHAN_INFO_OFFSET_SEPARATE_BIT, \
-	.address = (addr), \
-	.scan_index = (si), \
-	.scan_type = { \
-		.sign = 'u', \
-		.realbits = (bits), \
-		.storagebits = 16, \
-		.endianness = IIO_BE, \
-	}, \
-}
-
-#define ADIS_MOD_CHAN(_type, mod, addr, si, info, bits) { \
-	.type = (_type), \
-	.modified = 1, \
-	.channel2 = IIO_MOD_ ## mod, \
-	.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \
-		 IIO_CHAN_INFO_SCALE_SHARED_BIT | \
-		 info, \
-	.address = (addr), \
-	.scan_index = (si), \
-	.scan_type = { \
-		.sign = 's', \
-		.realbits = (bits), \
-		.storagebits = 16, \
-		.endianness = IIO_BE, \
-	}, \
-}
-
-#define ADIS_ACCEL_CHAN(mod, addr, si, info, bits) \
-	ADIS_MOD_CHAN(IIO_ACCEL, mod, addr, si, info, bits)
-
-#define ADIS_GYRO_CHAN(mod, addr, si, info, bits) \
-	ADIS_MOD_CHAN(IIO_ANGL_VEL, mod, addr, si, info, bits)
-
-#define ADIS_INCLI_CHAN(mod, addr, si, info, bits) \
-	ADIS_MOD_CHAN(IIO_INCLI, mod, addr, si, info, bits)
-
-#define ADIS_ROT_CHAN(mod, addr, si, info, bits) \
-	ADIS_MOD_CHAN(IIO_ROT, mod, addr, si, info, bits)
-
-#ifdef CONFIG_IIO_ADIS_LIB_BUFFER
-
-int adis_setup_buffer_and_trigger(struct adis *adis,
-	struct iio_dev *indio_dev, irqreturn_t (*trigger_handler)(int, void *));
-void adis_cleanup_buffer_and_trigger(struct adis *adis,
-	struct iio_dev *indio_dev);
-
-int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev);
-void adis_remove_trigger(struct adis *adis);
-
-int adis_update_scan_mode(struct iio_dev *indio_dev,
-	const unsigned long *scan_mask);
-
-#else /* CONFIG_IIO_BUFFER */
-
-static inline int adis_setup_buffer_and_trigger(struct adis *adis,
-	struct iio_dev *indio_dev, irqreturn_t (*trigger_handler)(int, void *))
-{
-	return 0;
-}
-
-static inline void adis_cleanup_buffer_and_trigger(struct adis *adis,
-	struct iio_dev *indio_dev)
-{
-}
-
-static inline int adis_probe_trigger(struct adis *adis,
-	struct iio_dev *indio_dev)
-{
-	return 0;
-}
-
-static inline void adis_remove_trigger(struct adis *adis)
-{
-}
-
-#define adis_update_scan_mode NULL
-
-#endif /* CONFIG_IIO_BUFFER */
-
-#endif
diff --git a/drivers/staging/iio/imu/adis_buffer.c b/drivers/staging/iio/imu/adis_buffer.c
deleted file mode 100644
index 342758c14d2b..000000000000
--- a/drivers/staging/iio/imu/adis_buffer.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Common library for ADIS16XXX devices
- *
- * Copyright 2012 Analog Devices Inc.
- *   Author: Lars-Peter Clausen <lars@metafoo.de>
- *
- * Licensed under the GPL-2 or later.
- */
-
-#include <linux/export.h>
-#include <linux/interrupt.h>
-#include <linux/mutex.h>
-#include <linux/kernel.h>
-#include <linux/spi/spi.h>
-#include <linux/slab.h>
-
-#include <linux/iio/iio.h>
-#include <linux/iio/buffer.h>
-#include <linux/iio/trigger_consumer.h>
-#include <linux/iio/triggered_buffer.h>
-
-#include  "adis.h"
-
-int adis_update_scan_mode(struct iio_dev *indio_dev,
-	const unsigned long *scan_mask)
-{
-	struct adis *adis = iio_device_get_drvdata(indio_dev);
-	const struct iio_chan_spec *chan;
-	unsigned int scan_count;
-	unsigned int i, j;
-	__be16 *tx, *rx;
-
-	kfree(adis->xfer);
-	kfree(adis->buffer);
-
-	scan_count = indio_dev->scan_bytes / 2;
-
-	adis->xfer = kcalloc(scan_count + 1, sizeof(*adis->xfer), GFP_KERNEL);
-	if (!adis->xfer)
-		return -ENOMEM;
-
-	adis->buffer = kzalloc(indio_dev->scan_bytes * 2, GFP_KERNEL);
-	if (!adis->buffer)
-		return -ENOMEM;
-
-	rx = adis->buffer;
-	tx = rx + indio_dev->scan_bytes;
-
-	spi_message_init(&adis->msg);
-
-	for (j = 0; j <= scan_count; j++) {
-		adis->xfer[j].bits_per_word = 8;
-		if (j != scan_count)
-			adis->xfer[j].cs_change = 1;
-		adis->xfer[j].len = 2;
-		adis->xfer[j].delay_usecs = adis->data->read_delay;
-		if (j < scan_count)
-			adis->xfer[j].tx_buf = &tx[j];
-		if (j >= 1)
-			adis->xfer[j].rx_buf = &rx[j - 1];
-		spi_message_add_tail(&adis->xfer[j], &adis->msg);
-	}
-
-	chan = indio_dev->channels;
-	for (i = 0; i < indio_dev->num_channels; i++, chan++) {
-		if (!test_bit(chan->scan_index, scan_mask))
-			continue;
-		*tx++ = cpu_to_be16(chan->address << 8);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(adis_update_scan_mode);
-
-static irqreturn_t adis_trigger_handler(int irq, void *p)
-{
-	struct iio_poll_func *pf = p;
-	struct iio_dev *indio_dev = pf->indio_dev;
-	struct adis *adis = iio_device_get_drvdata(indio_dev);
-	int ret;
-
-	if (!adis->buffer)
-		return -ENOMEM;
-
-	ret = spi_sync(adis->spi, &adis->msg);
-	if (ret)
-		dev_err(&adis->spi->dev, "Failed to read data: %d", ret);
-
-	/* Guaranteed to be aligned with 8 byte boundary */
-	if (indio_dev->scan_timestamp) {
-		void *b = adis->buffer + indio_dev->scan_bytes - sizeof(s64);
-		*(s64 *)b = pf->timestamp;
-	}
-
-	iio_push_to_buffers(indio_dev, adis->buffer);
-
-	iio_trigger_notify_done(indio_dev->trig);
-
-	return IRQ_HANDLED;
-}
-
-/**
- * adis_setup_buffer_and_trigger() - Sets up buffer and trigger for the adis device
- * @adis: The adis device.
- * @indio_dev: The IIO device.
- * @trigger_handler: Optional trigger handler, may be NULL.
- *
- * Returns 0 on success, a negative error code otherwise.
- *
- * This function sets up the buffer and trigger for a adis devices.  If
- * 'trigger_handler' is NULL the default trigger handler will be used. The
- * default trigger handler will simply read the registers assigned to the
- * currently active channels.
- *
- * adis_cleanup_buffer_and_trigger() should be called to free the resources
- * allocated by this function.
- */
-int adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev,
-	irqreturn_t (*trigger_handler)(int, void *))
-{
-	int ret;
-
-	if (!trigger_handler)
-		trigger_handler = adis_trigger_handler;
-
-	ret = iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
-		trigger_handler, NULL);
-	if (ret)
-		return ret;
-
-	if (adis->spi->irq) {
-		ret = adis_probe_trigger(adis, indio_dev);
-		if (ret)
-			goto error_buffer_cleanup;
-	}
-	return 0;
-
-error_buffer_cleanup:
-	iio_triggered_buffer_cleanup(indio_dev);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(adis_setup_buffer_and_trigger);
-
-/**
- * adis_cleanup_buffer_and_trigger() - Free buffer and trigger resources
- * @adis: The adis device.
- * @indio_dev: The IIO device.
- *
- * Frees resources allocated by adis_setup_buffer_and_trigger()
- */
-void adis_cleanup_buffer_and_trigger(struct adis *adis,
-	struct iio_dev *indio_dev)
-{
-	if (adis->spi->irq)
-		adis_remove_trigger(adis);
-	kfree(adis->buffer);
-	kfree(adis->xfer);
-	iio_triggered_buffer_cleanup(indio_dev);
-}
-EXPORT_SYMBOL_GPL(adis_cleanup_buffer_and_trigger);
diff --git a/drivers/staging/iio/imu/adis_trigger.c b/drivers/staging/iio/imu/adis_trigger.c
deleted file mode 100644
index 3e89b2e83708..000000000000
--- a/drivers/staging/iio/imu/adis_trigger.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Common library for ADIS16XXX devices
- *
- * Copyright 2012 Analog Devices Inc.
- *   Author: Lars-Peter Clausen <lars@metafoo.de>
- *
- * Licensed under the GPL-2 or later.
- */
-
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/spi/spi.h>
-#include <linux/export.h>
-
-#include <linux/iio/iio.h>
-#include <linux/iio/trigger.h>
-
-#include "adis.h"
-
-static int adis_data_rdy_trigger_set_state(struct iio_trigger *trig,
-						bool state)
-{
-	struct adis *adis = trig->private_data;
-
-	return adis_enable_irq(adis, state);
-}
-
-static const struct iio_trigger_ops adis_trigger_ops = {
-	.owner = THIS_MODULE,
-	.set_trigger_state = &adis_data_rdy_trigger_set_state,
-};
-
-/**
- * adis_probe_trigger() - Sets up trigger for a adis device
- * @adis: The adis device
- * @indio_dev: The IIO device
- *
- * Returns 0 on success or a negative error code
- *
- * adis_remove_trigger() should be used to free the trigger.
- */
-int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev)
-{
-	int ret;
-
-	adis->trig = iio_trigger_alloc("%s-dev%d", indio_dev->name,
-					indio_dev->id);
-	if (adis->trig == NULL)
-		return -ENOMEM;
-
-	ret = request_irq(adis->spi->irq,
-			  &iio_trigger_generic_data_rdy_poll,
-			  IRQF_TRIGGER_RISING,
-			  indio_dev->name,
-			  adis->trig);
-	if (ret)
-		goto error_free_trig;
-
-	adis->trig->dev.parent = &adis->spi->dev;
-	adis->trig->ops = &adis_trigger_ops;
-	adis->trig->private_data = adis;
-	ret = iio_trigger_register(adis->trig);
-
-	indio_dev->trig = adis->trig;
-	if (ret)
-		goto error_free_irq;
-
-	return 0;
-
-error_free_irq:
-	free_irq(adis->spi->irq, adis->trig);
-error_free_trig:
-	iio_trigger_free(adis->trig);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(adis_probe_trigger);
-
-/**
- * adis_remove_trigger() - Remove trigger for a adis devices
- * @adis: The adis device
- *
- * Removes the trigger previously registered with adis_probe_trigger().
- */
-void adis_remove_trigger(struct adis *adis)
-{
-	iio_trigger_unregister(adis->trig);
-	free_irq(adis->spi->irq, adis->trig);
-	iio_trigger_free(adis->trig);
-}
-EXPORT_SYMBOL_GPL(adis_remove_trigger);
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
new file mode 100644
index 000000000000..8c3304d44b97
--- /dev/null
+++ b/include/linux/iio/imu/adis.h
@@ -0,0 +1,186 @@
+/*
+ * Common library for ADIS16XXX devices
+ *
+ * Copyright 2012 Analog Devices Inc.
+ *   Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef __IIO_ADIS_H__
+#define __IIO_ADIS_H__
+
+#include <linux/spi/spi.h>
+#include <linux/interrupt.h>
+#include <linux/iio/types.h>
+
+#define ADIS_WRITE_REG(reg) (0x80 | (reg))
+#define ADIS_READ_REG(reg) (reg)
+
+/**
+ * struct adis_data - ADIS chip variant specific data
+ * @read_delay: SPI delay for read operations in us
+ * @write_delay: SPI delay for write operations in us
+ * @glob_cmd_reg: Register address of the GLOB_CMD register
+ * @msc_ctrl_reg: Register address of the MSC_CTRL register
+ * @diag_stat_reg: Register address of the DIAG_STAT register
+ * @status_error_msgs: Array of error messgaes
+ * @status_error_mask:
+ */
+struct adis_data {
+	unsigned int read_delay;
+	unsigned int write_delay;
+
+	unsigned int glob_cmd_reg;
+	unsigned int msc_ctrl_reg;
+	unsigned int diag_stat_reg;
+
+	unsigned int self_test_mask;
+	unsigned int startup_delay;
+
+	const char * const *status_error_msgs;
+	unsigned int status_error_mask;
+};
+
+struct adis {
+	struct spi_device	*spi;
+	struct iio_trigger	*trig;
+
+	const struct adis_data	*data;
+
+	struct mutex		txrx_lock;
+	struct spi_message	msg;
+	struct spi_transfer	*xfer;
+	void			*buffer;
+
+	uint8_t			tx[8] ____cacheline_aligned;
+	uint8_t			rx[4];
+};
+
+int adis_init(struct adis *adis, struct iio_dev *indio_dev,
+	struct spi_device *spi, const struct adis_data *data);
+int adis_reset(struct adis *adis);
+
+int adis_write_reg_8(struct adis *adis, unsigned int reg, uint8_t val);
+int adis_write_reg_16(struct adis *adis, unsigned int reg, uint16_t val);
+int adis_read_reg_16(struct adis *adis, unsigned int reg, uint16_t *val);
+
+int adis_enable_irq(struct adis *adis, bool enable);
+int adis_check_status(struct adis *adis);
+
+int adis_initial_startup(struct adis *adis);
+
+int adis_single_conversion(struct iio_dev *indio_dev,
+	const struct iio_chan_spec *chan, unsigned int error_mask,
+	int *val);
+
+#define ADIS_VOLTAGE_CHAN(addr, si, chan, name, bits) { \
+	.type = IIO_VOLTAGE, \
+	.indexed = 1, \
+	.channel = (chan), \
+	.extend_name = name, \
+	.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \
+		IIO_CHAN_INFO_SCALE_SEPARATE_BIT, \
+	.address = (addr), \
+	.scan_index = (si), \
+	.scan_type = { \
+		.sign = 'u', \
+		.realbits = (bits), \
+		.storagebits = 16, \
+		.endianness = IIO_BE, \
+	}, \
+}
+
+#define ADIS_SUPPLY_CHAN(addr, si, bits) \
+	ADIS_VOLTAGE_CHAN(addr, si, 0, "supply", bits)
+
+#define ADIS_AUX_ADC_CHAN(addr, si, bits) \
+	ADIS_VOLTAGE_CHAN(addr, si, 1, NULL, bits)
+
+#define ADIS_TEMP_CHAN(addr, si, bits) { \
+	.type = IIO_TEMP, \
+	.indexed = 1, \
+	.channel = 0, \
+	.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \
+		IIO_CHAN_INFO_SCALE_SEPARATE_BIT | \
+		IIO_CHAN_INFO_OFFSET_SEPARATE_BIT, \
+	.address = (addr), \
+	.scan_index = (si), \
+	.scan_type = { \
+		.sign = 'u', \
+		.realbits = (bits), \
+		.storagebits = 16, \
+		.endianness = IIO_BE, \
+	}, \
+}
+
+#define ADIS_MOD_CHAN(_type, mod, addr, si, info, bits) { \
+	.type = (_type), \
+	.modified = 1, \
+	.channel2 = IIO_MOD_ ## mod, \
+	.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \
+		 IIO_CHAN_INFO_SCALE_SHARED_BIT | \
+		 info, \
+	.address = (addr), \
+	.scan_index = (si), \
+	.scan_type = { \
+		.sign = 's', \
+		.realbits = (bits), \
+		.storagebits = 16, \
+		.endianness = IIO_BE, \
+	}, \
+}
+
+#define ADIS_ACCEL_CHAN(mod, addr, si, info, bits) \
+	ADIS_MOD_CHAN(IIO_ACCEL, mod, addr, si, info, bits)
+
+#define ADIS_GYRO_CHAN(mod, addr, si, info, bits) \
+	ADIS_MOD_CHAN(IIO_ANGL_VEL, mod, addr, si, info, bits)
+
+#define ADIS_INCLI_CHAN(mod, addr, si, info, bits) \
+	ADIS_MOD_CHAN(IIO_INCLI, mod, addr, si, info, bits)
+
+#define ADIS_ROT_CHAN(mod, addr, si, info, bits) \
+	ADIS_MOD_CHAN(IIO_ROT, mod, addr, si, info, bits)
+
+#ifdef CONFIG_IIO_ADIS_LIB_BUFFER
+
+int adis_setup_buffer_and_trigger(struct adis *adis,
+	struct iio_dev *indio_dev, irqreturn_t (*trigger_handler)(int, void *));
+void adis_cleanup_buffer_and_trigger(struct adis *adis,
+	struct iio_dev *indio_dev);
+
+int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev);
+void adis_remove_trigger(struct adis *adis);
+
+int adis_update_scan_mode(struct iio_dev *indio_dev,
+	const unsigned long *scan_mask);
+
+#else /* CONFIG_IIO_BUFFER */
+
+static inline int adis_setup_buffer_and_trigger(struct adis *adis,
+	struct iio_dev *indio_dev, irqreturn_t (*trigger_handler)(int, void *))
+{
+	return 0;
+}
+
+static inline void adis_cleanup_buffer_and_trigger(struct adis *adis,
+	struct iio_dev *indio_dev)
+{
+}
+
+static inline int adis_probe_trigger(struct adis *adis,
+	struct iio_dev *indio_dev)
+{
+	return 0;
+}
+
+static inline void adis_remove_trigger(struct adis *adis)
+{
+}
+
+#define adis_update_scan_mode NULL
+
+#endif /* CONFIG_IIO_BUFFER */
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 709ab36e9559ff5c7df6e6f2d9e3c4a4410f8d49 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen
Date: Sat, 17 Nov 2012 11:42:59 +0000
Subject: staging:iio: Move the ad7298 driver out of staging

The driver does not expose any custom API to userspace and none of the standard
static code checker tools report any issues, so move it out of staging.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/Kconfig              |  12 ++
 drivers/iio/adc/Makefile             |   1 +
 drivers/iio/adc/ad7298.c             | 408 +++++++++++++++++++++++++++++++++++
 drivers/staging/iio/adc/Kconfig      |  12 --
 drivers/staging/iio/adc/Makefile     |   2 -
 drivers/staging/iio/adc/ad7298.c     | 408 -----------------------------------
 drivers/staging/iio/adc/ad7298.h     |  20 --
 include/linux/platform_data/ad7298.h |  20 ++
 8 files changed, 441 insertions(+), 442 deletions(-)
 create mode 100644 drivers/iio/adc/ad7298.c
 delete mode 100644 drivers/staging/iio/adc/ad7298.c
 delete mode 100644 drivers/staging/iio/adc/ad7298.h
 create mode 100644 include/linux/platform_data/ad7298.h

(limited to 'include/linux')

diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index ef5200a6850e..cd5eed60be28 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -18,6 +18,18 @@ config AD7266
 	  Say yes here to build support for Analog Devices AD7265 and AD7266
 	  ADCs.
 
+config AD7298
+	tristate "Analog Devices AD7298 ADC driver"
+	depends on SPI
+	select IIO_BUFFER
+	select IIO_TRIGGERED_BUFFER
+	help
+	  Say yes here to build support for Analog Devices AD7298
+	  8 Channel ADC with temperature sensor.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ad7298.
+
 config AD7791
 	tristate "Analog Devices AD7791 ADC driver"
 	depends on SPI
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index 54ac7bbcd01b..3256dc64a466 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -4,6 +4,7 @@
 
 obj-$(CONFIG_AD_SIGMA_DELTA) += ad_sigma_delta.o
 obj-$(CONFIG_AD7266) += ad7266.o
+obj-$(CONFIG_AD7298) += ad7298.o
 obj-$(CONFIG_AD7476) += ad7476.o
 obj-$(CONFIG_AD7791) += ad7791.o
 obj-$(CONFIG_AD7887) += ad7887.o
diff --git a/drivers/iio/adc/ad7298.c b/drivers/iio/adc/ad7298.c
new file mode 100644
index 000000000000..441a9a265c12
--- /dev/null
+++ b/drivers/iio/adc/ad7298.c
@@ -0,0 +1,408 @@
+/*
+ * AD7298 SPI ADC driver
+ *
+ * Copyright 2011 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/spi/spi.h>
+#include <linux/regulator/consumer.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iio/triggered_buffer.h>
+
+#include <linux/platform_data/ad7298.h>
+
+#define AD7298_WRITE	(1 << 15) /* write to the control register */
+#define AD7298_REPEAT	(1 << 14) /* repeated conversion enable */
+#define AD7298_CH(x)	(1 << (13 - (x))) /* channel select */
+#define AD7298_TSENSE	(1 << 5) /* temperature conversion enable */
+#define AD7298_EXTREF	(1 << 2) /* external reference enable */
+#define AD7298_TAVG	(1 << 1) /* temperature sensor averaging enable */
+#define AD7298_PDD	(1 << 0) /* partial power down enable */
+
+#define AD7298_MAX_CHAN		8
+#define AD7298_BITS		12
+#define AD7298_STORAGE_BITS	16
+#define AD7298_INTREF_mV	2500
+
+#define AD7298_CH_TEMP		9
+
+#define RES_MASK(bits)	((1 << (bits)) - 1)
+
+struct ad7298_state {
+	struct spi_device		*spi;
+	struct regulator		*reg;
+	unsigned			ext_ref;
+	struct spi_transfer		ring_xfer[10];
+	struct spi_transfer		scan_single_xfer[3];
+	struct spi_message		ring_msg;
+	struct spi_message		scan_single_msg;
+	/*
+	 * DMA (thus cache coherency maintenance) requires the
+	 * transfer buffers to live in their own cache lines.
+	 */
+	unsigned short			rx_buf[12] ____cacheline_aligned;
+	unsigned short			tx_buf[2];
+};
+
+#define AD7298_V_CHAN(index)						\
+	{								\
+		.type = IIO_VOLTAGE,					\
+		.indexed = 1,						\
+		.channel = index,					\
+		.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT |		\
+		IIO_CHAN_INFO_SCALE_SHARED_BIT,				\
+		.address = index,					\
+		.scan_index = index,					\
+		.scan_type = {						\
+			.sign = 'u',					\
+			.realbits = 12,					\
+			.storagebits = 16,				\
+			.endianness = IIO_BE,				\
+		},							\
+	}
+
+static const struct iio_chan_spec ad7298_channels[] = {
+	{
+		.type = IIO_TEMP,
+		.indexed = 1,
+		.channel = 0,
+		.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT |
+			IIO_CHAN_INFO_SCALE_SEPARATE_BIT |
+			IIO_CHAN_INFO_OFFSET_SEPARATE_BIT,
+		.address = AD7298_CH_TEMP,
+		.scan_index = -1,
+		.scan_type = {
+			.sign = 's',
+			.realbits = 32,
+			.storagebits = 32,
+		},
+	},
+	AD7298_V_CHAN(0),
+	AD7298_V_CHAN(1),
+	AD7298_V_CHAN(2),
+	AD7298_V_CHAN(3),
+	AD7298_V_CHAN(4),
+	AD7298_V_CHAN(5),
+	AD7298_V_CHAN(6),
+	AD7298_V_CHAN(7),
+	IIO_CHAN_SOFT_TIMESTAMP(8),
+};
+
+/**
+ * ad7298_update_scan_mode() setup the spi transfer buffer for the new scan mask
+ **/
+static int ad7298_update_scan_mode(struct iio_dev *indio_dev,
+	const unsigned long *active_scan_mask)
+{
+	struct ad7298_state *st = iio_priv(indio_dev);
+	int i, m;
+	unsigned short command;
+	int scan_count;
+
+	/* Now compute overall size */
+	scan_count = bitmap_weight(active_scan_mask, indio_dev->masklength);
+
+	command = AD7298_WRITE | st->ext_ref;
+
+	for (i = 0, m = AD7298_CH(0); i < AD7298_MAX_CHAN; i++, m >>= 1)
+		if (test_bit(i, active_scan_mask))
+			command |= m;
+
+	st->tx_buf[0] = cpu_to_be16(command);
+
+	/* build spi ring message */
+	st->ring_xfer[0].tx_buf = &st->tx_buf[0];
+	st->ring_xfer[0].len = 2;
+	st->ring_xfer[0].cs_change = 1;
+	st->ring_xfer[1].tx_buf = &st->tx_buf[1];
+	st->ring_xfer[1].len = 2;
+	st->ring_xfer[1].cs_change = 1;
+
+	spi_message_init(&st->ring_msg);
+	spi_message_add_tail(&st->ring_xfer[0], &st->ring_msg);
+	spi_message_add_tail(&st->ring_xfer[1], &st->ring_msg);
+
+	for (i = 0; i < scan_count; i++) {
+		st->ring_xfer[i + 2].rx_buf = &st->rx_buf[i];
+		st->ring_xfer[i + 2].len = 2;
+		st->ring_xfer[i + 2].cs_change = 1;
+		spi_message_add_tail(&st->ring_xfer[i + 2], &st->ring_msg);
+	}
+	/* make sure last transfer cs_change is not set */
+	st->ring_xfer[i + 1].cs_change = 0;
+
+	return 0;
+}
+
+/**
+ * ad7298_trigger_handler() bh of trigger launched polling to ring buffer
+ *
+ * Currently there is no option in this driver to disable the saving of
+ * timestamps within the ring.
+ **/
+static irqreturn_t ad7298_trigger_handler(int irq, void *p)
+{
+	struct iio_poll_func *pf = p;
+	struct iio_dev *indio_dev = pf->indio_dev;
+	struct ad7298_state *st = iio_priv(indio_dev);
+	s64 time_ns = 0;
+	int b_sent;
+
+	b_sent = spi_sync(st->spi, &st->ring_msg);
+	if (b_sent)
+		goto done;
+
+	if (indio_dev->scan_timestamp) {
+		time_ns = iio_get_time_ns();
+		memcpy((u8 *)st->rx_buf + indio_dev->scan_bytes - sizeof(s64),
+			&time_ns, sizeof(time_ns));
+	}
+
+	iio_push_to_buffers(indio_dev, (u8 *)st->rx_buf);
+
+done:
+	iio_trigger_notify_done(indio_dev->trig);
+
+	return IRQ_HANDLED;
+}
+
+static int ad7298_scan_direct(struct ad7298_state *st, unsigned ch)
+{
+	int ret;
+	st->tx_buf[0] = cpu_to_be16(AD7298_WRITE | st->ext_ref |
+				   (AD7298_CH(0) >> ch));
+
+	ret = spi_sync(st->spi, &st->scan_single_msg);
+	if (ret)
+		return ret;
+
+	return be16_to_cpu(st->rx_buf[0]);
+}
+
+static int ad7298_scan_temp(struct ad7298_state *st, int *val)
+{
+	int ret;
+	__be16 buf;
+
+	buf = cpu_to_be16(AD7298_WRITE | AD7298_TSENSE |
+			  AD7298_TAVG | st->ext_ref);
+
+	ret = spi_write(st->spi, (u8 *)&buf, 2);
+	if (ret)
+		return ret;
+
+	buf = cpu_to_be16(0);
+
+	ret = spi_write(st->spi, (u8 *)&buf, 2);
+	if (ret)
+		return ret;
+
+	usleep_range(101, 1000); /* sleep > 100us */
+
+	ret = spi_read(st->spi, (u8 *)&buf, 2);
+	if (ret)
+		return ret;
+
+	*val = sign_extend32(be16_to_cpu(buf), 11);
+
+	return 0;
+}
+
+static int ad7298_get_ref_voltage(struct ad7298_state *st)
+{
+	int vref;
+
+	if (st->ext_ref) {
+		vref = regulator_get_voltage(st->reg);
+		if (vref < 0)
+			return vref;
+
+		return vref / 1000;
+	} else {
+		return AD7298_INTREF_mV;
+	}
+}
+
+static int ad7298_read_raw(struct iio_dev *indio_dev,
+			   struct iio_chan_spec const *chan,
+			   int *val,
+			   int *val2,
+			   long m)
+{
+	int ret;
+	struct ad7298_state *st = iio_priv(indio_dev);
+
+	switch (m) {
+	case IIO_CHAN_INFO_RAW:
+		mutex_lock(&indio_dev->mlock);
+		if (indio_dev->currentmode == INDIO_BUFFER_TRIGGERED) {
+			ret = -EBUSY;
+		} else {
+			if (chan->address == AD7298_CH_TEMP)
+				ret = ad7298_scan_temp(st, val);
+			else
+				ret = ad7298_scan_direct(st, chan->address);
+		}
+		mutex_unlock(&indio_dev->mlock);
+
+		if (ret < 0)
+			return ret;
+
+		if (chan->address != AD7298_CH_TEMP)
+			*val = ret & RES_MASK(AD7298_BITS);
+
+		return IIO_VAL_INT;
+	case IIO_CHAN_INFO_SCALE:
+		switch (chan->type) {
+		case IIO_VOLTAGE:
+			*val = ad7298_get_ref_voltage(st);
+			*val2 = chan->scan_type.realbits;
+			return IIO_VAL_FRACTIONAL_LOG2;
+		case IIO_TEMP:
+			*val = ad7298_get_ref_voltage(st);
+			*val2 = 10;
+			return IIO_VAL_FRACTIONAL;
+		default:
+			return -EINVAL;
+		}
+	case IIO_CHAN_INFO_OFFSET:
+		*val = 1093 - 2732500 / ad7298_get_ref_voltage(st);
+		return IIO_VAL_INT;
+	}
+	return -EINVAL;
+}
+
+static const struct iio_info ad7298_info = {
+	.read_raw = &ad7298_read_raw,
+	.update_scan_mode = ad7298_update_scan_mode,
+	.driver_module = THIS_MODULE,
+};
+
+static int __devinit ad7298_probe(struct spi_device *spi)
+{
+	struct ad7298_platform_data *pdata = spi->dev.platform_data;
+	struct ad7298_state *st;
+	struct iio_dev *indio_dev = iio_device_alloc(sizeof(*st));
+	int ret;
+
+	if (indio_dev == NULL)
+		return -ENOMEM;
+
+	st = iio_priv(indio_dev);
+
+	if (pdata && pdata->ext_ref)
+		st->ext_ref = AD7298_EXTREF;
+
+	if (st->ext_ref) {
+		st->reg = regulator_get(&spi->dev, "vref");
+		if (IS_ERR(st->reg)) {
+			ret = PTR_ERR(st->reg);
+			goto error_free;
+		}
+		ret = regulator_enable(st->reg);
+		if (ret)
+			goto error_put_reg;
+	}
+
+	spi_set_drvdata(spi, indio_dev);
+
+	st->spi = spi;
+
+	indio_dev->name = spi_get_device_id(spi)->name;
+	indio_dev->dev.parent = &spi->dev;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->channels = ad7298_channels;
+	indio_dev->num_channels = ARRAY_SIZE(ad7298_channels);
+	indio_dev->info = &ad7298_info;
+
+	/* Setup default message */
+
+	st->scan_single_xfer[0].tx_buf = &st->tx_buf[0];
+	st->scan_single_xfer[0].len = 2;
+	st->scan_single_xfer[0].cs_change = 1;
+	st->scan_single_xfer[1].tx_buf = &st->tx_buf[1];
+	st->scan_single_xfer[1].len = 2;
+	st->scan_single_xfer[1].cs_change = 1;
+	st->scan_single_xfer[2].rx_buf = &st->rx_buf[0];
+	st->scan_single_xfer[2].len = 2;
+
+	spi_message_init(&st->scan_single_msg);
+	spi_message_add_tail(&st->scan_single_xfer[0], &st->scan_single_msg);
+	spi_message_add_tail(&st->scan_single_xfer[1], &st->scan_single_msg);
+	spi_message_add_tail(&st->scan_single_xfer[2], &st->scan_single_msg);
+
+	ret = iio_triggered_buffer_setup(indio_dev, NULL,
+			&ad7298_trigger_handler, NULL);
+	if (ret)
+		goto error_disable_reg;
+
+	ret = iio_device_register(indio_dev);
+	if (ret)
+		goto error_cleanup_ring;
+
+	return 0;
+
+error_cleanup_ring:
+	iio_triggered_buffer_cleanup(indio_dev);
+error_disable_reg:
+	if (st->ext_ref)
+		regulator_disable(st->reg);
+error_put_reg:
+	if (st->ext_ref)
+		regulator_put(st->reg);
+error_free:
+	iio_device_free(indio_dev);
+
+	return ret;
+}
+
+static int __devexit ad7298_remove(struct spi_device *spi)
+{
+	struct iio_dev *indio_dev = spi_get_drvdata(spi);
+	struct ad7298_state *st = iio_priv(indio_dev);
+
+	iio_device_unregister(indio_dev);
+	iio_triggered_buffer_cleanup(indio_dev);
+	if (st->ext_ref) {
+		regulator_disable(st->reg);
+		regulator_put(st->reg);
+	}
+	iio_device_free(indio_dev);
+
+	return 0;
+}
+
+static const struct spi_device_id ad7298_id[] = {
+	{"ad7298", 0},
+	{}
+};
+MODULE_DEVICE_TABLE(spi, ad7298_id);
+
+static struct spi_driver ad7298_driver = {
+	.driver = {
+		.name	= "ad7298",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= ad7298_probe,
+	.remove		= __devexit_p(ad7298_remove),
+	.id_table	= ad7298_id,
+};
+module_spi_driver(ad7298_driver);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("Analog Devices AD7298 ADC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/staging/iio/adc/Kconfig b/drivers/staging/iio/adc/Kconfig
index 5086a46b8e9a..dc8582b95b61 100644
--- a/drivers/staging/iio/adc/Kconfig
+++ b/drivers/staging/iio/adc/Kconfig
@@ -10,18 +10,6 @@ config AD7291
 	  Say yes here to build support for Analog Devices AD7291
 	  8 Channel ADC with temperature sensor.
 
-config AD7298
-	tristate "Analog Devices AD7298 ADC driver"
-	depends on SPI
-	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
-	help
-	  Say yes here to build support for Analog Devices AD7298
-	  8 Channel ADC with temperature sensor.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called ad7298.
-
 config AD7606
 	tristate "Analog Devices AD7606 ADC driver"
 	depends on GPIOLIB
diff --git a/drivers/staging/iio/adc/Makefile b/drivers/staging/iio/adc/Makefile
index 4beaa588256b..7281451a613a 100644
--- a/drivers/staging/iio/adc/Makefile
+++ b/drivers/staging/iio/adc/Makefile
@@ -12,8 +12,6 @@ ad799x-y := ad799x_core.o
 ad799x-$(CONFIG_AD799X_RING_BUFFER) += ad799x_ring.o
 obj-$(CONFIG_AD799X) += ad799x.o
 
-obj-$(CONFIG_AD7298) += ad7298.o
-
 obj-$(CONFIG_AD7291) += ad7291.o
 obj-$(CONFIG_AD7780) += ad7780.o
 obj-$(CONFIG_AD7793) += ad7793.o
diff --git a/drivers/staging/iio/adc/ad7298.c b/drivers/staging/iio/adc/ad7298.c
deleted file mode 100644
index 2742a9de05db..000000000000
--- a/drivers/staging/iio/adc/ad7298.c
+++ /dev/null
@@ -1,408 +0,0 @@
-/*
- * AD7298 SPI ADC driver
- *
- * Copyright 2011 Analog Devices Inc.
- *
- * Licensed under the GPL-2.
- */
-
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/sysfs.h>
-#include <linux/spi/spi.h>
-#include <linux/regulator/consumer.h>
-#include <linux/err.h>
-#include <linux/delay.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-
-#include <linux/iio/iio.h>
-#include <linux/iio/sysfs.h>
-#include <linux/iio/buffer.h>
-#include <linux/iio/trigger_consumer.h>
-#include <linux/iio/triggered_buffer.h>
-
-#include "ad7298.h"
-
-#define AD7298_WRITE	(1 << 15) /* write to the control register */
-#define AD7298_REPEAT	(1 << 14) /* repeated conversion enable */
-#define AD7298_CH(x)	(1 << (13 - (x))) /* channel select */
-#define AD7298_TSENSE	(1 << 5) /* temperature conversion enable */
-#define AD7298_EXTREF	(1 << 2) /* external reference enable */
-#define AD7298_TAVG	(1 << 1) /* temperature sensor averaging enable */
-#define AD7298_PDD	(1 << 0) /* partial power down enable */
-
-#define AD7298_MAX_CHAN		8
-#define AD7298_BITS		12
-#define AD7298_STORAGE_BITS	16
-#define AD7298_INTREF_mV	2500
-
-#define AD7298_CH_TEMP		9
-
-#define RES_MASK(bits)	((1 << (bits)) - 1)
-
-struct ad7298_state {
-	struct spi_device		*spi;
-	struct regulator		*reg;
-	unsigned			ext_ref;
-	struct spi_transfer		ring_xfer[10];
-	struct spi_transfer		scan_single_xfer[3];
-	struct spi_message		ring_msg;
-	struct spi_message		scan_single_msg;
-	/*
-	 * DMA (thus cache coherency maintenance) requires the
-	 * transfer buffers to live in their own cache lines.
-	 */
-	unsigned short			rx_buf[12] ____cacheline_aligned;
-	unsigned short			tx_buf[2];
-};
-
-#define AD7298_V_CHAN(index)						\
-	{								\
-		.type = IIO_VOLTAGE,					\
-		.indexed = 1,						\
-		.channel = index,					\
-		.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT |		\
-		IIO_CHAN_INFO_SCALE_SHARED_BIT,				\
-		.address = index,					\
-		.scan_index = index,					\
-		.scan_type = {						\
-			.sign = 'u',					\
-			.realbits = 12,					\
-			.storagebits = 16,				\
-			.endianness = IIO_BE,				\
-		},							\
-	}
-
-static const struct iio_chan_spec ad7298_channels[] = {
-	{
-		.type = IIO_TEMP,
-		.indexed = 1,
-		.channel = 0,
-		.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT |
-			IIO_CHAN_INFO_SCALE_SEPARATE_BIT |
-			IIO_CHAN_INFO_OFFSET_SEPARATE_BIT,
-		.address = AD7298_CH_TEMP,
-		.scan_index = -1,
-		.scan_type = {
-			.sign = 's',
-			.realbits = 32,
-			.storagebits = 32,
-		},
-	},
-	AD7298_V_CHAN(0),
-	AD7298_V_CHAN(1),
-	AD7298_V_CHAN(2),
-	AD7298_V_CHAN(3),
-	AD7298_V_CHAN(4),
-	AD7298_V_CHAN(5),
-	AD7298_V_CHAN(6),
-	AD7298_V_CHAN(7),
-	IIO_CHAN_SOFT_TIMESTAMP(8),
-};
-
-/**
- * ad7298_update_scan_mode() setup the spi transfer buffer for the new scan mask
- **/
-static int ad7298_update_scan_mode(struct iio_dev *indio_dev,
-	const unsigned long *active_scan_mask)
-{
-	struct ad7298_state *st = iio_priv(indio_dev);
-	int i, m;
-	unsigned short command;
-	int scan_count;
-
-	/* Now compute overall size */
-	scan_count = bitmap_weight(active_scan_mask, indio_dev->masklength);
-
-	command = AD7298_WRITE | st->ext_ref;
-
-	for (i = 0, m = AD7298_CH(0); i < AD7298_MAX_CHAN; i++, m >>= 1)
-		if (test_bit(i, active_scan_mask))
-			command |= m;
-
-	st->tx_buf[0] = cpu_to_be16(command);
-
-	/* build spi ring message */
-	st->ring_xfer[0].tx_buf = &st->tx_buf[0];
-	st->ring_xfer[0].len = 2;
-	st->ring_xfer[0].cs_change = 1;
-	st->ring_xfer[1].tx_buf = &st->tx_buf[1];
-	st->ring_xfer[1].len = 2;
-	st->ring_xfer[1].cs_change = 1;
-
-	spi_message_init(&st->ring_msg);
-	spi_message_add_tail(&st->ring_xfer[0], &st->ring_msg);
-	spi_message_add_tail(&st->ring_xfer[1], &st->ring_msg);
-
-	for (i = 0; i < scan_count; i++) {
-		st->ring_xfer[i + 2].rx_buf = &st->rx_buf[i];
-		st->ring_xfer[i + 2].len = 2;
-		st->ring_xfer[i + 2].cs_change = 1;
-		spi_message_add_tail(&st->ring_xfer[i + 2], &st->ring_msg);
-	}
-	/* make sure last transfer cs_change is not set */
-	st->ring_xfer[i + 1].cs_change = 0;
-
-	return 0;
-}
-
-/**
- * ad7298_trigger_handler() bh of trigger launched polling to ring buffer
- *
- * Currently there is no option in this driver to disable the saving of
- * timestamps within the ring.
- **/
-static irqreturn_t ad7298_trigger_handler(int irq, void *p)
-{
-	struct iio_poll_func *pf = p;
-	struct iio_dev *indio_dev = pf->indio_dev;
-	struct ad7298_state *st = iio_priv(indio_dev);
-	s64 time_ns = 0;
-	int b_sent;
-
-	b_sent = spi_sync(st->spi, &st->ring_msg);
-	if (b_sent)
-		goto done;
-
-	if (indio_dev->scan_timestamp) {
-		time_ns = iio_get_time_ns();
-		memcpy((u8 *)st->rx_buf + indio_dev->scan_bytes - sizeof(s64),
-			&time_ns, sizeof(time_ns));
-	}
-
-	iio_push_to_buffers(indio_dev, (u8 *)st->rx_buf);
-
-done:
-	iio_trigger_notify_done(indio_dev->trig);
-
-	return IRQ_HANDLED;
-}
-
-static int ad7298_scan_direct(struct ad7298_state *st, unsigned ch)
-{
-	int ret;
-	st->tx_buf[0] = cpu_to_be16(AD7298_WRITE | st->ext_ref |
-				   (AD7298_CH(0) >> ch));
-
-	ret = spi_sync(st->spi, &st->scan_single_msg);
-	if (ret)
-		return ret;
-
-	return be16_to_cpu(st->rx_buf[0]);
-}
-
-static int ad7298_scan_temp(struct ad7298_state *st, int *val)
-{
-	int ret;
-	__be16 buf;
-
-	buf = cpu_to_be16(AD7298_WRITE | AD7298_TSENSE |
-			  AD7298_TAVG | st->ext_ref);
-
-	ret = spi_write(st->spi, (u8 *)&buf, 2);
-	if (ret)
-		return ret;
-
-	buf = cpu_to_be16(0);
-
-	ret = spi_write(st->spi, (u8 *)&buf, 2);
-	if (ret)
-		return ret;
-
-	usleep_range(101, 1000); /* sleep > 100us */
-
-	ret = spi_read(st->spi, (u8 *)&buf, 2);
-	if (ret)
-		return ret;
-
-	*val = sign_extend32(be16_to_cpu(buf), 11);
-
-	return 0;
-}
-
-static int ad7298_get_ref_voltage(struct ad7298_state *st)
-{
-	int vref;
-
-	if (st->ext_ref) {
-		vref = regulator_get_voltage(st->reg);
-		if (vref < 0)
-			return vref;
-
-		return vref / 1000;
-	} else {
-		return AD7298_INTREF_mV;
-	}
-}
-
-static int ad7298_read_raw(struct iio_dev *indio_dev,
-			   struct iio_chan_spec const *chan,
-			   int *val,
-			   int *val2,
-			   long m)
-{
-	int ret;
-	struct ad7298_state *st = iio_priv(indio_dev);
-
-	switch (m) {
-	case IIO_CHAN_INFO_RAW:
-		mutex_lock(&indio_dev->mlock);
-		if (indio_dev->currentmode == INDIO_BUFFER_TRIGGERED) {
-			ret = -EBUSY;
-		} else {
-			if (chan->address == AD7298_CH_TEMP)
-				ret = ad7298_scan_temp(st, val);
-			else
-				ret = ad7298_scan_direct(st, chan->address);
-		}
-		mutex_unlock(&indio_dev->mlock);
-
-		if (ret < 0)
-			return ret;
-
-		if (chan->address != AD7298_CH_TEMP)
-			*val = ret & RES_MASK(AD7298_BITS);
-
-		return IIO_VAL_INT;
-	case IIO_CHAN_INFO_SCALE:
-		switch (chan->type) {
-		case IIO_VOLTAGE:
-			*val = ad7298_get_ref_voltage(st);
-			*val2 = chan->scan_type.realbits;
-			return IIO_VAL_FRACTIONAL_LOG2;
-		case IIO_TEMP:
-			*val = ad7298_get_ref_voltage(st);
-			*val2 = 10;
-			return IIO_VAL_FRACTIONAL;
-		default:
-			return -EINVAL;
-		}
-	case IIO_CHAN_INFO_OFFSET:
-		*val = 1093 - 2732500 / ad7298_get_ref_voltage(st);
-		return IIO_VAL_INT;
-	}
-	return -EINVAL;
-}
-
-static const struct iio_info ad7298_info = {
-	.read_raw = &ad7298_read_raw,
-	.update_scan_mode = ad7298_update_scan_mode,
-	.driver_module = THIS_MODULE,
-};
-
-static int __devinit ad7298_probe(struct spi_device *spi)
-{
-	struct ad7298_platform_data *pdata = spi->dev.platform_data;
-	struct ad7298_state *st;
-	struct iio_dev *indio_dev = iio_device_alloc(sizeof(*st));
-	int ret;
-
-	if (indio_dev == NULL)
-		return -ENOMEM;
-
-	st = iio_priv(indio_dev);
-
-	if (pdata && pdata->ext_ref)
-		st->ext_ref = AD7298_EXTREF;
-
-	if (st->ext_ref) {
-		st->reg = regulator_get(&spi->dev, "vref");
-		if (IS_ERR(st->reg)) {
-			ret = PTR_ERR(st->reg);
-			goto error_free;
-		}
-		ret = regulator_enable(st->reg);
-		if (ret)
-			goto error_put_reg;
-	}
-
-	spi_set_drvdata(spi, indio_dev);
-
-	st->spi = spi;
-
-	indio_dev->name = spi_get_device_id(spi)->name;
-	indio_dev->dev.parent = &spi->dev;
-	indio_dev->modes = INDIO_DIRECT_MODE;
-	indio_dev->channels = ad7298_channels;
-	indio_dev->num_channels = ARRAY_SIZE(ad7298_channels);
-	indio_dev->info = &ad7298_info;
-
-	/* Setup default message */
-
-	st->scan_single_xfer[0].tx_buf = &st->tx_buf[0];
-	st->scan_single_xfer[0].len = 2;
-	st->scan_single_xfer[0].cs_change = 1;
-	st->scan_single_xfer[1].tx_buf = &st->tx_buf[1];
-	st->scan_single_xfer[1].len = 2;
-	st->scan_single_xfer[1].cs_change = 1;
-	st->scan_single_xfer[2].rx_buf = &st->rx_buf[0];
-	st->scan_single_xfer[2].len = 2;
-
-	spi_message_init(&st->scan_single_msg);
-	spi_message_add_tail(&st->scan_single_xfer[0], &st->scan_single_msg);
-	spi_message_add_tail(&st->scan_single_xfer[1], &st->scan_single_msg);
-	spi_message_add_tail(&st->scan_single_xfer[2], &st->scan_single_msg);
-
-	ret = iio_triggered_buffer_setup(indio_dev, NULL,
-			&ad7298_trigger_handler, NULL);
-	if (ret)
-		goto error_disable_reg;
-
-	ret = iio_device_register(indio_dev);
-	if (ret)
-		goto error_cleanup_ring;
-
-	return 0;
-
-error_cleanup_ring:
-	iio_triggered_buffer_cleanup(indio_dev);
-error_disable_reg:
-	if (st->ext_ref)
-		regulator_disable(st->reg);
-error_put_reg:
-	if (st->ext_ref)
-		regulator_put(st->reg);
-error_free:
-	iio_device_free(indio_dev);
-
-	return ret;
-}
-
-static int __devexit ad7298_remove(struct spi_device *spi)
-{
-	struct iio_dev *indio_dev = spi_get_drvdata(spi);
-	struct ad7298_state *st = iio_priv(indio_dev);
-
-	iio_device_unregister(indio_dev);
-	iio_triggered_buffer_cleanup(indio_dev);
-	if (st->ext_ref) {
-		regulator_disable(st->reg);
-		regulator_put(st->reg);
-	}
-	iio_device_free(indio_dev);
-
-	return 0;
-}
-
-static const struct spi_device_id ad7298_id[] = {
-	{"ad7298", 0},
-	{}
-};
-MODULE_DEVICE_TABLE(spi, ad7298_id);
-
-static struct spi_driver ad7298_driver = {
-	.driver = {
-		.name	= "ad7298",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= ad7298_probe,
-	.remove		= __devexit_p(ad7298_remove),
-	.id_table	= ad7298_id,
-};
-module_spi_driver(ad7298_driver);
-
-MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
-MODULE_DESCRIPTION("Analog Devices AD7298 ADC");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/staging/iio/adc/ad7298.h b/drivers/staging/iio/adc/ad7298.h
deleted file mode 100644
index c8ac969ec016..000000000000
--- a/drivers/staging/iio/adc/ad7298.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * AD7298 SPI ADC driver
- *
- * Copyright 2011 Analog Devices Inc.
- *
- * Licensed under the GPL-2.
- */
-
-#ifndef IIO_ADC_AD7298_H_
-#define IIO_ADC_AD7298_H_
-
-/**
- * struct ad7298_platform_data - Platform data for the ad7298 ADC driver
- * @ext_ref: Whether to use an external reference voltage.
- **/
-struct ad7298_platform_data {
-	bool ext_ref;
-};
-
-#endif /* IIO_ADC_AD7298_H_ */
diff --git a/include/linux/platform_data/ad7298.h b/include/linux/platform_data/ad7298.h
new file mode 100644
index 000000000000..fbf8adf1363a
--- /dev/null
+++ b/include/linux/platform_data/ad7298.h
@@ -0,0 +1,20 @@
+/*
+ * AD7298 SPI ADC driver
+ *
+ * Copyright 2011 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2.
+ */
+
+#ifndef __LINUX_PLATFORM_DATA_AD7298_H__
+#define __LINUX_PLATFORM_DATA_AD7298_H__
+
+/**
+ * struct ad7298_platform_data - Platform data for the ad7298 ADC driver
+ * @ext_ref: Whether to use an external reference voltage.
+ **/
+struct ad7298_platform_data {
+	bool ext_ref;
+};
+
+#endif /* IIO_ADC_AD7298_H_ */
-- 
cgit v1.2.3-55-g7522


From d962ec4922493da48bb0fd33b26200de039b0d75 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann
Date: Wed, 24 Oct 2012 11:45:12 -0700
Subject: NFC: Remove unused details from pn544.h header file

The majority of the defines and structures from pn544.h are no
longer in use. So just remove them.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/nfc/pn544.h | 60 -----------------------------------------------
 1 file changed, 60 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfc/pn544.h b/include/linux/nfc/pn544.h
index 9890bbaf4328..713bfd703342 100644
--- a/include/linux/nfc/pn544.h
+++ b/include/linux/nfc/pn544.h
@@ -25,65 +25,6 @@
 
 #include <linux/i2c.h>
 
-#define PN544_DRIVER_NAME	"pn544"
-#define PN544_MAXWINDOW_SIZE	7
-#define PN544_WINDOW_SIZE	4
-#define PN544_RETRIES		10
-#define PN544_MAX_I2C_TRANSFER	0x0400
-#define PN544_MSG_MAX_SIZE	0x21 /* at normal HCI mode */
-
-/* ioctl */
-#define PN544_CHAR_BASE		'P'
-#define PN544_IOR(num, dtype)	_IOR(PN544_CHAR_BASE, num, dtype)
-#define PN544_IOW(num, dtype)	_IOW(PN544_CHAR_BASE, num, dtype)
-#define PN544_GET_FW_MODE	PN544_IOW(1, unsigned int)
-#define PN544_SET_FW_MODE	PN544_IOW(2, unsigned int)
-#define PN544_GET_DEBUG		PN544_IOW(3, unsigned int)
-#define PN544_SET_DEBUG		PN544_IOW(4, unsigned int)
-
-/* Timing restrictions (ms) */
-#define PN544_RESETVEN_TIME	30 /* 7 */
-#define PN544_PVDDVEN_TIME	0
-#define PN544_VBATVEN_TIME	0
-#define PN544_GPIO4VEN_TIME	0
-#define PN544_WAKEUP_ACK	5
-#define PN544_WAKEUP_GUARD	(PN544_WAKEUP_ACK + 1)
-#define PN544_INACTIVITY_TIME	1000
-#define PN544_INTERFRAME_DELAY	200 /* us */
-#define PN544_BAUDRATE_CHANGE	150 /* us */
-
-/* Debug bits */
-#define PN544_DEBUG_BUF		0x01
-#define PN544_DEBUG_READ	0x02
-#define PN544_DEBUG_WRITE	0x04
-#define PN544_DEBUG_IRQ		0x08
-#define PN544_DEBUG_CALLS	0x10
-#define PN544_DEBUG_MODE	0x20
-
-/* Normal (HCI) mode */
-#define PN544_LLC_HCI_OVERHEAD	3 /* header + crc (to length) */
-#define PN544_LLC_MIN_SIZE	(1 + PN544_LLC_HCI_OVERHEAD) /* length + */
-#define PN544_LLC_MAX_DATA	(PN544_MSG_MAX_SIZE - 2)
-#define PN544_LLC_MAX_HCI_SIZE	(PN544_LLC_MAX_DATA - 2)
-
-struct pn544_llc_packet {
-	unsigned char length; /* of rest of packet */
-	unsigned char header;
-	unsigned char data[PN544_LLC_MAX_DATA]; /* includes crc-ccitt */
-};
-
-/* Firmware upgrade mode */
-#define PN544_FW_HEADER_SIZE	3
-/* max fw transfer is 1024bytes, but I2C limits it to 0xC0 */
-#define PN544_MAX_FW_DATA	(PN544_MAX_I2C_TRANSFER - PN544_FW_HEADER_SIZE)
-
-struct pn544_fw_packet {
-	unsigned char command; /* status in answer */
-	unsigned char length[2]; /* big-endian order (msf) */
-	unsigned char data[PN544_MAX_FW_DATA];
-};
-
-#ifdef __KERNEL__
 enum {
 	NFC_GPIO_ENABLE,
 	NFC_GPIO_FW_RESET,
@@ -99,6 +40,5 @@ struct pn544_nfc_platform_data {
 	void (*disable) (void);
 	int (*get_gpio)(int type);
 };
-#endif /* __KERNEL__ */
 
 #endif /* _PN544_H_ */
-- 
cgit v1.2.3-55-g7522


From 61cdb01853c99e78d8b54db1b77e524c0dce585d Mon Sep 17 00:00:00 2001
From: Marcel Holtmann
Date: Wed, 24 Oct 2012 11:45:26 -0700
Subject: NFC: Move pn544.h to linux/platform_data/

The pn544.h just provides the platform data struct and defines and
nothing else. So move it to to linux/platform_data/ now.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/pn544/i2c.c             |  2 +-
 include/linux/nfc/pn544.h           | 44 -------------------------------------
 include/linux/platform_data/pn544.h | 44 +++++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 45 deletions(-)
 delete mode 100644 include/linux/nfc/pn544.h
 create mode 100644 include/linux/platform_data/pn544.h

(limited to 'include/linux')

diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c
index fb430d882352..7da9071b68b6 100644
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c
@@ -26,7 +26,7 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 
-#include <linux/nfc/pn544.h>
+#include <linux/platform_data/pn544.h>
 
 #include <net/nfc/hci.h>
 #include <net/nfc/llc.h>
diff --git a/include/linux/nfc/pn544.h b/include/linux/nfc/pn544.h
deleted file mode 100644
index 713bfd703342..000000000000
--- a/include/linux/nfc/pn544.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Driver include for the PN544 NFC chip.
- *
- * Copyright (C) Nokia Corporation
- *
- * Author: Jari Vanhala <ext-jari.vanhala@nokia.com>
- * Contact: Matti Aaltoenn <matti.j.aaltonen@nokia.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _PN544_H_
-#define _PN544_H_
-
-#include <linux/i2c.h>
-
-enum {
-	NFC_GPIO_ENABLE,
-	NFC_GPIO_FW_RESET,
-	NFC_GPIO_IRQ
-};
-
-/* board config */
-struct pn544_nfc_platform_data {
-	int (*request_resources) (struct i2c_client *client);
-	void (*free_resources) (void);
-	void (*enable) (int fw);
-	int (*test) (void);
-	void (*disable) (void);
-	int (*get_gpio)(int type);
-};
-
-#endif /* _PN544_H_ */
diff --git a/include/linux/platform_data/pn544.h b/include/linux/platform_data/pn544.h
new file mode 100644
index 000000000000..713bfd703342
--- /dev/null
+++ b/include/linux/platform_data/pn544.h
@@ -0,0 +1,44 @@
+/*
+ * Driver include for the PN544 NFC chip.
+ *
+ * Copyright (C) Nokia Corporation
+ *
+ * Author: Jari Vanhala <ext-jari.vanhala@nokia.com>
+ * Contact: Matti Aaltoenn <matti.j.aaltonen@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _PN544_H_
+#define _PN544_H_
+
+#include <linux/i2c.h>
+
+enum {
+	NFC_GPIO_ENABLE,
+	NFC_GPIO_FW_RESET,
+	NFC_GPIO_IRQ
+};
+
+/* board config */
+struct pn544_nfc_platform_data {
+	int (*request_resources) (struct i2c_client *client);
+	void (*free_resources) (void);
+	void (*enable) (int fw);
+	int (*test) (void);
+	void (*disable) (void);
+	int (*get_gpio)(int type);
+};
+
+#endif /* _PN544_H_ */
-- 
cgit v1.2.3-55-g7522


From 4b20db3de8dab005b07c74161cb041db8c5ff3a7 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom
Date: Tue, 6 Nov 2012 11:31:49 +0000
Subject: kref: Implement kref_get_unless_zero v3

This function is intended to simplify locking around refcounting for
objects that can be looked up from a lookup structure, and which are
removed from that lookup structure in the object destructor.
Operations on such objects require at least a read lock around
lookup + kref_get, and a write lock around kref_put + remove from lookup
structure. Furthermore, RCU implementations become extremely tricky.
With a lookup followed by a kref_get_unless_zero *with return value check*
locking in the kref_put path can be deferred to the actual removal from
the lookup structure and RCU lookups become trivial.

v2: Formatting fixes.
v3: Invert the return value.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/kref.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index 65af6887872f..4972e6e9ca93 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -111,4 +111,25 @@ static inline int kref_put_mutex(struct kref *kref,
 	}
 	return 0;
 }
+
+/**
+ * kref_get_unless_zero - Increment refcount for object unless it is zero.
+ * @kref: object.
+ *
+ * Return non-zero if the increment succeeded. Otherwise return 0.
+ *
+ * This function is intended to simplify locking around refcounting for
+ * objects that can be looked up from a lookup structure, and which are
+ * removed from that lookup structure in the object destructor.
+ * Operations on such objects require at least a read lock around
+ * lookup + kref_get, and a write lock around kref_put + remove from lookup
+ * structure. Furthermore, RCU implementations become extremely tricky.
+ * With a lookup followed by a kref_get_unless_zero *with return value check*
+ * locking in the kref_put path can be deferred to the actual removal from
+ * the lookup structure and RCU lookups become trivial.
+ */
+static inline int __must_check kref_get_unless_zero(struct kref *kref)
+{
+	return atomic_add_unless(&kref->refcount, 1, 0);
+}
 #endif /* _KREF_H_ */
-- 
cgit v1.2.3-55-g7522


From e09651fcc295a7dc802f38d9494f5b860dd90bca Mon Sep 17 00:00:00 2001
From: Nishanth Menon
Date: Mon, 29 Oct 2012 08:02:23 -0500
Subject: PM / devfreq: documentation cleanups for devfreq header

struct parameters need to have ':' in documentation for
scripts/kernel-doc to parse appropriately.

Fix the errors reported by:
./scripts/kernel-doc include/linux/devfreq.h >/dev/null

Cc: Rajagopal Venkat <rajagopal.venkat@linaro.org>
Cc: MyungJoo Ham <myungjoo.ham@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Kevin Hilman <khilman@ti.com>
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org

Signed-off-by: Nishanth Menon <nm@ti.com>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 include/linux/devfreq.h | 54 ++++++++++++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 7e2e2ea4a70f..1461fb2355ad 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -25,12 +25,12 @@ struct devfreq;
  * struct devfreq_dev_status - Data given from devfreq user device to
  *			     governors. Represents the performance
  *			     statistics.
- * @total_time		The total time represented by this instance of
+ * @total_time:		The total time represented by this instance of
  *			devfreq_dev_status
- * @busy_time		The time that the device was working among the
+ * @busy_time:		The time that the device was working among the
  *			total_time.
- * @current_frequency	The operating frequency.
- * @private_data	An entry not specified by the devfreq framework.
+ * @current_frequency:	The operating frequency.
+ * @private_data:	An entry not specified by the devfreq framework.
  *			A device and a specific governor may have their
  *			own protocol with private_data. However, because
  *			this is governor-specific, a governor using this
@@ -54,21 +54,21 @@ struct devfreq_dev_status {
 
 /**
  * struct devfreq_dev_profile - Devfreq's user device profile
- * @initial_freq	The operating frequency when devfreq_add_device() is
+ * @initial_freq:	The operating frequency when devfreq_add_device() is
  *			called.
- * @polling_ms		The polling interval in ms. 0 disables polling.
- * @target		The device should set its operating frequency at
+ * @polling_ms:		The polling interval in ms. 0 disables polling.
+ * @target:		The device should set its operating frequency at
  *			freq or lowest-upper-than-freq value. If freq is
  *			higher than any operable frequency, set maximum.
  *			Before returning, target function should set
  *			freq at the current frequency.
  *			The "flags" parameter's possible values are
  *			explained above with "DEVFREQ_FLAG_*" macros.
- * @get_dev_status	The device should provide the current performance
+ * @get_dev_status:	The device should provide the current performance
  *			status to devfreq, which is used by governors.
- * @get_cur_freq	The device should provide the current frequency
+ * @get_cur_freq:	The device should provide the current frequency
  *			at which it is operating.
- * @exit		An optional callback that is called when devfreq
+ * @exit:		An optional callback that is called when devfreq
  *			is removing the devfreq object due to error or
  *			from devfreq_remove_device() call. If the user
  *			has registered devfreq->nb at a notifier-head,
@@ -87,14 +87,14 @@ struct devfreq_dev_profile {
 
 /**
  * struct devfreq_governor - Devfreq policy governor
- * @name		Governor's name
- * @get_target_freq	Returns desired operating frequency for the device.
+ * @name:		Governor's name
+ * @get_target_freq:	Returns desired operating frequency for the device.
  *			Basically, get_target_freq will run
  *			devfreq_dev_profile.get_dev_status() to get the
  *			status of the device (load = busy_time / total_time).
  *			If no_central_polling is set, this callback is called
  *			only with update_devfreq() notified by OPP.
- * @event_handler       Callback for devfreq core framework to notify events
+ * @event_handler:      Callback for devfreq core framework to notify events
  *                      to governors. Events include per device governor
  *                      init and exit, opp changes out of devfreq, suspend
  *                      and resume of per device devfreq during device idle.
@@ -110,23 +110,23 @@ struct devfreq_governor {
 
 /**
  * struct devfreq - Device devfreq structure
- * @node	list node - contains the devices with devfreq that have been
+ * @node:	list node - contains the devices with devfreq that have been
  *		registered.
- * @lock	a mutex to protect accessing devfreq.
- * @dev		device registered by devfreq class. dev.parent is the device
+ * @lock:	a mutex to protect accessing devfreq.
+ * @dev:	device registered by devfreq class. dev.parent is the device
  *		using devfreq.
- * @profile	device-specific devfreq profile
- * @governor	method how to choose frequency based on the usage.
- * @nb		notifier block used to notify devfreq object that it should
+ * @profile:	device-specific devfreq profile
+ * @governor:	method how to choose frequency based on the usage.
+ * @nb:		notifier block used to notify devfreq object that it should
  *		reevaluate operable frequencies. Devfreq users may use
  *		devfreq.nb to the corresponding register notifier call chain.
- * @work	delayed work for load monitoring.
- * @previous_freq	previously configured frequency value.
- * @data	Private data of the governor. The devfreq framework does not
+ * @work:	delayed work for load monitoring.
+ * @previous_freq:	previously configured frequency value.
+ * @data:	Private data of the governor. The devfreq framework does not
  *		touch this.
- * @min_freq	Limit minimum frequency requested by user (0: none)
- * @max_freq	Limit maximum frequency requested by user (0: none)
- * @stop_polling	 devfreq polling status of a device.
+ * @min_freq:	Limit minimum frequency requested by user (0: none)
+ * @max_freq:	Limit maximum frequency requested by user (0: none)
+ * @stop_polling:	 devfreq polling status of a device.
  *
  * This structure stores the devfreq information for a give device.
  *
@@ -186,9 +186,9 @@ extern const struct devfreq_governor devfreq_simple_ondemand;
 /**
  * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq
  *	and devfreq_add_device
- * @ upthreshold	If the load is over this value, the frequency jumps.
+ * @upthreshold:	If the load is over this value, the frequency jumps.
  *			Specify 0 to use the default. Valid value = 0 to 100.
- * @ downdifferential	If the load is under upthreshold - downdifferential,
+ * @downdifferential:	If the load is under upthreshold - downdifferential,
  *			the governor may consider slowing the frequency down.
  *			Specify 0 to use the default. Valid value = 0 to 100.
  *			downdifferential < upthreshold must hold.
-- 
cgit v1.2.3-55-g7522


From d851718f65d646c5033a28fa60631440c6dc0d4f Mon Sep 17 00:00:00 2001
From: Chanwoo Choi
Date: Thu, 8 Nov 2012 18:39:41 +0900
Subject: extcon: Add missing header file to extcon.h

This patch add missing header file(sysfs.h/device.h) to extcon.h
because 'struct extcon_dev' define attribute field(attr_g_muex/
attrs_muex/d_attrs_mues) and device_type field(extcon_dev_type).

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 include/linux/extcon.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 2c26c14cd710..54c00ce9ce7a 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -23,7 +23,9 @@
 #ifndef __LINUX_EXTCON_H__
 #define __LINUX_EXTCON_H__
 
+#include <linux/device.h>
 #include <linux/notifier.h>
+#include <linux/sysfs.h>
 
 #define SUPPORTED_CABLE_MAX	32
 #define CABLE_NAME_MAX		30
-- 
cgit v1.2.3-55-g7522


From e552bbaf5b987f57c43e6981a452b8a3c700b1ae Mon Sep 17 00:00:00 2001
From: Jonghwa Lee
Date: Thu, 23 Aug 2012 20:00:46 +0900
Subject: PM / devfreq: Add sysfs node for representing frequency transition
 information.

This patch adds sysfs node which can be used to get information of frequency
transition. It represents transition table which contains total number of transition of
each freqeuncy state and time spent. It is inspired CPUFREQ's status driver.

Signed-off-by: Jonghwa Lee <jonghwa3.lee@samsung.com>
[Added Documentation/ABI entry, updated kernel-doc, and resolved merge conflict]
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 Documentation/ABI/testing/sysfs-class-devfreq |  11 +++
 drivers/devfreq/devfreq.c                     | 101 ++++++++++++++++++++++++++
 include/linux/devfreq.h                       |  15 ++++
 3 files changed, 127 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
index e672ccb02e7f..40f98a9428dc 100644
--- a/Documentation/ABI/testing/sysfs-class-devfreq
+++ b/Documentation/ABI/testing/sysfs-class-devfreq
@@ -44,6 +44,17 @@ Description:
 		(/sys/class/devfreq/.../central_polling is 0), this value
 		may be useless.
 
+What:		/sys/class/devfreq/.../trans_stat
+Date:		October 2012
+Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
+Descrtiption:
+		This ABI shows the statistics of devfreq behavior on a
+		specific device. It shows the time spent in each state and
+		the number of transitions between states.
+		In order to activate this ABI, the devfreq target device
+		driver should provide the list of available frequencies
+		with its profile.
+
 What:		/sys/class/devfreq/.../userspace/set_freq
 Date:		September 2011
 Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index c44e562bdfe0..bf6de38190cf 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -66,6 +66,51 @@ static struct devfreq *find_device_devfreq(struct device *dev)
 	return ERR_PTR(-ENODEV);
 }
 
+/**
+ * devfreq_get_freq_level() - Lookup freq_table for the frequency
+ * @devfreq:	the devfreq instance
+ * @freq:	the target frequency
+ */
+static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
+{
+	int lev;
+
+	for (lev = 0; lev < devfreq->profile->max_state; lev++)
+		if (freq == devfreq->profile->freq_table[lev])
+			return lev;
+
+	return -EINVAL;
+}
+
+/**
+ * devfreq_update_status() - Update statistics of devfreq behavior
+ * @devfreq:	the devfreq instance
+ * @freq:	the update target frequency
+ */
+static int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
+{
+	int lev, prev_lev;
+	unsigned long cur_time;
+
+	lev = devfreq_get_freq_level(devfreq, freq);
+	if (lev < 0)
+		return lev;
+
+	cur_time = jiffies;
+	devfreq->time_in_state[lev] +=
+			 cur_time - devfreq->last_stat_updated;
+	if (freq != devfreq->previous_freq) {
+		prev_lev = devfreq_get_freq_level(devfreq,
+						devfreq->previous_freq);
+		devfreq->trans_table[(prev_lev *
+				devfreq->profile->max_state) + lev]++;
+		devfreq->total_trans++;
+	}
+	devfreq->last_stat_updated = cur_time;
+
+	return 0;
+}
+
 /* Load monitoring helper functions for governors use */
 
 /**
@@ -112,6 +157,11 @@ int update_devfreq(struct devfreq *devfreq)
 	if (err)
 		return err;
 
+	if (devfreq->profile->freq_table)
+		if (devfreq_update_status(devfreq, freq))
+			dev_err(&devfreq->dev,
+				"Couldn't update frequency transition information.\n");
+
 	devfreq->previous_freq = freq;
 	return err;
 }
@@ -378,6 +428,15 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	devfreq->data = data;
 	devfreq->nb.notifier_call = devfreq_notifier_call;
 
+	devfreq->trans_table =	devm_kzalloc(dev, sizeof(unsigned int) *
+						devfreq->profile->max_state *
+						devfreq->profile->max_state,
+						GFP_KERNEL);
+	devfreq->time_in_state = devm_kzalloc(dev, sizeof(unsigned int) *
+						devfreq->profile->max_state,
+						GFP_KERNEL);
+	devfreq->last_stat_updated = jiffies;
+
 	dev_set_name(&devfreq->dev, dev_name(dev));
 	err = device_register(&devfreq->dev);
 	if (err) {
@@ -601,6 +660,47 @@ static ssize_t show_available_freqs(struct device *d,
 	return count;
 }
 
+static ssize_t show_trans_table(struct device *dev, struct device_attribute *attr,
+				char *buf)
+{
+	struct devfreq *devfreq = to_devfreq(dev);
+	ssize_t len;
+	int i, j, err;
+	unsigned int max_state = devfreq->profile->max_state;
+
+	err = devfreq_update_status(devfreq, devfreq->previous_freq);
+	if (err)
+		return 0;
+
+	len = sprintf(buf, "   From  :   To\n");
+	len += sprintf(buf + len, "         :");
+	for (i = 0; i < max_state; i++)
+		len += sprintf(buf + len, "%8u",
+				devfreq->profile->freq_table[i]);
+
+	len += sprintf(buf + len, "   time(ms)\n");
+
+	for (i = 0; i < max_state; i++) {
+		if (devfreq->profile->freq_table[i]
+					== devfreq->previous_freq) {
+			len += sprintf(buf + len, "*");
+		} else {
+			len += sprintf(buf + len, " ");
+		}
+		len += sprintf(buf + len, "%8u:",
+				devfreq->profile->freq_table[i]);
+		for (j = 0; j < max_state; j++)
+			len += sprintf(buf + len, "%8u",
+				devfreq->trans_table[(i * max_state) + j]);
+		len += sprintf(buf + len, "%10u\n",
+			jiffies_to_msecs(devfreq->time_in_state[i]));
+	}
+
+	len += sprintf(buf + len, "Total transition : %u\n",
+					devfreq->total_trans);
+	return len;
+}
+
 static struct device_attribute devfreq_attrs[] = {
 	__ATTR(governor, S_IRUGO, show_governor, NULL),
 	__ATTR(cur_freq, S_IRUGO, show_freq, NULL),
@@ -610,6 +710,7 @@ static struct device_attribute devfreq_attrs[] = {
 	       store_polling_interval),
 	__ATTR(min_freq, S_IRUGO | S_IWUSR, show_min_freq, store_min_freq),
 	__ATTR(max_freq, S_IRUGO | S_IWUSR, show_max_freq, store_max_freq),
+	__ATTR(trans_stat, S_IRUGO, show_trans_table, NULL),
 	{ },
 };
 
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 1461fb2355ad..bc35c4aee6a3 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -73,6 +73,8 @@ struct devfreq_dev_status {
  *			from devfreq_remove_device() call. If the user
  *			has registered devfreq->nb at a notifier-head,
  *			this is the time to unregister it.
+ * @freq_table:	Optional list of frequencies to support statistics.
+ * @max_state:	The size of freq_table.
  */
 struct devfreq_dev_profile {
 	unsigned long initial_freq;
@@ -83,6 +85,9 @@ struct devfreq_dev_profile {
 			      struct devfreq_dev_status *stat);
 	int (*get_cur_freq)(struct device *dev, unsigned long *freq);
 	void (*exit)(struct device *dev);
+
+	unsigned int *freq_table;
+	unsigned int max_state;
 };
 
 /**
@@ -127,6 +132,10 @@ struct devfreq_governor {
  * @min_freq:	Limit minimum frequency requested by user (0: none)
  * @max_freq:	Limit maximum frequency requested by user (0: none)
  * @stop_polling:	 devfreq polling status of a device.
+ * @total_trans:	Number of devfreq transitions
+ * @trans_table:	Statistics of devfreq transitions
+ * @time_in_state:	Statistics of devfreq states
+ * @last_stat_updated:	The last time stat updated
  *
  * This structure stores the devfreq information for a give device.
  *
@@ -153,6 +162,12 @@ struct devfreq {
 	unsigned long min_freq;
 	unsigned long max_freq;
 	bool stop_polling;
+
+	/* information for device freqeuncy transition */
+	unsigned int total_trans;
+	unsigned int *trans_table;
+	unsigned long *time_in_state;
+	unsigned long last_stat_updated;
 };
 
 #if defined(CONFIG_PM_DEVFREQ)
-- 
cgit v1.2.3-55-g7522


From 31974361761b18e07e821d3b262e34f1a2d105cb Mon Sep 17 00:00:00 2001
From: Nicolas Ferre
Date: Tue, 20 Nov 2012 16:38:17 +0800
Subject: ASoC: atmel-ssc: add phybase in device structure

Useful for future dmaengine use.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/misc/atmel-ssc.c  | 2 ++
 include/linux/atmel-ssc.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c
index a769719e36bf..d07a9eda7fff 100644
--- a/drivers/misc/atmel-ssc.c
+++ b/drivers/misc/atmel-ssc.c
@@ -157,6 +157,8 @@ static int ssc_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	ssc->phybase = regs->start;
+
 	ssc->clk = devm_clk_get(&pdev->dev, "pclk");
 	if (IS_ERR(ssc->clk)) {
 		dev_dbg(&pdev->dev, "no pclk clock defined\n");
diff --git a/include/linux/atmel-ssc.h b/include/linux/atmel-ssc.h
index 1ca0e3292bc9..deb0ae58b99b 100644
--- a/include/linux/atmel-ssc.h
+++ b/include/linux/atmel-ssc.h
@@ -11,6 +11,7 @@ struct atmel_ssc_platform_data {
 
 struct ssc_device {
 	struct list_head	list;
+	resource_size_t		phybase;
 	void __iomem		*regs;
 	struct platform_device	*pdev;
 	struct atmel_ssc_platform_data *pdata;
-- 
cgit v1.2.3-55-g7522


From 3aa173b8db200bb96354481acc0a5b9e123119fe Mon Sep 17 00:00:00 2001
From: Nishanth Menon
Date: Mon, 29 Oct 2012 15:01:43 -0500
Subject: PM / devfreq: provide hooks for governors to be registered

Add devfreq_add_governor and devfreq_remove_governor which
can be invoked by governors to register with devfreq.

This sets up the stage to dynamically switch governors and
allow governors to be dynamically loaded as well.

Cc: Rajagopal Venkat <rajagopal.venkat@linaro.org>
Cc: MyungJoo Ham <myungjoo.ham@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Kevin Hilman <khilman@ti.com>
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org

Signed-off-by: Nishanth Menon <nm@ti.com>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq.c  | 91 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/devfreq/governor.h |  4 ++
 include/linux/devfreq.h    |  3 ++
 3 files changed, 98 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index e0002c5cbadc..679ac424472f 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -36,6 +36,8 @@ static struct class *devfreq_class;
  */
 static struct workqueue_struct *devfreq_wq;
 
+/* The list of all device-devfreq governors */
+static LIST_HEAD(devfreq_governor_list);
 /* The list of all device-devfreq */
 static LIST_HEAD(devfreq_list);
 static DEFINE_MUTEX(devfreq_list_lock);
@@ -111,6 +113,32 @@ static int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
 	return 0;
 }
 
+/**
+ * find_devfreq_governor() - find devfreq governor from name
+ * @name:	name of the governor
+ *
+ * Search the list of devfreq governors and return the matched
+ * governor's pointer. devfreq_list_lock should be held by the caller.
+ */
+static struct devfreq_governor *find_devfreq_governor(const char *name)
+{
+	struct devfreq_governor *tmp_governor;
+
+	if (unlikely(IS_ERR_OR_NULL(name))) {
+		pr_err("DEVFREQ: %s: Invalid parameters\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+	WARN(!mutex_is_locked(&devfreq_list_lock),
+	     "devfreq_list_lock must be locked.");
+
+	list_for_each_entry(tmp_governor, &devfreq_governor_list, node) {
+		if (!strncmp(tmp_governor->name, name, DEVFREQ_NAME_LEN))
+			return tmp_governor;
+	}
+
+	return ERR_PTR(-ENODEV);
+}
+
 /* Load monitoring helper functions for governors use */
 
 /**
@@ -515,6 +543,69 @@ int devfreq_resume_device(struct devfreq *devfreq)
 }
 EXPORT_SYMBOL(devfreq_resume_device);
 
+/**
+ * devfreq_add_governor() - Add devfreq governor
+ * @governor:	the devfreq governor to be added
+ */
+int devfreq_add_governor(struct devfreq_governor *governor)
+{
+	struct devfreq_governor *g;
+	int err = 0;
+
+	if (!governor) {
+		pr_err("%s: Invalid parameters.\n", __func__);
+		return -EINVAL;
+	}
+
+	mutex_lock(&devfreq_list_lock);
+	g = find_devfreq_governor(governor->name);
+	if (!IS_ERR(g)) {
+		pr_err("%s: governor %s already registered\n", __func__,
+		       g->name);
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	list_add(&governor->node, &devfreq_governor_list);
+
+err_out:
+	mutex_unlock(&devfreq_list_lock);
+
+	return err;
+}
+EXPORT_SYMBOL(devfreq_add_governor);
+
+/**
+ * devfreq_remove_device() - Remove devfreq feature from a device.
+ * @governor:	the devfreq governor to be removed
+ */
+int devfreq_remove_governor(struct devfreq_governor *governor)
+{
+	struct devfreq_governor *g;
+	int err = 0;
+
+	if (!governor) {
+		pr_err("%s: Invalid parameters.\n", __func__);
+		return -EINVAL;
+	}
+
+	mutex_lock(&devfreq_list_lock);
+	g = find_devfreq_governor(governor->name);
+	if (IS_ERR(g)) {
+		pr_err("%s: governor %s not registered\n", __func__,
+		       g->name);
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	list_del(&governor->node);
+err_out:
+	mutex_unlock(&devfreq_list_lock);
+
+	return err;
+}
+EXPORT_SYMBOL(devfreq_remove_governor);
+
 static ssize_t show_governor(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
index 26432ac0a398..fad7d6321978 100644
--- a/drivers/devfreq/governor.h
+++ b/drivers/devfreq/governor.h
@@ -34,4 +34,8 @@ extern void devfreq_monitor_suspend(struct devfreq *devfreq);
 extern void devfreq_monitor_resume(struct devfreq *devfreq);
 extern void devfreq_interval_update(struct devfreq *devfreq,
 					unsigned int *delay);
+
+extern int devfreq_add_governor(struct devfreq_governor *governor);
+extern int devfreq_remove_governor(struct devfreq_governor *governor);
+
 #endif /* _GOVERNOR_H */
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index bc35c4aee6a3..6484a3f8dda8 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -92,6 +92,7 @@ struct devfreq_dev_profile {
 
 /**
  * struct devfreq_governor - Devfreq policy governor
+ * @node:		list node - contains registered devfreq governors
  * @name:		Governor's name
  * @get_target_freq:	Returns desired operating frequency for the device.
  *			Basically, get_target_freq will run
@@ -107,6 +108,8 @@ struct devfreq_dev_profile {
  * Note that the callbacks are called with devfreq->lock locked by devfreq.
  */
 struct devfreq_governor {
+	struct list_head node;
+
 	const char name[DEVFREQ_NAME_LEN];
 	int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
 	int (*event_handler)(struct devfreq *devfreq,
-- 
cgit v1.2.3-55-g7522


From 1b5c1be2c88e8445a20fa1929e26c37e7ca8c926 Mon Sep 17 00:00:00 2001
From: Nishanth Menon
Date: Mon, 29 Oct 2012 15:01:45 -0500
Subject: PM / devfreq: map devfreq drivers to governor using name

Allow devfreq drivers to register a preferred governor name
and when the devfreq governor loads itself at a later point
required drivers are managed appropriately, at the time of
unload of a devfreq governor, stop managing those drivers
as well.

Since the governor structures do not need to be exposed
anymore, remove the definitions and make them static

NOTE: devfreq_list_lock is now used to protect governor
start and stop - as this allows us to protect governors and
devfreq with the proper dependencies as needed.

As part of this change, change the registration of exynos
bus driver to request for ondemand using the governor name.

Cc: Rajagopal Venkat <rajagopal.venkat@linaro.org>
Cc: MyungJoo Ham <myungjoo.ham@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Kevin Hilman <khilman@ti.com>
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org

Signed-off-by: Nishanth Menon <nm@ti.com>
[Merge conflict resolved by MyungJoo Ham]
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq.c                 | 95 ++++++++++++++++++++++++++++---
 drivers/devfreq/exynos4_bus.c             |  2 +-
 drivers/devfreq/governor_performance.c    |  2 +-
 drivers/devfreq/governor_powersave.c      |  2 +-
 drivers/devfreq/governor_simpleondemand.c |  2 +-
 drivers/devfreq/governor_userspace.c      |  2 +-
 include/linux/devfreq.h                   | 21 ++-----
 7 files changed, 96 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 679ac424472f..0d7be03d561f 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -159,6 +159,9 @@ int update_devfreq(struct devfreq *devfreq)
 		return -EINVAL;
 	}
 
+	if (!devfreq->governor)
+		return -EINVAL;
+
 	/* Reevaluate the proper frequency */
 	err = devfreq->governor->get_target_freq(devfreq, &freq);
 	if (err)
@@ -379,7 +382,9 @@ static void _remove_devfreq(struct devfreq *devfreq, bool skip)
 	list_del(&devfreq->node);
 	mutex_unlock(&devfreq_list_lock);
 
-	devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_STOP, NULL);
+	if (devfreq->governor)
+		devfreq->governor->event_handler(devfreq,
+						 DEVFREQ_GOV_STOP, NULL);
 
 	if (devfreq->profile->exit)
 		devfreq->profile->exit(devfreq->dev.parent);
@@ -412,19 +417,20 @@ static void devfreq_dev_release(struct device *dev)
  * devfreq_add_device() - Add devfreq feature to the device
  * @dev:	the device to add devfreq feature.
  * @profile:	device-specific profile to run devfreq.
- * @governor:	the policy to choose frequency.
+ * @governor_name:	name of the policy to choose frequency.
  * @data:	private data for the governor. The devfreq framework does not
  *		touch this value.
  */
 struct devfreq *devfreq_add_device(struct device *dev,
 				   struct devfreq_dev_profile *profile,
-				   const struct devfreq_governor *governor,
+				   const char *governor_name,
 				   void *data)
 {
 	struct devfreq *devfreq;
+	struct devfreq_governor *governor;
 	int err = 0;
 
-	if (!dev || !profile || !governor) {
+	if (!dev || !profile || !governor_name) {
 		dev_err(dev, "%s: Invalid parameters.\n", __func__);
 		return ERR_PTR(-EINVAL);
 	}
@@ -452,7 +458,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	devfreq->dev.class = devfreq_class;
 	devfreq->dev.release = devfreq_dev_release;
 	devfreq->profile = profile;
-	devfreq->governor = governor;
+	strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN);
 	devfreq->previous_freq = profile->initial_freq;
 	devfreq->data = data;
 	devfreq->nb.notifier_call = devfreq_notifier_call;
@@ -478,10 +484,14 @@ struct devfreq *devfreq_add_device(struct device *dev,
 
 	mutex_lock(&devfreq_list_lock);
 	list_add(&devfreq->node, &devfreq_list);
-	mutex_unlock(&devfreq_list_lock);
 
-	err = devfreq->governor->event_handler(devfreq,
-				DEVFREQ_GOV_START, NULL);
+	governor = find_devfreq_governor(devfreq->governor_name);
+	if (!IS_ERR(governor))
+		devfreq->governor = governor;
+	if (devfreq->governor)
+		err = devfreq->governor->event_handler(devfreq,
+					DEVFREQ_GOV_START, NULL);
+	mutex_unlock(&devfreq_list_lock);
 	if (err) {
 		dev_err(dev, "%s: Unable to start governor for the device\n",
 			__func__);
@@ -524,6 +534,9 @@ int devfreq_suspend_device(struct devfreq *devfreq)
 	if (!devfreq)
 		return -EINVAL;
 
+	if (!devfreq->governor)
+		return 0;
+
 	return devfreq->governor->event_handler(devfreq,
 				DEVFREQ_GOV_SUSPEND, NULL);
 }
@@ -538,6 +551,9 @@ int devfreq_resume_device(struct devfreq *devfreq)
 	if (!devfreq)
 		return -EINVAL;
 
+	if (!devfreq->governor)
+		return 0;
+
 	return devfreq->governor->event_handler(devfreq,
 				DEVFREQ_GOV_RESUME, NULL);
 }
@@ -550,6 +566,7 @@ EXPORT_SYMBOL(devfreq_resume_device);
 int devfreq_add_governor(struct devfreq_governor *governor)
 {
 	struct devfreq_governor *g;
+	struct devfreq *devfreq;
 	int err = 0;
 
 	if (!governor) {
@@ -568,6 +585,38 @@ int devfreq_add_governor(struct devfreq_governor *governor)
 
 	list_add(&governor->node, &devfreq_governor_list);
 
+	list_for_each_entry(devfreq, &devfreq_list, node) {
+		int ret = 0;
+		struct device *dev = devfreq->dev.parent;
+
+		if (!strncmp(devfreq->governor_name, governor->name,
+			     DEVFREQ_NAME_LEN)) {
+			/* The following should never occur */
+			if (devfreq->governor) {
+				dev_warn(dev,
+					 "%s: Governor %s already present\n",
+					 __func__, devfreq->governor->name);
+				ret = devfreq->governor->event_handler(devfreq,
+							DEVFREQ_GOV_STOP, NULL);
+				if (ret) {
+					dev_warn(dev,
+						 "%s: Governor %s stop = %d\n",
+						 __func__,
+						 devfreq->governor->name, ret);
+				}
+				/* Fall through */
+			}
+			devfreq->governor = governor;
+			ret = devfreq->governor->event_handler(devfreq,
+						DEVFREQ_GOV_START, NULL);
+			if (ret) {
+				dev_warn(dev, "%s: Governor %s start=%d\n",
+					 __func__, devfreq->governor->name,
+					 ret);
+			}
+		}
+	}
+
 err_out:
 	mutex_unlock(&devfreq_list_lock);
 
@@ -582,6 +631,7 @@ EXPORT_SYMBOL(devfreq_add_governor);
 int devfreq_remove_governor(struct devfreq_governor *governor)
 {
 	struct devfreq_governor *g;
+	struct devfreq *devfreq;
 	int err = 0;
 
 	if (!governor) {
@@ -597,6 +647,29 @@ int devfreq_remove_governor(struct devfreq_governor *governor)
 		err = -EINVAL;
 		goto err_out;
 	}
+	list_for_each_entry(devfreq, &devfreq_list, node) {
+		int ret;
+		struct device *dev = devfreq->dev.parent;
+
+		if (!strncmp(devfreq->governor_name, governor->name,
+			     DEVFREQ_NAME_LEN)) {
+			/* we should have a devfreq governor! */
+			if (!devfreq->governor) {
+				dev_warn(dev, "%s: Governor %s NOT present\n",
+					 __func__, governor->name);
+				continue;
+				/* Fall through */
+			}
+			ret = devfreq->governor->event_handler(devfreq,
+						DEVFREQ_GOV_STOP, NULL);
+			if (ret) {
+				dev_warn(dev, "%s: Governor %s stop=%d\n",
+					 __func__, devfreq->governor->name,
+					 ret);
+			}
+			devfreq->governor = NULL;
+		}
+	}
 
 	list_del(&governor->node);
 err_out:
@@ -609,6 +682,9 @@ EXPORT_SYMBOL(devfreq_remove_governor);
 static ssize_t show_governor(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
+	if (!to_devfreq(dev)->governor)
+		return -EINVAL;
+
 	return sprintf(buf, "%s\n", to_devfreq(dev)->governor->name);
 }
 
@@ -645,6 +721,9 @@ static ssize_t store_polling_interval(struct device *dev,
 	unsigned int value;
 	int ret;
 
+	if (!df->governor)
+		return -EINVAL;
+
 	ret = sscanf(buf, "%u", &value);
 	if (ret != 1)
 		return -EINVAL;
diff --git a/drivers/devfreq/exynos4_bus.c b/drivers/devfreq/exynos4_bus.c
index 68145316c49c..b8ac28497b37 100644
--- a/drivers/devfreq/exynos4_bus.c
+++ b/drivers/devfreq/exynos4_bus.c
@@ -1040,7 +1040,7 @@ static __devinit int exynos4_busfreq_probe(struct platform_device *pdev)
 	busfreq_mon_reset(data);
 
 	data->devfreq = devfreq_add_device(dev, &exynos4_devfreq_profile,
-					   &devfreq_simple_ondemand, NULL);
+					   "simple_ondemand", NULL);
 	if (IS_ERR(data->devfreq))
 		return PTR_ERR(data->devfreq);
 
diff --git a/drivers/devfreq/governor_performance.c b/drivers/devfreq/governor_performance.c
index db8ff77dbed2..865a36956917 100644
--- a/drivers/devfreq/governor_performance.c
+++ b/drivers/devfreq/governor_performance.c
@@ -40,7 +40,7 @@ static int devfreq_performance_handler(struct devfreq *devfreq,
 	return ret;
 }
 
-const struct devfreq_governor devfreq_performance = {
+static struct devfreq_governor devfreq_performance = {
 	.name = "performance",
 	.get_target_freq = devfreq_performance_func,
 	.event_handler = devfreq_performance_handler,
diff --git a/drivers/devfreq/governor_powersave.c b/drivers/devfreq/governor_powersave.c
index 30f0fca8d635..8612c0f96b79 100644
--- a/drivers/devfreq/governor_powersave.c
+++ b/drivers/devfreq/governor_powersave.c
@@ -37,7 +37,7 @@ static int devfreq_powersave_handler(struct devfreq *devfreq,
 	return ret;
 }
 
-const struct devfreq_governor devfreq_powersave = {
+static struct devfreq_governor devfreq_powersave = {
 	.name = "powersave",
 	.get_target_freq = devfreq_powersave_func,
 	.event_handler = devfreq_powersave_handler,
diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c
index 85f9ed531b1e..a870a24bb56b 100644
--- a/drivers/devfreq/governor_simpleondemand.c
+++ b/drivers/devfreq/governor_simpleondemand.c
@@ -120,7 +120,7 @@ static int devfreq_simple_ondemand_handler(struct devfreq *devfreq,
 	return 0;
 }
 
-const struct devfreq_governor devfreq_simple_ondemand = {
+static struct devfreq_governor devfreq_simple_ondemand = {
 	.name = "simple_ondemand",
 	.get_target_freq = devfreq_simple_ondemand_func,
 	.event_handler = devfreq_simple_ondemand_handler,
diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c
index 110f178fec04..34fb80f50cf6 100644
--- a/drivers/devfreq/governor_userspace.c
+++ b/drivers/devfreq/governor_userspace.c
@@ -135,7 +135,7 @@ static int devfreq_userspace_handler(struct devfreq *devfreq,
 	return ret;
 }
 
-const struct devfreq_governor devfreq_userspace = {
+static struct devfreq_governor devfreq_userspace = {
 	.name = "userspace",
 	.get_target_freq = devfreq_userspace_func,
 	.event_handler = devfreq_userspace_handler,
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 6484a3f8dda8..235248cb2c93 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -125,6 +125,7 @@ struct devfreq_governor {
  *		using devfreq.
  * @profile:	device-specific devfreq profile
  * @governor:	method how to choose frequency based on the usage.
+ * @governor_name:	devfreq governor name for use with this devfreq
  * @nb:		notifier block used to notify devfreq object that it should
  *		reevaluate operable frequencies. Devfreq users may use
  *		devfreq.nb to the corresponding register notifier call chain.
@@ -155,6 +156,7 @@ struct devfreq {
 	struct device dev;
 	struct devfreq_dev_profile *profile;
 	const struct devfreq_governor *governor;
+	char governor_name[DEVFREQ_NAME_LEN];
 	struct notifier_block nb;
 	struct delayed_work work;
 
@@ -176,7 +178,7 @@ struct devfreq {
 #if defined(CONFIG_PM_DEVFREQ)
 extern struct devfreq *devfreq_add_device(struct device *dev,
 				  struct devfreq_dev_profile *profile,
-				  const struct devfreq_governor *governor,
+				  const char *governor_name,
 				  void *data);
 extern int devfreq_remove_device(struct devfreq *devfreq);
 extern int devfreq_suspend_device(struct devfreq *devfreq);
@@ -190,17 +192,7 @@ extern int devfreq_register_opp_notifier(struct device *dev,
 extern int devfreq_unregister_opp_notifier(struct device *dev,
 					   struct devfreq *devfreq);
 
-#ifdef CONFIG_DEVFREQ_GOV_POWERSAVE
-extern const struct devfreq_governor devfreq_powersave;
-#endif
-#ifdef CONFIG_DEVFREQ_GOV_PERFORMANCE
-extern const struct devfreq_governor devfreq_performance;
-#endif
-#ifdef CONFIG_DEVFREQ_GOV_USERSPACE
-extern const struct devfreq_governor devfreq_userspace;
-#endif
 #ifdef CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND
-extern const struct devfreq_governor devfreq_simple_ondemand;
 /**
  * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq
  *	and devfreq_add_device
@@ -223,7 +215,7 @@ struct devfreq_simple_ondemand_data {
 #else /* !CONFIG_PM_DEVFREQ */
 static struct devfreq *devfreq_add_device(struct device *dev,
 					  struct devfreq_dev_profile *profile,
-					  struct devfreq_governor *governor,
+					  const char *governor_name,
 					  void *data)
 {
 	return NULL;
@@ -262,11 +254,6 @@ static int devfreq_unregister_opp_notifier(struct device *dev,
 	return -EINVAL;
 }
 
-#define devfreq_powersave	NULL
-#define devfreq_performance	NULL
-#define devfreq_userspace	NULL
-#define devfreq_simple_ondemand	NULL
-
 #endif /* CONFIG_PM_DEVFREQ */
 
 #endif /* __LINUX_DEVFREQ_H__ */
-- 
cgit v1.2.3-55-g7522


From 43c1af0f4861b721def8c67ed6af2a69a4efcca3 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Tue, 13 Nov 2012 19:33:57 +0530
Subject: mfd: tps65910: Use regmap irq framework for interrupt support

Implement irq support of tps65910 with regmap irq framework
in place of implementing locally.
This reduces the code size significantly and easy to maintain.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps65910-irq.c   | 375 +++++++++++++++++++++----------------------
 include/linux/mfd/tps65910.h | 139 +++++++++++-----
 2 files changed, 278 insertions(+), 236 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c
index 09aab3e4776d..554543a584a1 100644
--- a/drivers/mfd/tps65910-irq.c
+++ b/drivers/mfd/tps65910-irq.c
@@ -24,171 +24,184 @@
 #include <linux/gpio.h>
 #include <linux/mfd/tps65910.h>
 
-/*
- * This is a threaded IRQ handler so can access I2C/SPI.  Since all
- * interrupts are clear on read the IRQ line will be reasserted and
- * the physical IRQ will be handled again if another interrupt is
- * asserted while we run - in the normal course of events this is a
- * rare occurrence so we save I2C/SPI reads.  We're also assuming that
- * it's rare to get lots of interrupts firing simultaneously so try to
- * minimise I/O.
- */
-static irqreturn_t tps65910_irq(int irq, void *irq_data)
-{
-	struct tps65910 *tps65910 = irq_data;
-	unsigned int reg;
-	u32 irq_sts;
-	u32 irq_mask;
-	int i;
-
-	tps65910_reg_read(tps65910, TPS65910_INT_STS, &reg);
-	irq_sts = reg;
-	tps65910_reg_read(tps65910, TPS65910_INT_STS2, &reg);
-	irq_sts |= reg << 8;
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65911:
-		tps65910_reg_read(tps65910, TPS65910_INT_STS3, &reg);
-		irq_sts |= reg << 16;
-	}
-
-	tps65910_reg_read(tps65910, TPS65910_INT_MSK, &reg);
-	irq_mask = reg;
-	tps65910_reg_read(tps65910, TPS65910_INT_MSK2, &reg);
-	irq_mask |= reg << 8;
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65911:
-		tps65910_reg_read(tps65910, TPS65910_INT_MSK3, &reg);
-		irq_mask |= reg << 16;
-	}
-
-	irq_sts &= ~irq_mask;
-
-	if (!irq_sts)
-		return IRQ_NONE;
-
-	for (i = 0; i < tps65910->irq_num; i++) {
-
-		if (!(irq_sts & (1 << i)))
-			continue;
-
-		handle_nested_irq(irq_find_mapping(tps65910->domain, i));
-	}
-
-	/* Write the STS register back to clear IRQs we handled */
-	reg = irq_sts & 0xFF;
-	irq_sts >>= 8;
-	tps65910_reg_write(tps65910, TPS65910_INT_STS, reg);
-	reg = irq_sts & 0xFF;
-	tps65910_reg_write(tps65910, TPS65910_INT_STS2, reg);
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65911:
-		reg = irq_sts >> 8;
-		tps65910_reg_write(tps65910, TPS65910_INT_STS3, reg);
-	}
-
-	return IRQ_HANDLED;
-}
-
-static void tps65910_irq_lock(struct irq_data *data)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-
-	mutex_lock(&tps65910->irq_lock);
-}
-
-static void tps65910_irq_sync_unlock(struct irq_data *data)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-	u32 reg_mask;
-	unsigned int reg;
-
-	tps65910_reg_read(tps65910, TPS65910_INT_MSK, &reg);
-	reg_mask = reg;
-	tps65910_reg_read(tps65910, TPS65910_INT_MSK2, &reg);
-	reg_mask |= reg << 8;
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65911:
-		tps65910_reg_read(tps65910, TPS65910_INT_MSK3, &reg);
-		reg_mask |= reg << 16;
-	}
 
-	if (tps65910->irq_mask != reg_mask) {
-		reg = tps65910->irq_mask & 0xFF;
-		tps65910_reg_write(tps65910, TPS65910_INT_MSK, reg);
-		reg = tps65910->irq_mask >> 8 & 0xFF;
-		tps65910_reg_write(tps65910, TPS65910_INT_MSK2, reg);
-		switch (tps65910_chip_id(tps65910)) {
-		case TPS65911:
-			reg = tps65910->irq_mask >> 16;
-			tps65910_reg_write(tps65910, TPS65910_INT_MSK3, reg);
-		}
-	}
-	mutex_unlock(&tps65910->irq_lock);
-}
-
-static void tps65910_irq_enable(struct irq_data *data)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-
-	tps65910->irq_mask &= ~(1 << data->hwirq);
-}
-
-static void tps65910_irq_disable(struct irq_data *data)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-
-	tps65910->irq_mask |= (1 << data->hwirq);
-}
+static const struct regmap_irq tps65911_irqs[] = {
+	/* INT_STS */
+	[TPS65911_IRQ_PWRHOLD_F] = {
+		.mask = INT_MSK_PWRHOLD_F_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_VBAT_VMHI] = {
+		.mask = INT_MSK_VMBHI_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRON] = {
+		.mask = INT_MSK_PWRON_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRON_LP] = {
+		.mask = INT_MSK_PWRON_LP_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRHOLD_R] = {
+		.mask = INT_MSK_PWRHOLD_R_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_HOTDIE] = {
+		.mask = INT_MSK_HOTDIE_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_RTC_ALARM] = {
+		.mask = INT_MSK_RTC_ALARM_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_RTC_PERIOD] = {
+		.mask = INT_MSK_RTC_PERIOD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+
+	/* INT_STS2 */
+	[TPS65911_IRQ_GPIO0_R] = {
+		.mask = INT_MSK2_GPIO0_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO0_F] = {
+		.mask = INT_MSK2_GPIO0_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO1_R] = {
+		.mask = INT_MSK2_GPIO1_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO1_F] = {
+		.mask = INT_MSK2_GPIO1_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO2_R] = {
+		.mask = INT_MSK2_GPIO2_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO2_F] = {
+		.mask = INT_MSK2_GPIO2_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO3_R] = {
+		.mask = INT_MSK2_GPIO3_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO3_F] = {
+		.mask = INT_MSK2_GPIO3_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+
+	/* INT_STS3 */
+	[TPS65911_IRQ_GPIO4_R] = {
+		.mask = INT_MSK3_GPIO4_R_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO4_F] = {
+		.mask = INT_MSK3_GPIO4_F_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO5_R] = {
+		.mask = INT_MSK3_GPIO5_R_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO5_F] = {
+		.mask = INT_MSK3_GPIO5_F_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_WTCHDG] = {
+		.mask = INT_MSK3_WTCHDG_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_VMBCH2_H] = {
+		.mask = INT_MSK3_VMBCH2_H_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_VMBCH2_L] = {
+		.mask = INT_MSK3_VMBCH2_L_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_PWRDN] = {
+		.mask = INT_MSK3_PWRDN_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+};
 
-#ifdef CONFIG_PM_SLEEP
-static int tps65910_irq_set_wake(struct irq_data *data, unsigned int enable)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-	return irq_set_irq_wake(tps65910->chip_irq, enable);
-}
-#else
-#define tps65910_irq_set_wake NULL
-#endif
+static const struct regmap_irq tps65910_irqs[] = {
+	/* INT_STS */
+	[TPS65910_IRQ_VBAT_VMBDCH] = {
+		.mask = TPS65910_INT_MSK_VMBDCH_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_VBAT_VMHI] = {
+		.mask = TPS65910_INT_MSK_VMBHI_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRON] = {
+		.mask = TPS65910_INT_MSK_PWRON_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRON_LP] = {
+		.mask = TPS65910_INT_MSK_PWRON_LP_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRHOLD] = {
+		.mask = TPS65910_INT_MSK_PWRHOLD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_HOTDIE] = {
+		.mask = TPS65910_INT_MSK_HOTDIE_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_RTC_ALARM] = {
+		.mask = TPS65910_INT_MSK_RTC_ALARM_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_RTC_PERIOD] = {
+		.mask = TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+
+	/* INT_STS2 */
+	[TPS65910_IRQ_GPIO_R] = {
+		.mask = TPS65910_INT_MSK2_GPIO0_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65910_IRQ_GPIO_F] = {
+		.mask = TPS65910_INT_MSK2_GPIO0_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+};
 
-static struct irq_chip tps65910_irq_chip = {
+static struct regmap_irq_chip tps65911_irq_chip = {
 	.name = "tps65910",
-	.irq_bus_lock = tps65910_irq_lock,
-	.irq_bus_sync_unlock = tps65910_irq_sync_unlock,
-	.irq_disable = tps65910_irq_disable,
-	.irq_enable = tps65910_irq_enable,
-	.irq_set_wake = tps65910_irq_set_wake,
+	.irqs = tps65911_irqs,
+	.num_irqs = ARRAY_SIZE(tps65911_irqs),
+	.num_regs = 3,
+	.irq_reg_stride = 2,
+	.status_base = TPS65910_INT_STS,
+	.mask_base = TPS65910_INT_MSK,
+	.ack_base = TPS65910_INT_MSK,
 };
 
-static int tps65910_irq_map(struct irq_domain *h, unsigned int virq,
-				irq_hw_number_t hw)
-{
-	struct tps65910 *tps65910 = h->host_data;
-
-	irq_set_chip_data(virq, tps65910);
-	irq_set_chip_and_handler(virq, &tps65910_irq_chip, handle_edge_irq);
-	irq_set_nested_thread(virq, 1);
-
-	/* ARM needs us to explicitly flag the IRQ as valid
-	 * and will set them noprobe when we do so. */
-#ifdef CONFIG_ARM
-	set_irq_flags(virq, IRQF_VALID);
-#else
-	irq_set_noprobe(virq);
-#endif
-
-	return 0;
-}
-
-static struct irq_domain_ops tps65910_domain_ops = {
-	.map	= tps65910_irq_map,
-	.xlate	= irq_domain_xlate_twocell,
+static struct regmap_irq_chip tps65910_irq_chip = {
+	.name = "tps65910",
+	.irqs = tps65910_irqs,
+	.num_irqs = ARRAY_SIZE(tps65910_irqs),
+	.num_regs = 2,
+	.irq_reg_stride = 2,
+	.status_base = TPS65910_INT_STS,
+	.mask_base = TPS65910_INT_MSK,
+	.ack_base = TPS65910_INT_MSK,
 };
 
 int tps65910_irq_init(struct tps65910 *tps65910, int irq,
 		    struct tps65910_platform_data *pdata)
 {
-	int ret;
-	int flags = IRQF_ONESHOT;
+	int ret = 0;
+	static struct regmap_irq_chip *tps6591x_irqs_chip;
 
 	if (!irq) {
 		dev_warn(tps65910->dev, "No interrupt support, no core IRQ\n");
@@ -200,61 +213,31 @@ int tps65910_irq_init(struct tps65910 *tps65910, int irq,
 		return -EINVAL;
 	}
 
+
 	switch (tps65910_chip_id(tps65910)) {
 	case TPS65910:
-		tps65910->irq_num = TPS65910_NUM_IRQ;
+		tps6591x_irqs_chip = &tps65910_irq_chip;
 		break;
 	case TPS65911:
-		tps65910->irq_num = TPS65911_NUM_IRQ;
+		tps6591x_irqs_chip = &tps65911_irq_chip;
 		break;
 	}
 
-	if (pdata->irq_base > 0) {
-		pdata->irq_base = irq_alloc_descs(pdata->irq_base, 0,
-					tps65910->irq_num, -1);
-		if (pdata->irq_base < 0) {
-			dev_warn(tps65910->dev, "Failed to alloc IRQs: %d\n",
-					pdata->irq_base);
-			return pdata->irq_base;
-		}
-	}
-
-	tps65910->irq_mask = 0xFFFFFF;
-
-	mutex_init(&tps65910->irq_lock);
 	tps65910->chip_irq = irq;
-	tps65910->irq_base = pdata->irq_base;
-
-	if (pdata->irq_base > 0)
-		tps65910->domain = irq_domain_add_legacy(tps65910->dev->of_node,
-					tps65910->irq_num,
-					pdata->irq_base,
-					0,
-					&tps65910_domain_ops, tps65910);
-	else
-		tps65910->domain = irq_domain_add_linear(tps65910->dev->of_node,
-					tps65910->irq_num,
-					&tps65910_domain_ops, tps65910);
-
-	if (!tps65910->domain) {
-		dev_err(tps65910->dev, "Failed to create IRQ domain\n");
-		return -ENOMEM;
+	ret = regmap_add_irq_chip(tps65910->regmap, tps65910->chip_irq,
+		IRQF_ONESHOT, pdata->irq_base,
+		tps6591x_irqs_chip, &tps65910->irq_data);
+	if (ret < 0) {
+		dev_warn(tps65910->dev,
+				"Failed to add irq_chip %d\n", ret);
+		return ret;
 	}
-
-	ret = request_threaded_irq(irq, NULL, tps65910_irq, flags,
-				   "tps65910", tps65910);
-
-	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
-
-	if (ret != 0)
-		dev_err(tps65910->dev, "Failed to request IRQ: %d\n", ret);
-
 	return ret;
 }
 
 int tps65910_irq_exit(struct tps65910 *tps65910)
 {
-	if (tps65910->chip_irq)
-		free_irq(tps65910->chip_irq, tps65910);
+	if (tps65910->chip_irq > 0)
+		regmap_del_irq_chip(tps65910->chip_irq, tps65910->irq_data);
 	return 0;
 }
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 02e894f3ff45..b564ac29590a 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -572,6 +572,49 @@
 #define SPARE_SPARE_MASK				0xFF
 #define SPARE_SPARE_SHIFT				0
 
+#define TPS65910_INT_STS_RTC_PERIOD_IT_MASK			0x80
+#define TPS65910_INT_STS_RTC_PERIOD_IT_SHIFT			7
+#define TPS65910_INT_STS_RTC_ALARM_IT_MASK			0x40
+#define TPS65910_INT_STS_RTC_ALARM_IT_SHIFT			6
+#define TPS65910_INT_STS_HOTDIE_IT_MASK				0x20
+#define TPS65910_INT_STS_HOTDIE_IT_SHIFT			5
+#define TPS65910_INT_STS_PWRHOLD_F_IT_MASK			0x10
+#define TPS65910_INT_STS_PWRHOLD_F_IT_SHIFT			4
+#define TPS65910_INT_STS_PWRON_LP_IT_MASK			0x08
+#define TPS65910_INT_STS_PWRON_LP_IT_SHIFT			3
+#define TPS65910_INT_STS_PWRON_IT_MASK				0x04
+#define TPS65910_INT_STS_PWRON_IT_SHIFT				2
+#define TPS65910_INT_STS_VMBHI_IT_MASK				0x02
+#define TPS65910_INT_STS_VMBHI_IT_SHIFT				1
+#define TPS65910_INT_STS_VMBDCH_IT_MASK				0x01
+#define TPS65910_INT_STS_VMBDCH_IT_SHIFT			0
+
+#define TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_MASK			0x80
+#define TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_SHIFT		7
+#define TPS65910_INT_MSK_RTC_ALARM_IT_MSK_MASK			0x40
+#define TPS65910_INT_MSK_RTC_ALARM_IT_MSK_SHIFT			6
+#define TPS65910_INT_MSK_HOTDIE_IT_MSK_MASK			0x20
+#define TPS65910_INT_MSK_HOTDIE_IT_MSK_SHIFT			5
+#define TPS65910_INT_MSK_PWRHOLD_IT_MSK_MASK			0x10
+#define TPS65910_INT_MSK_PWRHOLD_IT_MSK_SHIFT			4
+#define TPS65910_INT_MSK_PWRON_LP_IT_MSK_MASK			0x08
+#define TPS65910_INT_MSK_PWRON_LP_IT_MSK_SHIFT			3
+#define TPS65910_INT_MSK_PWRON_IT_MSK_MASK			0x04
+#define TPS65910_INT_MSK_PWRON_IT_MSK_SHIFT			2
+#define TPS65910_INT_MSK_VMBHI_IT_MSK_MASK			0x02
+#define TPS65910_INT_MSK_VMBHI_IT_MSK_SHIFT			1
+#define TPS65910_INT_MSK_VMBDCH_IT_MSK_MASK			0x01
+#define TPS65910_INT_MSK_VMBDCH_IT_MSK_SHIFT			0
+
+#define TPS65910_INT_STS2_GPIO0_F_IT_SHIFT			2
+#define TPS65910_INT_STS2_GPIO0_F_IT_MASK			0x02
+#define TPS65910_INT_STS2_GPIO0_R_IT_SHIFT			1
+#define TPS65910_INT_STS2_GPIO0_R_IT_MASK			0x01
+
+#define TPS65910_INT_MSK2_GPIO0_F_IT_MSK_SHIFT			2
+#define TPS65910_INT_MSK2_GPIO0_F_IT_MSK_MASK			0x02
+#define TPS65910_INT_MSK2_GPIO0_R_IT_MSK_SHIFT			1
+#define TPS65910_INT_MSK2_GPIO0_R_IT_MSK_MASK			0x01
 
 /*Register INT_STS  (0x80) register.RegisterDescription */
 #define INT_STS_RTC_PERIOD_IT_MASK			0x80
@@ -580,16 +623,16 @@
 #define INT_STS_RTC_ALARM_IT_SHIFT			6
 #define INT_STS_HOTDIE_IT_MASK				0x20
 #define INT_STS_HOTDIE_IT_SHIFT				5
-#define INT_STS_PWRHOLD_IT_MASK				0x10
-#define INT_STS_PWRHOLD_IT_SHIFT			4
+#define INT_STS_PWRHOLD_R_IT_MASK			0x10
+#define INT_STS_PWRHOLD_R_IT_SHIFT			4
 #define INT_STS_PWRON_LP_IT_MASK			0x08
 #define INT_STS_PWRON_LP_IT_SHIFT			3
 #define INT_STS_PWRON_IT_MASK				0x04
 #define INT_STS_PWRON_IT_SHIFT				2
 #define INT_STS_VMBHI_IT_MASK				0x02
 #define INT_STS_VMBHI_IT_SHIFT				1
-#define INT_STS_VMBDCH_IT_MASK				0x01
-#define INT_STS_VMBDCH_IT_SHIFT				0
+#define INT_STS_PWRHOLD_F_IT_MASK			0x01
+#define INT_STS_PWRHOLD_F_IT_SHIFT			0
 
 
 /*Register INT_MSK  (0x80) register.RegisterDescription */
@@ -599,16 +642,16 @@
 #define INT_MSK_RTC_ALARM_IT_MSK_SHIFT			6
 #define INT_MSK_HOTDIE_IT_MSK_MASK			0x20
 #define INT_MSK_HOTDIE_IT_MSK_SHIFT			5
-#define INT_MSK_PWRHOLD_IT_MSK_MASK			0x10
-#define INT_MSK_PWRHOLD_IT_MSK_SHIFT			4
+#define INT_MSK_PWRHOLD_R_IT_MSK_MASK			0x10
+#define INT_MSK_PWRHOLD_R_IT_MSK_SHIFT			4
 #define INT_MSK_PWRON_LP_IT_MSK_MASK			0x08
 #define INT_MSK_PWRON_LP_IT_MSK_SHIFT			3
 #define INT_MSK_PWRON_IT_MSK_MASK			0x04
 #define INT_MSK_PWRON_IT_MSK_SHIFT			2
 #define INT_MSK_VMBHI_IT_MSK_MASK			0x02
 #define INT_MSK_VMBHI_IT_MSK_SHIFT			1
-#define INT_MSK_VMBDCH_IT_MSK_MASK			0x01
-#define INT_MSK_VMBDCH_IT_MSK_SHIFT			0
+#define INT_MSK_PWRHOLD_F_IT_MSK_MASK			0x01
+#define INT_MSK_PWRHOLD_F_IT_MSK_SHIFT			0
 
 
 /*Register INT_STS2  (0x80) register.RegisterDescription */
@@ -650,6 +693,14 @@
 
 
 /*Register INT_STS3  (0x80) register.RegisterDescription */
+#define INT_STS3_PWRDN_IT_MASK				0x80
+#define INT_STS3_PWRDN_IT_SHIFT				7
+#define INT_STS3_VMBCH2_L_IT_MASK			0x40
+#define INT_STS3_VMBCH2_L_IT_SHIFT			6
+#define INT_STS3_VMBCH2_H_IT_MASK			0x20
+#define INT_STS3_VMBCH2_H_IT_SHIFT			5
+#define INT_STS3_WTCHDG_IT_MASK				0x10
+#define INT_STS3_WTCHDG_IT_SHIFT			4
 #define INT_STS3_GPIO5_F_IT_MASK			0x08
 #define INT_STS3_GPIO5_F_IT_SHIFT			3
 #define INT_STS3_GPIO5_R_IT_MASK			0x04
@@ -661,6 +712,14 @@
 
 
 /*Register INT_MSK3  (0x80) register.RegisterDescription */
+#define INT_MSK3_PWRDN_IT_MSK_MASK			0x80
+#define INT_MSK3_PWRDN_IT_MSK_SHIFT			7
+#define INT_MSK3_VMBCH2_L_IT_MSK_MASK			0x40
+#define INT_MSK3_VMBCH2_L_IT_MSK_SHIFT			6
+#define INT_MSK3_VMBCH2_H_IT_MSK_MASK			0x20
+#define INT_MSK3_VMBCH2_H_IT_MSK_SHIFT			5
+#define INT_MSK3_WTCHDG_IT_MSK_MASK			0x10
+#define INT_MSK3_WTCHDG_IT_MSK_SHIFT			4
 #define INT_MSK3_GPIO5_F_IT_MSK_MASK			0x08
 #define INT_MSK3_GPIO5_F_IT_MSK_SHIFT			3
 #define INT_MSK3_GPIO5_R_IT_MSK_MASK			0x04
@@ -721,34 +780,32 @@
 #define TPS65910_IRQ_GPIO_F				9
 #define TPS65910_NUM_IRQ				10
 
-#define TPS65911_IRQ_VBAT_VMBDCH			0
-#define TPS65911_IRQ_VBAT_VMBDCH2L			1
-#define TPS65911_IRQ_VBAT_VMBDCH2H			2
-#define TPS65911_IRQ_VBAT_VMHI				3
-#define TPS65911_IRQ_PWRON				4
-#define TPS65911_IRQ_PWRON_LP				5
-#define TPS65911_IRQ_PWRHOLD_F				6
-#define TPS65911_IRQ_PWRHOLD_R				7
-#define TPS65911_IRQ_HOTDIE				8
-#define TPS65911_IRQ_RTC_ALARM				9
-#define TPS65911_IRQ_RTC_PERIOD				10
-#define TPS65911_IRQ_GPIO0_R				11
-#define TPS65911_IRQ_GPIO0_F				12
-#define TPS65911_IRQ_GPIO1_R				13
-#define TPS65911_IRQ_GPIO1_F				14
-#define TPS65911_IRQ_GPIO2_R				15
-#define TPS65911_IRQ_GPIO2_F				16
-#define TPS65911_IRQ_GPIO3_R				17
-#define TPS65911_IRQ_GPIO3_F				18
-#define TPS65911_IRQ_GPIO4_R				19
-#define TPS65911_IRQ_GPIO4_F				20
-#define TPS65911_IRQ_GPIO5_R				21
-#define TPS65911_IRQ_GPIO5_F				22
-#define TPS65911_IRQ_WTCHDG				23
-#define TPS65911_IRQ_PWRDN				24
-
-#define TPS65911_NUM_IRQ				25
-
+#define TPS65911_IRQ_PWRHOLD_F				0
+#define TPS65911_IRQ_VBAT_VMHI				1
+#define TPS65911_IRQ_PWRON				2
+#define TPS65911_IRQ_PWRON_LP				3
+#define TPS65911_IRQ_PWRHOLD_R				4
+#define TPS65911_IRQ_HOTDIE				5
+#define TPS65911_IRQ_RTC_ALARM				6
+#define TPS65911_IRQ_RTC_PERIOD				7
+#define TPS65911_IRQ_GPIO0_R				8
+#define TPS65911_IRQ_GPIO0_F				9
+#define TPS65911_IRQ_GPIO1_R				10
+#define TPS65911_IRQ_GPIO1_F				11
+#define TPS65911_IRQ_GPIO2_R				12
+#define TPS65911_IRQ_GPIO2_F				13
+#define TPS65911_IRQ_GPIO3_R				14
+#define TPS65911_IRQ_GPIO3_F				15
+#define TPS65911_IRQ_GPIO4_R				16
+#define TPS65911_IRQ_GPIO4_F				17
+#define TPS65911_IRQ_GPIO5_R				18
+#define TPS65911_IRQ_GPIO5_F				19
+#define TPS65911_IRQ_WTCHDG				20
+#define TPS65911_IRQ_VMBCH2_H				21
+#define TPS65911_IRQ_VMBCH2_L				22
+#define TPS65911_IRQ_PWRDN				23
+
+#define TPS65911_NUM_IRQ				24
 
 /* GPIO Register Definitions */
 #define TPS65910_GPIO_DEB				BIT(2)
@@ -848,11 +905,8 @@ struct tps65910 {
 	struct tps65910_board *of_plat_data;
 
 	/* IRQ Handling */
-	struct mutex irq_lock;
 	int chip_irq;
-	int irq_base;
-	int irq_num;
-	u32 irq_mask;
+	struct regmap_irq_chip_data *irq_data;
 	struct irq_domain *domain;
 };
 
@@ -900,4 +954,9 @@ static inline int tps65910_reg_update_bits(struct tps65910 *tps65910, u8 reg,
 	return regmap_update_bits(tps65910->regmap, reg, mask, val);
 }
 
+static inline int tps65910_irq_get_virq(struct tps65910 *tps65910, int irq)
+{
+	return regmap_irq_get_virq(tps65910->irq_data, irq);
+}
+
 #endif /*  __LINUX_MFD_TPS65910_H */
-- 
cgit v1.2.3-55-g7522


From 4aab3fadad32ff4df05832beff7c16fd6ad938aa Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Tue, 13 Nov 2012 19:33:58 +0530
Subject: mfd: tps65910: Move interrupt implementation code to mfd file

In place of implementing the irq support in separate file,
moving implementation to main mfd file.
The irq files only contains the table and init steps only
and does not need extra file to have this only for this
purpose.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Makefile         |   2 +-
 drivers/mfd/tps65910-irq.c   | 243 -------------------------------------------
 drivers/mfd/tps65910.c       | 216 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/tps65910.h |   4 -
 4 files changed, 217 insertions(+), 248 deletions(-)
 delete mode 100644 drivers/mfd/tps65910-irq.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 8a68fc7ea870..a30c49ecdd95 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -56,7 +56,7 @@ obj-$(CONFIG_TPS6105X)		+= tps6105x.o
 obj-$(CONFIG_TPS65010)		+= tps65010.o
 obj-$(CONFIG_TPS6507X)		+= tps6507x.o
 obj-$(CONFIG_MFD_TPS65217)	+= tps65217.o
-obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o tps65910-irq.o
+obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o
 tps65912-objs                   := tps65912-core.o tps65912-irq.o
 obj-$(CONFIG_MFD_TPS65912)	+= tps65912.o
 obj-$(CONFIG_MFD_TPS65912_I2C)	+= tps65912-i2c.o
diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c
deleted file mode 100644
index 554543a584a1..000000000000
--- a/drivers/mfd/tps65910-irq.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * tps65910-irq.c  --  TI TPS6591x
- *
- * Copyright 2010 Texas Instruments Inc.
- *
- * Author: Graeme Gregory <gg@slimlogic.co.uk>
- * Author: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under  the terms of the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/bug.h>
-#include <linux/device.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/irqdomain.h>
-#include <linux/gpio.h>
-#include <linux/mfd/tps65910.h>
-
-
-static const struct regmap_irq tps65911_irqs[] = {
-	/* INT_STS */
-	[TPS65911_IRQ_PWRHOLD_F] = {
-		.mask = INT_MSK_PWRHOLD_F_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65911_IRQ_VBAT_VMHI] = {
-		.mask = INT_MSK_VMBHI_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65911_IRQ_PWRON] = {
-		.mask = INT_MSK_PWRON_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65911_IRQ_PWRON_LP] = {
-		.mask = INT_MSK_PWRON_LP_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65911_IRQ_PWRHOLD_R] = {
-		.mask = INT_MSK_PWRHOLD_R_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65911_IRQ_HOTDIE] = {
-		.mask = INT_MSK_HOTDIE_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65911_IRQ_RTC_ALARM] = {
-		.mask = INT_MSK_RTC_ALARM_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65911_IRQ_RTC_PERIOD] = {
-		.mask = INT_MSK_RTC_PERIOD_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-
-	/* INT_STS2 */
-	[TPS65911_IRQ_GPIO0_R] = {
-		.mask = INT_MSK2_GPIO0_R_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65911_IRQ_GPIO0_F] = {
-		.mask = INT_MSK2_GPIO0_F_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65911_IRQ_GPIO1_R] = {
-		.mask = INT_MSK2_GPIO1_R_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65911_IRQ_GPIO1_F] = {
-		.mask = INT_MSK2_GPIO1_F_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65911_IRQ_GPIO2_R] = {
-		.mask = INT_MSK2_GPIO2_R_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65911_IRQ_GPIO2_F] = {
-		.mask = INT_MSK2_GPIO2_F_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65911_IRQ_GPIO3_R] = {
-		.mask = INT_MSK2_GPIO3_R_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65911_IRQ_GPIO3_F] = {
-		.mask = INT_MSK2_GPIO3_F_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-
-	/* INT_STS3 */
-	[TPS65911_IRQ_GPIO4_R] = {
-		.mask = INT_MSK3_GPIO4_R_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-	[TPS65911_IRQ_GPIO4_F] = {
-		.mask = INT_MSK3_GPIO4_F_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-	[TPS65911_IRQ_GPIO5_R] = {
-		.mask = INT_MSK3_GPIO5_R_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-	[TPS65911_IRQ_GPIO5_F] = {
-		.mask = INT_MSK3_GPIO5_F_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-	[TPS65911_IRQ_WTCHDG] = {
-		.mask = INT_MSK3_WTCHDG_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-	[TPS65911_IRQ_VMBCH2_H] = {
-		.mask = INT_MSK3_VMBCH2_H_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-	[TPS65911_IRQ_VMBCH2_L] = {
-		.mask = INT_MSK3_VMBCH2_L_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-	[TPS65911_IRQ_PWRDN] = {
-		.mask = INT_MSK3_PWRDN_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-};
-
-static const struct regmap_irq tps65910_irqs[] = {
-	/* INT_STS */
-	[TPS65910_IRQ_VBAT_VMBDCH] = {
-		.mask = TPS65910_INT_MSK_VMBDCH_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65910_IRQ_VBAT_VMHI] = {
-		.mask = TPS65910_INT_MSK_VMBHI_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65910_IRQ_PWRON] = {
-		.mask = TPS65910_INT_MSK_PWRON_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65910_IRQ_PWRON_LP] = {
-		.mask = TPS65910_INT_MSK_PWRON_LP_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65910_IRQ_PWRHOLD] = {
-		.mask = TPS65910_INT_MSK_PWRHOLD_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65910_IRQ_HOTDIE] = {
-		.mask = TPS65910_INT_MSK_HOTDIE_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65910_IRQ_RTC_ALARM] = {
-		.mask = TPS65910_INT_MSK_RTC_ALARM_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65910_IRQ_RTC_PERIOD] = {
-		.mask = TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-
-	/* INT_STS2 */
-	[TPS65910_IRQ_GPIO_R] = {
-		.mask = TPS65910_INT_MSK2_GPIO0_F_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65910_IRQ_GPIO_F] = {
-		.mask = TPS65910_INT_MSK2_GPIO0_R_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-};
-
-static struct regmap_irq_chip tps65911_irq_chip = {
-	.name = "tps65910",
-	.irqs = tps65911_irqs,
-	.num_irqs = ARRAY_SIZE(tps65911_irqs),
-	.num_regs = 3,
-	.irq_reg_stride = 2,
-	.status_base = TPS65910_INT_STS,
-	.mask_base = TPS65910_INT_MSK,
-	.ack_base = TPS65910_INT_MSK,
-};
-
-static struct regmap_irq_chip tps65910_irq_chip = {
-	.name = "tps65910",
-	.irqs = tps65910_irqs,
-	.num_irqs = ARRAY_SIZE(tps65910_irqs),
-	.num_regs = 2,
-	.irq_reg_stride = 2,
-	.status_base = TPS65910_INT_STS,
-	.mask_base = TPS65910_INT_MSK,
-	.ack_base = TPS65910_INT_MSK,
-};
-
-int tps65910_irq_init(struct tps65910 *tps65910, int irq,
-		    struct tps65910_platform_data *pdata)
-{
-	int ret = 0;
-	static struct regmap_irq_chip *tps6591x_irqs_chip;
-
-	if (!irq) {
-		dev_warn(tps65910->dev, "No interrupt support, no core IRQ\n");
-		return -EINVAL;
-	}
-
-	if (!pdata) {
-		dev_warn(tps65910->dev, "No interrupt support, no pdata\n");
-		return -EINVAL;
-	}
-
-
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65910:
-		tps6591x_irqs_chip = &tps65910_irq_chip;
-		break;
-	case TPS65911:
-		tps6591x_irqs_chip = &tps65911_irq_chip;
-		break;
-	}
-
-	tps65910->chip_irq = irq;
-	ret = regmap_add_irq_chip(tps65910->regmap, tps65910->chip_irq,
-		IRQF_ONESHOT, pdata->irq_base,
-		tps6591x_irqs_chip, &tps65910->irq_data);
-	if (ret < 0) {
-		dev_warn(tps65910->dev,
-				"Failed to add irq_chip %d\n", ret);
-		return ret;
-	}
-	return ret;
-}
-
-int tps65910_irq_exit(struct tps65910 *tps65910)
-{
-	if (tps65910->chip_irq > 0)
-		regmap_del_irq_chip(tps65910->chip_irq, tps65910->irq_data);
-	return 0;
-}
diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index 27fbbe510101..d4d4eb5b5b6f 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -19,6 +19,9 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/mfd/core.h>
 #include <linux/regmap.h>
 #include <linux/mfd/tps65910.h>
@@ -50,6 +53,219 @@ static struct mfd_cell tps65910s[] = {
 };
 
 
+static const struct regmap_irq tps65911_irqs[] = {
+	/* INT_STS */
+	[TPS65911_IRQ_PWRHOLD_F] = {
+		.mask = INT_MSK_PWRHOLD_F_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_VBAT_VMHI] = {
+		.mask = INT_MSK_VMBHI_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRON] = {
+		.mask = INT_MSK_PWRON_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRON_LP] = {
+		.mask = INT_MSK_PWRON_LP_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRHOLD_R] = {
+		.mask = INT_MSK_PWRHOLD_R_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_HOTDIE] = {
+		.mask = INT_MSK_HOTDIE_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_RTC_ALARM] = {
+		.mask = INT_MSK_RTC_ALARM_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_RTC_PERIOD] = {
+		.mask = INT_MSK_RTC_PERIOD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+
+	/* INT_STS2 */
+	[TPS65911_IRQ_GPIO0_R] = {
+		.mask = INT_MSK2_GPIO0_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO0_F] = {
+		.mask = INT_MSK2_GPIO0_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO1_R] = {
+		.mask = INT_MSK2_GPIO1_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO1_F] = {
+		.mask = INT_MSK2_GPIO1_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO2_R] = {
+		.mask = INT_MSK2_GPIO2_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO2_F] = {
+		.mask = INT_MSK2_GPIO2_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO3_R] = {
+		.mask = INT_MSK2_GPIO3_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO3_F] = {
+		.mask = INT_MSK2_GPIO3_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+
+	/* INT_STS2 */
+	[TPS65911_IRQ_GPIO4_R] = {
+		.mask = INT_MSK3_GPIO4_R_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO4_F] = {
+		.mask = INT_MSK3_GPIO4_F_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO5_R] = {
+		.mask = INT_MSK3_GPIO5_R_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO5_F] = {
+		.mask = INT_MSK3_GPIO5_F_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_WTCHDG] = {
+		.mask = INT_MSK3_WTCHDG_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_VMBCH2_H] = {
+		.mask = INT_MSK3_VMBCH2_H_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_VMBCH2_L] = {
+		.mask = INT_MSK3_VMBCH2_L_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_PWRDN] = {
+		.mask = INT_MSK3_PWRDN_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+};
+
+static const struct regmap_irq tps65910_irqs[] = {
+	/* INT_STS */
+	[TPS65910_IRQ_VBAT_VMBDCH] = {
+		.mask = TPS65910_INT_MSK_VMBDCH_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_VBAT_VMHI] = {
+		.mask = TPS65910_INT_MSK_VMBHI_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRON] = {
+		.mask = TPS65910_INT_MSK_PWRON_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRON_LP] = {
+		.mask = TPS65910_INT_MSK_PWRON_LP_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRHOLD] = {
+		.mask = TPS65910_INT_MSK_PWRHOLD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_HOTDIE] = {
+		.mask = TPS65910_INT_MSK_HOTDIE_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_RTC_ALARM] = {
+		.mask = TPS65910_INT_MSK_RTC_ALARM_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_RTC_PERIOD] = {
+		.mask = TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+
+	/* INT_STS2 */
+	[TPS65910_IRQ_GPIO_R] = {
+		.mask = TPS65910_INT_MSK2_GPIO0_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65910_IRQ_GPIO_F] = {
+		.mask = TPS65910_INT_MSK2_GPIO0_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+};
+
+static struct regmap_irq_chip tps65911_irq_chip = {
+	.name = "tps65910",
+	.irqs = tps65911_irqs,
+	.num_irqs = ARRAY_SIZE(tps65911_irqs),
+	.num_regs = 3,
+	.irq_reg_stride = 2,
+	.status_base = TPS65910_INT_STS,
+	.mask_base = TPS65910_INT_MSK,
+	.ack_base = TPS65910_INT_MSK,
+};
+
+static struct regmap_irq_chip tps65910_irq_chip = {
+	.name = "tps65910",
+	.irqs = tps65910_irqs,
+	.num_irqs = ARRAY_SIZE(tps65910_irqs),
+	.num_regs = 2,
+	.irq_reg_stride = 2,
+	.status_base = TPS65910_INT_STS,
+	.mask_base = TPS65910_INT_MSK,
+	.ack_base = TPS65910_INT_MSK,
+};
+
+static int tps65910_irq_init(struct tps65910 *tps65910, int irq,
+		    struct tps65910_platform_data *pdata)
+{
+	int ret = 0;
+	static struct regmap_irq_chip *tps6591x_irqs_chip;
+
+	if (!irq) {
+		dev_warn(tps65910->dev, "No interrupt support, no core IRQ\n");
+		return -EINVAL;
+	}
+
+	if (!pdata) {
+		dev_warn(tps65910->dev, "No interrupt support, no pdata\n");
+		return -EINVAL;
+	}
+
+	switch (tps65910_chip_id(tps65910)) {
+	case TPS65910:
+		tps6591x_irqs_chip = &tps65910_irq_chip;
+		break;
+	case TPS65911:
+		tps6591x_irqs_chip = &tps65911_irq_chip;
+		break;
+	}
+
+	tps65910->chip_irq = irq;
+	ret = regmap_add_irq_chip(tps65910->regmap, tps65910->chip_irq,
+		IRQF_ONESHOT, pdata->irq_base,
+		tps6591x_irqs_chip, &tps65910->irq_data);
+	if (ret < 0)
+		dev_warn(tps65910->dev, "Failed to add irq_chip %d\n", ret);
+	return ret;
+}
+
+static int tps65910_irq_exit(struct tps65910 *tps65910)
+{
+	if (tps65910->chip_irq > 0)
+		regmap_del_irq_chip(tps65910->chip_irq, tps65910->irq_data);
+	return 0;
+}
+
 static bool is_volatile_reg(struct device *dev, unsigned int reg)
 {
 	struct tps65910 *tps65910 = dev_get_drvdata(dev);
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index b564ac29590a..0b16903f7e88 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -915,10 +915,6 @@ struct tps65910_platform_data {
 	int irq_base;
 };
 
-int tps65910_irq_init(struct tps65910 *tps65910, int irq,
-		struct tps65910_platform_data *pdata);
-int tps65910_irq_exit(struct tps65910 *tps65910);
-
 static inline int tps65910_chip_id(struct tps65910 *tps65910)
 {
 	return tps65910->id;
-- 
cgit v1.2.3-55-g7522


From 8bad1abd6303476d6f77878aa8ea737d5d1b625c Mon Sep 17 00:00:00 2001
From: Fabio Estevam
Date: Thu, 4 Oct 2012 00:15:05 -0300
Subject: mfd: da9052: Introduce da9052-irq.c

Create a da9052-irq.c file so that it can handle interrupt related functions.

This is useful for allowing the da9052 drivers to use such functions
when dealing with da9052 interrupts.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Makefile              |   1 +
 drivers/mfd/da9052-core.c         | 271 ++---------------------------------
 drivers/mfd/da9052-irq.c          | 288 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/da9052/da9052.h |  10 ++
 4 files changed, 307 insertions(+), 263 deletions(-)
 create mode 100644 drivers/mfd/da9052-irq.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index a30c49ecdd95..632cce09e407 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -90,6 +90,7 @@ obj-$(CONFIG_UCB1400_CORE)	+= ucb1400_core.o
 
 obj-$(CONFIG_PMIC_DA903X)	+= da903x.o
 
+obj-$(CONFIG_PMIC_DA9052)	+= da9052-irq.o
 obj-$(CONFIG_PMIC_DA9052)	+= da9052-core.o
 obj-$(CONFIG_MFD_DA9052_SPI)	+= da9052-spi.o
 obj-$(CONFIG_MFD_DA9052_I2C)	+= da9052-i2c.o
diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c
index c96cdbc0daff..2153f9bba9ef 100644
--- a/drivers/mfd/da9052-core.c
+++ b/drivers/mfd/da9052-core.c
@@ -15,7 +15,6 @@
 #include <linux/delay.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
-#include <linux/irq.h>
 #include <linux/mfd/core.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -24,16 +23,6 @@
 #include <linux/mfd/da9052/pdata.h>
 #include <linux/mfd/da9052/reg.h>
 
-#define DA9052_NUM_IRQ_REGS		4
-#define DA9052_IRQ_MASK_POS_1		0x01
-#define DA9052_IRQ_MASK_POS_2		0x02
-#define DA9052_IRQ_MASK_POS_3		0x04
-#define DA9052_IRQ_MASK_POS_4		0x08
-#define DA9052_IRQ_MASK_POS_5		0x10
-#define DA9052_IRQ_MASK_POS_6		0x20
-#define DA9052_IRQ_MASK_POS_7		0x40
-#define DA9052_IRQ_MASK_POS_8		0x80
-
 static bool da9052_reg_readable(struct device *dev, unsigned int reg)
 {
 	switch (reg) {
@@ -425,15 +414,6 @@ err:
 }
 EXPORT_SYMBOL_GPL(da9052_adc_manual_read);
 
-static irqreturn_t da9052_auxadc_irq(int irq, void *irq_data)
-{
-	struct da9052 *da9052 = irq_data;
-
-	complete(&da9052->done);
-
-	return IRQ_HANDLED;
-}
-
 int da9052_adc_read_temp(struct da9052 *da9052)
 {
 	int tbat;
@@ -447,74 +427,6 @@ int da9052_adc_read_temp(struct da9052 *da9052)
 }
 EXPORT_SYMBOL_GPL(da9052_adc_read_temp);
 
-static struct resource da9052_rtc_resource = {
-	.name = "ALM",
-	.start = DA9052_IRQ_ALARM,
-	.end   = DA9052_IRQ_ALARM,
-	.flags = IORESOURCE_IRQ,
-};
-
-static struct resource da9052_onkey_resource = {
-	.name = "ONKEY",
-	.start = DA9052_IRQ_NONKEY,
-	.end   = DA9052_IRQ_NONKEY,
-	.flags = IORESOURCE_IRQ,
-};
-
-static struct resource da9052_bat_resources[] = {
-	{
-		.name = "BATT TEMP",
-		.start = DA9052_IRQ_TBAT,
-		.end   = DA9052_IRQ_TBAT,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "DCIN DET",
-		.start = DA9052_IRQ_DCIN,
-		.end   = DA9052_IRQ_DCIN,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "DCIN REM",
-		.start = DA9052_IRQ_DCINREM,
-		.end   = DA9052_IRQ_DCINREM,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "VBUS DET",
-		.start = DA9052_IRQ_VBUS,
-		.end   = DA9052_IRQ_VBUS,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "VBUS REM",
-		.start = DA9052_IRQ_VBUSREM,
-		.end   = DA9052_IRQ_VBUSREM,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "CHG END",
-		.start = DA9052_IRQ_CHGEND,
-		.end   = DA9052_IRQ_CHGEND,
-		.flags = IORESOURCE_IRQ,
-	},
-};
-
-static struct resource da9052_tsi_resources[] = {
-	{
-		.name = "PENDWN",
-		.start = DA9052_IRQ_PENDOWN,
-		.end   = DA9052_IRQ_PENDOWN,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "TSIRDY",
-		.start = DA9052_IRQ_TSIREADY,
-		.end   = DA9052_IRQ_TSIREADY,
-		.flags = IORESOURCE_IRQ,
-	},
-};
-
 static struct mfd_cell __devinitdata da9052_subdev_info[] = {
 	{
 		.name = "da9052-regulator",
@@ -574,13 +486,9 @@ static struct mfd_cell __devinitdata da9052_subdev_info[] = {
 	},
 	{
 		.name = "da9052-onkey",
-		.resources = &da9052_onkey_resource,
-		.num_resources = 1,
 	},
 	{
 		.name = "da9052-rtc",
-		.resources = &da9052_rtc_resource,
-		.num_resources = 1,
 	},
 	{
 		.name = "da9052-gpio",
@@ -602,160 +510,15 @@ static struct mfd_cell __devinitdata da9052_subdev_info[] = {
 	},
 	{
 		.name = "da9052-tsi",
-		.resources = da9052_tsi_resources,
-		.num_resources = ARRAY_SIZE(da9052_tsi_resources),
 	},
 	{
 		.name = "da9052-bat",
-		.resources = da9052_bat_resources,
-		.num_resources = ARRAY_SIZE(da9052_bat_resources),
 	},
 	{
 		.name = "da9052-watchdog",
 	},
 };
 
-static struct regmap_irq da9052_irqs[] = {
-	[DA9052_IRQ_DCIN] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_1,
-	},
-	[DA9052_IRQ_VBUS] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_2,
-	},
-	[DA9052_IRQ_DCINREM] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_3,
-	},
-	[DA9052_IRQ_VBUSREM] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_4,
-	},
-	[DA9052_IRQ_VDDLOW] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_5,
-	},
-	[DA9052_IRQ_ALARM] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_6,
-	},
-	[DA9052_IRQ_SEQRDY] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_7,
-	},
-	[DA9052_IRQ_COMP1V2] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_8,
-	},
-	[DA9052_IRQ_NONKEY] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_1,
-	},
-	[DA9052_IRQ_IDFLOAT] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_2,
-	},
-	[DA9052_IRQ_IDGND] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_3,
-	},
-	[DA9052_IRQ_CHGEND] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_4,
-	},
-	[DA9052_IRQ_TBAT] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_5,
-	},
-	[DA9052_IRQ_ADC_EOM] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_6,
-	},
-	[DA9052_IRQ_PENDOWN] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_7,
-	},
-	[DA9052_IRQ_TSIREADY] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_8,
-	},
-	[DA9052_IRQ_GPI0] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_1,
-	},
-	[DA9052_IRQ_GPI1] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_2,
-	},
-	[DA9052_IRQ_GPI2] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_3,
-	},
-	[DA9052_IRQ_GPI3] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_4,
-	},
-	[DA9052_IRQ_GPI4] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_5,
-	},
-	[DA9052_IRQ_GPI5] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_6,
-	},
-	[DA9052_IRQ_GPI6] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_7,
-	},
-	[DA9052_IRQ_GPI7] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_8,
-	},
-	[DA9052_IRQ_GPI8] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_1,
-	},
-	[DA9052_IRQ_GPI9] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_2,
-	},
-	[DA9052_IRQ_GPI10] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_3,
-	},
-	[DA9052_IRQ_GPI11] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_4,
-	},
-	[DA9052_IRQ_GPI12] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_5,
-	},
-	[DA9052_IRQ_GPI13] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_6,
-	},
-	[DA9052_IRQ_GPI14] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_7,
-	},
-	[DA9052_IRQ_GPI15] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_8,
-	},
-};
-
-static struct regmap_irq_chip da9052_regmap_irq_chip = {
-	.name = "da9052_irq",
-	.status_base = DA9052_EVENT_A_REG,
-	.mask_base = DA9052_IRQ_MASK_A_REG,
-	.ack_base = DA9052_EVENT_A_REG,
-	.num_regs = DA9052_NUM_IRQ_REGS,
-	.irqs = da9052_irqs,
-	.num_irqs = ARRAY_SIZE(da9052_irqs),
-};
-
 struct regmap_config da9052_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -769,15 +532,10 @@ struct regmap_config da9052_regmap_config = {
 };
 EXPORT_SYMBOL_GPL(da9052_regmap_config);
 
-static int da9052_map_irq(struct da9052 *da9052, int irq)
-{
-	return regmap_irq_get_virq(da9052->irq_data, irq);
-}
-
 int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id)
 {
 	struct da9052_pdata *pdata = da9052->dev->platform_data;
-	int ret, i;
+	int ret;
 
 	mutex_init(&da9052->auxadc_lock);
 	init_completion(&da9052->done);
@@ -787,22 +545,12 @@ int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id)
 
 	da9052->chip_id = chip_id;
 
-	ret = regmap_add_irq_chip(da9052->regmap, da9052->chip_irq,
-				  IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-				  -1, &da9052_regmap_irq_chip,
-				  &da9052->irq_data);
-	if (ret < 0) {
-		dev_err(da9052->dev, "regmap_add_irq_chip failed: %d\n", ret);
-		goto regmap_err;
+	ret = da9052_irq_init(da9052);
+	if (ret != 0) {
+		dev_err(da9052->dev, "da9052_irq_init failed: %d\n", ret);
+		return ret;
 	}
 
-	i = da9052_map_irq(da9052, DA9052_IRQ_ADC_EOM);
-	ret = request_threaded_irq(i, NULL, da9052_auxadc_irq,
-				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-				   "adc-irq", da9052);
-	if (ret != 0)
-		dev_err(da9052->dev, "DA9052 ADC IRQ failed ret=%d\n", ret);
-
 	ret = mfd_add_devices(da9052->dev, -1, da9052_subdev_info,
 			      ARRAY_SIZE(da9052_subdev_info), NULL, 0, NULL);
 	if (ret) {
@@ -813,18 +561,15 @@ int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id)
 	return 0;
 
 err:
-	free_irq(da9052_map_irq(da9052, DA9052_IRQ_ADC_EOM), da9052);
-	regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data);
-	mfd_remove_devices(da9052->dev);
-regmap_err:
+	da9052_irq_exit(da9052);
+
 	return ret;
 }
 
 void da9052_device_exit(struct da9052 *da9052)
 {
-	free_irq(da9052_map_irq(da9052, DA9052_IRQ_ADC_EOM), da9052);
-	regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data);
 	mfd_remove_devices(da9052->dev);
+	da9052_irq_exit(da9052);
 }
 
 MODULE_AUTHOR("David Dajun Chen <dchen@diasemi.com>");
diff --git a/drivers/mfd/da9052-irq.c b/drivers/mfd/da9052-irq.c
new file mode 100644
index 000000000000..57ae7841f536
--- /dev/null
+++ b/drivers/mfd/da9052-irq.c
@@ -0,0 +1,288 @@
+/*
+ * DA9052 interrupt support
+ *
+ * Author: Fabio Estevam <fabio.estevam@freescale.com>
+ * Based on arizona-irq.c, which is:
+ *
+ * Copyright 2012 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include <linux/mfd/da9052/da9052.h>
+#include <linux/mfd/da9052/reg.h>
+
+#define DA9052_NUM_IRQ_REGS		4
+#define DA9052_IRQ_MASK_POS_1		0x01
+#define DA9052_IRQ_MASK_POS_2		0x02
+#define DA9052_IRQ_MASK_POS_3		0x04
+#define DA9052_IRQ_MASK_POS_4		0x08
+#define DA9052_IRQ_MASK_POS_5		0x10
+#define DA9052_IRQ_MASK_POS_6		0x20
+#define DA9052_IRQ_MASK_POS_7		0x40
+#define DA9052_IRQ_MASK_POS_8		0x80
+
+static struct regmap_irq da9052_irqs[] = {
+	[DA9052_IRQ_DCIN] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_VBUS] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_DCINREM] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_VBUSREM] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_VDDLOW] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_ALARM] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_SEQRDY] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_COMP1V2] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+	[DA9052_IRQ_NONKEY] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_IDFLOAT] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_IDGND] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_CHGEND] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_TBAT] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_ADC_EOM] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_PENDOWN] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_TSIREADY] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+	[DA9052_IRQ_GPI0] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_GPI1] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_GPI2] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_GPI3] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_GPI4] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_GPI5] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_GPI6] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_GPI7] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+	[DA9052_IRQ_GPI8] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_GPI9] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_GPI10] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_GPI11] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_GPI12] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_GPI13] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_GPI14] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_GPI15] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+};
+
+static struct regmap_irq_chip da9052_regmap_irq_chip = {
+	.name = "da9052_irq",
+	.status_base = DA9052_EVENT_A_REG,
+	.mask_base = DA9052_IRQ_MASK_A_REG,
+	.ack_base = DA9052_EVENT_A_REG,
+	.num_regs = DA9052_NUM_IRQ_REGS,
+	.irqs = da9052_irqs,
+	.num_irqs = ARRAY_SIZE(da9052_irqs),
+};
+
+static int da9052_map_irq(struct da9052 *da9052, int irq)
+{
+	return regmap_irq_get_virq(da9052->irq_data, irq);
+}
+
+int da9052_enable_irq(struct da9052 *da9052, int irq)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return irq;
+
+	enable_irq(irq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(da9052_enable_irq);
+
+int da9052_disable_irq(struct da9052 *da9052, int irq)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return irq;
+
+	disable_irq(irq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(da9052_disable_irq);
+
+int da9052_disable_irq_nosync(struct da9052 *da9052, int irq)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return irq;
+
+	disable_irq_nosync(irq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(da9052_disable_irq_nosync);
+
+int da9052_request_irq(struct da9052 *da9052, int irq, char *name,
+			   irq_handler_t handler, void *data)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return irq;
+
+	return request_threaded_irq(irq, NULL, handler,
+				     IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				     name, data);
+}
+EXPORT_SYMBOL_GPL(da9052_request_irq);
+
+void da9052_free_irq(struct da9052 *da9052, int irq, void *data)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return;
+
+	free_irq(irq, data);
+}
+EXPORT_SYMBOL_GPL(da9052_free_irq);
+
+static irqreturn_t da9052_auxadc_irq(int irq, void *irq_data)
+{
+	struct da9052 *da9052 = irq_data;
+
+	complete(&da9052->done);
+
+	return IRQ_HANDLED;
+}
+
+int da9052_irq_init(struct da9052 *da9052)
+{
+	int ret;
+
+	ret = regmap_add_irq_chip(da9052->regmap, da9052->chip_irq,
+				  IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				  -1, &da9052_regmap_irq_chip,
+				  &da9052->irq_data);
+	if (ret < 0) {
+		dev_err(da9052->dev, "regmap_add_irq_chip failed: %d\n", ret);
+		goto regmap_err;
+	}
+
+	ret = da9052_request_irq(da9052, DA9052_IRQ_ADC_EOM, "adc-irq",
+			    da9052_auxadc_irq, da9052);
+
+	if (ret != 0) {
+		dev_err(da9052->dev, "DA9052_IRQ_ADC_EOM failed: %d\n", ret);
+		goto request_irq_err;
+	}
+
+	return 0;
+
+request_irq_err:
+	regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data);
+regmap_err:
+	return ret;
+
+}
+
+int da9052_irq_exit(struct da9052 *da9052)
+{
+	da9052_free_irq(da9052, DA9052_IRQ_ADC_EOM , da9052);
+	regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data);
+
+	return 0;
+}
diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
index 0507c4c21a7d..86dd93de6ff2 100644
--- a/include/linux/mfd/da9052/da9052.h
+++ b/include/linux/mfd/da9052/da9052.h
@@ -146,4 +146,14 @@ void da9052_device_exit(struct da9052 *da9052);
 
 extern struct regmap_config da9052_regmap_config;
 
+int da9052_irq_init(struct da9052 *da9052);
+int da9052_irq_exit(struct da9052 *da9052);
+int da9052_request_irq(struct da9052 *da9052, int irq, char *name,
+			   irq_handler_t handler, void *data);
+void da9052_free_irq(struct da9052 *da9052, int irq, void *data);
+
+int da9052_enable_irq(struct da9052 *da9052, int irq);
+int da9052_disable_irq(struct da9052 *da9052, int irq);
+int da9052_disable_irq_nosync(struct da9052 *da9052, int irq);
+
 #endif /* __MFD_DA9052_DA9052_H */
-- 
cgit v1.2.3-55-g7522


From f01312d846016dbd38cc9865e580298fb61f2aa7 Mon Sep 17 00:00:00 2001
From: Lars Poeschel
Date: Mon, 5 Nov 2012 15:48:23 +0100
Subject: mfd: Add viperboard driver

Add mfd driver for Nano River Technologies viperboard.

Signed-off-by: Lars Poeschel <poeschel@lemonage.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig            |  14 +++++
 drivers/mfd/Makefile           |   1 +
 drivers/mfd/viperboard.c       | 129 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/viperboard.h | 105 +++++++++++++++++++++++++++++++++
 4 files changed, 249 insertions(+)
 create mode 100644 drivers/mfd/viperboard.c
 create mode 100644 include/linux/mfd/viperboard.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 34242cada125..b280639a7634 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1065,6 +1065,20 @@ config MFD_PALMAS
 	  If you say yes here you get support for the Palmas
 	  series of PMIC chips from Texas Instruments.
 
+config MFD_VIPERBOARD
+        tristate "Support for Nano River Technologies Viperboard"
+	select MFD_CORE
+	depends on USB
+	default n
+	help
+	  Say yes here if you want support for Nano River Technologies
+	  Viperboard.
+	  There are mfd cell drivers available for i2c master, adc and
+	  both gpios found on the board. The spi part does not yet
+	  have a driver.
+	  You need to select the mfd cell drivers separately.
+	  The drivers do not support all features the board exposes.
+
 endmenu
 endif
 
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 632cce09e407..1c3ee7c76906 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -139,6 +139,7 @@ obj-$(CONFIG_MFD_TPS65090)	+= tps65090.o
 obj-$(CONFIG_MFD_AAT2870_CORE)	+= aat2870-core.o
 obj-$(CONFIG_MFD_INTEL_MSIC)	+= intel_msic.o
 obj-$(CONFIG_MFD_PALMAS)	+= palmas.o
+obj-$(CONFIG_MFD_VIPERBOARD)    += viperboard.o
 obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
diff --git a/drivers/mfd/viperboard.c b/drivers/mfd/viperboard.c
new file mode 100644
index 000000000000..cd1e2fe95647
--- /dev/null
+++ b/drivers/mfd/viperboard.c
@@ -0,0 +1,129 @@
+/*
+ *  Nano River Technologies viperboard driver
+ *
+ *  This is the core driver for the viperboard. There are cell drivers
+ *  available for I2C, ADC and both GPIOs. SPI is not yet supported.
+ *  The drivers do not support all features the board exposes. See user
+ *  manual of the viperboard.
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+
+#include <linux/mfd/core.h>
+#include <linux/mfd/viperboard.h>
+
+#include <linux/usb.h>
+
+
+static const struct usb_device_id vprbrd_table[] = {
+	{ USB_DEVICE(0x2058, 0x1005) },   /* Nano River Technologies */
+	{ }                               /* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(usb, vprbrd_table);
+
+static struct mfd_cell vprbrd_devs[] = {
+};
+
+static int vprbrd_probe(struct usb_interface *interface,
+			      const struct usb_device_id *id)
+{
+	struct vprbrd *vb;
+
+	u16 version = 0;
+	int pipe, ret;
+	unsigned char buf[1];
+
+	/* allocate memory for our device state and initialize it */
+	vb = kzalloc(sizeof(*vb), GFP_KERNEL);
+	if (vb == NULL) {
+		dev_err(&interface->dev, "Out of memory\n");
+		return -ENOMEM;
+	}
+
+	mutex_init(&vb->lock);
+
+	vb->usb_dev = usb_get_dev(interface_to_usbdev(interface));
+
+	/* save our data pointer in this interface device */
+	usb_set_intfdata(interface, vb);
+	dev_set_drvdata(&vb->pdev.dev, vb);
+
+	/* get version information, major first, minor then */
+	pipe = usb_rcvctrlpipe(vb->usb_dev, 0);
+	ret = usb_control_msg(vb->usb_dev, pipe, VPRBRD_USB_REQUEST_MAJOR,
+		VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, buf, 1,
+		VPRBRD_USB_TIMEOUT_MS);
+	if (ret == 1)
+		version = buf[0];
+
+	ret = usb_control_msg(vb->usb_dev, pipe, VPRBRD_USB_REQUEST_MINOR,
+		VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, buf, 1,
+		VPRBRD_USB_TIMEOUT_MS);
+	if (ret == 1) {
+		version <<= 8;
+		version = version | buf[0];
+	}
+
+	dev_info(&interface->dev,
+		 "version %x.%02x found at bus %03d address %03d\n",
+		 version >> 8, version & 0xff,
+		 vb->usb_dev->bus->busnum, vb->usb_dev->devnum);
+
+	ret = mfd_add_devices(&interface->dev, -1, vprbrd_devs,
+				ARRAY_SIZE(vprbrd_devs), NULL, 0, NULL);
+	if (ret != 0) {
+		dev_err(&interface->dev, "Failed to add mfd devices to core.");
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (vb) {
+		usb_put_dev(vb->usb_dev);
+		kfree(vb);
+	}
+
+	return ret;
+}
+
+static void vprbrd_disconnect(struct usb_interface *interface)
+{
+	struct vprbrd *vb = usb_get_intfdata(interface);
+
+	mfd_remove_devices(&interface->dev);
+	usb_set_intfdata(interface, NULL);
+	usb_put_dev(vb->usb_dev);
+	kfree(vb);
+
+	dev_dbg(&interface->dev, "disconnected\n");
+}
+
+static struct usb_driver vprbrd_driver = {
+	.name		= "viperboard",
+	.probe		= vprbrd_probe,
+	.disconnect	= vprbrd_disconnect,
+	.id_table	= vprbrd_table,
+};
+
+module_usb_driver(vprbrd_driver);
+
+MODULE_DESCRIPTION("Nano River Technologies viperboard mfd core driver");
+MODULE_AUTHOR("Lars Poeschel <poeschel@lemonage.de>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/viperboard.h b/include/linux/mfd/viperboard.h
new file mode 100644
index 000000000000..0d1342466db5
--- /dev/null
+++ b/include/linux/mfd/viperboard.h
@@ -0,0 +1,105 @@
+/*
+ *  include/linux/mfd/viperboard.h
+ *
+ *  Nano River Technologies viperboard definitions
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_VIPERBOARD_H__
+#define __MFD_VIPERBOARD_H__
+
+#include <linux/types.h>
+#include <linux/usb.h>
+
+#define VPRBRD_EP_OUT               0x02
+#define VPRBRD_EP_IN                0x86
+
+#define VPRBRD_I2C_MSG_LEN          512 /* max length of a msg on USB level */
+
+#define VPRBRD_I2C_FREQ_6MHZ        1                        /*   6 MBit/s */
+#define VPRBRD_I2C_FREQ_3MHZ        2                        /*   3 MBit/s */
+#define VPRBRD_I2C_FREQ_1MHZ        3                        /*   1 MBit/s */
+#define VPRBRD_I2C_FREQ_FAST        4                        /* 400 kbit/s */
+#define VPRBRD_I2C_FREQ_400KHZ      VPRBRD_I2C_FREQ_FAST
+#define VPRBRD_I2C_FREQ_200KHZ      5                        /* 200 kbit/s */
+#define VPRBRD_I2C_FREQ_STD         6                        /* 100 kbit/s */
+#define VPRBRD_I2C_FREQ_100KHZ      VPRBRD_I2C_FREQ_STD
+#define VPRBRD_I2C_FREQ_10KHZ       7                        /*  10 kbit/s */
+
+#define VPRBRD_I2C_CMD_WRITE        0x00
+#define VPRBRD_I2C_CMD_READ         0x01
+#define VPRBRD_I2C_CMD_ADDR         0x02
+
+#define VPRBRD_USB_TYPE_OUT	    0x40
+#define VPRBRD_USB_TYPE_IN	    0xc0
+#define VPRBRD_USB_TIMEOUT_MS       100
+#define VPRBRD_USB_REQUEST_MAJOR    0xea
+#define VPRBRD_USB_REQUEST_MINOR    0xeb
+
+struct vprbrd_i2c_write_hdr {
+	u8 cmd;
+	u16 addr;
+	u8 len1;
+	u8 len2;
+	u8 last;
+	u8 chan;
+	u16 spi;
+} __packed;
+
+struct vprbrd_i2c_read_hdr {
+	u8 cmd;
+	u16 addr;
+	u8 len0;
+	u8 len1;
+	u8 len2;
+	u8 len3;
+	u8 len4;
+	u8 len5;
+	u16 tf1;                        /* transfer 1 length */
+	u16 tf2;                        /* transfer 2 length */
+} __packed;
+
+struct vprbrd_i2c_status {
+	u8 unknown[11];
+	u8 status;
+} __packed;
+
+struct vprbrd_i2c_write_msg {
+	struct vprbrd_i2c_write_hdr header;
+	u8 data[VPRBRD_I2C_MSG_LEN
+		- sizeof(struct vprbrd_i2c_write_hdr)];
+} __packed;
+
+struct vprbrd_i2c_read_msg {
+	struct vprbrd_i2c_read_hdr header;
+	u8 data[VPRBRD_I2C_MSG_LEN
+		- sizeof(struct vprbrd_i2c_read_hdr)];
+} __packed;
+
+struct vprbrd_i2c_addr_msg {
+	u8 cmd;
+	u8 addr;
+	u8 unknown1;
+	u16 len;
+	u8 unknown2;
+	u8 unknown3;
+} __packed;
+
+/* Structure to hold all device specific stuff */
+struct vprbrd {
+	struct usb_device *usb_dev; /* the usb device for this device */
+	struct mutex lock;
+	u8 buf[sizeof(struct vprbrd_i2c_write_msg)];
+	struct platform_device pdev;
+};
+
+#endif /* __MFD_VIPERBOARD_H__ */
-- 
cgit v1.2.3-55-g7522


From 9d5b72de0d1627b130fa69c5edf58b5b2df4ca50 Mon Sep 17 00:00:00 2001
From: Lars Poeschel
Date: Mon, 5 Nov 2012 15:48:24 +0100
Subject: gpio: Add viperboard gpio driver

This adds the mfd cell to use the gpio a and gpio b part
of the Nano River Technologies viperboard.

Signed-off-by: Lars Poeschel <poeschel@lemonage.de>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/gpio/Kconfig           |  13 ++
 drivers/gpio/Makefile          |   1 +
 drivers/gpio/gpio-viperboard.c | 517 +++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/viperboard.c       |   3 +
 include/linux/mfd/viperboard.h |   2 +
 5 files changed, 536 insertions(+)
 create mode 100644 drivers/gpio/gpio-viperboard.c

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index d055cee36942..c3bbd08bcdc3 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -649,4 +649,17 @@ config GPIO_MSIC
 	  Enable support for GPIO on intel MSIC controllers found in
 	  intel MID devices
 
+comment "USB GPIO expanders:"
+
+config GPIO_VIPERBOARD
+	tristate "Viperboard GPIO a & b support"
+	depends on MFD_VIPERBOARD && USB
+	help
+	  Say yes here to access the GPIO signals of Nano River
+	  Technologies Viperboard. There are two GPIO chips on the
+	  board: gpioa and gpiob.
+          See viperboard API specification and Nano
+          River Tech's viperboard.h for detailed meaning
+          of the module parameters.
+
 endif
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 9aeed6707326..16a1385226e9 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_GPIO_TPS65912)	+= gpio-tps65912.o
 obj-$(CONFIG_GPIO_TWL4030)	+= gpio-twl4030.o
 obj-$(CONFIG_GPIO_TWL6040)	+= gpio-twl6040.o
 obj-$(CONFIG_GPIO_UCB1400)	+= gpio-ucb1400.o
+obj-$(CONFIG_GPIO_VIPERBOARD)	+= gpio-viperboard.o
 obj-$(CONFIG_GPIO_VR41XX)	+= gpio-vr41xx.o
 obj-$(CONFIG_GPIO_VT8500)	+= gpio-vt8500.o
 obj-$(CONFIG_GPIO_VX855)	+= gpio-vx855.o
diff --git a/drivers/gpio/gpio-viperboard.c b/drivers/gpio/gpio-viperboard.c
new file mode 100644
index 000000000000..13772996cf24
--- /dev/null
+++ b/drivers/gpio/gpio-viperboard.c
@@ -0,0 +1,517 @@
+/*
+ *  Nano River Technologies viperboard GPIO lib driver
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+
+#include <linux/usb.h>
+#include <linux/gpio.h>
+
+#include <linux/mfd/viperboard.h>
+
+#define VPRBRD_GPIOA_CLK_1MHZ		0
+#define VPRBRD_GPIOA_CLK_100KHZ		1
+#define VPRBRD_GPIOA_CLK_10KHZ		2
+#define VPRBRD_GPIOA_CLK_1KHZ		3
+#define VPRBRD_GPIOA_CLK_100HZ		4
+#define VPRBRD_GPIOA_CLK_10HZ		5
+
+#define VPRBRD_GPIOA_FREQ_DEFAULT	1000
+
+#define VPRBRD_GPIOA_CMD_CONT		0x00
+#define VPRBRD_GPIOA_CMD_PULSE		0x01
+#define VPRBRD_GPIOA_CMD_PWM		0x02
+#define VPRBRD_GPIOA_CMD_SETOUT		0x03
+#define VPRBRD_GPIOA_CMD_SETIN		0x04
+#define VPRBRD_GPIOA_CMD_SETINT		0x05
+#define VPRBRD_GPIOA_CMD_GETIN		0x06
+
+#define VPRBRD_GPIOB_CMD_SETDIR		0x00
+#define VPRBRD_GPIOB_CMD_SETVAL		0x01
+
+struct vprbrd_gpioa_msg {
+	u8 cmd;
+	u8 clk;
+	u8 offset;
+	u8 t1;
+	u8 t2;
+	u8 invert;
+	u8 pwmlevel;
+	u8 outval;
+	u8 risefall;
+	u8 answer;
+	u8 __fill;
+} __packed;
+
+struct vprbrd_gpiob_msg {
+	u8 cmd;
+	u16 val;
+	u16 mask;
+} __packed;
+
+struct vprbrd_gpio {
+	struct gpio_chip gpioa; /* gpio a related things */
+	u32 gpioa_out;
+	u32 gpioa_val;
+	struct gpio_chip gpiob; /* gpio b related things */
+	u32 gpiob_out;
+	u32 gpiob_val;
+	struct vprbrd *vb;
+};
+
+/* gpioa sampling clock module parameter */
+static unsigned char gpioa_clk;
+static unsigned int gpioa_freq = VPRBRD_GPIOA_FREQ_DEFAULT;
+module_param(gpioa_freq, uint, 0);
+MODULE_PARM_DESC(gpioa_freq,
+	"gpio-a sampling freq in Hz (default is 1000Hz) valid values: 10, 100, 1000, 10000, 100000, 1000000");
+
+/* ----- begin of gipo a chip -------------------------------------------- */
+
+static int vprbrd_gpioa_get(struct gpio_chip *chip,
+		unsigned offset)
+{
+	int ret, answer, error = 0;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpioa);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf;
+
+	/* if io is set to output, just return the saved value */
+	if (gpio->gpioa_out & (1 << offset))
+		return gpio->gpioa_val & (1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	gamsg->cmd = VPRBRD_GPIOA_CMD_GETIN;
+	gamsg->clk = 0x00;
+	gamsg->offset = offset;
+	gamsg->t1 = 0x00;
+	gamsg->t2 = 0x00;
+	gamsg->invert = 0x00;
+	gamsg->pwmlevel = 0x00;
+	gamsg->outval = 0x00;
+	gamsg->risefall = 0x00;
+	gamsg->answer = 0x00;
+	gamsg->__fill = 0x00;
+
+	ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT, 0x0000,
+		0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+	if (ret != sizeof(struct vprbrd_gpioa_msg))
+		error = -EREMOTEIO;
+
+	ret = usb_control_msg(vb->usb_dev, usb_rcvctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_IN, 0x0000,
+		0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+	answer = gamsg->answer & 0x01;
+
+	mutex_unlock(&vb->lock);
+
+	if (ret != sizeof(struct vprbrd_gpioa_msg))
+		error = -EREMOTEIO;
+
+	if (error)
+		return error;
+
+	return answer;
+}
+
+static void vprbrd_gpioa_set(struct gpio_chip *chip,
+		unsigned offset, int value)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpioa);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf;
+
+	if (gpio->gpioa_out & (1 << offset)) {
+		if (value)
+			gpio->gpioa_val |= (1 << offset);
+		else
+			gpio->gpioa_val &= ~(1 << offset);
+
+		mutex_lock(&vb->lock);
+
+		gamsg->cmd = VPRBRD_GPIOA_CMD_SETOUT;
+		gamsg->clk = 0x00;
+		gamsg->offset = offset;
+		gamsg->t1 = 0x00;
+		gamsg->t2 = 0x00;
+		gamsg->invert = 0x00;
+		gamsg->pwmlevel = 0x00;
+		gamsg->outval = value;
+		gamsg->risefall = 0x00;
+		gamsg->answer = 0x00;
+		gamsg->__fill = 0x00;
+
+		ret = usb_control_msg(vb->usb_dev,
+			usb_sndctrlpipe(vb->usb_dev, 0),
+			VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT,
+			0x0000,	0x0000, gamsg,
+			sizeof(struct vprbrd_gpioa_msg), VPRBRD_USB_TIMEOUT_MS);
+
+		mutex_unlock(&vb->lock);
+
+		if (ret != sizeof(struct vprbrd_gpioa_msg))
+			dev_err(chip->dev, "usb error setting pin value\n");
+	}
+}
+
+static int vprbrd_gpioa_direction_input(struct gpio_chip *chip,
+			unsigned offset)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpioa);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf;
+
+	gpio->gpioa_out &= ~(1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	gamsg->cmd = VPRBRD_GPIOA_CMD_SETIN;
+	gamsg->clk = gpioa_clk;
+	gamsg->offset = offset;
+	gamsg->t1 = 0x00;
+	gamsg->t2 = 0x00;
+	gamsg->invert = 0x00;
+	gamsg->pwmlevel = 0x00;
+	gamsg->outval = 0x00;
+	gamsg->risefall = 0x00;
+	gamsg->answer = 0x00;
+	gamsg->__fill = 0x00;
+
+	ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT, 0x0000,
+		0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+
+	mutex_unlock(&vb->lock);
+
+	if (ret != sizeof(struct vprbrd_gpioa_msg))
+		return -EREMOTEIO;
+
+	return 0;
+}
+
+static int vprbrd_gpioa_direction_output(struct gpio_chip *chip,
+			unsigned offset, int value)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpioa);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf;
+
+	gpio->gpioa_out |= (1 << offset);
+	if (value)
+		gpio->gpioa_val |= (1 << offset);
+	else
+		gpio->gpioa_val &= ~(1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	gamsg->cmd = VPRBRD_GPIOA_CMD_SETOUT;
+	gamsg->clk = 0x00;
+	gamsg->offset = offset;
+	gamsg->t1 = 0x00;
+	gamsg->t2 = 0x00;
+	gamsg->invert = 0x00;
+	gamsg->pwmlevel = 0x00;
+	gamsg->outval = value;
+	gamsg->risefall = 0x00;
+	gamsg->answer = 0x00;
+	gamsg->__fill = 0x00;
+
+	ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT, 0x0000,
+		0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+
+	mutex_unlock(&vb->lock);
+
+	if (ret != sizeof(struct vprbrd_gpioa_msg))
+		return -EREMOTEIO;
+
+	return 0;
+}
+
+/* ----- end of gpio a chip ---------------------------------------------- */
+
+/* ----- begin of gipo b chip -------------------------------------------- */
+
+static int vprbrd_gpiob_setdir(struct vprbrd *vb, unsigned offset,
+	unsigned dir)
+{
+	struct vprbrd_gpiob_msg *gbmsg = (struct vprbrd_gpiob_msg *)vb->buf;
+	int ret;
+
+	gbmsg->cmd = VPRBRD_GPIOB_CMD_SETDIR;
+	gbmsg->val = cpu_to_be16(dir << offset);
+	gbmsg->mask = cpu_to_be16(0x0001 << offset);
+
+	ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOB, VPRBRD_USB_TYPE_OUT, 0x0000,
+		0x0000, gbmsg, sizeof(struct vprbrd_gpiob_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+
+	if (ret != sizeof(struct vprbrd_gpiob_msg))
+		return -EREMOTEIO;
+
+	return 0;
+}
+
+static int vprbrd_gpiob_get(struct gpio_chip *chip,
+		unsigned offset)
+{
+	int ret;
+	u16 val;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpiob);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpiob_msg *gbmsg = (struct vprbrd_gpiob_msg *)vb->buf;
+
+	/* if io is set to output, just return the saved value */
+	if (gpio->gpiob_out & (1 << offset))
+		return gpio->gpiob_val & (1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	ret = usb_control_msg(vb->usb_dev, usb_rcvctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOB, VPRBRD_USB_TYPE_IN, 0x0000,
+		0x0000, gbmsg,	sizeof(struct vprbrd_gpiob_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+	val = gbmsg->val;
+
+	mutex_unlock(&vb->lock);
+
+	if (ret != sizeof(struct vprbrd_gpiob_msg))
+		return ret;
+
+	/* cache the read values */
+	gpio->gpiob_val = be16_to_cpu(val);
+
+	return (gpio->gpiob_val >> offset) & 0x1;
+}
+
+static void vprbrd_gpiob_set(struct gpio_chip *chip,
+		unsigned offset, int value)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpiob);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpiob_msg *gbmsg = (struct vprbrd_gpiob_msg *)vb->buf;
+
+	if (gpio->gpiob_out & (1 << offset)) {
+		if (value)
+			gpio->gpiob_val |= (1 << offset);
+		else
+			gpio->gpiob_val &= ~(1 << offset);
+
+		mutex_lock(&vb->lock);
+
+		gbmsg->cmd = VPRBRD_GPIOB_CMD_SETVAL;
+		gbmsg->val = cpu_to_be16(value << offset);
+		gbmsg->mask = cpu_to_be16(0x0001 << offset);
+
+		ret = usb_control_msg(vb->usb_dev,
+			usb_sndctrlpipe(vb->usb_dev, 0),
+			VPRBRD_USB_REQUEST_GPIOB, VPRBRD_USB_TYPE_OUT,
+			0x0000,	0x0000, gbmsg,
+			sizeof(struct vprbrd_gpiob_msg), VPRBRD_USB_TIMEOUT_MS);
+
+		mutex_unlock(&vb->lock);
+
+		if (ret != sizeof(struct vprbrd_gpiob_msg))
+			dev_err(chip->dev, "usb error setting pin value\n");
+	}
+}
+
+static int vprbrd_gpiob_direction_input(struct gpio_chip *chip,
+			unsigned offset)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpiob);
+	struct vprbrd *vb = gpio->vb;
+
+	gpio->gpiob_out &= ~(1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	ret = vprbrd_gpiob_setdir(vb, offset, 0);
+
+	mutex_unlock(&vb->lock);
+
+	if (ret)
+		dev_err(chip->dev, "usb error setting pin to input\n");
+
+	return ret;
+}
+
+static int vprbrd_gpiob_direction_output(struct gpio_chip *chip,
+			unsigned offset, int value)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpiob);
+	struct vprbrd *vb = gpio->vb;
+
+	gpio->gpiob_out |= (1 << offset);
+	if (value)
+		gpio->gpiob_val |= (1 << offset);
+	else
+		gpio->gpiob_val &= ~(1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	ret = vprbrd_gpiob_setdir(vb, offset, 1);
+	if (ret)
+		dev_err(chip->dev, "usb error setting pin to output\n");
+
+	mutex_unlock(&vb->lock);
+
+	vprbrd_gpiob_set(chip, offset, value);
+
+	return ret;
+}
+
+/* ----- end of gpio b chip ---------------------------------------------- */
+
+static int __devinit vprbrd_gpio_probe(struct platform_device *pdev)
+{
+	struct vprbrd *vb = dev_get_drvdata(pdev->dev.parent);
+	struct vprbrd_gpio *vb_gpio;
+	int ret;
+
+	vb_gpio = devm_kzalloc(&pdev->dev, sizeof(*vb_gpio), GFP_KERNEL);
+	if (vb_gpio == NULL)
+		return -ENOMEM;
+
+	vb_gpio->vb = vb;
+	/* registering gpio a */
+	vb_gpio->gpioa.label = "viperboard gpio a";
+	vb_gpio->gpioa.dev = &pdev->dev;
+	vb_gpio->gpioa.owner = THIS_MODULE;
+	vb_gpio->gpioa.base = -1;
+	vb_gpio->gpioa.ngpio = 16;
+	vb_gpio->gpioa.can_sleep = 1;
+	vb_gpio->gpioa.set = vprbrd_gpioa_set;
+	vb_gpio->gpioa.get = vprbrd_gpioa_get;
+	vb_gpio->gpioa.direction_input = vprbrd_gpioa_direction_input;
+	vb_gpio->gpioa.direction_output = vprbrd_gpioa_direction_output;
+	ret = gpiochip_add(&vb_gpio->gpioa);
+	if (ret < 0) {
+		dev_err(vb_gpio->gpioa.dev, "could not add gpio a");
+		goto err_gpioa;
+	}
+
+	/* registering gpio b */
+	vb_gpio->gpiob.label = "viperboard gpio b";
+	vb_gpio->gpiob.dev = &pdev->dev;
+	vb_gpio->gpiob.owner = THIS_MODULE;
+	vb_gpio->gpiob.base = -1;
+	vb_gpio->gpiob.ngpio = 16;
+	vb_gpio->gpiob.can_sleep = 1;
+	vb_gpio->gpiob.set = vprbrd_gpiob_set;
+	vb_gpio->gpiob.get = vprbrd_gpiob_get;
+	vb_gpio->gpiob.direction_input = vprbrd_gpiob_direction_input;
+	vb_gpio->gpiob.direction_output = vprbrd_gpiob_direction_output;
+	ret = gpiochip_add(&vb_gpio->gpiob);
+	if (ret < 0) {
+		dev_err(vb_gpio->gpiob.dev, "could not add gpio b");
+		goto err_gpiob;
+	}
+
+	platform_set_drvdata(pdev, vb_gpio);
+
+	return ret;
+
+err_gpiob:
+	ret = gpiochip_remove(&vb_gpio->gpioa);
+
+err_gpioa:
+	return ret;
+}
+
+static int __devexit vprbrd_gpio_remove(struct platform_device *pdev)
+{
+	struct vprbrd_gpio *vb_gpio = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = gpiochip_remove(&vb_gpio->gpiob);
+	if (ret == 0)
+		ret = gpiochip_remove(&vb_gpio->gpioa);
+
+	return ret;
+}
+
+static struct platform_driver vprbrd_gpio_driver = {
+	.driver.name	= "viperboard-gpio",
+	.driver.owner	= THIS_MODULE,
+	.probe		= vprbrd_gpio_probe,
+	.remove		= __devexit_p(vprbrd_gpio_remove),
+};
+
+static int __init vprbrd_gpio_init(void)
+{
+	switch (gpioa_freq) {
+	case 1000000:
+		gpioa_clk = VPRBRD_GPIOA_CLK_1MHZ;
+		break;
+	case 100000:
+		gpioa_clk = VPRBRD_GPIOA_CLK_100KHZ;
+		break;
+	case 10000:
+		gpioa_clk = VPRBRD_GPIOA_CLK_10KHZ;
+		break;
+	case 1000:
+		gpioa_clk = VPRBRD_GPIOA_CLK_1KHZ;
+		break;
+	case 100:
+		gpioa_clk = VPRBRD_GPIOA_CLK_100HZ;
+		break;
+	case 10:
+		gpioa_clk = VPRBRD_GPIOA_CLK_10HZ;
+		break;
+	default:
+		pr_warn("invalid gpioa_freq (%d)\n", gpioa_freq);
+		gpioa_clk = VPRBRD_GPIOA_CLK_1KHZ;
+	}
+
+	return platform_driver_register(&vprbrd_gpio_driver);
+}
+subsys_initcall(vprbrd_gpio_init);
+
+static void __exit vprbrd_gpio_exit(void)
+{
+	platform_driver_unregister(&vprbrd_gpio_driver);
+}
+module_exit(vprbrd_gpio_exit);
+
+MODULE_AUTHOR("Lars Poeschel <poeschel@lemonage.de>");
+MODULE_DESCRIPTION("GPIO driver for Nano River Techs Viperboard");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:viperboard-gpio");
diff --git a/drivers/mfd/viperboard.c b/drivers/mfd/viperboard.c
index cd1e2fe95647..c75ac08f639c 100644
--- a/drivers/mfd/viperboard.c
+++ b/drivers/mfd/viperboard.c
@@ -38,6 +38,9 @@ static const struct usb_device_id vprbrd_table[] = {
 MODULE_DEVICE_TABLE(usb, vprbrd_table);
 
 static struct mfd_cell vprbrd_devs[] = {
+	{
+		.name = "viperboard-gpio",
+	},
 };
 
 static int vprbrd_probe(struct usb_interface *interface,
diff --git a/include/linux/mfd/viperboard.h b/include/linux/mfd/viperboard.h
index 0d1342466db5..42d339fac3e5 100644
--- a/include/linux/mfd/viperboard.h
+++ b/include/linux/mfd/viperboard.h
@@ -44,6 +44,8 @@
 #define VPRBRD_USB_TIMEOUT_MS       100
 #define VPRBRD_USB_REQUEST_MAJOR    0xea
 #define VPRBRD_USB_REQUEST_MINOR    0xeb
+#define VPRBRD_USB_REQUEST_GPIOA    0xed
+#define VPRBRD_USB_REQUEST_GPIOB    0xdd
 
 struct vprbrd_i2c_write_hdr {
 	u8 cmd;
-- 
cgit v1.2.3-55-g7522


From 174a13aa8669331605b138aaf61569dd7189e453 Mon Sep 17 00:00:00 2001
From: Lars Poeschel
Date: Mon, 19 Nov 2012 16:36:04 +0100
Subject: i2c: Add viperboard i2c master driver

This adds the mfd cell to use the i2c part of the Nano River Technologies
viperboard as i2c master.

Signed-off-by: Lars Poeschel <poeschel@lemonage.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/i2c/busses/Kconfig          |  10 +
 drivers/i2c/busses/Makefile         |   1 +
 drivers/i2c/busses/i2c-viperboard.c | 480 ++++++++++++++++++++++++++++++++++++
 drivers/mfd/viperboard.c            |   3 +
 include/linux/mfd/viperboard.h      |   2 +
 5 files changed, 496 insertions(+)
 create mode 100644 drivers/i2c/busses/i2c-viperboard.c

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index e9df4612b7eb..c7bff51fe524 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -818,6 +818,16 @@ config I2C_TINY_USB
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-tiny-usb.
 
+config I2C_VIPERBOARD
+	tristate "Viperboard I2C master support"
+	depends on MFD_VIPERBOARD && USB
+	help
+	  Say yes here to access the I2C part of the Nano River
+	  Technologies Viperboard as I2C master.
+          See viperboard API specification and Nano
+          River Tech's viperboard.h for detailed meaning
+          of the module parameters.
+
 comment "Other I2C/SMBus bus drivers"
 
 config I2C_ACORN
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 395b516ffa08..e5cb209d276c 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -79,6 +79,7 @@ obj-$(CONFIG_I2C_PARPORT)	+= i2c-parport.o
 obj-$(CONFIG_I2C_PARPORT_LIGHT)	+= i2c-parport-light.o
 obj-$(CONFIG_I2C_TAOS_EVM)	+= i2c-taos-evm.o
 obj-$(CONFIG_I2C_TINY_USB)	+= i2c-tiny-usb.o
+obj-$(CONFIG_I2C_VIPERBOARD)	+= i2c-viperboard.o
 
 # Other I2C/SMBus bus drivers
 obj-$(CONFIG_I2C_ACORN)		+= i2c-acorn.o
diff --git a/drivers/i2c/busses/i2c-viperboard.c b/drivers/i2c/busses/i2c-viperboard.c
new file mode 100644
index 000000000000..f5fa20dea906
--- /dev/null
+++ b/drivers/i2c/busses/i2c-viperboard.c
@@ -0,0 +1,480 @@
+/*
+ *  Nano River Technologies viperboard i2c master driver
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+
+#include <linux/usb.h>
+#include <linux/i2c.h>
+
+#include <linux/mfd/viperboard.h>
+
+struct vprbrd_i2c {
+	struct i2c_adapter i2c;
+	u8 bus_freq_param;
+};
+
+/* i2c bus frequency module parameter */
+static u8 i2c_bus_param;
+static unsigned int i2c_bus_freq = 100;
+module_param(i2c_bus_freq, int, 0);
+MODULE_PARM_DESC(i2c_bus_freq,
+	"i2c bus frequency in khz (default is 100) valid values: 10, 100, 200, 400, 1000, 3000, 6000");
+
+static int vprbrd_i2c_status(struct i2c_adapter *i2c,
+	struct vprbrd_i2c_status *status, bool prev_error)
+{
+	u16 bytes_xfer;
+	int ret;
+	struct vprbrd *vb = (struct vprbrd *)i2c->algo_data;
+
+	/* check for protocol error */
+	bytes_xfer = sizeof(struct vprbrd_i2c_status);
+
+	ret = usb_control_msg(vb->usb_dev, usb_rcvctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_I2C, VPRBRD_USB_TYPE_IN, 0x0000, 0x0000,
+		status, bytes_xfer, VPRBRD_USB_TIMEOUT_MS);
+
+	if (ret != bytes_xfer)
+		prev_error = true;
+
+	if (prev_error) {
+		dev_err(&i2c->dev, "failure in usb communication\n");
+		return -EREMOTEIO;
+	}
+
+	dev_dbg(&i2c->dev, "  status = %d\n", status->status);
+	if (status->status != 0x00) {
+		dev_err(&i2c->dev, "failure: i2c protocol error\n");
+		return -EPROTO;
+	}
+	return 0;
+}
+
+static int vprbrd_i2c_receive(struct usb_device *usb_dev,
+	struct vprbrd_i2c_read_msg *rmsg, int bytes_xfer)
+{
+	int ret, bytes_actual;
+	int error = 0;
+
+	/* send the read request */
+	ret = usb_bulk_msg(usb_dev,
+		usb_sndbulkpipe(usb_dev, VPRBRD_EP_OUT), rmsg,
+		sizeof(struct vprbrd_i2c_read_hdr), &bytes_actual,
+		VPRBRD_USB_TIMEOUT_MS);
+
+	if ((ret < 0)
+		|| (bytes_actual != sizeof(struct vprbrd_i2c_read_hdr))) {
+		dev_err(&usb_dev->dev, "failure transmitting usb\n");
+		error = -EREMOTEIO;
+	}
+
+	/* read the actual data */
+	ret = usb_bulk_msg(usb_dev,
+		usb_rcvbulkpipe(usb_dev, VPRBRD_EP_IN), rmsg,
+		bytes_xfer, &bytes_actual, VPRBRD_USB_TIMEOUT_MS);
+
+	if ((ret < 0) || (bytes_xfer != bytes_actual)) {
+		dev_err(&usb_dev->dev, "failure receiving usb\n");
+		error = -EREMOTEIO;
+	}
+	return error;
+}
+
+static int vprbrd_i2c_addr(struct usb_device *usb_dev,
+	struct vprbrd_i2c_addr_msg *amsg)
+{
+	int ret, bytes_actual;
+
+	ret = usb_bulk_msg(usb_dev,
+		usb_sndbulkpipe(usb_dev, VPRBRD_EP_OUT), amsg,
+		sizeof(struct vprbrd_i2c_addr_msg), &bytes_actual,
+		VPRBRD_USB_TIMEOUT_MS);
+
+	if ((ret < 0) ||
+			(sizeof(struct vprbrd_i2c_addr_msg) != bytes_actual)) {
+		dev_err(&usb_dev->dev, "failure transmitting usb\n");
+		return -EREMOTEIO;
+	}
+	return 0;
+}
+
+static int vprbrd_i2c_read(struct vprbrd *vb, struct i2c_msg *msg)
+{
+	int ret;
+	u16 remain_len, bytes_xfer, len1, len2,
+		start = 0x0000;
+	struct vprbrd_i2c_read_msg *rmsg =
+		(struct vprbrd_i2c_read_msg *)vb->buf;
+
+	remain_len = msg->len;
+	rmsg->header.cmd = VPRBRD_I2C_CMD_READ;
+	while (remain_len > 0) {
+		rmsg->header.addr = cpu_to_le16(start + 0x4000);
+		if (remain_len <= 255) {
+			len1 = remain_len;
+			len2 = 0x00;
+			rmsg->header.len0 = remain_len;
+			rmsg->header.len1 = 0x00;
+			rmsg->header.len2 = 0x00;
+			rmsg->header.len3 = 0x00;
+			rmsg->header.len4 = 0x00;
+			rmsg->header.len5 = 0x00;
+			remain_len = 0;
+		} else if (remain_len <= 510) {
+			len1 = remain_len;
+			len2 = 0x00;
+			rmsg->header.len0 = remain_len - 255;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0x00;
+			rmsg->header.len3 = 0x00;
+			rmsg->header.len4 = 0x00;
+			rmsg->header.len5 = 0x00;
+			remain_len = 0;
+		} else if (remain_len <= 512) {
+			len1 = remain_len;
+			len2 = 0x00;
+			rmsg->header.len0 = remain_len - 510;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = 0x00;
+			rmsg->header.len4 = 0x00;
+			rmsg->header.len5 = 0x00;
+			remain_len = 0;
+		} else if (remain_len <= 767) {
+			len1 = 512;
+			len2 = remain_len - 512;
+			rmsg->header.len0 = 0x02;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = remain_len - 512;
+			rmsg->header.len4 = 0x00;
+			rmsg->header.len5 = 0x00;
+			bytes_xfer = remain_len;
+			remain_len = 0;
+		} else if (remain_len <= 1022) {
+			len1 = 512;
+			len2 = remain_len - 512;
+			rmsg->header.len0 = 0x02;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = remain_len - 767;
+			rmsg->header.len4 = 0xff;
+			rmsg->header.len5 = 0x00;
+			remain_len = 0;
+		} else if (remain_len <= 1024) {
+			len1 = 512;
+			len2 = remain_len - 512;
+			rmsg->header.len0 = 0x02;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = remain_len - 1022;
+			rmsg->header.len4 = 0xff;
+			rmsg->header.len5 = 0xff;
+			remain_len = 0;
+		} else {
+			len1 = 512;
+			len2 = 512;
+			rmsg->header.len0 = 0x02;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = 0x02;
+			rmsg->header.len4 = 0xff;
+			rmsg->header.len5 = 0xff;
+			remain_len -= 1024;
+			start += 1024;
+		}
+		rmsg->header.tf1 = cpu_to_le16(len1);
+		rmsg->header.tf2 = cpu_to_le16(len2);
+
+		/* first read transfer */
+		ret = vprbrd_i2c_receive(vb->usb_dev, rmsg, len1);
+		if (ret < 0)
+			return ret;
+		/* copy the received data */
+		memcpy(msg->buf + start, rmsg, len1);
+
+		/* second read transfer if neccessary */
+		if (len2 > 0) {
+			ret = vprbrd_i2c_receive(vb->usb_dev, rmsg, len2);
+			if (ret < 0)
+				return ret;
+			/* copy the received data */
+			memcpy(msg->buf + start + 512, rmsg, len2);
+		}
+	}
+	return 0;
+}
+
+static int vprbrd_i2c_write(struct vprbrd *vb, struct i2c_msg *msg)
+{
+	int ret, bytes_actual;
+	u16 remain_len, bytes_xfer,
+		start = 0x0000;
+	struct vprbrd_i2c_write_msg *wmsg =
+		(struct vprbrd_i2c_write_msg *)vb->buf;
+
+	remain_len = msg->len;
+	wmsg->header.cmd = VPRBRD_I2C_CMD_WRITE;
+	wmsg->header.last = 0x00;
+	wmsg->header.chan = 0x00;
+	wmsg->header.spi = 0x0000;
+	while (remain_len > 0) {
+		wmsg->header.addr = cpu_to_le16(start + 0x4000);
+		if (remain_len > 503) {
+			wmsg->header.len1 = 0xff;
+			wmsg->header.len2 = 0xf8;
+			remain_len -= 503;
+			bytes_xfer = 503 + sizeof(struct vprbrd_i2c_write_hdr);
+			start += 503;
+		} else if (remain_len > 255) {
+			wmsg->header.len1 = 0xff;
+			wmsg->header.len2 = (remain_len - 255);
+			bytes_xfer = remain_len +
+				sizeof(struct vprbrd_i2c_write_hdr);
+			remain_len = 0;
+		} else {
+			wmsg->header.len1 = remain_len;
+			wmsg->header.len2 = 0x00;
+			bytes_xfer = remain_len +
+				sizeof(struct vprbrd_i2c_write_hdr);
+			remain_len = 0;
+		}
+		memcpy(wmsg->data, msg->buf + start,
+			bytes_xfer - sizeof(struct vprbrd_i2c_write_hdr));
+
+		ret = usb_bulk_msg(vb->usb_dev,
+			usb_sndbulkpipe(vb->usb_dev,
+			VPRBRD_EP_OUT), wmsg,
+			bytes_xfer, &bytes_actual, VPRBRD_USB_TIMEOUT_MS);
+		if ((ret < 0) || (bytes_xfer != bytes_actual))
+			return -EREMOTEIO;
+	}
+	return 0;
+}
+
+static int vprbrd_i2c_xfer(struct i2c_adapter *i2c, struct i2c_msg *msgs,
+		int num)
+{
+	struct i2c_msg *pmsg;
+	int i, ret,
+		error = 0;
+	struct vprbrd *vb = (struct vprbrd *)i2c->algo_data;
+	struct vprbrd_i2c_addr_msg *amsg =
+		(struct vprbrd_i2c_addr_msg *)vb->buf;
+	struct vprbrd_i2c_status *smsg = (struct vprbrd_i2c_status *)vb->buf;
+
+	dev_dbg(&i2c->dev, "master xfer %d messages:\n", num);
+
+	for (i = 0 ; i < num ; i++) {
+		pmsg = &msgs[i];
+
+		dev_dbg(&i2c->dev,
+			"  %d: %s (flags %d) %d bytes to 0x%02x\n",
+			i, pmsg->flags & I2C_M_RD ? "read" : "write",
+			pmsg->flags, pmsg->len, pmsg->addr);
+
+		/* msgs longer than 2048 bytes are not supported by adapter */
+		if (pmsg->len > 2048)
+			return -EINVAL;
+
+		mutex_lock(&vb->lock);
+		/* directly send the message */
+		if (pmsg->flags & I2C_M_RD) {
+			/* read data */
+			amsg->cmd = VPRBRD_I2C_CMD_ADDR;
+			amsg->unknown2 = 0x00;
+			amsg->unknown3 = 0x00;
+			amsg->addr = pmsg->addr;
+			amsg->unknown1 = 0x01;
+			amsg->len = cpu_to_le16(pmsg->len);
+			/* send the addr and len, we're interested to board */
+			ret = vprbrd_i2c_addr(vb->usb_dev, amsg);
+			if (ret < 0)
+				error = ret;
+
+			ret = vprbrd_i2c_read(vb, pmsg);
+			if (ret < 0)
+				error = ret;
+
+			ret = vprbrd_i2c_status(i2c, smsg, error);
+			if (ret < 0)
+				error = ret;
+			/* in case of protocol error, return the error */
+			if (error < 0)
+				goto error;
+		} else {
+			/* write data */
+			ret = vprbrd_i2c_write(vb, pmsg);
+
+			amsg->cmd = VPRBRD_I2C_CMD_ADDR;
+			amsg->unknown2 = 0x00;
+			amsg->unknown3 = 0x00;
+			amsg->addr = pmsg->addr;
+			amsg->unknown1 = 0x00;
+			amsg->len = cpu_to_le16(pmsg->len);
+			/* send the addr, the data goes to to board */
+			ret = vprbrd_i2c_addr(vb->usb_dev, amsg);
+			if (ret < 0)
+				error = ret;
+
+			ret = vprbrd_i2c_status(i2c, smsg, error);
+			if (ret < 0)
+				error = ret;
+
+			if (error < 0)
+				goto error;
+		}
+		mutex_unlock(&vb->lock);
+	}
+	return 0;
+error:
+	mutex_unlock(&vb->lock);
+	return error;
+}
+
+static u32 vprbrd_i2c_func(struct i2c_adapter *i2c)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+/* This is the actual algorithm we define */
+static const struct i2c_algorithm vprbrd_algorithm = {
+	.master_xfer	= vprbrd_i2c_xfer,
+	.functionality	= vprbrd_i2c_func,
+};
+
+static int __devinit vprbrd_i2c_probe(struct platform_device *pdev)
+{
+	struct vprbrd *vb = dev_get_drvdata(pdev->dev.parent);
+	struct vprbrd_i2c *vb_i2c;
+	int ret;
+	int pipe;
+
+	vb_i2c = kzalloc(sizeof(*vb_i2c), GFP_KERNEL);
+	if (vb_i2c == NULL)
+		return -ENOMEM;
+
+	/* setup i2c adapter description */
+	vb_i2c->i2c.owner = THIS_MODULE;
+	vb_i2c->i2c.class = I2C_CLASS_HWMON;
+	vb_i2c->i2c.algo = &vprbrd_algorithm;
+	vb_i2c->i2c.algo_data = vb;
+	/* save the param in usb capabable memory */
+	vb_i2c->bus_freq_param = i2c_bus_param;
+
+	snprintf(vb_i2c->i2c.name, sizeof(vb_i2c->i2c.name),
+		 "viperboard at bus %03d device %03d",
+		 vb->usb_dev->bus->busnum, vb->usb_dev->devnum);
+
+	/* setting the bus frequency */
+	if ((i2c_bus_param <= VPRBRD_I2C_FREQ_10KHZ)
+		&& (i2c_bus_param >= VPRBRD_I2C_FREQ_6MHZ)) {
+		pipe = usb_sndctrlpipe(vb->usb_dev, 0);
+		ret = usb_control_msg(vb->usb_dev, pipe,
+			VPRBRD_USB_REQUEST_I2C_FREQ, VPRBRD_USB_TYPE_OUT,
+			0x0000, 0x0000, &vb_i2c->bus_freq_param, 1,
+			VPRBRD_USB_TIMEOUT_MS);
+	    if (ret != 1) {
+		dev_err(&pdev->dev,
+			"failure setting i2c_bus_freq to %d\n", i2c_bus_freq);
+		ret = -EIO;
+		goto error;
+	    }
+	} else {
+		dev_err(&pdev->dev,
+			"invalid i2c_bus_freq setting:%d\n", i2c_bus_freq);
+		ret = -EIO;
+		goto error;
+	}
+
+	vb_i2c->i2c.dev.parent = &pdev->dev;
+
+	/* attach to i2c layer */
+	i2c_add_adapter(&vb_i2c->i2c);
+
+	platform_set_drvdata(pdev, vb_i2c);
+
+	return 0;
+
+error:
+	kfree(vb_i2c);
+	return ret;
+}
+
+static int __devexit vprbrd_i2c_remove(struct platform_device *pdev)
+{
+	struct vprbrd_i2c *vb_i2c = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = i2c_del_adapter(&vb_i2c->i2c);
+
+	return ret;
+}
+
+static struct platform_driver vprbrd_i2c_driver = {
+	.driver.name	= "viperboard-i2c",
+	.driver.owner	= THIS_MODULE,
+	.probe		= vprbrd_i2c_probe,
+	.remove		= __devexit_p(vprbrd_i2c_remove),
+};
+
+static int __init vprbrd_i2c_init(void)
+{
+	switch (i2c_bus_freq) {
+	case 6000:
+		i2c_bus_param = VPRBRD_I2C_FREQ_6MHZ;
+		break;
+	case 3000:
+		i2c_bus_param = VPRBRD_I2C_FREQ_3MHZ;
+		break;
+	case 1000:
+		i2c_bus_param = VPRBRD_I2C_FREQ_1MHZ;
+		break;
+	case 400:
+		i2c_bus_param = VPRBRD_I2C_FREQ_400KHZ;
+		break;
+	case 200:
+		i2c_bus_param = VPRBRD_I2C_FREQ_200KHZ;
+		break;
+	case 100:
+		i2c_bus_param = VPRBRD_I2C_FREQ_100KHZ;
+		break;
+	case 10:
+		i2c_bus_param = VPRBRD_I2C_FREQ_10KHZ;
+		break;
+	default:
+		pr_warn("invalid i2c_bus_freq (%d)\n", i2c_bus_freq);
+		i2c_bus_param = VPRBRD_I2C_FREQ_100KHZ;
+	}
+
+	return platform_driver_register(&vprbrd_i2c_driver);
+}
+subsys_initcall(vprbrd_i2c_init);
+
+static void __exit vprbrd_i2c_exit(void)
+{
+	platform_driver_unregister(&vprbrd_i2c_driver);
+}
+module_exit(vprbrd_i2c_exit);
+
+MODULE_AUTHOR("Lars Poeschel <poeschel@lemonage.de>");
+MODULE_DESCRIPTION("I2C master driver for Nano River Techs Viperboard");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:viperboard-i2c");
diff --git a/drivers/mfd/viperboard.c b/drivers/mfd/viperboard.c
index c75ac08f639c..4f74ec868a52 100644
--- a/drivers/mfd/viperboard.c
+++ b/drivers/mfd/viperboard.c
@@ -41,6 +41,9 @@ static struct mfd_cell vprbrd_devs[] = {
 	{
 		.name = "viperboard-gpio",
 	},
+	{
+		.name = "viperboard-i2c",
+	},
 };
 
 static int vprbrd_probe(struct usb_interface *interface,
diff --git a/include/linux/mfd/viperboard.h b/include/linux/mfd/viperboard.h
index 42d339fac3e5..ef7851430d86 100644
--- a/include/linux/mfd/viperboard.h
+++ b/include/linux/mfd/viperboard.h
@@ -42,6 +42,8 @@
 #define VPRBRD_USB_TYPE_OUT	    0x40
 #define VPRBRD_USB_TYPE_IN	    0xc0
 #define VPRBRD_USB_TIMEOUT_MS       100
+#define VPRBRD_USB_REQUEST_I2C_FREQ 0xe6
+#define VPRBRD_USB_REQUEST_I2C      0xe9
 #define VPRBRD_USB_REQUEST_MAJOR    0xea
 #define VPRBRD_USB_REQUEST_MINOR    0xeb
 #define VPRBRD_USB_REQUEST_GPIOA    0xed
-- 
cgit v1.2.3-55-g7522


From ffd8a6e7a77802dd09559be78dea63956aa8f1d5 Mon Sep 17 00:00:00 2001
From: Lars Poeschel
Date: Mon, 5 Nov 2012 15:48:26 +0100
Subject: iio: adc: Add viperboard adc driver

This adds the mfd cell to use the adc part of the Nano River Technologies
viperboard.

Signed-off-by: Lars Poeschel <poeschel@lemonage.de>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/iio/adc/Kconfig          |   7 ++
 drivers/iio/adc/Makefile         |   1 +
 drivers/iio/adc/viperboard_adc.c | 181 +++++++++++++++++++++++++++++++++++++++
 drivers/mfd/viperboard.c         |   3 +
 include/linux/mfd/viperboard.h   |   1 +
 5 files changed, 193 insertions(+)
 create mode 100644 drivers/iio/adc/viperboard_adc.c

(limited to 'include/linux')

diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 1401ed1af39f..bb3053739c80 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -67,4 +67,11 @@ config TI_AM335X_ADC
 	  Say yes here to build support for Texas Instruments ADC
 	  driver which is also a MFD client.
 
+config VIPERBOARD_ADC
+	tristate "Viperboard ADC support"
+	depends on MFD_VIPERBOARD && USB
+	help
+	  Say yes here to access the ADC part of the Nano River
+	  Technologies Viperboard.
+
 endmenu
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index 4410a90fc84b..4268fa987fe6 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_AD7791) += ad7791.o
 obj-$(CONFIG_AT91_ADC) += at91_adc.o
 obj-$(CONFIG_LP8788_ADC) += lp8788_adc.o
 obj-$(CONFIG_TI_AM335X_ADC) += ti_am335x_adc.o
+obj-$(CONFIG_VIPERBOARD_ADC) += viperboard_adc.o
\ No newline at end of file
diff --git a/drivers/iio/adc/viperboard_adc.c b/drivers/iio/adc/viperboard_adc.c
new file mode 100644
index 000000000000..10136a8b20d4
--- /dev/null
+++ b/drivers/iio/adc/viperboard_adc.c
@@ -0,0 +1,181 @@
+/*
+ *  Nano River Technologies viperboard IIO ADC driver
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+
+#include <linux/usb.h>
+#include <linux/iio/iio.h>
+
+#include <linux/mfd/viperboard.h>
+
+#define VPRBRD_ADC_CMD_GET		0x00
+
+struct vprbrd_adc_msg {
+	u8 cmd;
+	u8 chan;
+	u8 val;
+} __packed;
+
+struct vprbrd_adc {
+	struct vprbrd *vb;
+};
+
+#define VPRBRD_ADC_CHANNEL(_index) {			\
+	.type = IIO_VOLTAGE,				\
+	.indexed = 1,					\
+	.channel = _index,				\
+	.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT,	\
+	.scan_index = _index,				\
+	.scan_type = {					\
+		.sign = 'u',				\
+		.realbits = 8,				\
+		.storagebits = 8,			\
+	},						\
+}
+
+static struct iio_chan_spec const vprbrd_adc_iio_channels[] = {
+	VPRBRD_ADC_CHANNEL(0),
+	VPRBRD_ADC_CHANNEL(1),
+	VPRBRD_ADC_CHANNEL(2),
+	VPRBRD_ADC_CHANNEL(3),
+};
+
+static int vprbrd_iio_read_raw(struct iio_dev *iio_dev,
+				struct iio_chan_spec const *chan,
+				int *val,
+				int *val2,
+				long info)
+{
+	int ret, error = 0;
+	struct vprbrd_adc *adc = iio_priv(iio_dev);
+	struct vprbrd *vb = adc->vb;
+	struct vprbrd_adc_msg *admsg = (struct vprbrd_adc_msg *)vb->buf;
+
+	switch (info) {
+	case IIO_CHAN_INFO_RAW:
+		mutex_lock(&vb->lock);
+
+		admsg->cmd = VPRBRD_ADC_CMD_GET;
+		admsg->chan = chan->scan_index;
+		admsg->val = 0x00;
+
+		ret = usb_control_msg(vb->usb_dev,
+			usb_sndctrlpipe(vb->usb_dev, 0), VPRBRD_USB_REQUEST_ADC,
+			VPRBRD_USB_TYPE_OUT, 0x0000, 0x0000, admsg,
+			sizeof(struct vprbrd_adc_msg), VPRBRD_USB_TIMEOUT_MS);
+		if (ret != sizeof(struct vprbrd_adc_msg)) {
+			dev_err(&iio_dev->dev, "usb send error on adc read\n");
+			error = -EREMOTEIO;
+		}
+
+		ret = usb_control_msg(vb->usb_dev,
+			usb_rcvctrlpipe(vb->usb_dev, 0), VPRBRD_USB_REQUEST_ADC,
+			VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, admsg,
+			sizeof(struct vprbrd_adc_msg), VPRBRD_USB_TIMEOUT_MS);
+
+		*val = admsg->val;
+
+		mutex_unlock(&vb->lock);
+
+		if (ret != sizeof(struct vprbrd_adc_msg)) {
+			dev_err(&iio_dev->dev, "usb recv error on adc read\n");
+			error = -EREMOTEIO;
+		}
+
+		if (error)
+			goto error;
+
+		return IIO_VAL_INT;
+	default:
+		error = -EINVAL;
+		break;
+	}
+error:
+	return error;
+}
+
+static const struct iio_info vprbrd_adc_iio_info = {
+	.read_raw = &vprbrd_iio_read_raw,
+	.driver_module = THIS_MODULE,
+};
+
+static int __devinit vprbrd_adc_probe(struct platform_device *pdev)
+{
+	struct vprbrd *vb = dev_get_drvdata(pdev->dev.parent);
+	struct vprbrd_adc *adc;
+	struct iio_dev *indio_dev;
+	int ret;
+
+	/* registering iio */
+	indio_dev = iio_device_alloc(sizeof(*adc));
+	if (!indio_dev) {
+		dev_err(&pdev->dev, "failed allocating iio device\n");
+		return -ENOMEM;
+	}
+
+	adc = iio_priv(indio_dev);
+	adc->vb = vb;
+	indio_dev->name = "viperboard adc";
+	indio_dev->dev.parent = &pdev->dev;
+	indio_dev->info = &vprbrd_adc_iio_info;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->channels = vprbrd_adc_iio_channels;
+	indio_dev->num_channels = ARRAY_SIZE(vprbrd_adc_iio_channels);
+
+	ret = iio_device_register(indio_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "could not register iio (adc)");
+		goto error;
+	}
+
+	platform_set_drvdata(pdev, indio_dev);
+
+	return 0;
+
+error:
+	iio_device_free(indio_dev);
+	return ret;
+}
+
+static int __devexit vprbrd_adc_remove(struct platform_device *pdev)
+{
+	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
+
+	iio_device_unregister(indio_dev);
+	iio_device_free(indio_dev);
+
+	return 0;
+}
+
+static struct platform_driver vprbrd_adc_driver = {
+	.driver = {
+		.name	= "viperboard-adc",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= vprbrd_adc_probe,
+	.remove		= __devexit_p(vprbrd_adc_remove),
+};
+
+module_platform_driver(vprbrd_adc_driver);
+
+MODULE_AUTHOR("Lars Poeschel <poeschel@lemonage.de>");
+MODULE_DESCRIPTION("IIO ADC driver for Nano River Techs Viperboard");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:viperboard-adc");
diff --git a/drivers/mfd/viperboard.c b/drivers/mfd/viperboard.c
index 4f74ec868a52..276122b37199 100644
--- a/drivers/mfd/viperboard.c
+++ b/drivers/mfd/viperboard.c
@@ -44,6 +44,9 @@ static struct mfd_cell vprbrd_devs[] = {
 	{
 		.name = "viperboard-i2c",
 	},
+	{
+		.name = "viperboard-adc",
+	},
 };
 
 static int vprbrd_probe(struct usb_interface *interface,
diff --git a/include/linux/mfd/viperboard.h b/include/linux/mfd/viperboard.h
index ef7851430d86..193452848c04 100644
--- a/include/linux/mfd/viperboard.h
+++ b/include/linux/mfd/viperboard.h
@@ -46,6 +46,7 @@
 #define VPRBRD_USB_REQUEST_I2C      0xe9
 #define VPRBRD_USB_REQUEST_MAJOR    0xea
 #define VPRBRD_USB_REQUEST_MINOR    0xeb
+#define VPRBRD_USB_REQUEST_ADC      0xec
 #define VPRBRD_USB_REQUEST_GPIOA    0xed
 #define VPRBRD_USB_REQUEST_GPIOB    0xdd
 
-- 
cgit v1.2.3-55-g7522


From 1950c7164646bfeeb82c34bc299d82119706afb5 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi
Date: Fri, 9 Nov 2012 15:19:52 +0100
Subject: mfd: sta2x11-mfd: Add apb-soc regs driver and factor out common code

A driver for the apb-soc registers is needed by the clock
infrastructure code to configure and control clocks on the sta2x11
chip.

Since some of the functions in sta2x11-mfd.c were almost identical
for the two existing platform devices, the following changes
have been performed to avoid further code duplication while
adding the apb-soc-regs driver:

* The sctl_regs and apbreg_regs fields in struct sta2x11_mfd
have been turned into just one array of pointers accessed by
device index.
* Platform probe methods have become one-liners invoking a
common probe with the device's index as second parameter.
* For loops have been inserted where the same operations
were performed for each of the two bars of a pci device.
* The apbreg_mask and sctl_mask functions were almost identical,
so they were turned into inline functions invoking a common
__sta2x11_mfd_mask() with the platform device's index as last
parameter. To do this, enum sta2x11_mfd_plat_dev has been declared in
sta2x11-mfd.h and more device types have been added to it.

Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/sta2x11-mfd.c       | 350 +++++++++++++++++++++++++++-------------
 include/linux/mfd/sta2x11-mfd.h | 156 +++++++++++++++++-
 2 files changed, 395 insertions(+), 111 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index d35da6820bea..9e01b84aa8bc 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -40,8 +40,7 @@ struct sta2x11_mfd {
 	struct sta2x11_instance *instance;
 	spinlock_t lock;
 	struct list_head list;
-	void __iomem *sctl_regs;
-	void __iomem *apbreg_regs;
+	void __iomem *regs[sta2x11_n_mfd_plat_devs];
 };
 
 static LIST_HEAD(sta2x11_mfd_list);
@@ -100,56 +99,33 @@ static int __devexit mfd_remove(struct pci_dev *pdev)
 	return 0;
 }
 
-/* These two functions are exported and are not expected to fail */
-u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+/* This function is exported and is not expected to fail */
+u32 __sta2x11_mfd_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val,
+		       enum sta2x11_mfd_plat_dev index)
 {
 	struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
 	u32 r;
 	unsigned long flags;
+	void __iomem *regs = mfd->regs[index];
 
 	if (!mfd) {
 		dev_warn(&pdev->dev, ": can't access sctl regs\n");
 		return 0;
 	}
-	if (!mfd->sctl_regs) {
+	if (!regs) {
 		dev_warn(&pdev->dev, ": system ctl not initialized\n");
 		return 0;
 	}
 	spin_lock_irqsave(&mfd->lock, flags);
-	r = readl(mfd->sctl_regs + reg);
+	r = readl(regs + reg);
 	r &= ~mask;
 	r |= val;
 	if (mask)
-		writel(r, mfd->sctl_regs + reg);
+		writel(r, regs + reg);
 	spin_unlock_irqrestore(&mfd->lock, flags);
 	return r;
 }
-EXPORT_SYMBOL(sta2x11_sctl_mask);
-
-u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
-{
-	struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
-	u32 r;
-	unsigned long flags;
-
-	if (!mfd) {
-		dev_warn(&pdev->dev, ": can't access apb regs\n");
-		return 0;
-	}
-	if (!mfd->apbreg_regs) {
-		dev_warn(&pdev->dev, ": apb bridge not initialized\n");
-		return 0;
-	}
-	spin_lock_irqsave(&mfd->lock, flags);
-	r = readl(mfd->apbreg_regs + reg);
-	r &= ~mask;
-	r |= val;
-	if (mask)
-		writel(r, mfd->apbreg_regs + reg);
-	spin_unlock_irqrestore(&mfd->lock, flags);
-	return r;
-}
-EXPORT_SYMBOL(sta2x11_apbreg_mask);
+EXPORT_SYMBOL(__sta2x11_mfd_mask);
 
 /* Two debugfs files, for our registers (FIXME: one instance only) */
 #define REG(regname) {.name = #regname, .offset = SCTL_ ## regname}
@@ -180,51 +156,103 @@ static struct debugfs_regset32 apbreg_regset = {
 	.nregs = ARRAY_SIZE(sta2x11_apbreg_regs),
 };
 
-static struct dentry *sta2x11_sctl_debugfs;
-static struct dentry *sta2x11_apbreg_debugfs;
+#define REG(regname) {.name = #regname, .offset = regname}
+static struct debugfs_reg32 sta2x11_apb_soc_regs_regs[] = {
+	REG(PCIE_EP1_FUNC3_0_INTR_REG), REG(PCIE_EP1_FUNC7_4_INTR_REG),
+	REG(PCIE_EP2_FUNC3_0_INTR_REG), REG(PCIE_EP2_FUNC7_4_INTR_REG),
+	REG(PCIE_EP3_FUNC3_0_INTR_REG), REG(PCIE_EP3_FUNC7_4_INTR_REG),
+	REG(PCIE_EP4_FUNC3_0_INTR_REG), REG(PCIE_EP4_FUNC7_4_INTR_REG),
+	REG(PCIE_INTR_ENABLE0_REG), REG(PCIE_INTR_ENABLE1_REG),
+	REG(PCIE_EP1_FUNC_TC_REG), REG(PCIE_EP2_FUNC_TC_REG),
+	REG(PCIE_EP3_FUNC_TC_REG), REG(PCIE_EP4_FUNC_TC_REG),
+	REG(PCIE_EP1_FUNC_F_REG), REG(PCIE_EP2_FUNC_F_REG),
+	REG(PCIE_EP3_FUNC_F_REG), REG(PCIE_EP4_FUNC_F_REG),
+	REG(PCIE_PAB_AMBA_SW_RST_REG), REG(PCIE_PM_STATUS_0_PORT_0_4),
+	REG(PCIE_PM_STATUS_7_0_EP1), REG(PCIE_PM_STATUS_7_0_EP2),
+	REG(PCIE_PM_STATUS_7_0_EP3), REG(PCIE_PM_STATUS_7_0_EP4),
+	REG(PCIE_DEV_ID_0_EP1_REG), REG(PCIE_CC_REV_ID_0_EP1_REG),
+	REG(PCIE_DEV_ID_1_EP1_REG), REG(PCIE_CC_REV_ID_1_EP1_REG),
+	REG(PCIE_DEV_ID_2_EP1_REG), REG(PCIE_CC_REV_ID_2_EP1_REG),
+	REG(PCIE_DEV_ID_3_EP1_REG), REG(PCIE_CC_REV_ID_3_EP1_REG),
+	REG(PCIE_DEV_ID_4_EP1_REG), REG(PCIE_CC_REV_ID_4_EP1_REG),
+	REG(PCIE_DEV_ID_5_EP1_REG), REG(PCIE_CC_REV_ID_5_EP1_REG),
+	REG(PCIE_DEV_ID_6_EP1_REG), REG(PCIE_CC_REV_ID_6_EP1_REG),
+	REG(PCIE_DEV_ID_7_EP1_REG), REG(PCIE_CC_REV_ID_7_EP1_REG),
+	REG(PCIE_DEV_ID_0_EP2_REG), REG(PCIE_CC_REV_ID_0_EP2_REG),
+	REG(PCIE_DEV_ID_1_EP2_REG), REG(PCIE_CC_REV_ID_1_EP2_REG),
+	REG(PCIE_DEV_ID_2_EP2_REG), REG(PCIE_CC_REV_ID_2_EP2_REG),
+	REG(PCIE_DEV_ID_3_EP2_REG), REG(PCIE_CC_REV_ID_3_EP2_REG),
+	REG(PCIE_DEV_ID_4_EP2_REG), REG(PCIE_CC_REV_ID_4_EP2_REG),
+	REG(PCIE_DEV_ID_5_EP2_REG), REG(PCIE_CC_REV_ID_5_EP2_REG),
+	REG(PCIE_DEV_ID_6_EP2_REG), REG(PCIE_CC_REV_ID_6_EP2_REG),
+	REG(PCIE_DEV_ID_7_EP2_REG), REG(PCIE_CC_REV_ID_7_EP2_REG),
+	REG(PCIE_DEV_ID_0_EP3_REG), REG(PCIE_CC_REV_ID_0_EP3_REG),
+	REG(PCIE_DEV_ID_1_EP3_REG), REG(PCIE_CC_REV_ID_1_EP3_REG),
+	REG(PCIE_DEV_ID_2_EP3_REG), REG(PCIE_CC_REV_ID_2_EP3_REG),
+	REG(PCIE_DEV_ID_3_EP3_REG), REG(PCIE_CC_REV_ID_3_EP3_REG),
+	REG(PCIE_DEV_ID_4_EP3_REG), REG(PCIE_CC_REV_ID_4_EP3_REG),
+	REG(PCIE_DEV_ID_5_EP3_REG), REG(PCIE_CC_REV_ID_5_EP3_REG),
+	REG(PCIE_DEV_ID_6_EP3_REG), REG(PCIE_CC_REV_ID_6_EP3_REG),
+	REG(PCIE_DEV_ID_7_EP3_REG), REG(PCIE_CC_REV_ID_7_EP3_REG),
+	REG(PCIE_DEV_ID_0_EP4_REG), REG(PCIE_CC_REV_ID_0_EP4_REG),
+	REG(PCIE_DEV_ID_1_EP4_REG), REG(PCIE_CC_REV_ID_1_EP4_REG),
+	REG(PCIE_DEV_ID_2_EP4_REG), REG(PCIE_CC_REV_ID_2_EP4_REG),
+	REG(PCIE_DEV_ID_3_EP4_REG), REG(PCIE_CC_REV_ID_3_EP4_REG),
+	REG(PCIE_DEV_ID_4_EP4_REG), REG(PCIE_CC_REV_ID_4_EP4_REG),
+	REG(PCIE_DEV_ID_5_EP4_REG), REG(PCIE_CC_REV_ID_5_EP4_REG),
+	REG(PCIE_DEV_ID_6_EP4_REG), REG(PCIE_CC_REV_ID_6_EP4_REG),
+	REG(PCIE_DEV_ID_7_EP4_REG), REG(PCIE_CC_REV_ID_7_EP4_REG),
+	REG(PCIE_SUBSYS_VEN_ID_REG), REG(PCIE_COMMON_CLOCK_CONFIG_0_4_0),
+	REG(PCIE_MIPHYP_SSC_EN_REG), REG(PCIE_MIPHYP_ADDR_REG),
+	REG(PCIE_L1_ASPM_READY_REG), REG(PCIE_EXT_CFG_RDY_REG),
+	REG(PCIE_SoC_INT_ROUTER_STATUS0_REG),
+	REG(PCIE_SoC_INT_ROUTER_STATUS1_REG),
+	REG(PCIE_SoC_INT_ROUTER_STATUS2_REG),
+	REG(PCIE_SoC_INT_ROUTER_STATUS3_REG),
+	REG(DMA_IP_CTRL_REG), REG(DISP_BRIDGE_PU_PD_CTRL_REG),
+	REG(VIP_PU_PD_CTRL_REG), REG(USB_MLB_PU_PD_CTRL_REG),
+	REG(SDIO_PU_PD_MISCFUNC_CTRL_REG1), REG(SDIO_PU_PD_MISCFUNC_CTRL_REG2),
+	REG(UART_PU_PD_CTRL_REG), REG(ARM_Lock), REG(SYS_IO_CHAR_REG1),
+	REG(SYS_IO_CHAR_REG2), REG(SATA_CORE_ID_REG), REG(SATA_CTRL_REG),
+	REG(I2C_HSFIX_MISC_REG), REG(SPARE2_RESERVED), REG(SPARE3_RESERVED),
+	REG(MASTER_LOCK_REG), REG(SYSTEM_CONFIG_STATUS_REG),
+	REG(MSP_CLK_CTRL_REG), REG(COMPENSATION_REG1), REG(COMPENSATION_REG2),
+	REG(COMPENSATION_REG3), REG(TEST_CTL_REG),
+};
+#undef REG
 
-/* Probe for the two platform devices */
-static int sta2x11_sctl_probe(struct platform_device *dev)
-{
-	struct pci_dev **pdev;
-	struct sta2x11_mfd *mfd;
-	struct resource *res;
+static struct debugfs_regset32 apb_soc_regs_regset = {
+	.regs = sta2x11_apb_soc_regs_regs,
+	.nregs = ARRAY_SIZE(sta2x11_apb_soc_regs_regs),
+};
 
-	pdev = dev->dev.platform_data;
-	mfd = sta2x11_mfd_find(*pdev);
-	if (!mfd)
-		return -ENODEV;
 
-	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENOMEM;
+static struct dentry *sta2x11_mfd_debugfs[sta2x11_n_mfd_plat_devs];
 
-	if (!request_mem_region(res->start, resource_size(res),
-				"sta2x11-sctl"))
-		return -EBUSY;
+static struct debugfs_regset32 *sta2x11_mfd_regset[sta2x11_n_mfd_plat_devs] = {
+	[sta2x11_sctl] = &sctl_regset,
+	[sta2x11_apbreg] = &apbreg_regset,
+	[sta2x11_apb_soc_regs] = &apb_soc_regs_regset,
+};
 
-	mfd->sctl_regs = ioremap(res->start, resource_size(res));
-	if (!mfd->sctl_regs) {
-		release_mem_region(res->start, resource_size(res));
-		return -ENOMEM;
-	}
-	sctl_regset.base = mfd->sctl_regs;
-	sta2x11_sctl_debugfs = debugfs_create_regset32("sta2x11-sctl",
-						  S_IFREG | S_IRUGO,
-						  NULL, &sctl_regset);
-	return 0;
-}
+static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = {
+	[sta2x11_sctl] = "sta2x11-sctl",
+	[sta2x11_apbreg] = "sta2x11-apbreg",
+	[sta2x11_apb_soc_regs] = "sta2x11-apb-soc-regs",
+};
 
-static int sta2x11_apbreg_probe(struct platform_device *dev)
+/* Probe for the three platform devices */
+
+static int sta2x11_mfd_platform_probe(struct platform_device *dev,
+				      enum sta2x11_mfd_plat_dev index)
 {
 	struct pci_dev **pdev;
 	struct sta2x11_mfd *mfd;
 	struct resource *res;
+	const char *name = sta2x11_mfd_names[index];
+	struct debugfs_regset32 *regset = sta2x11_mfd_regset[index];
 
 	pdev = dev->dev.platform_data;
-	dev_dbg(&dev->dev, "%s: pdata is %p\n", __func__, pdev);
-	dev_dbg(&dev->dev, "%s: *pdata is %p\n", __func__, *pdev);
-
 	mfd = sta2x11_mfd_find(*pdev);
 	if (!mfd)
 		return -ENODEV;
@@ -233,25 +261,37 @@ static int sta2x11_apbreg_probe(struct platform_device *dev)
 	if (!res)
 		return -ENOMEM;
 
-	if (!request_mem_region(res->start, resource_size(res),
-				"sta2x11-apbreg"))
+	if (!request_mem_region(res->start, resource_size(res), name))
 		return -EBUSY;
 
-	mfd->apbreg_regs = ioremap(res->start, resource_size(res));
-	if (!mfd->apbreg_regs) {
+	mfd->regs[index] = ioremap(res->start, resource_size(res));
+	if (!mfd->regs[index]) {
 		release_mem_region(res->start, resource_size(res));
 		return -ENOMEM;
 	}
-	dev_dbg(&dev->dev, "%s: regbase %p\n", __func__, mfd->apbreg_regs);
-
-	apbreg_regset.base = mfd->apbreg_regs;
-	sta2x11_apbreg_debugfs = debugfs_create_regset32("sta2x11-apbreg",
-						  S_IFREG | S_IRUGO,
-						  NULL, &apbreg_regset);
+	regset->base = mfd->regs[index];
+	sta2x11_mfd_debugfs[index] = debugfs_create_regset32(name,
+							     S_IFREG | S_IRUGO,
+							     NULL, regset);
 	return 0;
 }
 
-/* The two platform drivers */
+static int sta2x11_sctl_probe(struct platform_device *dev)
+{
+	return sta2x11_mfd_platform_probe(dev, sta2x11_sctl);
+}
+
+static int sta2x11_apbreg_probe(struct platform_device *dev)
+{
+	return sta2x11_mfd_platform_probe(dev, sta2x11_apbreg);
+}
+
+static int sta2x11_apb_soc_regs_probe(struct platform_device *dev)
+{
+	return sta2x11_mfd_platform_probe(dev, sta2x11_apb_soc_regs);
+}
+
+/* The three platform drivers */
 static struct platform_driver sta2x11_sctl_platform_driver = {
 	.driver = {
 		.name	= "sta2x11-sctl",
@@ -280,13 +320,29 @@ static int __init sta2x11_apbreg_init(void)
 	return platform_driver_register(&sta2x11_platform_driver);
 }
 
+static struct platform_driver sta2x11_apb_soc_regs_platform_driver = {
+	.driver = {
+		.name	= "sta2x11-apb-soc-regs",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sta2x11_apb_soc_regs_probe,
+};
+
+static int __init sta2x11_apb_soc_regs_init(void)
+{
+	pr_info("%s\n", __func__);
+	return platform_driver_register(&sta2x11_apb_soc_regs_platform_driver);
+}
+
 /*
- * What follows is the PCI device that hosts the above two pdevs.
+ * What follows are the PCI devices that host the above pdevs.
  * Each logic block is 4kB and they are all consecutive: we use this info.
  */
 
-/* Bar 0 */
-enum bar0_cells {
+/* Mfd 0 device */
+
+/* Mfd 0, Bar 0 */
+enum mfd0_bar0_cells {
 	STA2X11_GPIO_0 = 0,
 	STA2X11_GPIO_1,
 	STA2X11_GPIO_2,
@@ -295,8 +351,8 @@ enum bar0_cells {
 	STA2X11_SCR,
 	STA2X11_TIME,
 };
-/* Bar 1 */
-enum bar1_cells {
+/* Mfd 0 , Bar 1 */
+enum mfd0_bar1_cells {
 	STA2X11_APBREG = 0,
 };
 #define CELL_4K(_name, _cell) { \
@@ -330,17 +386,46 @@ static const __devinitconst struct resource apbreg_resources[] = {
 #define DEV(_name, _r) \
 	{ .name = _name, .num_resources = ARRAY_SIZE(_r), .resources = _r, }
 
-static __devinitdata struct mfd_cell sta2x11_mfd_bar0[] = {
+static __devinitdata struct mfd_cell sta2x11_mfd0_bar0[] = {
 	DEV("sta2x11-gpio", gpio_resources), /* offset 0: we add pdata later */
 	DEV("sta2x11-sctl", sctl_resources),
 	DEV("sta2x11-scr", scr_resources),
 	DEV("sta2x11-time", time_resources),
 };
 
-static __devinitdata struct mfd_cell sta2x11_mfd_bar1[] = {
+static __devinitdata struct mfd_cell sta2x11_mfd0_bar1[] = {
 	DEV("sta2x11-apbreg", apbreg_resources),
 };
 
+/* Mfd 1 devices */
+
+/* Mfd 1, Bar 0 */
+enum mfd1_bar0_cells {
+	STA2X11_VIC = 0,
+};
+
+/* Mfd 1, Bar 1 */
+enum mfd1_bar1_cells {
+	STA2X11_APB_SOC_REGS = 0,
+};
+
+static const __devinitconst struct resource vic_resources[] = {
+	CELL_4K("sta2x11-vic", STA2X11_VIC),
+};
+
+static const __devinitconst struct resource apb_soc_regs_resources[] = {
+	CELL_4K("sta2x11-apb-soc-regs", STA2X11_APB_SOC_REGS),
+};
+
+static __devinitdata struct mfd_cell sta2x11_mfd1_bar0[] = {
+	DEV("sta2x11-vic", vic_resources),
+};
+
+static __devinitdata struct mfd_cell sta2x11_mfd1_bar1[] = {
+	DEV("sta2x11-apb-soc-regs", apb_soc_regs_resources),
+};
+
+
 static int sta2x11_mfd_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	pci_save_state(pdev);
@@ -363,10 +448,63 @@ static int sta2x11_mfd_resume(struct pci_dev *pdev)
 	return 0;
 }
 
+struct sta2x11_mfd_bar_setup_data {
+	struct mfd_cell *cells;
+	int ncells;
+};
+
+struct sta2x11_mfd_setup_data {
+	struct sta2x11_mfd_bar_setup_data bars[2];
+};
+
+#define STA2X11_MFD0 0
+#define STA2X11_MFD1 1
+
+static struct sta2x11_mfd_setup_data mfd_setup_data[] = {
+	/* Mfd 0: gpio, sctl, scr, timers / apbregs */
+	[STA2X11_MFD0] = {
+		.bars = {
+			[0] = {
+				.cells = sta2x11_mfd0_bar0,
+				.ncells = ARRAY_SIZE(sta2x11_mfd0_bar0),
+			},
+			[1] = {
+				.cells = sta2x11_mfd0_bar1,
+				.ncells = ARRAY_SIZE(sta2x11_mfd0_bar1),
+			},
+		},
+	},
+	/* Mfd 1: vic / apb-soc-regs */
+	[STA2X11_MFD1] = {
+		.bars = {
+			[0] = {
+				.cells = sta2x11_mfd1_bar0,
+				.ncells = ARRAY_SIZE(sta2x11_mfd1_bar0),
+			},
+			[1] = {
+				.cells = sta2x11_mfd1_bar1,
+				.ncells = ARRAY_SIZE(sta2x11_mfd1_bar1),
+			},
+		},
+	},
+};
+
+static void __devinit sta2x11_mfd_setup(struct pci_dev *pdev,
+					struct sta2x11_mfd_setup_data *sd)
+{
+	int i, j;
+	for (i = 0; i < ARRAY_SIZE(sd->bars); i++)
+		for (j = 0; j < sd->bars[i].ncells; j++) {
+			sd->bars[i].cells[j].pdata_size = sizeof(pdev);
+			sd->bars[i].cells[j].platform_data = &pdev;
+		}
+}
+
 static int __devinit sta2x11_mfd_probe(struct pci_dev *pdev,
 				       const struct pci_device_id *pci_id)
 {
 	int err, i;
+	struct sta2x11_mfd_setup_data *setup_data;
 	struct sta2x11_gpio_pdata *gpio_data;
 
 	dev_info(&pdev->dev, "%s\n", __func__);
@@ -381,6 +519,10 @@ static int __devinit sta2x11_mfd_probe(struct pci_dev *pdev,
 	if (err)
 		dev_info(&pdev->dev, "Enable msi failed\n");
 
+	setup_data = pci_id->device == PCI_DEVICE_ID_STMICRO_GPIO ?
+		&mfd_setup_data[STA2X11_MFD0] :
+		&mfd_setup_data[STA2X11_MFD1];
+
 	/* Read gpio config data as pci device's platform data */
 	gpio_data = dev_get_platdata(&pdev->dev);
 	if (!gpio_data)
@@ -392,35 +534,23 @@ static int __devinit sta2x11_mfd_probe(struct pci_dev *pdev,
 		pdev, &pdev);
 
 	/* platform data is the pci device for all of them */
-	for (i = 0; i < ARRAY_SIZE(sta2x11_mfd_bar0); i++) {
-		sta2x11_mfd_bar0[i].pdata_size = sizeof(pdev);
-		sta2x11_mfd_bar0[i].platform_data = &pdev;
-	}
-	sta2x11_mfd_bar1[0].pdata_size = sizeof(pdev);
-	sta2x11_mfd_bar1[0].platform_data = &pdev;
+	sta2x11_mfd_setup(pdev, setup_data);
 
 	/* Record this pdev before mfd_add_devices: their probe looks for it */
 	sta2x11_mfd_add(pdev, GFP_ATOMIC);
 
-
-	err = mfd_add_devices(&pdev->dev, -1,
-			      sta2x11_mfd_bar0,
-			      ARRAY_SIZE(sta2x11_mfd_bar0),
-			      &pdev->resource[0],
-			      0, NULL);
-	if (err) {
-		dev_err(&pdev->dev, "mfd_add_devices[0] failed: %d\n", err);
-		goto err_disable;
-	}
-
-	err = mfd_add_devices(&pdev->dev, -1,
-			      sta2x11_mfd_bar1,
-			      ARRAY_SIZE(sta2x11_mfd_bar1),
-			      &pdev->resource[1],
-			      0, NULL);
-	if (err) {
-		dev_err(&pdev->dev, "mfd_add_devices[1] failed: %d\n", err);
-		goto err_disable;
+	/* Just 2 bars for all mfd's at present */
+	for (i = 0; i < 2; i++) {
+		err = mfd_add_devices(&pdev->dev, -1,
+				      setup_data->bars[i].cells,
+				      setup_data->bars[i].ncells,
+				      &pdev->resource[i],
+				      0, NULL);
+		if (err) {
+			dev_err(&pdev->dev,
+				"mfd_add_devices[%d] failed: %d\n", i, err);
+			goto err_disable;
+		}
 	}
 
 	return 0;
@@ -434,6 +564,7 @@ err_disable:
 
 static DEFINE_PCI_DEVICE_TABLE(sta2x11_mfd_tbl) = {
 	{PCI_DEVICE(PCI_VENDOR_ID_STMICRO, PCI_DEVICE_ID_STMICRO_GPIO)},
+	{PCI_DEVICE(PCI_VENDOR_ID_STMICRO, PCI_DEVICE_ID_STMICRO_VIC)},
 	{0,},
 };
 
@@ -459,6 +590,7 @@ static int __init sta2x11_mfd_init(void)
  */
 subsys_initcall(sta2x11_apbreg_init);
 subsys_initcall(sta2x11_sctl_init);
+subsys_initcall(sta2x11_apb_soc_regs_init);
 rootfs_initcall(sta2x11_mfd_init);
 
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
index d179227e866f..4d858797d7e2 100644
--- a/include/linux/mfd/sta2x11-mfd.h
+++ b/include/linux/mfd/sta2x11-mfd.h
@@ -26,6 +26,20 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 
+enum sta2x11_mfd_plat_dev {
+	sta2x11_sctl = 0,
+	sta2x11_gpio,
+	sta2x11_scr,
+	sta2x11_time,
+	sta2x11_apbreg,
+	sta2x11_apb_soc_regs,
+	sta2x11_vic,
+	sta2x11_n_mfd_plat_devs,
+};
+
+extern u32
+__sta2x11_mfd_mask(struct pci_dev *, u32, u32, u32, enum sta2x11_mfd_plat_dev);
+
 /*
  * The MFD PCI block includes the GPIO peripherals and other register blocks.
  * For GPIO, we have 32*4 bits (I use "gsta" for "gpio sta2x11".)
@@ -182,7 +196,11 @@ struct sta2x11_gpio_pdata {
  * The APB bridge has its own registers, needed by our users as well.
  * They are accessed with the following read/mask/write function.
  */
-u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val);
+static inline u32
+sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+{
+	return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_apbreg);
+}
 
 /* CAN and MLB */
 #define APBREG_BSR	0x00	/* Bridge Status Reg */
@@ -211,7 +229,11 @@ u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val);
  * The system controller has its own registers. Some of these are accessed
  * by out users as well, using the following read/mask/write/function
  */
-u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val);
+static inline
+u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+{
+	return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_sctl);
+}
 
 #define SCTL_SCCTL		0x00	/* System controller control register */
 #define SCTL_ARMCFG		0x04	/* ARM configuration register */
@@ -321,4 +343,134 @@ u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val);
 #define SCTL_SCPEREN1_I2C3		(1 << 16)
 #define SCTL_SCPEREN1_USB_PHY		(1 << 17)
 
+/*
+ * APB-SOC registers
+ */
+static inline
+u32 sta2x11_apb_soc_regs_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+{
+	return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_apb_soc_regs);
+}
+
+#define PCIE_EP1_FUNC3_0_INTR_REG	0x000
+#define PCIE_EP1_FUNC7_4_INTR_REG	0x004
+#define PCIE_EP2_FUNC3_0_INTR_REG	0x008
+#define PCIE_EP2_FUNC7_4_INTR_REG	0x00c
+#define PCIE_EP3_FUNC3_0_INTR_REG	0x010
+#define PCIE_EP3_FUNC7_4_INTR_REG	0x014
+#define PCIE_EP4_FUNC3_0_INTR_REG	0x018
+#define PCIE_EP4_FUNC7_4_INTR_REG	0x01c
+#define PCIE_INTR_ENABLE0_REG		0x020
+#define PCIE_INTR_ENABLE1_REG		0x024
+#define PCIE_EP1_FUNC_TC_REG		0x028
+#define PCIE_EP2_FUNC_TC_REG		0x02c
+#define PCIE_EP3_FUNC_TC_REG		0x030
+#define PCIE_EP4_FUNC_TC_REG		0x034
+#define PCIE_EP1_FUNC_F_REG		0x038
+#define PCIE_EP2_FUNC_F_REG		0x03c
+#define PCIE_EP3_FUNC_F_REG		0x040
+#define PCIE_EP4_FUNC_F_REG		0x044
+#define PCIE_PAB_AMBA_SW_RST_REG	0x048
+#define PCIE_PM_STATUS_0_PORT_0_4	0x04c
+#define PCIE_PM_STATUS_7_0_EP1		0x050
+#define PCIE_PM_STATUS_7_0_EP2		0x054
+#define PCIE_PM_STATUS_7_0_EP3		0x058
+#define PCIE_PM_STATUS_7_0_EP4		0x05c
+#define PCIE_DEV_ID_0_EP1_REG		0x060
+#define PCIE_CC_REV_ID_0_EP1_REG	0x064
+#define PCIE_DEV_ID_1_EP1_REG		0x068
+#define PCIE_CC_REV_ID_1_EP1_REG	0x06c
+#define PCIE_DEV_ID_2_EP1_REG		0x070
+#define PCIE_CC_REV_ID_2_EP1_REG	0x074
+#define PCIE_DEV_ID_3_EP1_REG		0x078
+#define PCIE_CC_REV_ID_3_EP1_REG	0x07c
+#define PCIE_DEV_ID_4_EP1_REG		0x080
+#define PCIE_CC_REV_ID_4_EP1_REG	0x084
+#define PCIE_DEV_ID_5_EP1_REG		0x088
+#define PCIE_CC_REV_ID_5_EP1_REG	0x08c
+#define PCIE_DEV_ID_6_EP1_REG		0x090
+#define PCIE_CC_REV_ID_6_EP1_REG	0x094
+#define PCIE_DEV_ID_7_EP1_REG		0x098
+#define PCIE_CC_REV_ID_7_EP1_REG	0x09c
+#define PCIE_DEV_ID_0_EP2_REG		0x0a0
+#define PCIE_CC_REV_ID_0_EP2_REG	0x0a4
+#define PCIE_DEV_ID_1_EP2_REG		0x0a8
+#define PCIE_CC_REV_ID_1_EP2_REG	0x0ac
+#define PCIE_DEV_ID_2_EP2_REG		0x0b0
+#define PCIE_CC_REV_ID_2_EP2_REG	0x0b4
+#define PCIE_DEV_ID_3_EP2_REG		0x0b8
+#define PCIE_CC_REV_ID_3_EP2_REG	0x0bc
+#define PCIE_DEV_ID_4_EP2_REG		0x0c0
+#define PCIE_CC_REV_ID_4_EP2_REG	0x0c4
+#define PCIE_DEV_ID_5_EP2_REG		0x0c8
+#define PCIE_CC_REV_ID_5_EP2_REG	0x0cc
+#define PCIE_DEV_ID_6_EP2_REG		0x0d0
+#define PCIE_CC_REV_ID_6_EP2_REG	0x0d4
+#define PCIE_DEV_ID_7_EP2_REG		0x0d8
+#define PCIE_CC_REV_ID_7_EP2_REG	0x0dC
+#define PCIE_DEV_ID_0_EP3_REG		0x0e0
+#define PCIE_CC_REV_ID_0_EP3_REG	0x0e4
+#define PCIE_DEV_ID_1_EP3_REG		0x0e8
+#define PCIE_CC_REV_ID_1_EP3_REG	0x0ec
+#define PCIE_DEV_ID_2_EP3_REG		0x0f0
+#define PCIE_CC_REV_ID_2_EP3_REG	0x0f4
+#define PCIE_DEV_ID_3_EP3_REG		0x0f8
+#define PCIE_CC_REV_ID_3_EP3_REG	0x0fc
+#define PCIE_DEV_ID_4_EP3_REG		0x100
+#define PCIE_CC_REV_ID_4_EP3_REG	0x104
+#define PCIE_DEV_ID_5_EP3_REG		0x108
+#define PCIE_CC_REV_ID_5_EP3_REG	0x10c
+#define PCIE_DEV_ID_6_EP3_REG		0x110
+#define PCIE_CC_REV_ID_6_EP3_REG	0x114
+#define PCIE_DEV_ID_7_EP3_REG		0x118
+#define PCIE_CC_REV_ID_7_EP3_REG	0x11c
+#define PCIE_DEV_ID_0_EP4_REG		0x120
+#define PCIE_CC_REV_ID_0_EP4_REG	0x124
+#define PCIE_DEV_ID_1_EP4_REG		0x128
+#define PCIE_CC_REV_ID_1_EP4_REG	0x12c
+#define PCIE_DEV_ID_2_EP4_REG		0x130
+#define PCIE_CC_REV_ID_2_EP4_REG	0x134
+#define PCIE_DEV_ID_3_EP4_REG		0x138
+#define PCIE_CC_REV_ID_3_EP4_REG	0x13c
+#define PCIE_DEV_ID_4_EP4_REG		0x140
+#define PCIE_CC_REV_ID_4_EP4_REG	0x144
+#define PCIE_DEV_ID_5_EP4_REG		0x148
+#define PCIE_CC_REV_ID_5_EP4_REG	0x14c
+#define PCIE_DEV_ID_6_EP4_REG		0x150
+#define PCIE_CC_REV_ID_6_EP4_REG	0x154
+#define PCIE_DEV_ID_7_EP4_REG		0x158
+#define PCIE_CC_REV_ID_7_EP4_REG	0x15c
+#define PCIE_SUBSYS_VEN_ID_REG		0x160
+#define PCIE_COMMON_CLOCK_CONFIG_0_4_0	0x164
+#define PCIE_MIPHYP_SSC_EN_REG		0x168
+#define PCIE_MIPHYP_ADDR_REG		0x16c
+#define PCIE_L1_ASPM_READY_REG		0x170
+#define PCIE_EXT_CFG_RDY_REG		0x174
+#define PCIE_SoC_INT_ROUTER_STATUS0_REG 0x178
+#define PCIE_SoC_INT_ROUTER_STATUS1_REG 0x17c
+#define PCIE_SoC_INT_ROUTER_STATUS2_REG 0x180
+#define PCIE_SoC_INT_ROUTER_STATUS3_REG 0x184
+#define DMA_IP_CTRL_REG			0x324
+#define DISP_BRIDGE_PU_PD_CTRL_REG	0x328
+#define VIP_PU_PD_CTRL_REG		0x32c
+#define USB_MLB_PU_PD_CTRL_REG		0x330
+#define SDIO_PU_PD_MISCFUNC_CTRL_REG1	0x334
+#define SDIO_PU_PD_MISCFUNC_CTRL_REG2	0x338
+#define UART_PU_PD_CTRL_REG		0x33c
+#define ARM_Lock			0x340
+#define SYS_IO_CHAR_REG1		0x344
+#define SYS_IO_CHAR_REG2		0x348
+#define SATA_CORE_ID_REG		0x34c
+#define SATA_CTRL_REG			0x350
+#define I2C_HSFIX_MISC_REG		0x354
+#define SPARE2_RESERVED			0x358
+#define SPARE3_RESERVED			0x35c
+#define MASTER_LOCK_REG			0x368
+#define SYSTEM_CONFIG_STATUS_REG	0x36c
+#define MSP_CLK_CTRL_REG		0x39c
+#define COMPENSATION_REG1		0x3c4
+#define COMPENSATION_REG2		0x3c8
+#define COMPENSATION_REG3		0x3cc
+#define TEST_CTL_REG			0x3d0
+
 #endif /* __STA2X11_MFD_H */
-- 
cgit v1.2.3-55-g7522


From d94e25535a7979a6c81922496f475a5dd0e006b4 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi
Date: Fri, 9 Nov 2012 15:19:53 +0100
Subject: mfd: sta2x11-mfd: Add regmap support

Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig             |   1 +
 drivers/mfd/sta2x11-mfd.c       | 234 ++++++++++++++++++++++------------------
 include/linux/mfd/sta2x11-mfd.h |   1 +
 3 files changed, 131 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index b280639a7634..59359a7a2493 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1046,6 +1046,7 @@ config MFD_STA2X11
 	bool "STA2X11 multi function device support"
 	depends on STA2X11
 	select MFD_CORE
+	select REGMAP_MMIO
 
 config MFD_SYSCON
 	bool "System Controller Register R/W Based on Regmap"
diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index 9e01b84aa8bc..0cac2013bbf6 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -27,17 +27,25 @@
 #include <linux/io.h>
 #include <linux/ioport.h>
 #include <linux/pci.h>
-#include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/sta2x11-mfd.h>
+#include <linux/regmap.h>
 
 #include <asm/sta2x11.h>
 
+static inline int __reg_within_range(unsigned int r,
+				     unsigned int start,
+				     unsigned int end)
+{
+	return ((r >= start) && (r <= end));
+}
+
 /* This describes STA2X11 MFD chip for us, we may have several */
 struct sta2x11_mfd {
 	struct sta2x11_instance *instance;
+	struct regmap *regmap[sta2x11_n_mfd_plat_devs];
 	spinlock_t lock;
 	struct list_head list;
 	void __iomem *regs[sta2x11_n_mfd_plat_devs];
@@ -127,118 +135,126 @@ u32 __sta2x11_mfd_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val,
 }
 EXPORT_SYMBOL(__sta2x11_mfd_mask);
 
-/* Two debugfs files, for our registers (FIXME: one instance only) */
-#define REG(regname) {.name = #regname, .offset = SCTL_ ## regname}
-static struct debugfs_reg32 sta2x11_sctl_regs[] = {
-	REG(SCCTL), REG(ARMCFG), REG(SCPLLCTL), REG(SCPLLFCTRL),
-	REG(SCRESFRACT), REG(SCRESCTRL1), REG(SCRESXTRL2), REG(SCPEREN0),
-	REG(SCPEREN1), REG(SCPEREN2), REG(SCGRST), REG(SCPCIPMCR1),
-	REG(SCPCIPMCR2), REG(SCPCIPMSR1), REG(SCPCIPMSR2), REG(SCPCIPMSR3),
-	REG(SCINTREN), REG(SCRISR), REG(SCCLKSTAT0), REG(SCCLKSTAT1),
-	REG(SCCLKSTAT2), REG(SCRSTSTA),
-};
-#undef REG
+/*
+ * Special sta2x11-mfd regmap lock/unlock functions
+ */
 
-static struct debugfs_regset32 sctl_regset = {
-	.regs = sta2x11_sctl_regs,
-	.nregs = ARRAY_SIZE(sta2x11_sctl_regs),
-};
+static void sta2x11_regmap_lock(void *__lock)
+{
+	spinlock_t *lock = __lock;
+	spin_lock(lock);
+}
 
-#define REG(regname) {.name = #regname, .offset = regname}
-static struct debugfs_reg32 sta2x11_apbreg_regs[] = {
-	REG(APBREG_BSR), REG(APBREG_PAER), REG(APBREG_PWAC), REG(APBREG_PRAC),
-	REG(APBREG_PCG), REG(APBREG_PUR), REG(APBREG_EMU_PCG),
-};
-#undef REG
+static void sta2x11_regmap_unlock(void *__lock)
+{
+	spinlock_t *lock = __lock;
+	spin_unlock(lock);
+}
 
-static struct debugfs_regset32 apbreg_regset = {
-	.regs = sta2x11_apbreg_regs,
-	.nregs = ARRAY_SIZE(sta2x11_apbreg_regs),
+static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = {
+	[sta2x11_sctl] = "sta2x11-sctl",
+	[sta2x11_apbreg] = "sta2x11-apbreg",
+	[sta2x11_apb_soc_regs] = "sta2x11-apb-soc-regs",
 };
 
-#define REG(regname) {.name = #regname, .offset = regname}
-static struct debugfs_reg32 sta2x11_apb_soc_regs_regs[] = {
-	REG(PCIE_EP1_FUNC3_0_INTR_REG), REG(PCIE_EP1_FUNC7_4_INTR_REG),
-	REG(PCIE_EP2_FUNC3_0_INTR_REG), REG(PCIE_EP2_FUNC7_4_INTR_REG),
-	REG(PCIE_EP3_FUNC3_0_INTR_REG), REG(PCIE_EP3_FUNC7_4_INTR_REG),
-	REG(PCIE_EP4_FUNC3_0_INTR_REG), REG(PCIE_EP4_FUNC7_4_INTR_REG),
-	REG(PCIE_INTR_ENABLE0_REG), REG(PCIE_INTR_ENABLE1_REG),
-	REG(PCIE_EP1_FUNC_TC_REG), REG(PCIE_EP2_FUNC_TC_REG),
-	REG(PCIE_EP3_FUNC_TC_REG), REG(PCIE_EP4_FUNC_TC_REG),
-	REG(PCIE_EP1_FUNC_F_REG), REG(PCIE_EP2_FUNC_F_REG),
-	REG(PCIE_EP3_FUNC_F_REG), REG(PCIE_EP4_FUNC_F_REG),
-	REG(PCIE_PAB_AMBA_SW_RST_REG), REG(PCIE_PM_STATUS_0_PORT_0_4),
-	REG(PCIE_PM_STATUS_7_0_EP1), REG(PCIE_PM_STATUS_7_0_EP2),
-	REG(PCIE_PM_STATUS_7_0_EP3), REG(PCIE_PM_STATUS_7_0_EP4),
-	REG(PCIE_DEV_ID_0_EP1_REG), REG(PCIE_CC_REV_ID_0_EP1_REG),
-	REG(PCIE_DEV_ID_1_EP1_REG), REG(PCIE_CC_REV_ID_1_EP1_REG),
-	REG(PCIE_DEV_ID_2_EP1_REG), REG(PCIE_CC_REV_ID_2_EP1_REG),
-	REG(PCIE_DEV_ID_3_EP1_REG), REG(PCIE_CC_REV_ID_3_EP1_REG),
-	REG(PCIE_DEV_ID_4_EP1_REG), REG(PCIE_CC_REV_ID_4_EP1_REG),
-	REG(PCIE_DEV_ID_5_EP1_REG), REG(PCIE_CC_REV_ID_5_EP1_REG),
-	REG(PCIE_DEV_ID_6_EP1_REG), REG(PCIE_CC_REV_ID_6_EP1_REG),
-	REG(PCIE_DEV_ID_7_EP1_REG), REG(PCIE_CC_REV_ID_7_EP1_REG),
-	REG(PCIE_DEV_ID_0_EP2_REG), REG(PCIE_CC_REV_ID_0_EP2_REG),
-	REG(PCIE_DEV_ID_1_EP2_REG), REG(PCIE_CC_REV_ID_1_EP2_REG),
-	REG(PCIE_DEV_ID_2_EP2_REG), REG(PCIE_CC_REV_ID_2_EP2_REG),
-	REG(PCIE_DEV_ID_3_EP2_REG), REG(PCIE_CC_REV_ID_3_EP2_REG),
-	REG(PCIE_DEV_ID_4_EP2_REG), REG(PCIE_CC_REV_ID_4_EP2_REG),
-	REG(PCIE_DEV_ID_5_EP2_REG), REG(PCIE_CC_REV_ID_5_EP2_REG),
-	REG(PCIE_DEV_ID_6_EP2_REG), REG(PCIE_CC_REV_ID_6_EP2_REG),
-	REG(PCIE_DEV_ID_7_EP2_REG), REG(PCIE_CC_REV_ID_7_EP2_REG),
-	REG(PCIE_DEV_ID_0_EP3_REG), REG(PCIE_CC_REV_ID_0_EP3_REG),
-	REG(PCIE_DEV_ID_1_EP3_REG), REG(PCIE_CC_REV_ID_1_EP3_REG),
-	REG(PCIE_DEV_ID_2_EP3_REG), REG(PCIE_CC_REV_ID_2_EP3_REG),
-	REG(PCIE_DEV_ID_3_EP3_REG), REG(PCIE_CC_REV_ID_3_EP3_REG),
-	REG(PCIE_DEV_ID_4_EP3_REG), REG(PCIE_CC_REV_ID_4_EP3_REG),
-	REG(PCIE_DEV_ID_5_EP3_REG), REG(PCIE_CC_REV_ID_5_EP3_REG),
-	REG(PCIE_DEV_ID_6_EP3_REG), REG(PCIE_CC_REV_ID_6_EP3_REG),
-	REG(PCIE_DEV_ID_7_EP3_REG), REG(PCIE_CC_REV_ID_7_EP3_REG),
-	REG(PCIE_DEV_ID_0_EP4_REG), REG(PCIE_CC_REV_ID_0_EP4_REG),
-	REG(PCIE_DEV_ID_1_EP4_REG), REG(PCIE_CC_REV_ID_1_EP4_REG),
-	REG(PCIE_DEV_ID_2_EP4_REG), REG(PCIE_CC_REV_ID_2_EP4_REG),
-	REG(PCIE_DEV_ID_3_EP4_REG), REG(PCIE_CC_REV_ID_3_EP4_REG),
-	REG(PCIE_DEV_ID_4_EP4_REG), REG(PCIE_CC_REV_ID_4_EP4_REG),
-	REG(PCIE_DEV_ID_5_EP4_REG), REG(PCIE_CC_REV_ID_5_EP4_REG),
-	REG(PCIE_DEV_ID_6_EP4_REG), REG(PCIE_CC_REV_ID_6_EP4_REG),
-	REG(PCIE_DEV_ID_7_EP4_REG), REG(PCIE_CC_REV_ID_7_EP4_REG),
-	REG(PCIE_SUBSYS_VEN_ID_REG), REG(PCIE_COMMON_CLOCK_CONFIG_0_4_0),
-	REG(PCIE_MIPHYP_SSC_EN_REG), REG(PCIE_MIPHYP_ADDR_REG),
-	REG(PCIE_L1_ASPM_READY_REG), REG(PCIE_EXT_CFG_RDY_REG),
-	REG(PCIE_SoC_INT_ROUTER_STATUS0_REG),
-	REG(PCIE_SoC_INT_ROUTER_STATUS1_REG),
-	REG(PCIE_SoC_INT_ROUTER_STATUS2_REG),
-	REG(PCIE_SoC_INT_ROUTER_STATUS3_REG),
-	REG(DMA_IP_CTRL_REG), REG(DISP_BRIDGE_PU_PD_CTRL_REG),
-	REG(VIP_PU_PD_CTRL_REG), REG(USB_MLB_PU_PD_CTRL_REG),
-	REG(SDIO_PU_PD_MISCFUNC_CTRL_REG1), REG(SDIO_PU_PD_MISCFUNC_CTRL_REG2),
-	REG(UART_PU_PD_CTRL_REG), REG(ARM_Lock), REG(SYS_IO_CHAR_REG1),
-	REG(SYS_IO_CHAR_REG2), REG(SATA_CORE_ID_REG), REG(SATA_CTRL_REG),
-	REG(I2C_HSFIX_MISC_REG), REG(SPARE2_RESERVED), REG(SPARE3_RESERVED),
-	REG(MASTER_LOCK_REG), REG(SYSTEM_CONFIG_STATUS_REG),
-	REG(MSP_CLK_CTRL_REG), REG(COMPENSATION_REG1), REG(COMPENSATION_REG2),
-	REG(COMPENSATION_REG3), REG(TEST_CTL_REG),
+static bool sta2x11_sctl_writeable_reg(struct device *dev, unsigned int reg)
+{
+	return !__reg_within_range(reg, SCTL_SCPCIECSBRST, SCTL_SCRSTSTA);
+}
+
+static struct regmap_config sta2x11_sctl_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.lock = sta2x11_regmap_lock,
+	.unlock = sta2x11_regmap_unlock,
+	.max_register = SCTL_SCRSTSTA,
+	.writeable_reg = sta2x11_sctl_writeable_reg,
 };
-#undef REG
 
-static struct debugfs_regset32 apb_soc_regs_regset = {
-	.regs = sta2x11_apb_soc_regs_regs,
-	.nregs = ARRAY_SIZE(sta2x11_apb_soc_regs_regs),
+static bool sta2x11_apbreg_readable_reg(struct device *dev, unsigned int reg)
+{
+	/* Two blocks (CAN and MLB, SARAC) 0x100 bytes apart */
+	if (reg >= APBREG_BSR_SARAC)
+		reg -= APBREG_BSR_SARAC;
+	switch (reg) {
+	case APBREG_BSR:
+	case APBREG_PAER:
+	case APBREG_PWAC:
+	case APBREG_PRAC:
+	case APBREG_PCG:
+	case APBREG_PUR:
+	case APBREG_EMU_PCG:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool sta2x11_apbreg_writeable_reg(struct device *dev, unsigned int reg)
+{
+	if (reg >= APBREG_BSR_SARAC)
+		reg -= APBREG_BSR_SARAC;
+	if (!sta2x11_apbreg_readable_reg(dev, reg))
+		return false;
+	return reg != APBREG_PAER;
+}
+
+static struct regmap_config sta2x11_apbreg_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.lock = sta2x11_regmap_lock,
+	.unlock = sta2x11_regmap_unlock,
+	.max_register = APBREG_EMU_PCG_SARAC,
+	.readable_reg = sta2x11_apbreg_readable_reg,
+	.writeable_reg = sta2x11_apbreg_writeable_reg,
 };
 
+static bool sta2x11_apb_soc_regs_readable_reg(struct device *dev,
+					      unsigned int reg)
+{
+	return reg <= PCIE_SoC_INT_ROUTER_STATUS3_REG ||
+		__reg_within_range(reg, DMA_IP_CTRL_REG, SPARE3_RESERVED) ||
+		__reg_within_range(reg, MASTER_LOCK_REG,
+				   SYSTEM_CONFIG_STATUS_REG) ||
+		reg == MSP_CLK_CTRL_REG ||
+		__reg_within_range(reg, COMPENSATION_REG1, TEST_CTL_REG);
+}
 
-static struct dentry *sta2x11_mfd_debugfs[sta2x11_n_mfd_plat_devs];
+static bool sta2x11_apb_soc_regs_writeable_reg(struct device *dev,
+					       unsigned int reg)
+{
+	if (!sta2x11_apb_soc_regs_readable_reg(dev, reg))
+		return false;
+	switch (reg) {
+	case PCIE_COMMON_CLOCK_CONFIG_0_4_0:
+	case SYSTEM_CONFIG_STATUS_REG:
+	case COMPENSATION_REG1:
+	case PCIE_SoC_INT_ROUTER_STATUS0_REG...PCIE_SoC_INT_ROUTER_STATUS3_REG:
+	case PCIE_PM_STATUS_0_PORT_0_4...PCIE_PM_STATUS_7_0_EP4:
+		return false;
+	default:
+		return true;
+	}
+}
 
-static struct debugfs_regset32 *sta2x11_mfd_regset[sta2x11_n_mfd_plat_devs] = {
-	[sta2x11_sctl] = &sctl_regset,
-	[sta2x11_apbreg] = &apbreg_regset,
-	[sta2x11_apb_soc_regs] = &apb_soc_regs_regset,
+static struct regmap_config sta2x11_apb_soc_regs_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.lock = sta2x11_regmap_lock,
+	.unlock = sta2x11_regmap_unlock,
+	.max_register = TEST_CTL_REG,
+	.readable_reg = sta2x11_apb_soc_regs_readable_reg,
+	.writeable_reg = sta2x11_apb_soc_regs_writeable_reg,
 };
 
-static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = {
-	[sta2x11_sctl] = "sta2x11-sctl",
-	[sta2x11_apbreg] = "sta2x11-apbreg",
-	[sta2x11_apb_soc_regs] = "sta2x11-apb-soc-regs",
+static struct regmap_config *
+sta2x11_mfd_regmap_configs[sta2x11_n_mfd_plat_devs] = {
+	[sta2x11_sctl] = &sta2x11_sctl_regmap_config,
+	[sta2x11_apbreg] = &sta2x11_apbreg_regmap_config,
+	[sta2x11_apb_soc_regs] = &sta2x11_apb_soc_regs_regmap_config,
 };
 
 /* Probe for the three platform devices */
@@ -250,12 +266,14 @@ static int sta2x11_mfd_platform_probe(struct platform_device *dev,
 	struct sta2x11_mfd *mfd;
 	struct resource *res;
 	const char *name = sta2x11_mfd_names[index];
-	struct debugfs_regset32 *regset = sta2x11_mfd_regset[index];
+	struct regmap_config *regmap_config = sta2x11_mfd_regmap_configs[index];
 
 	pdev = dev->dev.platform_data;
 	mfd = sta2x11_mfd_find(*pdev);
 	if (!mfd)
 		return -ENODEV;
+	if (!regmap_config)
+		return -ENODEV;
 
 	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	if (!res)
@@ -269,10 +287,16 @@ static int sta2x11_mfd_platform_probe(struct platform_device *dev,
 		release_mem_region(res->start, resource_size(res));
 		return -ENOMEM;
 	}
-	regset->base = mfd->regs[index];
-	sta2x11_mfd_debugfs[index] = debugfs_create_regset32(name,
-							     S_IFREG | S_IRUGO,
-							     NULL, regset);
+	regmap_config->lock_arg = &mfd->lock;
+	/*
+	   No caching, registers could be reached both via regmap and via
+	   void __iomem *
+	*/
+	regmap_config->cache_type = REGCACHE_NONE;
+	mfd->regmap[index] = devm_regmap_init_mmio(&dev->dev, mfd->regs[index],
+						   regmap_config);
+	WARN_ON(!mfd->regmap[index]);
+
 	return 0;
 }
 
diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
index 4d858797d7e2..83344304f2cf 100644
--- a/include/linux/mfd/sta2x11-mfd.h
+++ b/include/linux/mfd/sta2x11-mfd.h
@@ -246,6 +246,7 @@ u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
 #define SCTL_SCPEREN1		0x20	/* Peripheral clock enable register 1 */
 #define SCTL_SCPEREN2		0x24	/* Peripheral clock enable register 2 */
 #define SCTL_SCGRST		0x28	/* Peripheral global reset */
+#define SCTL_SCPCIECSBRST       0x2c    /* PCIe PAB CSB reset status register */
 #define SCTL_SCPCIPMCR1		0x30	/* PCI power management control 1 */
 #define SCTL_SCPCIPMCR2		0x34	/* PCI power management control 2 */
 #define SCTL_SCPCIPMSR1		0x38	/* PCI power management status 1 */
-- 
cgit v1.2.3-55-g7522


From 29f5b5a326b44c55e81b15308255ba695fecb323 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi
Date: Fri, 9 Nov 2012 15:19:54 +0100
Subject: mfd: sta2x11-mfd: Add sta2x11_mfd_get_regs_data() function

A couple of predefined clocks (mux and gated) need to be
initialized with the virtual address of the clock's controlling
register and the address of a spinlock used to protect against
races.

This function exports such data for all the mfd cells.

Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/sta2x11-mfd.c       | 22 ++++++++++++++++++++++
 include/linux/mfd/sta2x11-mfd.h |  5 +++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index 0cac2013bbf6..6d12ab42aba3 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -135,6 +135,28 @@ u32 __sta2x11_mfd_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val,
 }
 EXPORT_SYMBOL(__sta2x11_mfd_mask);
 
+int sta2x11_mfd_get_regs_data(struct platform_device *dev,
+			      enum sta2x11_mfd_plat_dev index,
+			      void __iomem **regs,
+			      spinlock_t **lock)
+{
+	struct pci_dev *pdev = *(struct pci_dev **)(dev->dev.platform_data);
+	struct sta2x11_mfd *mfd;
+
+	if (!pdev)
+		return -ENODEV;
+	mfd = sta2x11_mfd_find(pdev);
+	if (!mfd)
+		return -ENODEV;
+	if (index >= sta2x11_n_mfd_plat_devs)
+		return -ENODEV;
+	*regs = mfd->regs[index];
+	*lock = &mfd->lock[index];
+	pr_debug("%s %d *regs = %p\n", __func__, __LINE__, *regs);
+	return *regs ? 0 : -ENODEV;
+}
+EXPORT_SYMBOL(sta2x11_mfd_get_regs_data);
+
 /*
  * Special sta2x11-mfd regmap lock/unlock functions
  */
diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
index 83344304f2cf..e813e5efbe4e 100644
--- a/include/linux/mfd/sta2x11-mfd.h
+++ b/include/linux/mfd/sta2x11-mfd.h
@@ -474,4 +474,9 @@ u32 sta2x11_apb_soc_regs_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
 #define COMPENSATION_REG3		0x3cc
 #define TEST_CTL_REG			0x3d0
 
+extern int sta2x11_mfd_get_regs_data(struct platform_device *pdev,
+				     enum sta2x11_mfd_plat_dev index,
+				     void __iomem **regs,
+				     spinlock_t **lock);
+
 #endif /* __STA2X11_MFD_H */
-- 
cgit v1.2.3-55-g7522


From b18adafccd497245a6bc5b867bf9cba7e01f8729 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi
Date: Fri, 9 Nov 2012 15:19:55 +0100
Subject: mfd: sta2x11-mfd: Use defines for platform devices' names

Since there are now many sta2x11-mfd platform devices, using defines
for their names looks like a better solution.

Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/sta2x11-mfd.c       | 42 +++++++++++++++++++++--------------------
 include/linux/mfd/sta2x11-mfd.h |  8 ++++++++
 2 files changed, 30 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index 6d12ab42aba3..8d38ef264722 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -174,9 +174,9 @@ static void sta2x11_regmap_unlock(void *__lock)
 }
 
 static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = {
-	[sta2x11_sctl] = "sta2x11-sctl",
-	[sta2x11_apbreg] = "sta2x11-apbreg",
-	[sta2x11_apb_soc_regs] = "sta2x11-apb-soc-regs",
+	[sta2x11_sctl] = STA2X11_MFD_SCTL_NAME,
+	[sta2x11_apbreg] = STA2X11_MFD_APBREG_NAME,
+	[sta2x11_apb_soc_regs] = STA2X11_MFD_APB_SOC_REGS_NAME,
 };
 
 static bool sta2x11_sctl_writeable_reg(struct device *dev, unsigned int reg)
@@ -340,7 +340,7 @@ static int sta2x11_apb_soc_regs_probe(struct platform_device *dev)
 /* The three platform drivers */
 static struct platform_driver sta2x11_sctl_platform_driver = {
 	.driver = {
-		.name	= "sta2x11-sctl",
+		.name	= STA2X11_MFD_SCTL_NAME,
 		.owner	= THIS_MODULE,
 	},
 	.probe		= sta2x11_sctl_probe,
@@ -354,7 +354,7 @@ static int __init sta2x11_sctl_init(void)
 
 static struct platform_driver sta2x11_platform_driver = {
 	.driver = {
-		.name	= "sta2x11-apbreg",
+		.name	= STA2X11_MFD_APBREG_NAME,
 		.owner	= THIS_MODULE,
 	},
 	.probe		= sta2x11_apbreg_probe,
@@ -368,7 +368,7 @@ static int __init sta2x11_apbreg_init(void)
 
 static struct platform_driver sta2x11_apb_soc_regs_platform_driver = {
 	.driver = {
-		.name	= "sta2x11-apb-soc-regs",
+		.name	= STA2X11_MFD_APB_SOC_REGS_NAME,
 		.owner	= THIS_MODULE,
 	},
 	.probe		= sta2x11_apb_soc_regs_probe,
@@ -409,38 +409,40 @@ enum mfd0_bar1_cells {
 
 static const __devinitconst struct resource gpio_resources[] = {
 	{
-		.name = "sta2x11_gpio", /* 4 consecutive cells, 1 driver */
+		/* 4 consecutive cells, 1 driver */
+		.name = STA2X11_MFD_GPIO_NAME,
 		.start = 0,
 		.end = (4 * 4096) - 1,
 		.flags = IORESOURCE_MEM,
 	}
 };
 static const __devinitconst struct resource sctl_resources[] = {
-	CELL_4K("sta2x11-sctl", STA2X11_SCTL),
+	CELL_4K(STA2X11_MFD_SCTL_NAME, STA2X11_SCTL),
 };
 static const __devinitconst struct resource scr_resources[] = {
-	CELL_4K("sta2x11-scr", STA2X11_SCR),
+	CELL_4K(STA2X11_MFD_SCR_NAME, STA2X11_SCR),
 };
 static const __devinitconst struct resource time_resources[] = {
-	CELL_4K("sta2x11-time", STA2X11_TIME),
+	CELL_4K(STA2X11_MFD_TIME_NAME, STA2X11_TIME),
 };
 
 static const __devinitconst struct resource apbreg_resources[] = {
-	CELL_4K("sta2x11-apbreg", STA2X11_APBREG),
+	CELL_4K(STA2X11_MFD_APBREG_NAME, STA2X11_APBREG),
 };
 
 #define DEV(_name, _r) \
 	{ .name = _name, .num_resources = ARRAY_SIZE(_r), .resources = _r, }
 
 static __devinitdata struct mfd_cell sta2x11_mfd0_bar0[] = {
-	DEV("sta2x11-gpio", gpio_resources), /* offset 0: we add pdata later */
-	DEV("sta2x11-sctl", sctl_resources),
-	DEV("sta2x11-scr", scr_resources),
-	DEV("sta2x11-time", time_resources),
+	/* offset 0: we add pdata later */
+	DEV(STA2X11_MFD_GPIO_NAME, gpio_resources),
+	DEV(STA2X11_MFD_SCTL_NAME, sctl_resources),
+	DEV(STA2X11_MFD_SCR_NAME,  scr_resources),
+	DEV(STA2X11_MFD_TIME_NAME, time_resources),
 };
 
 static __devinitdata struct mfd_cell sta2x11_mfd0_bar1[] = {
-	DEV("sta2x11-apbreg", apbreg_resources),
+	DEV(STA2X11_MFD_APBREG_NAME, apbreg_resources),
 };
 
 /* Mfd 1 devices */
@@ -456,19 +458,19 @@ enum mfd1_bar1_cells {
 };
 
 static const __devinitconst struct resource vic_resources[] = {
-	CELL_4K("sta2x11-vic", STA2X11_VIC),
+	CELL_4K(STA2X11_MFD_VIC_NAME, STA2X11_VIC),
 };
 
 static const __devinitconst struct resource apb_soc_regs_resources[] = {
-	CELL_4K("sta2x11-apb-soc-regs", STA2X11_APB_SOC_REGS),
+	CELL_4K(STA2X11_MFD_APB_SOC_REGS_NAME, STA2X11_APB_SOC_REGS),
 };
 
 static __devinitdata struct mfd_cell sta2x11_mfd1_bar0[] = {
-	DEV("sta2x11-vic", vic_resources),
+	DEV(STA2X11_MFD_VIC_NAME, vic_resources),
 };
 
 static __devinitdata struct mfd_cell sta2x11_mfd1_bar1[] = {
-	DEV("sta2x11-apb-soc-regs", apb_soc_regs_resources),
+	DEV(STA2X11_MFD_APB_SOC_REGS_NAME, apb_soc_regs_resources),
 };
 
 
diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
index e813e5efbe4e..058de2bacf76 100644
--- a/include/linux/mfd/sta2x11-mfd.h
+++ b/include/linux/mfd/sta2x11-mfd.h
@@ -37,6 +37,14 @@ enum sta2x11_mfd_plat_dev {
 	sta2x11_n_mfd_plat_devs,
 };
 
+#define STA2X11_MFD_SCTL_NAME	       "sta2x11-sctl"
+#define STA2X11_MFD_GPIO_NAME	       "sta2x11-gpio"
+#define STA2X11_MFD_SCR_NAME	       "sta2x11-scr"
+#define STA2X11_MFD_TIME_NAME	       "sta2x11-time"
+#define STA2X11_MFD_APBREG_NAME	       "sta2x11-apbreg"
+#define STA2X11_MFD_APB_SOC_REGS_NAME  "sta2x11-apb-soc-regs"
+#define STA2X11_MFD_VIC_NAME	       "sta2x11-vic"
+
 extern u32
 __sta2x11_mfd_mask(struct pci_dev *, u32, u32, u32, enum sta2x11_mfd_plat_dev);
 
-- 
cgit v1.2.3-55-g7522


From dba6c1aeea4dd0e251e41c3f585abf4a06a4f057 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi
Date: Fri, 9 Nov 2012 15:19:59 +0100
Subject: mfd: sta2x11-mfd: Add scr (otp registers) platform driver

Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/sta2x11-mfd.c       | 52 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/mfd/sta2x11-mfd.h |  7 ++++++
 2 files changed, 58 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index da65839f1d86..7365f0f89df2 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -175,10 +175,16 @@ static void sta2x11_regmap_unlock(void *__lock)
 	spin_unlock(lock);
 }
 
+/* OTP (one time programmable registers do not require locking */
+static void sta2x11_regmap_nolock(void *__lock)
+{
+}
+
 static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = {
 	[sta2x11_sctl] = STA2X11_MFD_SCTL_NAME,
 	[sta2x11_apbreg] = STA2X11_MFD_APBREG_NAME,
 	[sta2x11_apb_soc_regs] = STA2X11_MFD_APB_SOC_REGS_NAME,
+	[sta2x11_scr] = STA2X11_MFD_SCR_NAME,
 };
 
 static bool sta2x11_sctl_writeable_reg(struct device *dev, unsigned int reg)
@@ -196,6 +202,28 @@ static struct regmap_config sta2x11_sctl_regmap_config = {
 	.writeable_reg = sta2x11_sctl_writeable_reg,
 };
 
+static bool sta2x11_scr_readable_reg(struct device *dev, unsigned int reg)
+{
+	return (reg == STA2X11_SECR_CR) ||
+		__reg_within_range(reg, STA2X11_SECR_FVR0, STA2X11_SECR_FVR1);
+}
+
+static bool sta2x11_scr_writeable_reg(struct device *dev, unsigned int reg)
+{
+	return false;
+}
+
+static struct regmap_config sta2x11_scr_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.lock = sta2x11_regmap_nolock,
+	.unlock = sta2x11_regmap_nolock,
+	.max_register = STA2X11_SECR_FVR1,
+	.readable_reg = sta2x11_scr_readable_reg,
+	.writeable_reg = sta2x11_scr_writeable_reg,
+};
+
 static bool sta2x11_apbreg_readable_reg(struct device *dev, unsigned int reg)
 {
 	/* Two blocks (CAN and MLB, SARAC) 0x100 bytes apart */
@@ -279,9 +307,10 @@ sta2x11_mfd_regmap_configs[sta2x11_n_mfd_plat_devs] = {
 	[sta2x11_sctl] = &sta2x11_sctl_regmap_config,
 	[sta2x11_apbreg] = &sta2x11_apbreg_regmap_config,
 	[sta2x11_apb_soc_regs] = &sta2x11_apb_soc_regs_regmap_config,
+	[sta2x11_scr] = &sta2x11_scr_regmap_config,
 };
 
-/* Probe for the three platform devices */
+/* Probe for the four platform devices */
 
 static int sta2x11_mfd_platform_probe(struct platform_device *dev,
 				      enum sta2x11_mfd_plat_dev index)
@@ -339,6 +368,11 @@ static int sta2x11_apb_soc_regs_probe(struct platform_device *dev)
 	return sta2x11_mfd_platform_probe(dev, sta2x11_apb_soc_regs);
 }
 
+static int sta2x11_scr_probe(struct platform_device *dev)
+{
+	return sta2x11_mfd_platform_probe(dev, sta2x11_scr);
+}
+
 /* The three platform drivers */
 static struct platform_driver sta2x11_sctl_platform_driver = {
 	.driver = {
@@ -382,6 +416,21 @@ static int __init sta2x11_apb_soc_regs_init(void)
 	return platform_driver_register(&sta2x11_apb_soc_regs_platform_driver);
 }
 
+static struct platform_driver sta2x11_scr_platform_driver = {
+	.driver = {
+		.name = STA2X11_MFD_SCR_NAME,
+		.owner = THIS_MODULE,
+	},
+	.probe = sta2x11_scr_probe,
+};
+
+static int __init sta2x11_scr_init(void)
+{
+	pr_info("%s\n", __func__);
+	return platform_driver_register(&sta2x11_scr_platform_driver);
+}
+
+
 /*
  * What follows are the PCI devices that host the above pdevs.
  * Each logic block is 4kB and they are all consecutive: we use this info.
@@ -631,6 +680,7 @@ static int __init sta2x11_mfd_init(void)
 subsys_initcall(sta2x11_apbreg_init);
 subsys_initcall(sta2x11_sctl_init);
 subsys_initcall(sta2x11_apb_soc_regs_init);
+subsys_initcall(sta2x11_scr_init);
 rootfs_initcall(sta2x11_mfd_init);
 
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
index 058de2bacf76..08cad95758c6 100644
--- a/include/linux/mfd/sta2x11-mfd.h
+++ b/include/linux/mfd/sta2x11-mfd.h
@@ -482,6 +482,13 @@ u32 sta2x11_apb_soc_regs_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
 #define COMPENSATION_REG3		0x3cc
 #define TEST_CTL_REG			0x3d0
 
+/*
+ * SECR (OTP) registers
+ */
+#define STA2X11_SECR_CR			0x00
+#define STA2X11_SECR_FVR0		0x10
+#define STA2X11_SECR_FVR1		0x14
+
 extern int sta2x11_mfd_get_regs_data(struct platform_device *pdev,
 				     enum sta2x11_mfd_plat_dev index,
 				     void __iomem **regs,
-- 
cgit v1.2.3-55-g7522


From 818b5c2528b9e31101bb39018fd211dcf159a696 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi
Date: Fri, 9 Nov 2012 15:20:00 +0100
Subject: mfd: sta2x11-mfd: Add defines for some sta2x11 sctl registers

These are required for the clock infrastructure code to properly configure
and control the sta2x11 PLLs.

Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/sta2x11-mfd.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
index 08cad95758c6..9a855ac11cbf 100644
--- a/include/linux/mfd/sta2x11-mfd.h
+++ b/include/linux/mfd/sta2x11-mfd.h
@@ -246,8 +246,29 @@ u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
 #define SCTL_SCCTL		0x00	/* System controller control register */
 #define SCTL_ARMCFG		0x04	/* ARM configuration register */
 #define SCTL_SCPLLCTL		0x08	/* PLL control status register */
+
+#define SCTL_SCPLLCTL_AUDIO_PLL_PD	     BIT(1)
+#define SCTL_SCPLLCTL_FRAC_CONTROL	     BIT(3)
+#define SCTL_SCPLLCTL_STRB_BYPASS	     BIT(6)
+#define SCTL_SCPLLCTL_STRB_INPUT	     BIT(8)
+
 #define SCTL_SCPLLFCTRL		0x0c	/* PLL frequency control register */
+
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_NDIV_MASK	0xff
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_NDIV_SHIFT	  10
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_IDF_MASK	   7
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_IDF_SHIFT	  21
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_ODF_MASK	   7
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_ODF_SHIFT	  18
+#define SCTL_SCPLLFCTRL_DITHER_DISABLE_MASK     0x03
+#define SCTL_SCPLLFCTRL_DITHER_DISABLE_SHIFT       4
+
+
 #define SCTL_SCRESFRACT		0x10	/* PLL fractional input register */
+
+#define SCTL_SCRESFRACT_MASK	0x0000ffff
+
+
 #define SCTL_SCRESCTRL1		0x14	/* Peripheral reset control 1 */
 #define SCTL_SCRESXTRL2		0x18	/* Peripheral reset control 2 */
 #define SCTL_SCPEREN0		0x1c	/* Peripheral clock enable register 0 */
-- 
cgit v1.2.3-55-g7522


From bcf58e725ddc45d31addbc6627d4f0edccc824c1 Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Thu, 26 Jul 2012 04:02:49 -0700
Subject: userns: Make create_new_namespaces take a user_ns parameter

Modify create_new_namespaces to explicitly take a user namespace
parameter, instead of implicitly through the task_struct.

This allows an implementation of unshare(CLONE_NEWUSER) where
the new user namespace is not stored onto the current task_struct
until after all of the namespaces are created.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/ipc_namespace.h |  7 ++++---
 include/linux/utsname.h       |  6 +++---
 ipc/namespace.c               | 10 ++++------
 kernel/nsproxy.c              | 22 +++++++++++++---------
 kernel/utsname.c              |  9 ++++-----
 5 files changed, 28 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 5499c92a9153..f03af702a39d 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -133,7 +133,8 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
 
 #if defined(CONFIG_IPC_NS)
 extern struct ipc_namespace *copy_ipcs(unsigned long flags,
-				       struct task_struct *tsk);
+	struct user_namespace *user_ns, struct ipc_namespace *ns);
+
 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 {
 	if (ns)
@@ -144,12 +145,12 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 extern void put_ipc_ns(struct ipc_namespace *ns);
 #else
 static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
-					      struct task_struct *tsk)
+	struct user_namespace *user_ns, struct ipc_namespace *ns)
 {
 	if (flags & CLONE_NEWIPC)
 		return ERR_PTR(-EINVAL);
 
-	return tsk->nsproxy->ipc_ns;
+	return ns;
 }
 
 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 2b345206722a..221f4a0a7502 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -33,7 +33,7 @@ static inline void get_uts_ns(struct uts_namespace *ns)
 }
 
 extern struct uts_namespace *copy_utsname(unsigned long flags,
-					  struct task_struct *tsk);
+	struct user_namespace *user_ns, struct uts_namespace *old_ns);
 extern void free_uts_ns(struct kref *kref);
 
 static inline void put_uts_ns(struct uts_namespace *ns)
@@ -50,12 +50,12 @@ static inline void put_uts_ns(struct uts_namespace *ns)
 }
 
 static inline struct uts_namespace *copy_utsname(unsigned long flags,
-						 struct task_struct *tsk)
+	struct user_namespace *user_ns, struct uts_namespace *old_ns)
 {
 	if (flags & CLONE_NEWUTS)
 		return ERR_PTR(-EINVAL);
 
-	return tsk->nsproxy->uts_ns;
+	return old_ns;
 }
 #endif
 
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 6ed33c05cb66..72c868277793 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -16,7 +16,7 @@
 
 #include "util.h"
 
-static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
+static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 					   struct ipc_namespace *old_ns)
 {
 	struct ipc_namespace *ns;
@@ -46,19 +46,17 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
 	ipcns_notify(IPCNS_CREATED);
 	register_ipcns_notifier(ns);
 
-	ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns));
+	ns->user_ns = get_user_ns(user_ns);
 
 	return ns;
 }
 
 struct ipc_namespace *copy_ipcs(unsigned long flags,
-				struct task_struct *tsk)
+	struct user_namespace *user_ns, struct ipc_namespace *ns)
 {
-	struct ipc_namespace *ns = tsk->nsproxy->ipc_ns;
-
 	if (!(flags & CLONE_NEWIPC))
 		return get_ipc_ns(ns);
-	return create_ipc_ns(tsk, ns);
+	return create_ipc_ns(user_ns, ns);
 }
 
 /*
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 4357a0a7d17d..2ddd81657a2a 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -57,7 +57,8 @@ static inline struct nsproxy *create_nsproxy(void)
  * leave it to the caller to do proper locking and attach it to task.
  */
 static struct nsproxy *create_new_namespaces(unsigned long flags,
-			struct task_struct *tsk, struct fs_struct *new_fs)
+	struct task_struct *tsk, struct user_namespace *user_ns,
+	struct fs_struct *new_fs)
 {
 	struct nsproxy *new_nsp;
 	int err;
@@ -66,31 +67,31 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 	if (!new_nsp)
 		return ERR_PTR(-ENOMEM);
 
-	new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, task_cred_xxx(tsk, user_ns), new_fs);
+	new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
 	if (IS_ERR(new_nsp->mnt_ns)) {
 		err = PTR_ERR(new_nsp->mnt_ns);
 		goto out_ns;
 	}
 
-	new_nsp->uts_ns = copy_utsname(flags, tsk);
+	new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
 	if (IS_ERR(new_nsp->uts_ns)) {
 		err = PTR_ERR(new_nsp->uts_ns);
 		goto out_uts;
 	}
 
-	new_nsp->ipc_ns = copy_ipcs(flags, tsk);
+	new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
 	if (IS_ERR(new_nsp->ipc_ns)) {
 		err = PTR_ERR(new_nsp->ipc_ns);
 		goto out_ipc;
 	}
 
-	new_nsp->pid_ns = copy_pid_ns(flags, task_cred_xxx(tsk, user_ns), tsk->nsproxy->pid_ns);
+	new_nsp->pid_ns = copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns);
 	if (IS_ERR(new_nsp->pid_ns)) {
 		err = PTR_ERR(new_nsp->pid_ns);
 		goto out_pid;
 	}
 
-	new_nsp->net_ns = copy_net_ns(flags, task_cred_xxx(tsk, user_ns), tsk->nsproxy->net_ns);
+	new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
 	if (IS_ERR(new_nsp->net_ns)) {
 		err = PTR_ERR(new_nsp->net_ns);
 		goto out_net;
@@ -152,7 +153,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 		goto out;
 	}
 
-	new_ns = create_new_namespaces(flags, tsk, tsk->fs);
+	new_ns = create_new_namespaces(flags, tsk,
+				       task_cred_xxx(tsk, user_ns), tsk->fs);
 	if (IS_ERR(new_ns)) {
 		err = PTR_ERR(new_ns);
 		goto out;
@@ -186,6 +188,7 @@ void free_nsproxy(struct nsproxy *ns)
 int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 		struct nsproxy **new_nsp, struct fs_struct *new_fs)
 {
+	struct user_namespace *user_ns;
 	int err = 0;
 
 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
@@ -195,7 +198,8 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 	if (!nsown_capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	*new_nsp = create_new_namespaces(unshare_flags, current,
+	user_ns = current_user_ns();
+	*new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
 				new_fs ? new_fs : current->fs);
 	if (IS_ERR(*new_nsp)) {
 		err = PTR_ERR(*new_nsp);
@@ -252,7 +256,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
 	if (nstype && (ops->type != nstype))
 		goto out;
 
-	new_nsproxy = create_new_namespaces(0, tsk, tsk->fs);
+	new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
 	if (IS_ERR(new_nsproxy)) {
 		err = PTR_ERR(new_nsproxy);
 		goto out;
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 4a9362f9325d..fdc619eb61ef 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -32,7 +32,7 @@ static struct uts_namespace *create_uts_ns(void)
  * @old_ns: namespace to clone
  * Return NULL on error (failure to kmalloc), new ns otherwise
  */
-static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
+static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 					  struct uts_namespace *old_ns)
 {
 	struct uts_namespace *ns;
@@ -43,7 +43,7 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
 
 	down_read(&uts_sem);
 	memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
-	ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns));
+	ns->user_ns = get_user_ns(user_ns);
 	up_read(&uts_sem);
 	return ns;
 }
@@ -55,9 +55,8 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
  * versa.
  */
 struct uts_namespace *copy_utsname(unsigned long flags,
-				   struct task_struct *tsk)
+	struct user_namespace *user_ns, struct uts_namespace *old_ns)
 {
-	struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
 	struct uts_namespace *new_ns;
 
 	BUG_ON(!old_ns);
@@ -66,7 +65,7 @@ struct uts_namespace *copy_utsname(unsigned long flags,
 	if (!(flags & CLONE_NEWUTS))
 		return old_ns;
 
-	new_ns = clone_uts_ns(tsk, old_ns);
+	new_ns = clone_uts_ns(user_ns, old_ns);
 
 	put_uts_ns(old_ns);
 	return new_ns;
-- 
cgit v1.2.3-55-g7522


From 4c44aaafa8108f584831850ab48a975e971db2de Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Thu, 26 Jul 2012 05:05:21 -0700
Subject: userns: Kill task_user_ns

The task_user_ns function hides the fact that it is getting the user
namespace from struct cred on the task.  struct cred may go away as
soon as the rcu lock is released.  This leads to a race where we
can dereference a stale user namespace pointer.

To make it obvious a struct cred is involved kill task_user_ns.

To kill the race modify the users of task_user_ns to only
reference the user namespace while the rcu lock is held.

Cc: Kees Cook <keescook@chromium.org>
Cc: James Morris <james.l.morris@oracle.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/cred.h     |  2 --
 kernel/ptrace.c          | 10 ++++++++--
 kernel/sched/core.c      | 10 ++++++++--
 security/yama/yama_lsm.c | 12 +++++++++---
 4 files changed, 25 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index ebbed2ce6637..856d2622d832 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -357,10 +357,8 @@ static inline void put_cred(const struct cred *_cred)
 extern struct user_namespace init_user_ns;
 #ifdef CONFIG_USER_NS
 #define current_user_ns()	(current_cred_xxx(user_ns))
-#define task_user_ns(task)	(task_cred_xxx((task), user_ns))
 #else
 #define current_user_ns()	(&init_user_ns)
-#define task_user_ns(task)	(&init_user_ns)
 #endif
 
 
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1f5e55dda955..7b09b88862cc 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -215,8 +215,12 @@ ok:
 	smp_rmb();
 	if (task->mm)
 		dumpable = get_dumpable(task->mm);
-	if (!dumpable  && !ptrace_has_cap(task_user_ns(task), mode))
+	rcu_read_lock();
+	if (!dumpable && !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
+		rcu_read_unlock();
 		return -EPERM;
+	}
+	rcu_read_unlock();
 
 	return security_ptrace_access_check(task, mode);
 }
@@ -280,8 +284,10 @@ static int ptrace_attach(struct task_struct *task, long request,
 
 	if (seize)
 		flags |= PT_SEIZED;
-	if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
+	rcu_read_lock();
+	if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
 		flags |= PT_PTRACE_CAP;
+	rcu_read_unlock();
 	task->ptrace = flags;
 
 	__ptrace_link(task, current);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d8927fda712..2f5eb1838b3e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4029,8 +4029,14 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 		goto out_free_cpus_allowed;
 	}
 	retval = -EPERM;
-	if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE))
-		goto out_unlock;
+	if (!check_same_owner(p)) {
+		rcu_read_lock();
+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+			rcu_read_unlock();
+			goto out_unlock;
+		}
+		rcu_read_unlock();
+	}
 
 	retval = security_task_setscheduler(p);
 	if (retval)
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index b4c29848b49d..0e72239aeb05 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -262,14 +262,18 @@ int yama_ptrace_access_check(struct task_struct *child,
 			/* No additional restrictions. */
 			break;
 		case YAMA_SCOPE_RELATIONAL:
+			rcu_read_lock();
 			if (!task_is_descendant(current, child) &&
 			    !ptracer_exception_found(current, child) &&
-			    !ns_capable(task_user_ns(child), CAP_SYS_PTRACE))
+			    !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE))
 				rc = -EPERM;
+			rcu_read_unlock();
 			break;
 		case YAMA_SCOPE_CAPABILITY:
-			if (!ns_capable(task_user_ns(child), CAP_SYS_PTRACE))
+			rcu_read_lock();
+			if (!ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE))
 				rc = -EPERM;
+			rcu_read_unlock();
 			break;
 		case YAMA_SCOPE_NO_ATTACH:
 		default:
@@ -307,8 +311,10 @@ int yama_ptrace_traceme(struct task_struct *parent)
 	/* Only disallow PTRACE_TRACEME on more aggressive settings. */
 	switch (ptrace_scope) {
 	case YAMA_SCOPE_CAPABILITY:
-		if (!ns_capable(task_user_ns(parent), CAP_SYS_PTRACE))
+		rcu_read_lock();
+		if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE))
 			rc = -EPERM;
+		rcu_read_unlock();
 		break;
 	case YAMA_SCOPE_NO_ATTACH:
 		rc = -EPERM;
-- 
cgit v1.2.3-55-g7522


From cde1975bc242f3e1072bde623ef378e547b73f91 Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Thu, 26 Jul 2012 06:24:06 -0700
Subject: userns: Implent proc namespace operations

This allows entering a user namespace, and the ability
to store a reference to a user namespace with a bind
mount.

Addition of missing userns_ns_put in userns_install
from Gao feng <gaofeng@cn.fujitsu.com>

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/proc/namespaces.c    |  4 +++
 include/linux/proc_fs.h |  1 +
 kernel/user_namespace.c | 90 +++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 78 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 2a17fd9ae6a9..030250c27d70 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -11,6 +11,7 @@
 #include <net/net_namespace.h>
 #include <linux/ipc_namespace.h>
 #include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
 #include "internal.h"
 
 
@@ -26,6 +27,9 @@ static const struct proc_ns_operations *ns_entries[] = {
 #endif
 #ifdef CONFIG_PID_NS
 	&pidns_operations,
+#endif
+#ifdef CONFIG_USER_NS
+	&userns_operations,
 #endif
 	&mntns_operations,
 };
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 9014c041e752..31447819bc55 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -258,6 +258,7 @@ extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
 extern const struct proc_ns_operations ipcns_operations;
 extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations userns_operations;
 extern const struct proc_ns_operations mntns_operations;
 
 union proc_op {
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 49096d559e08..a9460774e77d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,6 +9,7 @@
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
 #include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
 #include <linux/highuid.h>
 #include <linux/cred.h>
 #include <linux/securebits.h>
@@ -26,6 +27,24 @@ static struct kmem_cache *user_ns_cachep __read_mostly;
 static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *map);
 
+static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
+{
+	/* Start with the same capabilities as init but useless for doing
+	 * anything as the capabilities are bound to the new user namespace.
+	 */
+	cred->securebits = SECUREBITS_DEFAULT;
+	cred->cap_inheritable = CAP_EMPTY_SET;
+	cred->cap_permitted = CAP_FULL_SET;
+	cred->cap_effective = CAP_FULL_SET;
+	cred->cap_bset = CAP_FULL_SET;
+#ifdef CONFIG_KEYS
+	key_put(cred->request_key_auth);
+	cred->request_key_auth = NULL;
+#endif
+	/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
+	cred->user_ns = user_ns;
+}
+
 /*
  * Create a new user namespace, deriving the creator from the user in the
  * passed credentials, and replacing that user with the new root user for the
@@ -53,27 +72,12 @@ int create_user_ns(struct cred *new)
 		return -ENOMEM;
 
 	kref_init(&ns->kref);
+	/* Leave the new->user_ns reference with the new user namespace. */
 	ns->parent = parent_ns;
 	ns->owner = owner;
 	ns->group = group;
 
-	/* Start with the same capabilities as init but useless for doing
-	 * anything as the capabilities are bound to the new user namespace.
-	 */
-	new->securebits = SECUREBITS_DEFAULT;
-	new->cap_inheritable = CAP_EMPTY_SET;
-	new->cap_permitted = CAP_FULL_SET;
-	new->cap_effective = CAP_FULL_SET;
-	new->cap_bset = CAP_FULL_SET;
-#ifdef CONFIG_KEYS
-	key_put(new->request_key_auth);
-	new->request_key_auth = NULL;
-#endif
-	/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
-
-	/* Leave the new->user_ns reference with the new user namespace. */
-	/* Leave the reference to our user_ns with the new cred. */
-	new->user_ns = ns;
+	set_cred_user_ns(new, ns);
 
 	return 0;
 }
@@ -737,6 +741,58 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
 	return false;
 }
 
+static void *userns_get(struct task_struct *task)
+{
+	struct user_namespace *user_ns;
+
+	rcu_read_lock();
+	user_ns = get_user_ns(__task_cred(task)->user_ns);
+	rcu_read_unlock();
+
+	return user_ns;
+}
+
+static void userns_put(void *ns)
+{
+	put_user_ns(ns);
+}
+
+static int userns_install(struct nsproxy *nsproxy, void *ns)
+{
+	struct user_namespace *user_ns = ns;
+	struct cred *cred;
+
+	/* Don't allow gaining capabilities by reentering
+	 * the same user namespace.
+	 */
+	if (user_ns == current_user_ns())
+		return -EINVAL;
+
+	/* Threaded many not enter a different user namespace */
+	if (atomic_read(&current->mm->mm_users) > 1)
+		return -EINVAL;
+
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
+		return -EPERM;
+
+	cred = prepare_creds();
+	if (!cred)
+		return -ENOMEM;
+
+	put_user_ns(cred->user_ns);
+	set_cred_user_ns(cred, get_user_ns(user_ns));
+
+	return commit_creds(cred);
+}
+
+const struct proc_ns_operations userns_operations = {
+	.name		= "user",
+	.type		= CLONE_NEWUSER,
+	.get		= userns_get,
+	.put		= userns_put,
+	.install	= userns_install,
+};
+
 static __init int user_namespaces_init(void)
 {
 	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
-- 
cgit v1.2.3-55-g7522


From b2e0d98705e60e45bbb3c0032c48824ad7ae0704 Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Thu, 26 Jul 2012 05:15:35 -0700
Subject: userns: Implement unshare of the user namespace

- Add CLONE_THREAD to the unshare flags if CLONE_NEWUSER is selected
  As changing user namespaces is only valid if all there is only
  a single thread.
- Restore the code to add CLONE_VM if CLONE_THREAD is selected and
  the code to addCLONE_SIGHAND if CLONE_VM is selected.
  Making the constraints in the code clear.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/nsproxy.h        |  2 +-
 include/linux/user_namespace.h |  9 +++++++++
 kernel/fork.c                  | 25 ++++++++++++++++++++++---
 kernel/nsproxy.c               |  8 ++++----
 kernel/user_namespace.c        | 15 +++++++++++++++
 5 files changed, 51 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index cc37a55ad004..10e5947491c7 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -67,7 +67,7 @@ void exit_task_namespaces(struct task_struct *tsk);
 void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
 void free_nsproxy(struct nsproxy *ns);
 int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
-	struct fs_struct *);
+	struct cred *, struct fs_struct *);
 int __init nsproxy_cache_init(void);
 
 static inline void put_nsproxy(struct nsproxy *ns)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 95142cae446a..17651f08d67f 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -39,6 +39,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 }
 
 extern int create_user_ns(struct cred *new);
+extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
 extern void free_user_ns(struct kref *kref);
 
 static inline void put_user_ns(struct user_namespace *ns)
@@ -66,6 +67,14 @@ static inline int create_user_ns(struct cred *new)
 	return -EINVAL;
 }
 
+static inline int unshare_userns(unsigned long unshare_flags,
+				 struct cred **new_cred)
+{
+	if (unshare_flags & CLONE_NEWUSER)
+		return -EINVAL;
+	return 0;
+}
+
 static inline void put_user_ns(struct user_namespace *ns)
 {
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index 8c29abb19014..38e53b87402c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1687,7 +1687,7 @@ static int check_unshare_flags(unsigned long unshare_flags)
 	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
 				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
-				CLONE_NEWPID))
+				CLONE_NEWUSER|CLONE_NEWPID))
 		return -EINVAL;
 	/*
 	 * Not implemented, but pretend it works if there is nothing to
@@ -1754,10 +1754,16 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 {
 	struct fs_struct *fs, *new_fs = NULL;
 	struct files_struct *fd, *new_fd = NULL;
+	struct cred *new_cred = NULL;
 	struct nsproxy *new_nsproxy = NULL;
 	int do_sysvsem = 0;
 	int err;
 
+	/*
+	 * If unsharing a user namespace must also unshare the thread.
+	 */
+	if (unshare_flags & CLONE_NEWUSER)
+		unshare_flags |= CLONE_THREAD;
 	/*
 	 * If unsharing a pid namespace must also unshare the thread.
 	 */
@@ -1795,11 +1801,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 	err = unshare_fd(unshare_flags, &new_fd);
 	if (err)
 		goto bad_unshare_cleanup_fs;
-	err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs);
+	err = unshare_userns(unshare_flags, &new_cred);
 	if (err)
 		goto bad_unshare_cleanup_fd;
+	err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
+					 new_cred, new_fs);
+	if (err)
+		goto bad_unshare_cleanup_cred;
 
-	if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
+	if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
 		if (do_sysvsem) {
 			/*
 			 * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1832,11 +1842,20 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 		}
 
 		task_unlock(current);
+
+		if (new_cred) {
+			/* Install the new user namespace */
+			commit_creds(new_cred);
+			new_cred = NULL;
+		}
 	}
 
 	if (new_nsproxy)
 		put_nsproxy(new_nsproxy);
 
+bad_unshare_cleanup_cred:
+	if (new_cred)
+		put_cred(new_cred);
 bad_unshare_cleanup_fd:
 	if (new_fd)
 		put_files_struct(new_fd);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 2ddd81657a2a..78e2ecb20165 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -186,7 +186,7 @@ void free_nsproxy(struct nsproxy *ns)
  * On success, returns the new nsproxy.
  */
 int unshare_nsproxy_namespaces(unsigned long unshare_flags,
-		struct nsproxy **new_nsp, struct fs_struct *new_fs)
+	struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs)
 {
 	struct user_namespace *user_ns;
 	int err = 0;
@@ -195,12 +195,12 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 			       CLONE_NEWNET | CLONE_NEWPID)))
 		return 0;
 
-	if (!nsown_capable(CAP_SYS_ADMIN))
+	user_ns = new_cred ? new_cred->user_ns : current_user_ns();
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
-	user_ns = current_user_ns();
 	*new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
-				new_fs ? new_fs : current->fs);
+					 new_fs ? new_fs : current->fs);
 	if (IS_ERR(*new_nsp)) {
 		err = PTR_ERR(*new_nsp);
 		goto out;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index a9460774e77d..ce92f7e6290a 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -82,6 +82,21 @@ int create_user_ns(struct cred *new)
 	return 0;
 }
 
+int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
+{
+	struct cred *cred;
+
+	if (!(unshare_flags & CLONE_NEWUSER))
+		return 0;
+
+	cred = prepare_creds();
+	if (!cred)
+		return -ENOMEM;
+
+	*new_cred = cred;
+	return create_user_ns(cred);
+}
+
 void free_user_ns(struct kref *kref)
 {
 	struct user_namespace *parent, *ns =
-- 
cgit v1.2.3-55-g7522


From 33d6dce607573b5fd7a43168e0d91221b3ca532b Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Fri, 17 Jun 2011 13:33:20 -0700
Subject: proc: Generalize proc inode allocation

Generalize the proc inode allocation so that it can be
used without having to having to create a proc_dir_entry.

This will allow namespace file descriptors to remain light
weight entitities but still have the same inode number
when the backing namespace is the same.

Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/generic.c       | 26 +++++++++++++-------------
 include/linux/proc_fs.h | 10 ++++++++++
 2 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 0d80cef4cfb9..7b3ae3cc0ef9 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -350,14 +350,14 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
  * Return an inode number between PROC_DYNAMIC_FIRST and
  * 0xffffffff, or zero on failure.
  */
-static unsigned int get_inode_number(void)
+int proc_alloc_inum(unsigned int *inum)
 {
 	unsigned int i;
 	int error;
 
 retry:
-	if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
-		return 0;
+	if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
+		return -ENOMEM;
 
 	spin_lock(&proc_inum_lock);
 	error = ida_get_new(&proc_inum_ida, &i);
@@ -365,18 +365,19 @@ retry:
 	if (error == -EAGAIN)
 		goto retry;
 	else if (error)
-		return 0;
+		return error;
 
 	if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
 		spin_lock(&proc_inum_lock);
 		ida_remove(&proc_inum_ida, i);
 		spin_unlock(&proc_inum_lock);
-		return 0;
+		return -ENOSPC;
 	}
-	return PROC_DYNAMIC_FIRST + i;
+	*inum = PROC_DYNAMIC_FIRST + i;
+	return 0;
 }
 
-static void release_inode_number(unsigned int inum)
+void proc_free_inum(unsigned int inum)
 {
 	spin_lock(&proc_inum_lock);
 	ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
@@ -554,13 +555,12 @@ static const struct inode_operations proc_dir_inode_operations = {
 
 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
 {
-	unsigned int i;
 	struct proc_dir_entry *tmp;
+	int ret;
 	
-	i = get_inode_number();
-	if (i == 0)
-		return -EAGAIN;
-	dp->low_ino = i;
+	ret = proc_alloc_inum(&dp->low_ino);
+	if (ret)
+		return ret;
 
 	if (S_ISDIR(dp->mode)) {
 		if (dp->proc_iops == NULL) {
@@ -764,7 +764,7 @@ EXPORT_SYMBOL(proc_create_data);
 
 static void free_proc_entry(struct proc_dir_entry *de)
 {
-	release_inode_number(de->low_ino);
+	proc_free_inum(de->low_ino);
 
 	if (S_ISLNK(de->mode))
 		kfree(de->data);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 31447819bc55..bf1d000fbba6 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -176,6 +176,8 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
 extern struct file *proc_ns_fget(int fd);
 extern bool proc_ns_inode(struct inode *inode);
 
+extern int proc_alloc_inum(unsigned int *pino);
+extern void proc_free_inum(unsigned int inum);
 #else
 
 #define proc_net_fops_create(net, name, mode, fops)  ({ (void)(mode), NULL; })
@@ -235,6 +237,14 @@ static inline bool proc_ns_inode(struct inode *inode)
 	return false;
 }
 
+static inline int proc_alloc_inum(unsigned int *inum)
+{
+	*inum = 1;
+	return 0;
+}
+static inline void proc_free_inum(unsigned int inum)
+{
+}
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)
-- 
cgit v1.2.3-55-g7522


From 98f842e675f96ffac96e6c50315790912b2812be Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Wed, 15 Jun 2011 10:21:48 -0700
Subject: proc: Usable inode numbers for the namespace file descriptors.

Assign a unique proc inode to each namespace, and use that
inode number to ensure we only allocate at most one proc
inode for every namespace in proc.

A single proc inode per namespace allows userspace to test
to see if two processes are in the same namespace.

This has been a long requested feature and only blocked because
a naive implementation would put the id in a global space and
would ultimately require having a namespace for the names of
namespaces, making migration and certain virtualization tricks
impossible.

We still don't have per superblock inode numbers for proc, which
appears necessary for application unaware checkpoint/restart and
migrations (if the application is using namespace file descriptors)
but that is now allowd by the design if it becomes important.

I have preallocated the ipc and uts initial proc inode numbers so
their structures can be statically initialized.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/mount.h                     |  1 +
 fs/namespace.c                 | 14 ++++++++++++++
 fs/proc/namespaces.c           | 24 ++++++++++++++----------
 include/linux/ipc_namespace.h  |  2 ++
 include/linux/pid_namespace.h  |  1 +
 include/linux/proc_fs.h        |  7 ++++++-
 include/linux/user_namespace.h |  1 +
 include/linux/utsname.h        |  1 +
 include/net/net_namespace.h    |  2 ++
 init/version.c                 |  2 ++
 ipc/msgutil.c                  |  2 ++
 ipc/namespace.c                | 16 ++++++++++++++++
 kernel/pid.c                   |  1 +
 kernel/pid_namespace.c         | 12 ++++++++++++
 kernel/user.c                  |  2 ++
 kernel/user_namespace.c        | 15 +++++++++++++++
 kernel/utsname.c               | 17 ++++++++++++++++-
 net/core/net_namespace.c       | 24 ++++++++++++++++++++++++
 18 files changed, 132 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index 630fafc616bb..cd5007980400 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -4,6 +4,7 @@
 
 struct mnt_namespace {
 	atomic_t		count;
+	unsigned int		proc_inum;
 	struct mount *	root;
 	struct list_head	list;
 	struct user_namespace	*user_ns;
diff --git a/fs/namespace.c b/fs/namespace.c
index cab78a74aca3..c1bbe86f4920 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2301,6 +2301,7 @@ dput_out:
 
 static void free_mnt_ns(struct mnt_namespace *ns)
 {
+	proc_free_inum(ns->proc_inum);
 	put_user_ns(ns->user_ns);
 	kfree(ns);
 }
@@ -2317,10 +2318,16 @@ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
 static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 {
 	struct mnt_namespace *new_ns;
+	int ret;
 
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 	if (!new_ns)
 		return ERR_PTR(-ENOMEM);
+	ret = proc_alloc_inum(&new_ns->proc_inum);
+	if (ret) {
+		kfree(new_ns);
+		return ERR_PTR(ret);
+	}
 	new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
 	atomic_set(&new_ns->count, 1);
 	new_ns->root = NULL;
@@ -2799,10 +2806,17 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
+static unsigned int mntns_inum(void *ns)
+{
+	struct mnt_namespace *mnt_ns = ns;
+	return mnt_ns->proc_inum;
+}
+
 const struct proc_ns_operations mntns_operations = {
 	.name		= "mnt",
 	.type		= CLONE_NEWNS,
 	.get		= mntns_get,
 	.put		= mntns_put,
 	.install	= mntns_install,
+	.inum		= mntns_inum,
 };
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 7a6d8d69cdb8..b7a47196c8c3 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -82,7 +82,7 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb,
 		return ERR_PTR(-ENOMEM);
 	}
 
-	inode = new_inode(sb);
+	inode = iget_locked(sb, ns_ops->inum(ns));
 	if (!inode) {
 		dput(dentry);
 		ns_ops->put(ns);
@@ -90,13 +90,17 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb,
 	}
 
 	ei = PROC_I(inode);
-	inode->i_ino = get_next_ino();
-	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-	inode->i_op = &ns_inode_operations;
-	inode->i_mode = S_IFREG | S_IRUGO;
-	inode->i_fop = &ns_file_operations;
-	ei->ns_ops = ns_ops;
-	ei->ns = ns;
+	if (inode->i_state & I_NEW) {
+		inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+		inode->i_op = &ns_inode_operations;
+		inode->i_mode = S_IFREG | S_IRUGO;
+		inode->i_fop = &ns_file_operations;
+		ei->ns_ops = ns_ops;
+		ei->ns = ns;
+		unlock_new_inode(inode);
+	} else {
+		ns_ops->put(ns);
+	}
 
 	d_set_d_op(dentry, &ns_dentry_operations);
 	result = d_instantiate_unique(dentry, inode);
@@ -162,12 +166,12 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
 	if (!ns)
 		goto out_put_task;
 
-	snprintf(name, sizeof(name), "%s", ns_ops->name);
+	snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
 	len = strlen(name);
 
 	if (len > buflen)
 		len = buflen;
-	if (copy_to_user(buffer, ns_ops->name, len))
+	if (copy_to_user(buffer, name, len))
 		len = -EFAULT;
 
 	ns_ops->put(ns);
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index f03af702a39d..fe771978e877 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -67,6 +67,8 @@ struct ipc_namespace {
 
 	/* user_ns which owns the ipc ns */
 	struct user_namespace *user_ns;
+
+	unsigned int	proc_inum;
 };
 
 extern struct ipc_namespace init_ipc_ns;
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 4c96acdb2489..bf285999273a 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -37,6 +37,7 @@ struct pid_namespace {
 	kgid_t pid_gid;
 	int hide_pid;
 	int reboot;	/* group exit code if this pidns was rebooted */
+	unsigned int proc_inum;
 };
 
 extern struct pid_namespace init_pid_ns;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index bf1d000fbba6..2e24018b7cec 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -28,7 +28,11 @@ struct mm_struct;
  */
 
 enum {
-	PROC_ROOT_INO = 1,
+	PROC_ROOT_INO		= 1,
+	PROC_IPC_INIT_INO	= 0xEFFFFFFFU,
+	PROC_UTS_INIT_INO	= 0xEFFFFFFEU,
+	PROC_USER_INIT_INO	= 0xEFFFFFFDU,
+	PROC_PID_INIT_INO	= 0xEFFFFFFCU,
 };
 
 /*
@@ -263,6 +267,7 @@ struct proc_ns_operations {
 	void *(*get)(struct task_struct *task);
 	void (*put)(void *ns);
 	int (*install)(struct nsproxy *nsproxy, void *ns);
+	unsigned int (*inum)(void *ns);
 };
 extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 17651f08d67f..b9bd2e6c73cc 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -25,6 +25,7 @@ struct user_namespace {
 	struct user_namespace	*parent;
 	kuid_t			owner;
 	kgid_t			group;
+	unsigned int		proc_inum;
 };
 
 extern struct user_namespace init_user_ns;
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 221f4a0a7502..239e27733d6c 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -23,6 +23,7 @@ struct uts_namespace {
 	struct kref kref;
 	struct new_utsname name;
 	struct user_namespace *user_ns;
+	unsigned int proc_inum;
 };
 extern struct uts_namespace init_uts_ns;
 
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index c5a43f56b796..de644bcd8613 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -56,6 +56,8 @@ struct net {
 
 	struct user_namespace   *user_ns;	/* Owning user namespace */
 
+	unsigned int		proc_inum;
+
 	struct proc_dir_entry 	*proc_net;
 	struct proc_dir_entry 	*proc_net_stat;
 
diff --git a/init/version.c b/init/version.c
index 86fe0ccb997a..58170f18912d 100644
--- a/init/version.c
+++ b/init/version.c
@@ -12,6 +12,7 @@
 #include <linux/utsname.h>
 #include <generated/utsrelease.h>
 #include <linux/version.h>
+#include <linux/proc_fs.h>
 
 #ifndef CONFIG_KALLSYMS
 #define version(a) Version_ ## a
@@ -34,6 +35,7 @@ struct uts_namespace init_uts_ns = {
 		.domainname	= UTS_DOMAINNAME,
 	},
 	.user_ns = &init_user_ns,
+	.proc_inum = PROC_UTS_INIT_INO,
 };
 EXPORT_SYMBOL_GPL(init_uts_ns);
 
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 26143d377c95..6471f1bdae96 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -16,6 +16,7 @@
 #include <linux/msg.h>
 #include <linux/ipc_namespace.h>
 #include <linux/utsname.h>
+#include <linux/proc_fs.h>
 #include <asm/uaccess.h>
 
 #include "util.h"
@@ -30,6 +31,7 @@ DEFINE_SPINLOCK(mq_lock);
 struct ipc_namespace init_ipc_ns = {
 	.count		= ATOMIC_INIT(1),
 	.user_ns = &init_user_ns,
+	.proc_inum = PROC_IPC_INIT_INO,
 };
 
 atomic_t nr_ipc_ns = ATOMIC_INIT(1);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 72c868277793..cf3386a51de2 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -26,9 +26,16 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	if (ns == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	err = proc_alloc_inum(&ns->proc_inum);
+	if (err) {
+		kfree(ns);
+		return ERR_PTR(err);
+	}
+
 	atomic_set(&ns->count, 1);
 	err = mq_init_ns(ns);
 	if (err) {
+		proc_free_inum(ns->proc_inum);
 		kfree(ns);
 		return ERR_PTR(err);
 	}
@@ -111,6 +118,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
 	 */
 	ipcns_notify(IPCNS_REMOVED);
 	put_user_ns(ns->user_ns);
+	proc_free_inum(ns->proc_inum);
 	kfree(ns);
 }
 
@@ -172,10 +180,18 @@ static int ipcns_install(struct nsproxy *nsproxy, void *new)
 	return 0;
 }
 
+static unsigned int ipcns_inum(void *vp)
+{
+	struct ipc_namespace *ns = vp;
+
+	return ns->proc_inum;
+}
+
 const struct proc_ns_operations ipcns_operations = {
 	.name		= "ipc",
 	.type		= CLONE_NEWIPC,
 	.get		= ipcns_get,
 	.put		= ipcns_put,
 	.install	= ipcns_install,
+	.inum		= ipcns_inum,
 };
diff --git a/kernel/pid.c b/kernel/pid.c
index 6e8da291de49..3026ddae0a34 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -80,6 +80,7 @@ struct pid_namespace init_pid_ns = {
 	.level = 0,
 	.child_reaper = &init_task,
 	.user_ns = &init_user_ns,
+	.proc_inum = PROC_PID_INIT_INO,
 };
 EXPORT_SYMBOL_GPL(init_pid_ns);
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 68508d330634..560da0dab230 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -107,6 +107,10 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	if (ns->pid_cachep == NULL)
 		goto out_free_map;
 
+	err = proc_alloc_inum(&ns->proc_inum);
+	if (err)
+		goto out_free_map;
+
 	kref_init(&ns->kref);
 	ns->level = level;
 	ns->parent = get_pid_ns(parent_pid_ns);
@@ -133,6 +137,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 {
 	int i;
 
+	proc_free_inum(ns->proc_inum);
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
 	put_user_ns(ns->user_ns);
@@ -345,12 +350,19 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
+static unsigned int pidns_inum(void *ns)
+{
+	struct pid_namespace *pid_ns = ns;
+	return pid_ns->proc_inum;
+}
+
 const struct proc_ns_operations pidns_operations = {
 	.name		= "pid",
 	.type		= CLONE_NEWPID,
 	.get		= pidns_get,
 	.put		= pidns_put,
 	.install	= pidns_install,
+	.inum		= pidns_inum,
 };
 
 static __init int pid_namespaces_init(void)
diff --git a/kernel/user.c b/kernel/user.c
index 750acffbe9ec..33acb5e53a5f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/export.h>
 #include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
 
 /*
  * userns count is 1 for root user, 1 for init_uts_ns,
@@ -51,6 +52,7 @@ struct user_namespace init_user_ns = {
 	},
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
+	.proc_inum = PROC_USER_INIT_INO,
 };
 EXPORT_SYMBOL_GPL(init_user_ns);
 
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 89f6eaed067a..f5975ccf9348 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -58,6 +58,7 @@ int create_user_ns(struct cred *new)
 	struct user_namespace *ns, *parent_ns = new->user_ns;
 	kuid_t owner = new->euid;
 	kgid_t group = new->egid;
+	int ret;
 
 	/* The creator needs a mapping in the parent user namespace
 	 * or else we won't be able to reasonably tell userspace who
@@ -71,6 +72,12 @@ int create_user_ns(struct cred *new)
 	if (!ns)
 		return -ENOMEM;
 
+	ret = proc_alloc_inum(&ns->proc_inum);
+	if (ret) {
+		kmem_cache_free(user_ns_cachep, ns);
+		return ret;
+	}
+
 	kref_init(&ns->kref);
 	/* Leave the new->user_ns reference with the new user namespace. */
 	ns->parent = parent_ns;
@@ -103,6 +110,7 @@ void free_user_ns(struct kref *kref)
 		container_of(kref, struct user_namespace, kref);
 
 	parent = ns->parent;
+	proc_free_inum(ns->proc_inum);
 	kmem_cache_free(user_ns_cachep, ns);
 	put_user_ns(parent);
 }
@@ -808,12 +816,19 @@ static int userns_install(struct nsproxy *nsproxy, void *ns)
 	return commit_creds(cred);
 }
 
+static unsigned int userns_inum(void *ns)
+{
+	struct user_namespace *user_ns = ns;
+	return user_ns->proc_inum;
+}
+
 const struct proc_ns_operations userns_operations = {
 	.name		= "user",
 	.type		= CLONE_NEWUSER,
 	.get		= userns_get,
 	.put		= userns_put,
 	.install	= userns_install,
+	.inum		= userns_inum,
 };
 
 static __init int user_namespaces_init(void)
diff --git a/kernel/utsname.c b/kernel/utsname.c
index fdc619eb61ef..f6336d51d64c 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -36,11 +36,18 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 					  struct uts_namespace *old_ns)
 {
 	struct uts_namespace *ns;
+	int err;
 
 	ns = create_uts_ns();
 	if (!ns)
 		return ERR_PTR(-ENOMEM);
 
+	err = proc_alloc_inum(&ns->proc_inum);
+	if (err) {
+		kfree(ns);
+		return ERR_PTR(err);
+	}
+
 	down_read(&uts_sem);
 	memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
 	ns->user_ns = get_user_ns(user_ns);
@@ -77,6 +84,7 @@ void free_uts_ns(struct kref *kref)
 
 	ns = container_of(kref, struct uts_namespace, kref);
 	put_user_ns(ns->user_ns);
+	proc_free_inum(ns->proc_inum);
 	kfree(ns);
 }
 
@@ -114,11 +122,18 @@ static int utsns_install(struct nsproxy *nsproxy, void *new)
 	return 0;
 }
 
+static unsigned int utsns_inum(void *vp)
+{
+	struct uts_namespace *ns = vp;
+
+	return ns->proc_inum;
+}
+
 const struct proc_ns_operations utsns_operations = {
 	.name		= "uts",
 	.type		= CLONE_NEWUTS,
 	.get		= utsns_get,
 	.put		= utsns_put,
 	.install	= utsns_install,
+	.inum		= utsns_inum,
 };
-
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ec2870b44c1f..2e9a3132b8dd 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -381,6 +381,21 @@ struct net *get_net_ns_by_pid(pid_t pid)
 }
 EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 
+static __net_init int net_ns_net_init(struct net *net)
+{
+	return proc_alloc_inum(&net->proc_inum);
+}
+
+static __net_exit void net_ns_net_exit(struct net *net)
+{
+	proc_free_inum(net->proc_inum);
+}
+
+static struct pernet_operations __net_initdata net_ns_ops = {
+	.init = net_ns_net_init,
+	.exit = net_ns_net_exit,
+};
+
 static int __init net_ns_init(void)
 {
 	struct net_generic *ng;
@@ -412,6 +427,8 @@ static int __init net_ns_init(void)
 
 	mutex_unlock(&net_mutex);
 
+	register_pernet_subsys(&net_ns_ops);
+
 	return 0;
 }
 
@@ -640,11 +657,18 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
+static unsigned int netns_inum(void *ns)
+{
+	struct net *net = ns;
+	return net->proc_inum;
+}
+
 const struct proc_ns_operations netns_operations = {
 	.name		= "net",
 	.type		= CLONE_NEWNET,
 	.get		= netns_get,
 	.put		= netns_put,
 	.install	= netns_install,
+	.inum		= netns_inum,
 };
 #endif
-- 
cgit v1.2.3-55-g7522


From 97fa4cf442ff2872000d9110686371775795a32b Mon Sep 17 00:00:00 2001
From: Sebastian Hesselbarth
Date: Sat, 17 Nov 2012 15:22:22 +0100
Subject: clk: mvebu: add mvebu core clocks.

This driver allows to provide DT clocks for core clocks found on
Marvell Kirkwood, Dove & 370/XP SoCs. The core clock frequencies and
ratios are determined by decoding the Sample-At-Reset registers.

Although technically correct, using a divider of 0 will lead to
div_by_zero panic. Let's use a ratio of 0/1 instead to fail later
with a zero clock.

Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 .../devicetree/bindings/clock/mvebu-core-clock.txt |  47 ++
 drivers/clk/Kconfig                                |   2 +
 drivers/clk/Makefile                               |   1 +
 drivers/clk/mvebu/Kconfig                          |   3 +
 drivers/clk/mvebu/Makefile                         |   1 +
 drivers/clk/mvebu/clk-core.c                       | 675 +++++++++++++++++++++
 drivers/clk/mvebu/clk-core.h                       |  18 +
 drivers/clk/mvebu/clk.c                            |  23 +
 include/linux/clk/mvebu.h                          |  22 +
 9 files changed, 792 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/mvebu-core-clock.txt
 create mode 100644 drivers/clk/mvebu/Kconfig
 create mode 100644 drivers/clk/mvebu/Makefile
 create mode 100644 drivers/clk/mvebu/clk-core.c
 create mode 100644 drivers/clk/mvebu/clk-core.h
 create mode 100644 drivers/clk/mvebu/clk.c
 create mode 100644 include/linux/clk/mvebu.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt
new file mode 100644
index 000000000000..1e662948661e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt
@@ -0,0 +1,47 @@
+* Core Clock bindings for Marvell MVEBU SoCs
+
+Marvell MVEBU SoCs usually allow to determine core clock frequencies by
+reading the Sample-At-Reset (SAR) register. The core clock consumer should
+specify the desired clock by having the clock ID in its "clocks" phandle cell.
+
+The following is a list of provided IDs and clock names on Armada 370/XP:
+ 0 = tclk    (Internal Bus clock)
+ 1 = cpuclk  (CPU clock)
+ 2 = nbclk   (L2 Cache clock)
+ 3 = hclk    (DRAM control clock)
+ 4 = dramclk (DDR clock)
+
+The following is a list of provided IDs and clock names on Kirkwood and Dove:
+ 0 = tclk   (Internal Bus clock)
+ 1 = cpuclk (CPU0 clock)
+ 2 = l2clk  (L2 Cache clock derived from CPU0 clock)
+ 3 = ddrclk (DDR controller clock derived from CPU0 clock)
+
+Required properties:
+- compatible : shall be one of the following:
+	"marvell,armada-370-core-clock" - For Armada 370 SoC core clocks
+	"marvell,armada-xp-core-clock" - For Armada XP SoC core clocks
+	"marvell,dove-core-clock" - for Dove SoC core clocks
+	"marvell,kirkwood-core-clock" - for Kirkwood SoC (except mv88f6180)
+	"marvell,mv88f6180-core-clock" - for Kirkwood MV88f6180 SoC
+- reg : shall be the register address of the Sample-At-Reset (SAR) register
+- #clock-cells : from common clock binding; shall be set to 1
+
+Optional properties:
+- clock-output-names : from common clock binding; allows overwrite default clock
+	output names ("tclk", "cpuclk", "l2clk", "ddrclk")
+
+Example:
+
+core_clk: core-clocks@d0214 {
+	compatible = "marvell,dove-core-clock";
+	reg = <0xd0214 0x4>;
+	#clock-cells = <1>;
+};
+
+spi0: spi@10600 {
+	compatible = "marvell,orion-spi";
+	/* ... */
+	/* get tclk from core clock provider */
+	clocks = <&core_clk 0>;
+};
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index bace9e98f75d..60427c0d23e6 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -54,3 +54,5 @@ config COMMON_CLK_MAX77686
 	  This driver supports Maxim 77686 crystal oscillator clock. 
 
 endmenu
+
+source "drivers/clk/mvebu/Kconfig"
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 71a25b91de00..d0a14ae8d49c 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_PLAT_SPEAR)	+= spear/
 obj-$(CONFIG_ARCH_U300)		+= clk-u300.o
 obj-$(CONFIG_COMMON_CLK_VERSATILE) += versatile/
 obj-$(CONFIG_ARCH_PRIMA2)	+= clk-prima2.o
+obj-$(CONFIG_PLAT_ORION)	+= mvebu/
 ifeq ($(CONFIG_COMMON_CLK), y)
 obj-$(CONFIG_ARCH_MMP)		+= mmp/
 endif
diff --git a/drivers/clk/mvebu/Kconfig b/drivers/clk/mvebu/Kconfig
new file mode 100644
index 000000000000..fd7bf97ff74d
--- /dev/null
+++ b/drivers/clk/mvebu/Kconfig
@@ -0,0 +1,3 @@
+config MVEBU_CLK_CORE
+       bool
+
diff --git a/drivers/clk/mvebu/Makefile b/drivers/clk/mvebu/Makefile
new file mode 100644
index 000000000000..de1d9617f75a
--- /dev/null
+++ b/drivers/clk/mvebu/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MVEBU_CLK_CORE) 	+= clk.o clk-core.o
diff --git a/drivers/clk/mvebu/clk-core.c b/drivers/clk/mvebu/clk-core.c
new file mode 100644
index 000000000000..69056a7479e8
--- /dev/null
+++ b/drivers/clk/mvebu/clk-core.c
@@ -0,0 +1,675 @@
+/*
+ * Marvell EBU clock core handling defined at reset
+ *
+ * Copyright (C) 2012 Marvell
+ *
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include "clk-core.h"
+
+struct core_ratio {
+	int id;
+	const char *name;
+};
+
+struct core_clocks {
+	u32 (*get_tclk_freq)(void __iomem *sar);
+	u32 (*get_cpu_freq)(void __iomem *sar);
+	void (*get_clk_ratio)(void __iomem *sar, int id, int *mult, int *div);
+	const struct core_ratio *ratios;
+	int num_ratios;
+};
+
+static struct clk_onecell_data clk_data;
+
+static void __init mvebu_clk_core_setup(struct device_node *np,
+				 struct core_clocks *coreclk)
+{
+	const char *tclk_name = "tclk";
+	const char *cpuclk_name = "cpuclk";
+	void __iomem *base;
+	unsigned long rate;
+	int n;
+
+	base = of_iomap(np, 0);
+	if (WARN_ON(!base))
+		return;
+
+	/*
+	 * Allocate struct for TCLK, cpu clk, and core ratio clocks
+	 */
+	clk_data.clk_num = 2 + coreclk->num_ratios;
+	clk_data.clks = kzalloc(clk_data.clk_num * sizeof(struct clk *),
+				GFP_KERNEL);
+	if (WARN_ON(!clk_data.clks))
+		return;
+
+	/*
+	 * Register TCLK
+	 */
+	of_property_read_string_index(np, "clock-output-names", 0,
+				      &tclk_name);
+	rate = coreclk->get_tclk_freq(base);
+	clk_data.clks[0] = clk_register_fixed_rate(NULL, tclk_name, NULL,
+						   CLK_IS_ROOT, rate);
+	WARN_ON(IS_ERR(clk_data.clks[0]));
+
+	/*
+	 * Register CPU clock
+	 */
+	of_property_read_string_index(np, "clock-output-names", 1,
+				      &cpuclk_name);
+	rate = coreclk->get_cpu_freq(base);
+	clk_data.clks[1] = clk_register_fixed_rate(NULL, cpuclk_name, NULL,
+						   CLK_IS_ROOT, rate);
+	WARN_ON(IS_ERR(clk_data.clks[1]));
+
+	/*
+	 * Register fixed-factor clocks derived from CPU clock
+	 */
+	for (n = 0; n < coreclk->num_ratios; n++) {
+		const char *rclk_name = coreclk->ratios[n].name;
+		int mult, div;
+
+		of_property_read_string_index(np, "clock-output-names",
+					      2+n, &rclk_name);
+		coreclk->get_clk_ratio(base, coreclk->ratios[n].id,
+				       &mult, &div);
+		clk_data.clks[2+n] = clk_register_fixed_factor(NULL, rclk_name,
+				       cpuclk_name, 0, mult, div);
+		WARN_ON(IS_ERR(clk_data.clks[2+n]));
+	};
+
+	/*
+	 * SAR register isn't needed anymore
+	 */
+	iounmap(base);
+
+	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
+}
+
+#ifdef CONFIG_MACH_ARMADA_370_XP
+/*
+ * Armada 370/XP Sample At Reset is a 64 bit bitfiled split in two
+ * register of 32 bits
+ */
+
+#define SARL				    0	/* Low part [0:31] */
+#define	    SARL_AXP_PCLK_FREQ_OPT	    21
+#define	    SARL_AXP_PCLK_FREQ_OPT_MASK	    0x7
+#define	    SARL_A370_PCLK_FREQ_OPT	    11
+#define	    SARL_A370_PCLK_FREQ_OPT_MASK    0xF
+#define	    SARL_AXP_FAB_FREQ_OPT	    24
+#define	    SARL_AXP_FAB_FREQ_OPT_MASK	    0xF
+#define	    SARL_A370_FAB_FREQ_OPT	    15
+#define	    SARL_A370_FAB_FREQ_OPT_MASK	    0x1F
+#define	    SARL_A370_TCLK_FREQ_OPT	    20
+#define	    SARL_A370_TCLK_FREQ_OPT_MASK    0x1
+#define SARH				    4	/* High part [32:63] */
+#define	    SARH_AXP_PCLK_FREQ_OPT	    (52-32)
+#define	    SARH_AXP_PCLK_FREQ_OPT_MASK	    0x1
+#define	    SARH_AXP_PCLK_FREQ_OPT_SHIFT    3
+#define	    SARH_AXP_FAB_FREQ_OPT	    (51-32)
+#define	    SARH_AXP_FAB_FREQ_OPT_MASK	    0x1
+#define	    SARH_AXP_FAB_FREQ_OPT_SHIFT	    4
+
+static const u32 __initconst armada_370_tclk_frequencies[] = {
+	16600000,
+	20000000,
+};
+
+static u32 __init armada_370_get_tclk_freq(void __iomem *sar)
+{
+	u8 tclk_freq_select = 0;
+
+	tclk_freq_select = ((readl(sar) >> SARL_A370_TCLK_FREQ_OPT) &
+			    SARL_A370_TCLK_FREQ_OPT_MASK);
+	return armada_370_tclk_frequencies[tclk_freq_select];
+}
+
+static const u32 __initconst armada_370_cpu_frequencies[] = {
+	400000000,
+	533000000,
+	667000000,
+	800000000,
+	1000000000,
+	1067000000,
+	1200000000,
+};
+
+static u32 __init armada_370_get_cpu_freq(void __iomem *sar)
+{
+	u32 cpu_freq;
+	u8 cpu_freq_select = 0;
+
+	cpu_freq_select = ((readl(sar) >> SARL_A370_PCLK_FREQ_OPT) &
+			   SARL_A370_PCLK_FREQ_OPT_MASK);
+	if (cpu_freq_select > ARRAY_SIZE(armada_370_cpu_frequencies)) {
+		pr_err("CPU freq select unsuported %d\n", cpu_freq_select);
+		cpu_freq = 0;
+	} else
+		cpu_freq = armada_370_cpu_frequencies[cpu_freq_select];
+
+	return cpu_freq;
+}
+
+enum { A370_XP_NBCLK, A370_XP_HCLK, A370_XP_DRAMCLK };
+
+static const struct core_ratio __initconst armada_370_xp_core_ratios[] = {
+	{ .id = A370_XP_NBCLK,	 .name = "nbclk" },
+	{ .id = A370_XP_HCLK,	 .name = "hclk" },
+	{ .id = A370_XP_DRAMCLK, .name = "dramclk" },
+};
+
+static const int __initconst armada_370_xp_nbclk_ratios[32][2] = {
+	{0, 1}, {1, 2}, {2, 2}, {2, 2},
+	{1, 2}, {1, 2}, {1, 1}, {2, 3},
+	{0, 1}, {1, 2}, {2, 4}, {0, 1},
+	{1, 2}, {0, 1}, {0, 1}, {2, 2},
+	{0, 1}, {0, 1}, {0, 1}, {1, 1},
+	{2, 3}, {0, 1}, {0, 1}, {0, 1},
+	{0, 1}, {0, 1}, {0, 1}, {1, 1},
+	{0, 1}, {0, 1}, {0, 1}, {0, 1},
+};
+
+static const int __initconst armada_370_xp_hclk_ratios[32][2] = {
+	{0, 1}, {1, 2}, {2, 6}, {2, 3},
+	{1, 3}, {1, 4}, {1, 2}, {2, 6},
+	{0, 1}, {1, 6}, {2, 10}, {0, 1},
+	{1, 4}, {0, 1}, {0, 1}, {2, 5},
+	{0, 1}, {0, 1}, {0, 1}, {1, 2},
+	{2, 6}, {0, 1}, {0, 1}, {0, 1},
+	{0, 1}, {0, 1}, {0, 1}, {1, 1},
+	{0, 1}, {0, 1}, {0, 1}, {0, 1},
+};
+
+static const int __initconst armada_370_xp_dramclk_ratios[32][2] = {
+	{0, 1}, {1, 2}, {2, 3}, {2, 3},
+	{1, 3}, {1, 2}, {1, 2}, {2, 6},
+	{0, 1}, {1, 3}, {2, 5}, {0, 1},
+	{1, 4}, {0, 1}, {0, 1}, {2, 5},
+	{0, 1}, {0, 1}, {0, 1}, {1, 1},
+	{2, 3}, {0, 1}, {0, 1}, {0, 1},
+	{0, 1}, {0, 1}, {0, 1}, {1, 1},
+	{0, 1}, {0, 1}, {0, 1}, {0, 1},
+};
+
+static void __init armada_370_xp_get_clk_ratio(u32 opt,
+	void __iomem *sar, int id, int *mult, int *div)
+{
+	switch (id) {
+	case A370_XP_NBCLK:
+		*mult = armada_370_xp_nbclk_ratios[opt][0];
+		*div = armada_370_xp_nbclk_ratios[opt][1];
+		break;
+	case A370_XP_HCLK:
+		*mult = armada_370_xp_hclk_ratios[opt][0];
+		*div = armada_370_xp_hclk_ratios[opt][1];
+		break;
+	case A370_XP_DRAMCLK:
+		*mult = armada_370_xp_dramclk_ratios[opt][0];
+		*div = armada_370_xp_dramclk_ratios[opt][1];
+		break;
+	}
+}
+
+static void __init armada_370_get_clk_ratio(
+	void __iomem *sar, int id, int *mult, int *div)
+{
+	u32 opt = ((readl(sar) >> SARL_A370_FAB_FREQ_OPT) &
+		SARL_A370_FAB_FREQ_OPT_MASK);
+
+	armada_370_xp_get_clk_ratio(opt, sar, id, mult, div);
+}
+
+
+static const struct core_clocks armada_370_core_clocks = {
+	.get_tclk_freq = armada_370_get_tclk_freq,
+	.get_cpu_freq = armada_370_get_cpu_freq,
+	.get_clk_ratio = armada_370_get_clk_ratio,
+	.ratios = armada_370_xp_core_ratios,
+	.num_ratios = ARRAY_SIZE(armada_370_xp_core_ratios),
+};
+
+static const u32 __initconst armada_xp_cpu_frequencies[] = {
+	1000000000,
+	1066000000,
+	1200000000,
+	1333000000,
+	1500000000,
+	1666000000,
+	1800000000,
+	2000000000,
+	667000000,
+	0,
+	800000000,
+	1600000000,
+};
+
+/* For Armada XP TCLK frequency is fix: 250MHz */
+static u32 __init armada_xp_get_tclk_freq(void __iomem *sar)
+{
+	return 250 * 1000 * 1000;
+}
+
+static u32 __init armada_xp_get_cpu_freq(void __iomem *sar)
+{
+	u32 cpu_freq;
+	u8 cpu_freq_select = 0;
+
+	cpu_freq_select = ((readl(sar) >> SARL_AXP_PCLK_FREQ_OPT) &
+			   SARL_AXP_PCLK_FREQ_OPT_MASK);
+	/*
+	 * The upper bit is not contiguous to the other ones and
+	 * located in the high part of the SAR registers
+	 */
+	cpu_freq_select |= (((readl(sar+4) >> SARH_AXP_PCLK_FREQ_OPT) &
+			     SARH_AXP_PCLK_FREQ_OPT_MASK)
+			    << SARH_AXP_PCLK_FREQ_OPT_SHIFT);
+	if (cpu_freq_select > ARRAY_SIZE(armada_xp_cpu_frequencies)) {
+		pr_err("CPU freq select unsuported: %d\n", cpu_freq_select);
+		cpu_freq = 0;
+	} else
+		cpu_freq = armada_xp_cpu_frequencies[cpu_freq_select];
+
+	return cpu_freq;
+}
+
+static void __init armada_xp_get_clk_ratio(
+	void __iomem *sar, int id, int *mult, int *div)
+{
+
+	u32 opt = ((readl(sar) >> SARL_AXP_FAB_FREQ_OPT) &
+	      SARL_AXP_FAB_FREQ_OPT_MASK);
+	/*
+	 * The upper bit is not contiguous to the other ones and
+	 * located in the high part of the SAR registers
+	 */
+	opt |= (((readl(sar+4) >> SARH_AXP_FAB_FREQ_OPT) &
+		SARH_AXP_FAB_FREQ_OPT_MASK)
+	       << SARH_AXP_FAB_FREQ_OPT_SHIFT);
+
+	armada_370_xp_get_clk_ratio(opt, sar, id, mult, div);
+}
+
+static const struct core_clocks armada_xp_core_clocks = {
+	.get_tclk_freq = armada_xp_get_tclk_freq,
+	.get_cpu_freq = armada_xp_get_cpu_freq,
+	.get_clk_ratio = armada_xp_get_clk_ratio,
+	.ratios = armada_370_xp_core_ratios,
+	.num_ratios = ARRAY_SIZE(armada_370_xp_core_ratios),
+};
+
+#endif /* CONFIG_MACH_ARMADA_370_XP */
+
+/*
+ * Dove PLL sample-at-reset configuration
+ *
+ * SAR0[8:5]   : CPU frequency
+ *		 5  = 1000 MHz
+ *		 6  =  933 MHz
+ *		 7  =  933 MHz
+ *		 8  =  800 MHz
+ *		 9  =  800 MHz
+ *		 10 =  800 MHz
+ *		 11 = 1067 MHz
+ *		 12 =  667 MHz
+ *		 13 =  533 MHz
+ *		 14 =  400 MHz
+ *		 15 =  333 MHz
+ *		 others reserved.
+ *
+ * SAR0[11:9]  : CPU to L2 Clock divider ratio
+ *		 0 = (1/1) * CPU
+ *		 2 = (1/2) * CPU
+ *		 4 = (1/3) * CPU
+ *		 6 = (1/4) * CPU
+ *		 others reserved.
+ *
+ * SAR0[15:12] : CPU to DDR DRAM Clock divider ratio
+ *		 0  = (1/1) * CPU
+ *		 2  = (1/2) * CPU
+ *		 3  = (2/5) * CPU
+ *		 4  = (1/3) * CPU
+ *		 6  = (1/4) * CPU
+ *		 8  = (1/5) * CPU
+ *		 10 = (1/6) * CPU
+ *		 12 = (1/7) * CPU
+ *		 14 = (1/8) * CPU
+ *		 15 = (1/10) * CPU
+ *		 others reserved.
+ *
+ * SAR0[24:23] : TCLK frequency
+ *		 0 = 166 MHz
+ *		 1 = 125 MHz
+ *		 others reserved.
+ */
+#ifdef CONFIG_ARCH_DOVE
+#define SAR_DOVE_CPU_FREQ		5
+#define SAR_DOVE_CPU_FREQ_MASK		0xf
+#define SAR_DOVE_L2_RATIO		9
+#define SAR_DOVE_L2_RATIO_MASK		0x7
+#define SAR_DOVE_DDR_RATIO		12
+#define SAR_DOVE_DDR_RATIO_MASK		0xf
+#define SAR_DOVE_TCLK_FREQ		23
+#define SAR_DOVE_TCLK_FREQ_MASK		0x3
+
+static const u32 __initconst dove_tclk_frequencies[] = {
+	166666667,
+	125000000,
+	0, 0
+};
+
+static u32 __init dove_get_tclk_freq(void __iomem *sar)
+{
+	u32 opt = (readl(sar) >> SAR_DOVE_TCLK_FREQ) &
+		SAR_DOVE_TCLK_FREQ_MASK;
+	return dove_tclk_frequencies[opt];
+}
+
+static const u32 __initconst dove_cpu_frequencies[] = {
+	0, 0, 0, 0, 0,
+	1000000000,
+	933333333, 933333333,
+	800000000, 800000000, 800000000,
+	1066666667,
+	666666667,
+	533333333,
+	400000000,
+	333333333
+};
+
+static u32 __init dove_get_cpu_freq(void __iomem *sar)
+{
+	u32 opt = (readl(sar) >> SAR_DOVE_CPU_FREQ) &
+		SAR_DOVE_CPU_FREQ_MASK;
+	return dove_cpu_frequencies[opt];
+}
+
+enum { DOVE_CPU_TO_L2, DOVE_CPU_TO_DDR };
+
+static const struct core_ratio __initconst dove_core_ratios[] = {
+	{ .id = DOVE_CPU_TO_L2, .name = "l2clk", },
+	{ .id = DOVE_CPU_TO_DDR, .name = "ddrclk", }
+};
+
+static const int __initconst dove_cpu_l2_ratios[8][2] = {
+	{ 1, 1 }, { 0, 1 }, { 1, 2 }, { 0, 1 },
+	{ 1, 3 }, { 0, 1 }, { 1, 4 }, { 0, 1 }
+};
+
+static const int __initconst dove_cpu_ddr_ratios[16][2] = {
+	{ 1, 1 }, { 0, 1 }, { 1, 2 }, { 2, 5 },
+	{ 1, 3 }, { 0, 1 }, { 1, 4 }, { 0, 1 },
+	{ 1, 5 }, { 0, 1 }, { 1, 6 }, { 0, 1 },
+	{ 1, 7 }, { 0, 1 }, { 1, 8 }, { 1, 10 }
+};
+
+static void __init dove_get_clk_ratio(
+	void __iomem *sar, int id, int *mult, int *div)
+{
+	switch (id) {
+	case DOVE_CPU_TO_L2:
+	{
+		u32 opt = (readl(sar) >> SAR_DOVE_L2_RATIO) &
+			SAR_DOVE_L2_RATIO_MASK;
+		*mult = dove_cpu_l2_ratios[opt][0];
+		*div = dove_cpu_l2_ratios[opt][1];
+		break;
+	}
+	case DOVE_CPU_TO_DDR:
+	{
+		u32 opt = (readl(sar) >> SAR_DOVE_DDR_RATIO) &
+			SAR_DOVE_DDR_RATIO_MASK;
+		*mult = dove_cpu_ddr_ratios[opt][0];
+		*div = dove_cpu_ddr_ratios[opt][1];
+		break;
+	}
+	}
+}
+
+static const struct core_clocks dove_core_clocks = {
+	.get_tclk_freq = dove_get_tclk_freq,
+	.get_cpu_freq = dove_get_cpu_freq,
+	.get_clk_ratio = dove_get_clk_ratio,
+	.ratios = dove_core_ratios,
+	.num_ratios = ARRAY_SIZE(dove_core_ratios),
+};
+#endif /* CONFIG_ARCH_DOVE */
+
+/*
+ * Kirkwood PLL sample-at-reset configuration
+ * (6180 has different SAR layout than other Kirkwood SoCs)
+ *
+ * SAR0[4:3,22,1] : CPU frequency (6281,6292,6282)
+ *	4  =  600 MHz
+ *	6  =  800 MHz
+ *	7  = 1000 MHz
+ *	9  = 1200 MHz
+ *	12 = 1500 MHz
+ *	13 = 1600 MHz
+ *	14 = 1800 MHz
+ *	15 = 2000 MHz
+ *	others reserved.
+ *
+ * SAR0[19,10:9] : CPU to L2 Clock divider ratio (6281,6292,6282)
+ *	1 = (1/2) * CPU
+ *	3 = (1/3) * CPU
+ *	5 = (1/4) * CPU
+ *	others reserved.
+ *
+ * SAR0[8:5] : CPU to DDR DRAM Clock divider ratio (6281,6292,6282)
+ *	2 = (1/2) * CPU
+ *	4 = (1/3) * CPU
+ *	6 = (1/4) * CPU
+ *	7 = (2/9) * CPU
+ *	8 = (1/5) * CPU
+ *	9 = (1/6) * CPU
+ *	others reserved.
+ *
+ * SAR0[4:2] : Kirkwood 6180 cpu/l2/ddr clock configuration (6180 only)
+ *	5 = [CPU =  600 MHz, L2 = (1/2) * CPU, DDR = 200 MHz = (1/3) * CPU]
+ *	6 = [CPU =  800 MHz, L2 = (1/2) * CPU, DDR = 200 MHz = (1/4) * CPU]
+ *	7 = [CPU = 1000 MHz, L2 = (1/2) * CPU, DDR = 200 MHz = (1/5) * CPU]
+ *	others reserved.
+ *
+ * SAR0[21] : TCLK frequency
+ *	0 = 200 MHz
+ *	1 = 166 MHz
+ *	others reserved.
+ */
+#ifdef CONFIG_ARCH_KIRKWOOD
+#define SAR_KIRKWOOD_CPU_FREQ(x)	\
+	(((x & (1 <<  1)) >>  1) |	\
+	 ((x & (1 << 22)) >> 21) |	\
+	 ((x & (3 <<  3)) >>  1))
+#define SAR_KIRKWOOD_L2_RATIO(x)	\
+	(((x & (3 <<  9)) >> 9) |	\
+	 (((x & (1 << 19)) >> 17)))
+#define SAR_KIRKWOOD_DDR_RATIO		5
+#define SAR_KIRKWOOD_DDR_RATIO_MASK	0xf
+#define SAR_MV88F6180_CLK		2
+#define SAR_MV88F6180_CLK_MASK		0x7
+#define SAR_KIRKWOOD_TCLK_FREQ		21
+#define SAR_KIRKWOOD_TCLK_FREQ_MASK	0x1
+
+enum { KIRKWOOD_CPU_TO_L2, KIRKWOOD_CPU_TO_DDR };
+
+static const struct core_ratio __initconst kirkwood_core_ratios[] = {
+	{ .id = KIRKWOOD_CPU_TO_L2, .name = "l2clk", },
+	{ .id = KIRKWOOD_CPU_TO_DDR, .name = "ddrclk", }
+};
+
+static u32 __init kirkwood_get_tclk_freq(void __iomem *sar)
+{
+	u32 opt = (readl(sar) >> SAR_KIRKWOOD_TCLK_FREQ) &
+		SAR_KIRKWOOD_TCLK_FREQ_MASK;
+	return (opt) ? 166666667 : 200000000;
+}
+
+static const u32 __initconst kirkwood_cpu_frequencies[] = {
+	0, 0, 0, 0,
+	600000000,
+	0,
+	800000000,
+	1000000000,
+	0,
+	1200000000,
+	0, 0,
+	1500000000,
+	1600000000,
+	1800000000,
+	2000000000
+};
+
+static u32 __init kirkwood_get_cpu_freq(void __iomem *sar)
+{
+	u32 opt = SAR_KIRKWOOD_CPU_FREQ(readl(sar));
+	return kirkwood_cpu_frequencies[opt];
+}
+
+static const int __initconst kirkwood_cpu_l2_ratios[8][2] = {
+	{ 0, 1 }, { 1, 2 }, { 0, 1 }, { 1, 3 },
+	{ 0, 1 }, { 1, 4 }, { 0, 1 }, { 0, 1 }
+};
+
+static const int __initconst kirkwood_cpu_ddr_ratios[16][2] = {
+	{ 0, 1 }, { 0, 1 }, { 1, 2 }, { 0, 1 },
+	{ 1, 3 }, { 0, 1 }, { 1, 4 }, { 2, 9 },
+	{ 1, 5 }, { 1, 6 }, { 0, 1 }, { 0, 1 },
+	{ 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }
+};
+
+static void __init kirkwood_get_clk_ratio(
+	void __iomem *sar, int id, int *mult, int *div)
+{
+	switch (id) {
+	case KIRKWOOD_CPU_TO_L2:
+	{
+		u32 opt = SAR_KIRKWOOD_L2_RATIO(readl(sar));
+		*mult = kirkwood_cpu_l2_ratios[opt][0];
+		*div = kirkwood_cpu_l2_ratios[opt][1];
+		break;
+	}
+	case KIRKWOOD_CPU_TO_DDR:
+	{
+		u32 opt = (readl(sar) >> SAR_KIRKWOOD_DDR_RATIO) &
+			SAR_KIRKWOOD_DDR_RATIO_MASK;
+		*mult = kirkwood_cpu_ddr_ratios[opt][0];
+		*div = kirkwood_cpu_ddr_ratios[opt][1];
+		break;
+	}
+	}
+}
+
+static const struct core_clocks kirkwood_core_clocks = {
+	.get_tclk_freq = kirkwood_get_tclk_freq,
+	.get_cpu_freq = kirkwood_get_cpu_freq,
+	.get_clk_ratio = kirkwood_get_clk_ratio,
+	.ratios = kirkwood_core_ratios,
+	.num_ratios = ARRAY_SIZE(kirkwood_core_ratios),
+};
+
+static const u32 __initconst mv88f6180_cpu_frequencies[] = {
+	0, 0, 0, 0, 0,
+	600000000,
+	800000000,
+	1000000000
+};
+
+static u32 __init mv88f6180_get_cpu_freq(void __iomem *sar)
+{
+	u32 opt = (readl(sar) >> SAR_MV88F6180_CLK) & SAR_MV88F6180_CLK_MASK;
+	return mv88f6180_cpu_frequencies[opt];
+}
+
+static const int __initconst mv88f6180_cpu_ddr_ratios[8][2] = {
+	{ 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 },
+	{ 0, 1 }, { 1, 3 }, { 1, 4 }, { 1, 5 }
+};
+
+static void __init mv88f6180_get_clk_ratio(
+	void __iomem *sar, int id, int *mult, int *div)
+{
+	switch (id) {
+	case KIRKWOOD_CPU_TO_L2:
+	{
+		/* mv88f6180 has a fixed 1:2 CPU-to-L2 ratio */
+		*mult = 1;
+		*div = 2;
+		break;
+	}
+	case KIRKWOOD_CPU_TO_DDR:
+	{
+		u32 opt = (readl(sar) >> SAR_MV88F6180_CLK) &
+			SAR_MV88F6180_CLK_MASK;
+		*mult = mv88f6180_cpu_ddr_ratios[opt][0];
+		*div = mv88f6180_cpu_ddr_ratios[opt][1];
+		break;
+	}
+	}
+}
+
+static const struct core_clocks mv88f6180_core_clocks = {
+	.get_tclk_freq = kirkwood_get_tclk_freq,
+	.get_cpu_freq = mv88f6180_get_cpu_freq,
+	.get_clk_ratio = mv88f6180_get_clk_ratio,
+	.ratios = kirkwood_core_ratios,
+	.num_ratios = ARRAY_SIZE(kirkwood_core_ratios),
+};
+#endif /* CONFIG_ARCH_KIRKWOOD */
+
+static const __initdata struct of_device_id clk_core_match[] = {
+#ifdef CONFIG_MACH_ARMADA_370_XP
+	{
+		.compatible = "marvell,armada-370-core-clock",
+		.data = &armada_370_core_clocks,
+	},
+	{
+		.compatible = "marvell,armada-xp-core-clock",
+		.data = &armada_xp_core_clocks,
+	},
+#endif
+#ifdef CONFIG_ARCH_DOVE
+	{
+		.compatible = "marvell,dove-core-clock",
+		.data = &dove_core_clocks,
+	},
+#endif
+
+#ifdef CONFIG_ARCH_KIRKWOOD
+	{
+		.compatible = "marvell,kirkwood-core-clock",
+		.data = &kirkwood_core_clocks,
+	},
+	{
+		.compatible = "marvell,mv88f6180-core-clock",
+		.data = &mv88f6180_core_clocks,
+	},
+#endif
+
+	{ }
+};
+
+void __init mvebu_core_clk_init(void)
+{
+	struct device_node *np;
+
+	for_each_matching_node(np, clk_core_match) {
+		const struct of_device_id *match =
+			of_match_node(clk_core_match, np);
+		mvebu_clk_core_setup(np, (struct core_clocks *)match->data);
+	}
+}
diff --git a/drivers/clk/mvebu/clk-core.h b/drivers/clk/mvebu/clk-core.h
new file mode 100644
index 000000000000..28b5e02e9885
--- /dev/null
+++ b/drivers/clk/mvebu/clk-core.h
@@ -0,0 +1,18 @@
+/*
+ *  * Marvell EBU clock core handling defined at reset
+ *
+ * Copyright (C) 2012 Marvell
+ *
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __MVEBU_CLK_CORE_H
+#define __MVEBU_CLK_CORE_H
+
+void __init mvebu_core_clk_init(void);
+
+#endif
diff --git a/drivers/clk/mvebu/clk.c b/drivers/clk/mvebu/clk.c
new file mode 100644
index 000000000000..e6742acf9880
--- /dev/null
+++ b/drivers/clk/mvebu/clk.c
@@ -0,0 +1,23 @@
+/*
+ * Marvell EBU SoC clock handling.
+ *
+ * Copyright (C) 2012 Marvell
+ *
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+#include <linux/clk/mvebu.h>
+#include <linux/of.h>
+#include "clk-core.h"
+
+void __init mvebu_clocks_init(void)
+{
+	mvebu_core_clk_init();
+}
diff --git a/include/linux/clk/mvebu.h b/include/linux/clk/mvebu.h
new file mode 100644
index 000000000000..8c4ae713b063
--- /dev/null
+++ b/include/linux/clk/mvebu.h
@@ -0,0 +1,22 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __CLK_MVEBU_H_
+#define __CLK_MVEBU_H_
+
+void __init mvebu_clocks_init(void);
+
+#endif
-- 
cgit v1.2.3-55-g7522


From d0b2fdd2a51203f04ea0a5d716e033c15e0231af Mon Sep 17 00:00:00 2001
From: Tao Ma
Date: Tue, 20 Nov 2012 22:06:18 +0800
Subject: cgroup: remove obsolete guarantee from cgroup_task_migrate.

'guarantee' is already removed from cgroup_task_migrate, so remove
the corresponding comments. Some other typos in cgroup are also
changed.

Cc: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Signed-off-by: Tao Ma <boyu.mt@taobao.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 4 ++--
 kernel/cgroup.c        | 8 +++-----
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 82cf9e6fc77b..7d73905dcba2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -66,7 +66,7 @@ struct cgroup_subsys_state {
 	/*
 	 * State maintained by the cgroup system to allow subsystems
 	 * to be "busy". Should be accessed via css_get(),
-	 * css_tryget() and and css_put().
+	 * css_tryget() and css_put().
 	 */
 
 	atomic_t refcnt;
@@ -276,7 +276,7 @@ struct cgroup_map_cb {
 
 /* cftype->flags */
 #define CFTYPE_ONLY_ON_ROOT	(1U << 0)	/* only create on root cg */
-#define CFTYPE_NOT_ON_ROOT	(1U << 1)	/* don't create onp root cg */
+#define CFTYPE_NOT_ON_ROOT	(1U << 1)	/* don't create on root cg */
 
 #define MAX_CFTYPE_NAME		64
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 6db01417d39a..55a0a770a5a2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -782,12 +782,12 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
  *	The task_lock() exception
  *
  * The need for this exception arises from the action of
- * cgroup_attach_task(), which overwrites one tasks cgroup pointer with
+ * cgroup_attach_task(), which overwrites one task's cgroup pointer with
  * another.  It does so using cgroup_mutex, however there are
  * several performance critical places that need to reference
  * task->cgroup without the expense of grabbing a system global
  * mutex.  Therefore except as noted below, when dereferencing or, as
- * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use
+ * in cgroup_attach_task(), modifying a task's cgroup pointer we use
  * task_lock(), which acts on a spinlock (task->alloc_lock) already in
  * the task_struct routinely used for such matters.
  *
@@ -1882,9 +1882,7 @@ EXPORT_SYMBOL_GPL(cgroup_taskset_size);
 /*
  * cgroup_task_migrate - move a task from one cgroup to another.
  *
- * 'guarantee' is set if the caller promises that a new css_set for the task
- * will already exist. If not set, this function might sleep, and can fail with
- * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked.
+ * Must be called with cgroup_mutex and threadgroup locked.
  */
 static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
 				struct task_struct *tsk, struct css_set *newcg)
-- 
cgit v1.2.3-55-g7522


From 60d151f38799d5e15845ee04b73cbf3839f1b06c Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni
Date: Mon, 29 Oct 2012 16:54:49 +0100
Subject: dma: mv_xor: allow channels to be registered directly from the main
 device

Extend the XOR engine driver (currently called "mv_xor_shared") so
that XOR channels can be passed in the platform_data structure, and be
registered from there.

This will allow the users of the driver to be converted to the single
platform_driver variant of the mv_xor driver.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 drivers/dma/mv_xor.c                     | 45 ++++++++++++++++++++++++++++++++
 drivers/dma/mv_xor.h                     |  8 +++---
 include/linux/platform_data/dma-mv_xor.h |  3 +++
 3 files changed, 53 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index f7b99193e884..6ed3162cccc9 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1292,7 +1292,9 @@ static int mv_xor_shared_probe(struct platform_device *pdev)
 {
 	const struct mbus_dram_target_info *dram;
 	struct mv_xor_shared_private *msp;
+	struct mv_xor_shared_platform_data *pdata = pdev->dev.platform_data;
 	struct resource *res;
+	int i, ret;
 
 	dev_notice(&pdev->dev, "Marvell shared XOR driver\n");
 
@@ -1334,12 +1336,55 @@ static int mv_xor_shared_probe(struct platform_device *pdev)
 	if (!IS_ERR(msp->clk))
 		clk_prepare_enable(msp->clk);
 
+	if (pdata && pdata->channels) {
+		for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
+			struct mv_xor_platform_data *cd;
+			int irq;
+
+			cd = &pdata->channels[i];
+			if (!cd) {
+				ret = -ENODEV;
+				goto err_channel_add;
+			}
+
+			irq = platform_get_irq(pdev, i);
+			if (irq < 0) {
+				ret = irq;
+				goto err_channel_add;
+			}
+
+			msp->channels[i] =
+				mv_xor_channel_add(msp, pdev, cd->hw_id,
+						   cd->cap_mask,
+						   cd->pool_size, irq);
+			if (IS_ERR(msp->channels[i])) {
+				ret = PTR_ERR(msp->channels[i]);
+				goto err_channel_add;
+			}
+		}
+	}
+
 	return 0;
+
+err_channel_add:
+	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++)
+		if (msp->channels[i])
+			mv_xor_channel_remove(msp->channels[i]);
+
+	clk_disable_unprepare(msp->clk);
+	clk_put(msp->clk);
+	return ret;
 }
 
 static int mv_xor_shared_remove(struct platform_device *pdev)
 {
 	struct mv_xor_shared_private *msp = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
+		if (msp->channels[i])
+			mv_xor_channel_remove(msp->channels[i]);
+	}
 
 	if (!IS_ERR(msp->clk)) {
 		clk_disable_unprepare(msp->clk);
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index a0641aebbdef..686575f6b9f5 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -26,6 +26,7 @@
 #define USE_TIMER
 #define MV_XOR_SLOT_SIZE		64
 #define MV_XOR_THRESHOLD		1
+#define MV_XOR_MAX_CHANNELS             2
 
 #define XOR_OPERATION_MODE_XOR		0
 #define XOR_OPERATION_MODE_MEMCPY	2
@@ -53,9 +54,10 @@
 #define WINDOW_BAR_ENABLE(chan)	(0x240 + ((chan) << 2))
 
 struct mv_xor_shared_private {
-	void __iomem	*xor_base;
-	void __iomem	*xor_high_base;
-	struct clk	*clk;
+	void __iomem	     *xor_base;
+	void __iomem	     *xor_high_base;
+	struct clk	     *clk;
+	struct mv_xor_device *channels[MV_XOR_MAX_CHANNELS];
 };
 
 
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index 2ba1f7d76eef..f2aed978ca25 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -20,5 +20,8 @@ struct mv_xor_platform_data {
 	size_t				pool_size;
 };
 
+struct mv_xor_shared_platform_data {
+	struct mv_xor_platform_data    *channels;
+};
 
 #endif
-- 
cgit v1.2.3-55-g7522


From 18b2a02c7c8db8bb87a165d26c269968d3dd47bd Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni
Date: Tue, 30 Oct 2012 11:54:34 +0100
Subject: dma: mv_xor: remove sub-driver 'mv_xor'

Now that XOR channels are directly registered by the main
'mv_xor_shared' device ->probe() function and all users of the
'mv_xor' device have been removed, we can get rid of the latter.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 drivers/dma/mv_xor.c                     | 49 +-------------------------------
 include/linux/platform_data/dma-mv_xor.h |  1 -
 2 files changed, 1 insertion(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 6ed3162cccc9..be3907bdef14 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1219,35 +1219,6 @@ mv_xor_channel_add(struct mv_xor_shared_private *msp,
 	return ERR_PTR(ret);
 }
 
-static int __devexit mv_xor_remove(struct platform_device *pdev)
-{
-	struct mv_xor_device *device = platform_get_drvdata(pdev);
-	return mv_xor_channel_remove(device);
-}
-
-static int __devinit mv_xor_probe(struct platform_device *pdev)
-{
-	struct mv_xor_platform_data *plat_data = pdev->dev.platform_data;
-	struct mv_xor_shared_private *msp =
-		platform_get_drvdata(plat_data->shared);
-	struct mv_xor_device *mv_xor_device;
-	int irq;
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0)
-		return irq;
-
-	mv_xor_device = mv_xor_channel_add(msp, pdev, plat_data->hw_id,
-					   plat_data->cap_mask,
-					   plat_data->pool_size, irq);
-	if (IS_ERR(mv_xor_device))
-		return PTR_ERR(mv_xor_device);
-
-	platform_set_drvdata(pdev, mv_xor_device);
-
-	return 0;
-}
-
 static void
 mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
 			 const struct mbus_dram_target_info *dram)
@@ -1279,15 +1250,6 @@ mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
 	writel(win_enable, base + WINDOW_BAR_ENABLE(1));
 }
 
-static struct platform_driver mv_xor_driver = {
-	.probe		= mv_xor_probe,
-	.remove		= __devexit_p(mv_xor_remove),
-	.driver		= {
-		.owner	= THIS_MODULE,
-		.name	= MV_XOR_NAME,
-	},
-};
-
 static int mv_xor_shared_probe(struct platform_device *pdev)
 {
 	const struct mbus_dram_target_info *dram;
@@ -1406,15 +1368,7 @@ static struct platform_driver mv_xor_shared_driver = {
 
 static int __init mv_xor_init(void)
 {
-	int rc;
-
-	rc = platform_driver_register(&mv_xor_shared_driver);
-	if (!rc) {
-		rc = platform_driver_register(&mv_xor_driver);
-		if (rc)
-			platform_driver_unregister(&mv_xor_shared_driver);
-	}
-	return rc;
+	return platform_driver_register(&mv_xor_shared_driver);
 }
 module_init(mv_xor_init);
 
@@ -1422,7 +1376,6 @@ module_init(mv_xor_init);
 #if 0
 static void __exit mv_xor_exit(void)
 {
-	platform_driver_unregister(&mv_xor_driver);
 	platform_driver_unregister(&mv_xor_shared_driver);
 	return;
 }
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index f2aed978ca25..6abe5f9326b6 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -11,7 +11,6 @@
 #include <linux/mbus.h>
 
 #define MV_XOR_SHARED_NAME	"mv_xor_shared"
-#define MV_XOR_NAME		"mv_xor"
 
 struct mv_xor_platform_data {
 	struct platform_device		*shared;
-- 
cgit v1.2.3-55-g7522


From 2ccc469cfecee291707dd50e5842cbf206bc17d7 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni
Date: Wed, 31 Oct 2012 13:24:41 +0100
Subject: dma: mv_xor: remove 'shared' from mv_xor_platform_data

This member of the platform_data structure is no longer used, so get
rid of it.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/common.c             | 8 --------
 include/linux/platform_data/dma-mv_xor.h | 1 -
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index c6e6666986ff..5a66211d523c 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -625,16 +625,12 @@ static struct resource orion_xor0_shared_resources[] = {
 	},
 };
 
-static struct platform_device orion_xor0_shared;
-
 static struct mv_xor_platform_data orion_xor0_channels_pdata[2] = {
 	{
-		.shared		= &orion_xor0_shared,
 		.hw_id		= 0,
 		.pool_size	= PAGE_SIZE,
 	},
 	{
-		.shared		= &orion_xor0_shared,
 		.hw_id		= 1,
 		.pool_size	= PAGE_SIZE,
 	},
@@ -704,16 +700,12 @@ static struct resource orion_xor1_shared_resources[] = {
 	},
 };
 
-static struct platform_device orion_xor1_shared;
-
 static struct mv_xor_platform_data orion_xor1_channels_pdata[2] = {
 	{
-		.shared		= &orion_xor1_shared,
 		.hw_id		= 0,
 		.pool_size	= PAGE_SIZE,
 	},
 	{
-		.shared		= &orion_xor1_shared,
 		.hw_id		= 1,
 		.pool_size	= PAGE_SIZE,
 	},
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index 6abe5f9326b6..4a0980b14c9b 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -13,7 +13,6 @@
 #define MV_XOR_SHARED_NAME	"mv_xor_shared"
 
 struct mv_xor_platform_data {
-	struct platform_device		*shared;
 	int				hw_id;
 	dma_cap_mask_t			cap_mask;
 	size_t				pool_size;
-- 
cgit v1.2.3-55-g7522


From e39f6ec1f9c1d6a7011adf6d95d8d80bad0586b1 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni
Date: Tue, 30 Oct 2012 11:56:26 +0100
Subject: dma: mv_xor: rename mv_xor_platform_data to mv_xor_channel_data

mv_xor_platform_data used to be the platform_data structure associated
to the 'mv_xor' driver. This driver no longer exists, and this data
structure really contains the properties of each XOR channel part of a
given XOR engine. Therefore 'struct mv_xor_channel_data' is a more
appropriate name.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/common.c             | 28 ++++++++++++++--------------
 drivers/dma/mv_xor.c                     |  2 +-
 include/linux/platform_data/dma-mv_xor.h |  4 ++--
 3 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 5a66211d523c..7ffbe77c52cb 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -625,7 +625,7 @@ static struct resource orion_xor0_shared_resources[] = {
 	},
 };
 
-static struct mv_xor_platform_data orion_xor0_channels_pdata[2] = {
+static struct mv_xor_channel_data orion_xor0_channels_data[2] = {
 	{
 		.hw_id		= 0,
 		.pool_size	= PAGE_SIZE,
@@ -637,7 +637,7 @@ static struct mv_xor_platform_data orion_xor0_channels_pdata[2] = {
 };
 
 static struct mv_xor_shared_platform_data orion_xor0_pdata = {
-	.channels = orion_xor0_channels_pdata,
+	.channels = orion_xor0_channels_data,
 };
 
 static struct platform_device orion_xor0_shared = {
@@ -671,12 +671,12 @@ void __init orion_xor0_init(unsigned long mapbase_low,
 	 * two engines can't do memset simultaneously, this limitation
 	 * satisfied by removing memset support from one of the engines.
 	 */
-	dma_cap_set(DMA_MEMCPY, orion_xor0_channels_pdata[0].cap_mask);
-	dma_cap_set(DMA_XOR, orion_xor0_channels_pdata[0].cap_mask);
+	dma_cap_set(DMA_MEMCPY, orion_xor0_channels_data[0].cap_mask);
+	dma_cap_set(DMA_XOR, orion_xor0_channels_data[0].cap_mask);
 
-	dma_cap_set(DMA_MEMSET, orion_xor0_channels_pdata[1].cap_mask);
-	dma_cap_set(DMA_MEMCPY, orion_xor0_channels_pdata[1].cap_mask);
-	dma_cap_set(DMA_XOR, orion_xor0_channels_pdata[1].cap_mask);
+	dma_cap_set(DMA_MEMSET, orion_xor0_channels_data[1].cap_mask);
+	dma_cap_set(DMA_MEMCPY, orion_xor0_channels_data[1].cap_mask);
+	dma_cap_set(DMA_XOR, orion_xor0_channels_data[1].cap_mask);
 
 	platform_device_register(&orion_xor0_shared);
 }
@@ -700,7 +700,7 @@ static struct resource orion_xor1_shared_resources[] = {
 	},
 };
 
-static struct mv_xor_platform_data orion_xor1_channels_pdata[2] = {
+static struct mv_xor_channel_data orion_xor1_channels_data[2] = {
 	{
 		.hw_id		= 0,
 		.pool_size	= PAGE_SIZE,
@@ -712,7 +712,7 @@ static struct mv_xor_platform_data orion_xor1_channels_pdata[2] = {
 };
 
 static struct mv_xor_shared_platform_data orion_xor1_pdata = {
-	.channels = orion_xor1_channels_pdata,
+	.channels = orion_xor1_channels_data,
 };
 
 static struct platform_device orion_xor1_shared = {
@@ -746,12 +746,12 @@ void __init orion_xor1_init(unsigned long mapbase_low,
 	 * two engines can't do memset simultaneously, this limitation
 	 * satisfied by removing memset support from one of the engines.
 	 */
-	dma_cap_set(DMA_MEMCPY, orion_xor1_channels_pdata[0].cap_mask);
-	dma_cap_set(DMA_XOR, orion_xor1_channels_pdata[0].cap_mask);
+	dma_cap_set(DMA_MEMCPY, orion_xor1_channels_data[0].cap_mask);
+	dma_cap_set(DMA_XOR, orion_xor1_channels_data[0].cap_mask);
 
-	dma_cap_set(DMA_MEMSET, orion_xor1_channels_pdata[1].cap_mask);
-	dma_cap_set(DMA_MEMCPY, orion_xor1_channels_pdata[1].cap_mask);
-	dma_cap_set(DMA_XOR, orion_xor1_channels_pdata[1].cap_mask);
+	dma_cap_set(DMA_MEMSET, orion_xor1_channels_data[1].cap_mask);
+	dma_cap_set(DMA_MEMCPY, orion_xor1_channels_data[1].cap_mask);
+	dma_cap_set(DMA_XOR, orion_xor1_channels_data[1].cap_mask);
 
 	platform_device_register(&orion_xor1_shared);
 }
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index be3907bdef14..c7926e417281 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1300,7 +1300,7 @@ static int mv_xor_shared_probe(struct platform_device *pdev)
 
 	if (pdata && pdata->channels) {
 		for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
-			struct mv_xor_platform_data *cd;
+			struct mv_xor_channel_data *cd;
 			int irq;
 
 			cd = &pdata->channels[i];
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index 4a0980b14c9b..40ea3d5f5b9f 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -12,14 +12,14 @@
 
 #define MV_XOR_SHARED_NAME	"mv_xor_shared"
 
-struct mv_xor_platform_data {
+struct mv_xor_channel_data {
 	int				hw_id;
 	dma_cap_mask_t			cap_mask;
 	size_t				pool_size;
 };
 
 struct mv_xor_shared_platform_data {
-	struct mv_xor_platform_data    *channels;
+	struct mv_xor_channel_data    *channels;
 };
 
 #endif
-- 
cgit v1.2.3-55-g7522


From 7dde453d628687c0e991cfc55c9fd299a804aee6 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni
Date: Tue, 30 Oct 2012 11:58:14 +0100
Subject: dma: mv_xor: rename mv_xor_shared_platform_data to
 mv_xor_platform_data

'struct mv_xor_shared_platform_data' used to be the platform_data
structure for the 'mv_xor_shared', but this driver is going to be
renamed simply 'mv_xor', so also rename its platform_data structure
accordingly.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/common.c             | 4 ++--
 drivers/dma/mv_xor.c                     | 2 +-
 include/linux/platform_data/dma-mv_xor.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 7ffbe77c52cb..edd57a68fa8c 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -636,7 +636,7 @@ static struct mv_xor_channel_data orion_xor0_channels_data[2] = {
 	},
 };
 
-static struct mv_xor_shared_platform_data orion_xor0_pdata = {
+static struct mv_xor_platform_data orion_xor0_pdata = {
 	.channels = orion_xor0_channels_data,
 };
 
@@ -711,7 +711,7 @@ static struct mv_xor_channel_data orion_xor1_channels_data[2] = {
 	},
 };
 
-static struct mv_xor_shared_platform_data orion_xor1_pdata = {
+static struct mv_xor_platform_data orion_xor1_pdata = {
 	.channels = orion_xor1_channels_data,
 };
 
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index c7926e417281..ac598168b21f 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1254,7 +1254,7 @@ static int mv_xor_shared_probe(struct platform_device *pdev)
 {
 	const struct mbus_dram_target_info *dram;
 	struct mv_xor_shared_private *msp;
-	struct mv_xor_shared_platform_data *pdata = pdev->dev.platform_data;
+	struct mv_xor_platform_data *pdata = pdev->dev.platform_data;
 	struct resource *res;
 	int i, ret;
 
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index 40ea3d5f5b9f..82a5f4b84afe 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -18,7 +18,7 @@ struct mv_xor_channel_data {
 	size_t				pool_size;
 };
 
-struct mv_xor_shared_platform_data {
+struct mv_xor_platform_data {
 	struct mv_xor_channel_data    *channels;
 };
 
-- 
cgit v1.2.3-55-g7522


From 0dddee7a7d42192267ebef0fe15be8b296b665c8 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni
Date: Tue, 30 Oct 2012 11:59:42 +0100
Subject: dma: mv_xor: change the driver name to 'mv_xor'

Since we got rid of the per-XOR channel 'mv_xor' driver, now the
per-XOR engine driver that used to be called 'mv_xor_shared' can
simply be named 'mv_xor'.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/mach-dove/common.c              | 9 +++++----
 arch/arm/mach-kirkwood/board-dt.c        | 5 +++--
 arch/arm/mach-kirkwood/common.c          | 4 ++--
 arch/arm/plat-orion/common.c             | 4 ++--
 drivers/dma/mv_xor.c                     | 2 +-
 include/linux/platform_data/dma-mv_xor.h | 2 +-
 6 files changed, 14 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c
index 6a2c4dc413a8..f4ac5b06014b 100644
--- a/arch/arm/mach-dove/common.c
+++ b/arch/arm/mach-dove/common.c
@@ -33,6 +33,7 @@
 #include <linux/irq.h>
 #include <plat/time.h>
 #include <linux/platform_data/usb-ehci-orion.h>
+#include <linux/platform_data/dma-mv_xor.h>
 #include <plat/irq.h>
 #include <plat/common.h>
 #include <plat/addr-map.h>
@@ -124,8 +125,8 @@ static void __init dove_clk_init(void)
 	orion_clkdev_add(NULL, "mv_crypto", crypto);
 	orion_clkdev_add(NULL, "dove-ac97", ac97);
 	orion_clkdev_add(NULL, "dove-pdma", pdma);
-	orion_clkdev_add(NULL, "mv_xor_shared.0", xor0);
-	orion_clkdev_add(NULL, "mv_xor_shared.1", xor1);
+	orion_clkdev_add(NULL, MV_XOR_NAME ".0", xor0);
+	orion_clkdev_add(NULL, MV_XOR_NAME ".1", xor1);
 }
 
 /*****************************************************************************
@@ -410,11 +411,11 @@ static void __init dove_legacy_clk_init(void)
 			 of_clk_get_from_provider(&clkspec));
 
 	clkspec.args[0] = CLOCK_GATING_BIT_XOR0;
-	orion_clkdev_add(NULL, "mv_xor_shared.0",
+	orion_clkdev_add(NULL, MV_XOR_NAME ".0",
 			 of_clk_get_from_provider(&clkspec));
 
 	clkspec.args[0] = CLOCK_GATING_BIT_XOR1;
-	orion_clkdev_add(NULL, "mv_xor_shared.1",
+	orion_clkdev_add(NULL, MV_XOR_NAME ".1",
 			 of_clk_get_from_provider(&clkspec));
 }
 
diff --git a/arch/arm/mach-kirkwood/board-dt.c b/arch/arm/mach-kirkwood/board-dt.c
index 8bdfaa4db091..294ad5a4fd98 100644
--- a/arch/arm/mach-kirkwood/board-dt.c
+++ b/arch/arm/mach-kirkwood/board-dt.c
@@ -21,6 +21,7 @@
 #include <asm/mach/map.h>
 #include <mach/bridge-regs.h>
 #include <linux/platform_data/usb-ehci-orion.h>
+#include <linux/platform_data/dma-mv_xor.h>
 #include <plat/irq.h>
 #include <plat/common.h>
 #include "common.h"
@@ -60,11 +61,11 @@ static void __init kirkwood_legacy_clk_init(void)
 			 of_clk_get_from_provider(&clkspec));
 
 	clkspec.args[0] = CGC_BIT_XOR0;
-	orion_clkdev_add(NULL, "mv_xor_shared.0",
+	orion_clkdev_add(NULL, MV_XOR_NAME ".0",
 			 of_clk_get_from_provider(&clkspec));
 
 	clkspec.args[0] = CGC_BIT_XOR1;
-	orion_clkdev_add(NULL, "mv_xor_shared.1",
+	orion_clkdev_add(NULL, MV_XOR_NAME ".1",
 			 of_clk_get_from_provider(&clkspec));
 
 	clkspec.args[0] = CGC_BIT_PEX1;
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index 2c6c218fb79e..401dac1a8d80 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -260,8 +260,8 @@ void __init kirkwood_clk_init(void)
 	orion_clkdev_add(NULL, "orion_nand", runit);
 	orion_clkdev_add(NULL, "mvsdio", sdio);
 	orion_clkdev_add(NULL, "mv_crypto", crypto);
-	orion_clkdev_add(NULL, MV_XOR_SHARED_NAME ".0", xor0);
-	orion_clkdev_add(NULL, MV_XOR_SHARED_NAME ".1", xor1);
+	orion_clkdev_add(NULL, MV_XOR_NAME ".0", xor0);
+	orion_clkdev_add(NULL, MV_XOR_NAME ".1", xor1);
 	orion_clkdev_add("0", "pcie", pex0);
 	orion_clkdev_add("1", "pcie", pex1);
 	orion_clkdev_add(NULL, "kirkwood-i2s", audio);
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index edd57a68fa8c..31517cef8c4d 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -641,7 +641,7 @@ static struct mv_xor_platform_data orion_xor0_pdata = {
 };
 
 static struct platform_device orion_xor0_shared = {
-	.name		= MV_XOR_SHARED_NAME,
+	.name		= MV_XOR_NAME,
 	.id		= 0,
 	.num_resources	= ARRAY_SIZE(orion_xor0_shared_resources),
 	.resource	= orion_xor0_shared_resources,
@@ -716,7 +716,7 @@ static struct mv_xor_platform_data orion_xor1_pdata = {
 };
 
 static struct platform_device orion_xor1_shared = {
-	.name		= MV_XOR_SHARED_NAME,
+	.name		= MV_XOR_NAME,
 	.id		= 1,
 	.num_resources	= ARRAY_SIZE(orion_xor1_shared_resources),
 	.resource	= orion_xor1_shared_resources,
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index ac598168b21f..0ed5183eb5a3 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1361,7 +1361,7 @@ static struct platform_driver mv_xor_shared_driver = {
 	.remove		= mv_xor_shared_remove,
 	.driver		= {
 		.owner	= THIS_MODULE,
-		.name	= MV_XOR_SHARED_NAME,
+		.name	= MV_XOR_NAME,
 	},
 };
 
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index 82a5f4b84afe..367bb216c4a7 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -10,7 +10,7 @@
 #include <linux/dmaengine.h>
 #include <linux/mbus.h>
 
-#define MV_XOR_SHARED_NAME	"mv_xor_shared"
+#define MV_XOR_NAME	"mv_xor"
 
 struct mv_xor_channel_data {
 	int				hw_id;
-- 
cgit v1.2.3-55-g7522


From 9aedbdbab39c8aa58c0b2a0791fb10df6eebc123 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni
Date: Thu, 15 Nov 2012 15:36:37 +0100
Subject: dma: mv_xor: remove hw_id field from platform_data

There is no need for the platform_data to give this ID, it is simply
the channel number, so we can compute it inside the driver when
registering the channels.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/common.c             | 4 ----
 drivers/dma/mv_xor.c                     | 6 +++---
 include/linux/platform_data/dma-mv_xor.h | 1 -
 3 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 31517cef8c4d..09d836060bf4 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -627,11 +627,9 @@ static struct resource orion_xor0_shared_resources[] = {
 
 static struct mv_xor_channel_data orion_xor0_channels_data[2] = {
 	{
-		.hw_id		= 0,
 		.pool_size	= PAGE_SIZE,
 	},
 	{
-		.hw_id		= 1,
 		.pool_size	= PAGE_SIZE,
 	},
 };
@@ -702,11 +700,9 @@ static struct resource orion_xor1_shared_resources[] = {
 
 static struct mv_xor_channel_data orion_xor1_channels_data[2] = {
 	{
-		.hw_id		= 0,
 		.pool_size	= PAGE_SIZE,
 	},
 	{
-		.hw_id		= 1,
 		.pool_size	= PAGE_SIZE,
 	},
 };
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index a6a5a28574c4..fc983bf38438 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1088,7 +1088,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan)
 static struct mv_xor_chan *
 mv_xor_channel_add(struct mv_xor_device *xordev,
 		   struct platform_device *pdev,
-		   int hw_id, dma_cap_mask_t cap_mask,
+		   int idx, dma_cap_mask_t cap_mask,
 		   size_t pool_size, int irq)
 {
 	int ret = 0;
@@ -1101,7 +1101,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 		goto err_free_dma;
 	}
 
-	mv_chan->idx = hw_id;
+	mv_chan->idx = idx;
 
 	dma_dev = &mv_chan->dmadev;
 
@@ -1295,7 +1295,7 @@ static int mv_xor_probe(struct platform_device *pdev)
 			}
 
 			xordev->channels[i] =
-				mv_xor_channel_add(xordev, pdev, cd->hw_id,
+				mv_xor_channel_add(xordev, pdev, i,
 						   cd->cap_mask,
 						   cd->pool_size, irq);
 			if (IS_ERR(xordev->channels[i])) {
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index 367bb216c4a7..b18dc2496186 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -13,7 +13,6 @@
 #define MV_XOR_NAME	"mv_xor"
 
 struct mv_xor_channel_data {
-	int				hw_id;
 	dma_cap_mask_t			cap_mask;
 	size_t				pool_size;
 };
-- 
cgit v1.2.3-55-g7522


From b503fa01990f6875640339d8f4ba98dbc068f821 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni
Date: Thu, 15 Nov 2012 15:55:30 +0100
Subject: dma: mv_xor: remove the pool_size from platform_data

The pool_size is always PAGE_SIZE, and since it is a software
configuration paramter (and not a hardware description parameter), we
cannot make it part of the Device Tree binding, so we'd better remove
it from the platform_data as well.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/common.c             | 18 ++----------------
 drivers/dma/mv_xor.c                     | 15 ++++++---------
 drivers/dma/mv_xor.h                     |  1 +
 include/linux/platform_data/dma-mv_xor.h |  1 -
 4 files changed, 9 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 09d836060bf4..2d4b6414609f 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -625,14 +625,7 @@ static struct resource orion_xor0_shared_resources[] = {
 	},
 };
 
-static struct mv_xor_channel_data orion_xor0_channels_data[2] = {
-	{
-		.pool_size	= PAGE_SIZE,
-	},
-	{
-		.pool_size	= PAGE_SIZE,
-	},
-};
+static struct mv_xor_channel_data orion_xor0_channels_data[2];
 
 static struct mv_xor_platform_data orion_xor0_pdata = {
 	.channels = orion_xor0_channels_data,
@@ -698,14 +691,7 @@ static struct resource orion_xor1_shared_resources[] = {
 	},
 };
 
-static struct mv_xor_channel_data orion_xor1_channels_data[2] = {
-	{
-		.pool_size	= PAGE_SIZE,
-	},
-	{
-		.pool_size	= PAGE_SIZE,
-	},
-};
+static struct mv_xor_channel_data orion_xor1_channels_data[2];
 
 static struct mv_xor_platform_data orion_xor1_pdata = {
 	.channels = orion_xor1_channels_data,
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index fc983bf38438..ec741b4607e2 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -603,7 +603,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
 	int idx;
 	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
 	struct mv_xor_desc_slot *slot = NULL;
-	int num_descs_in_pool = mv_chan->pool_size/MV_XOR_SLOT_SIZE;
+	int num_descs_in_pool = MV_XOR_POOL_SIZE/MV_XOR_SLOT_SIZE;
 
 	/* Allocate descriptor slots */
 	idx = mv_chan->slots_allocated;
@@ -1074,7 +1074,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan)
 
 	dma_async_device_unregister(&mv_chan->dmadev);
 
-	dma_free_coherent(dev, mv_chan->pool_size,
+	dma_free_coherent(dev, MV_XOR_POOL_SIZE,
 			  mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool);
 
 	list_for_each_entry_safe(chan, _chan, &mv_chan->dmadev.channels,
@@ -1088,8 +1088,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan)
 static struct mv_xor_chan *
 mv_xor_channel_add(struct mv_xor_device *xordev,
 		   struct platform_device *pdev,
-		   int idx, dma_cap_mask_t cap_mask,
-		   size_t pool_size, int irq)
+		   int idx, dma_cap_mask_t cap_mask, int irq)
 {
 	int ret = 0;
 	struct mv_xor_chan *mv_chan;
@@ -1109,9 +1108,8 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 	 * note: writecombine gives slightly better performance, but
 	 * requires that we explicitly flush the writes
 	 */
-	mv_chan->pool_size = pool_size;
 	mv_chan->dma_desc_pool_virt =
-	  dma_alloc_writecombine(&pdev->dev, mv_chan->pool_size,
+	  dma_alloc_writecombine(&pdev->dev, MV_XOR_POOL_SIZE,
 				 &mv_chan->dma_desc_pool, GFP_KERNEL);
 	if (!mv_chan->dma_desc_pool_virt)
 		return ERR_PTR(-ENOMEM);
@@ -1193,7 +1191,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 	return mv_chan;
 
  err_free_dma:
-	dma_free_coherent(&pdev->dev, pool_size,
+	dma_free_coherent(&pdev->dev, MV_XOR_POOL_SIZE,
 			  mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool);
 	return ERR_PTR(ret);
 }
@@ -1296,8 +1294,7 @@ static int mv_xor_probe(struct platform_device *pdev)
 
 			xordev->channels[i] =
 				mv_xor_channel_add(xordev, pdev, i,
-						   cd->cap_mask,
-						   cd->pool_size, irq);
+						   cd->cap_mask, irq);
 			if (IS_ERR(xordev->channels[i])) {
 				ret = PTR_ERR(xordev->channels[i]);
 				goto err_channel_add;
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index dab9f30e564a..698b4487b348 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -24,6 +24,7 @@
 #include <linux/interrupt.h>
 
 #define USE_TIMER
+#define MV_XOR_POOL_SIZE		PAGE_SIZE
 #define MV_XOR_SLOT_SIZE		64
 #define MV_XOR_THRESHOLD		1
 #define MV_XOR_MAX_CHANNELS             2
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index b18dc2496186..8ec18f64e396 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -14,7 +14,6 @@
 
 struct mv_xor_channel_data {
 	dma_cap_mask_t			cap_mask;
-	size_t				pool_size;
 };
 
 struct mv_xor_platform_data {
-- 
cgit v1.2.3-55-g7522


From c8d35c84f5494d8d294205b598f927a11fd41f34 Mon Sep 17 00:00:00 2001
From: Tony Lindgren
Date: Fri, 2 Nov 2012 12:24:03 -0700
Subject: ARM: OMAP2+: Move plat/iovmm.h to include/linux/omap-iommu.h

Looks like the iommu framework does not have generic functions
exported for all the needs yet. The hardware specific functions
are defined in files like intel-iommu.h and amd-iommu.h. Follow
the same standard for omap-iommu.h.

This is needed because we are removing plat and mach includes
for ARM common zImage support. Further work should continue
in the iommu framework context as only pure platform data will
be communicated from arch/arm/*omap*/* code to the iommu
framework.

Cc: Ido Yariv <ido@wizery.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Omar Ramirez Luna <omar.luna@linaro.org>
Cc: linux-media@vger.kernel.org
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Ohad Ben-Cohen <ohad@wizery.com>
Acked-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/iommu2.c               |  1 +
 arch/arm/plat-omap/include/plat/iommu.h    | 10 +---
 arch/arm/plat-omap/include/plat/iovmm.h    | 89 ------------------------------
 drivers/iommu/omap-iommu-debug.c           |  2 +-
 drivers/iommu/omap-iommu.c                 |  1 +
 drivers/iommu/omap-iovmm.c                 | 46 ++++++++++++++-
 drivers/media/platform/omap3isp/isp.c      |  1 +
 drivers/media/platform/omap3isp/isp.h      |  4 +-
 drivers/media/platform/omap3isp/ispccdc.c  |  1 +
 drivers/media/platform/omap3isp/ispstat.c  |  1 +
 drivers/media/platform/omap3isp/ispvideo.c |  2 +-
 include/linux/omap-iommu.h                 | 52 +++++++++++++++++
 12 files changed, 107 insertions(+), 103 deletions(-)
 delete mode 100644 arch/arm/plat-omap/include/plat/iovmm.h
 create mode 100644 include/linux/omap-iommu.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/iommu2.c b/arch/arm/mach-omap2/iommu2.c
index eefc37912ef3..e8116cf58e15 100644
--- a/arch/arm/mach-omap2/iommu2.c
+++ b/arch/arm/mach-omap2/iommu2.c
@@ -15,6 +15,7 @@
 #include <linux/device.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
+#include <linux/omap-iommu.h>
 #include <linux/slab.h>
 #include <linux/stringify.h>
 
diff --git a/arch/arm/plat-omap/include/plat/iommu.h b/arch/arm/plat-omap/include/plat/iommu.h
index 7e8c7b66ee35..a4b71b14975a 100644
--- a/arch/arm/plat-omap/include/plat/iommu.h
+++ b/arch/arm/plat-omap/include/plat/iommu.h
@@ -216,13 +216,10 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
 #define MMU_RAM_PADDR_SHIFT	12
 #define MMU_RAM_PADDR_MASK \
 	((~0UL >> MMU_RAM_PADDR_SHIFT) << MMU_RAM_PADDR_SHIFT)
-#define MMU_RAM_ENDIAN_SHIFT	9
+
 #define MMU_RAM_ENDIAN_MASK	(1 << MMU_RAM_ENDIAN_SHIFT)
-#define MMU_RAM_ENDIAN_BIG	(1 << MMU_RAM_ENDIAN_SHIFT)
-#define MMU_RAM_ENDIAN_LITTLE	(0 << MMU_RAM_ENDIAN_SHIFT)
-#define MMU_RAM_ELSZ_SHIFT	7
 #define MMU_RAM_ELSZ_MASK	(3 << MMU_RAM_ELSZ_SHIFT)
-#define MMU_RAM_ELSZ_8		(0 << MMU_RAM_ELSZ_SHIFT)
+
 #define MMU_RAM_ELSZ_16		(1 << MMU_RAM_ELSZ_SHIFT)
 #define MMU_RAM_ELSZ_32		(2 << MMU_RAM_ELSZ_SHIFT)
 #define MMU_RAM_ELSZ_NONE	(3 << MMU_RAM_ELSZ_SHIFT)
@@ -269,9 +266,6 @@ extern int omap_iommu_set_isr(const char *name,
 				    void *priv),
 			 void *isr_priv);
 
-extern void omap_iommu_save_ctx(struct device *dev);
-extern void omap_iommu_restore_ctx(struct device *dev);
-
 extern int omap_install_iommu_arch(const struct iommu_functions *ops);
 extern void omap_uninstall_iommu_arch(const struct iommu_functions *ops);
 
diff --git a/arch/arm/plat-omap/include/plat/iovmm.h b/arch/arm/plat-omap/include/plat/iovmm.h
deleted file mode 100644
index 498e57cda6cd..000000000000
--- a/arch/arm/plat-omap/include/plat/iovmm.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * omap iommu: simple virtual address space management
- *
- * Copyright (C) 2008-2009 Nokia Corporation
- *
- * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __IOMMU_MMAP_H
-#define __IOMMU_MMAP_H
-
-#include <linux/iommu.h>
-
-struct iovm_struct {
-	struct omap_iommu	*iommu;	/* iommu object which this belongs to */
-	u32			da_start; /* area definition */
-	u32			da_end;
-	u32			flags; /* IOVMF_: see below */
-	struct list_head	list; /* linked in ascending order */
-	const struct sg_table	*sgt; /* keep 'page' <-> 'da' mapping */
-	void			*va; /* mpu side mapped address */
-};
-
-/*
- * IOVMF_FLAGS: attribute for iommu virtual memory area(iovma)
- *
- * lower 16 bit is used for h/w and upper 16 bit is for s/w.
- */
-#define IOVMF_SW_SHIFT		16
-
-/*
- * iovma: h/w flags derived from cam and ram attribute
- */
-#define IOVMF_CAM_MASK		(~((1 << 10) - 1))
-#define IOVMF_RAM_MASK		(~IOVMF_CAM_MASK)
-
-#define IOVMF_PGSZ_MASK		(3 << 0)
-#define IOVMF_PGSZ_1M		MMU_CAM_PGSZ_1M
-#define IOVMF_PGSZ_64K		MMU_CAM_PGSZ_64K
-#define IOVMF_PGSZ_4K		MMU_CAM_PGSZ_4K
-#define IOVMF_PGSZ_16M		MMU_CAM_PGSZ_16M
-
-#define IOVMF_ENDIAN_MASK	(1 << 9)
-#define IOVMF_ENDIAN_BIG	MMU_RAM_ENDIAN_BIG
-#define IOVMF_ENDIAN_LITTLE	MMU_RAM_ENDIAN_LITTLE
-
-#define IOVMF_ELSZ_MASK		(3 << 7)
-#define IOVMF_ELSZ_8		MMU_RAM_ELSZ_8
-#define IOVMF_ELSZ_16		MMU_RAM_ELSZ_16
-#define IOVMF_ELSZ_32		MMU_RAM_ELSZ_32
-#define IOVMF_ELSZ_NONE		MMU_RAM_ELSZ_NONE
-
-#define IOVMF_MIXED_MASK	(1 << 6)
-#define IOVMF_MIXED		MMU_RAM_MIXED
-
-/*
- * iovma: s/w flags, used for mapping and umapping internally.
- */
-#define IOVMF_MMIO		(1 << IOVMF_SW_SHIFT)
-#define IOVMF_ALLOC		(2 << IOVMF_SW_SHIFT)
-#define IOVMF_ALLOC_MASK	(3 << IOVMF_SW_SHIFT)
-
-/* "superpages" is supported just with physically linear pages */
-#define IOVMF_DISCONT		(1 << (2 + IOVMF_SW_SHIFT))
-#define IOVMF_LINEAR		(2 << (2 + IOVMF_SW_SHIFT))
-#define IOVMF_LINEAR_MASK	(3 << (2 + IOVMF_SW_SHIFT))
-
-#define IOVMF_DA_FIXED		(1 << (4 + IOVMF_SW_SHIFT))
-
-
-extern struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da);
-extern u32
-omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da,
-			const struct sg_table *sgt, u32 flags);
-extern struct sg_table *omap_iommu_vunmap(struct iommu_domain *domain,
-				struct device *dev, u32 da);
-extern u32
-omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev,
-				u32 da, size_t bytes, u32 flags);
-extern void
-omap_iommu_vfree(struct iommu_domain *domain, struct device *dev,
-				const u32 da);
-extern void *omap_da_to_va(struct device *dev, u32 da);
-
-#endif /* __IOMMU_MMAP_H */
diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index 0cac372b0d43..cf4a0b5c1d82 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -18,9 +18,9 @@
 #include <linux/uaccess.h>
 #include <linux/platform_device.h>
 #include <linux/debugfs.h>
+#include <linux/omap-iommu.h>
 
 #include <plat/iommu.h>
-#include <plat/iovmm.h>
 
 #include "omap-iopgtable.h"
 
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index f2bbfb0fd0e0..eadcfde757ee 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -19,6 +19,7 @@
 #include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/iommu.h>
+#include <linux/omap-iommu.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 
diff --git a/drivers/iommu/omap-iovmm.c b/drivers/iommu/omap-iovmm.c
index b332392397ce..9852101cf042 100644
--- a/drivers/iommu/omap-iovmm.c
+++ b/drivers/iommu/omap-iovmm.c
@@ -17,15 +17,59 @@
 #include <linux/device.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu.h>
+#include <linux/omap-iommu.h>
 
 #include <asm/cacheflush.h>
 #include <asm/mach/map.h>
 
 #include <plat/iommu.h>
-#include <plat/iovmm.h>
 
 #include "omap-iopgtable.h"
 
+/*
+ * IOVMF_FLAGS: attribute for iommu virtual memory area(iovma)
+ *
+ * lower 16 bit is used for h/w and upper 16 bit is for s/w.
+ */
+#define IOVMF_SW_SHIFT		16
+
+/*
+ * iovma: h/w flags derived from cam and ram attribute
+ */
+#define IOVMF_CAM_MASK		(~((1 << 10) - 1))
+#define IOVMF_RAM_MASK		(~IOVMF_CAM_MASK)
+
+#define IOVMF_PGSZ_MASK		(3 << 0)
+#define IOVMF_PGSZ_1M		MMU_CAM_PGSZ_1M
+#define IOVMF_PGSZ_64K		MMU_CAM_PGSZ_64K
+#define IOVMF_PGSZ_4K		MMU_CAM_PGSZ_4K
+#define IOVMF_PGSZ_16M		MMU_CAM_PGSZ_16M
+
+#define IOVMF_ENDIAN_MASK	(1 << 9)
+#define IOVMF_ENDIAN_BIG	MMU_RAM_ENDIAN_BIG
+
+#define IOVMF_ELSZ_MASK		(3 << 7)
+#define IOVMF_ELSZ_16		MMU_RAM_ELSZ_16
+#define IOVMF_ELSZ_32		MMU_RAM_ELSZ_32
+#define IOVMF_ELSZ_NONE		MMU_RAM_ELSZ_NONE
+
+#define IOVMF_MIXED_MASK	(1 << 6)
+#define IOVMF_MIXED		MMU_RAM_MIXED
+
+/*
+ * iovma: s/w flags, used for mapping and umapping internally.
+ */
+#define IOVMF_MMIO		(1 << IOVMF_SW_SHIFT)
+#define IOVMF_ALLOC		(2 << IOVMF_SW_SHIFT)
+#define IOVMF_ALLOC_MASK	(3 << IOVMF_SW_SHIFT)
+
+/* "superpages" is supported just with physically linear pages */
+#define IOVMF_DISCONT		(1 << (2 + IOVMF_SW_SHIFT))
+#define IOVMF_LINEAR		(2 << (2 + IOVMF_SW_SHIFT))
+#define IOVMF_LINEAR_MASK	(3 << (2 + IOVMF_SW_SHIFT))
+
+#define IOVMF_DA_FIXED		(1 << (4 + IOVMF_SW_SHIFT))
+
 static struct kmem_cache *iovm_area_cachep;
 
 /* return the offset of the first scatterlist entry in a sg table */
diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c
index 99640d8c1db0..7f182f0ff3da 100644
--- a/drivers/media/platform/omap3isp/isp.c
+++ b/drivers/media/platform/omap3isp/isp.c
@@ -61,6 +61,7 @@
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/omap-iommu.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
diff --git a/drivers/media/platform/omap3isp/isp.h b/drivers/media/platform/omap3isp/isp.h
index 8be7487c326f..8d6866942b85 100644
--- a/drivers/media/platform/omap3isp/isp.h
+++ b/drivers/media/platform/omap3isp/isp.h
@@ -31,11 +31,9 @@
 #include <media/v4l2-device.h>
 #include <linux/device.h>
 #include <linux/io.h>
+#include <linux/iommu.h>
 #include <linux/platform_device.h>
 #include <linux/wait.h>
-#include <linux/iommu.h>
-#include <plat/iommu.h>
-#include <plat/iovmm.h>
 
 #include "ispstat.h"
 #include "ispccdc.h"
diff --git a/drivers/media/platform/omap3isp/ispccdc.c b/drivers/media/platform/omap3isp/ispccdc.c
index 60181ab96063..6ae1ffb277e7 100644
--- a/drivers/media/platform/omap3isp/ispccdc.c
+++ b/drivers/media/platform/omap3isp/ispccdc.c
@@ -30,6 +30,7 @@
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/mm.h>
+#include <linux/omap-iommu.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <media/v4l2-event.h>
diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c
index d7ac76b5c2ae..35c38237449d 100644
--- a/drivers/media/platform/omap3isp/ispstat.c
+++ b/drivers/media/platform/omap3isp/ispstat.c
@@ -26,6 +26,7 @@
  */
 
 #include <linux/dma-mapping.h>
+#include <linux/omap-iommu.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
diff --git a/drivers/media/platform/omap3isp/ispvideo.c b/drivers/media/platform/omap3isp/ispvideo.c
index a0b737fecf13..a4b829027d71 100644
--- a/drivers/media/platform/omap3isp/ispvideo.c
+++ b/drivers/media/platform/omap3isp/ispvideo.c
@@ -27,6 +27,7 @@
 #include <linux/clk.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/omap-iommu.h>
 #include <linux/pagemap.h>
 #include <linux/scatterlist.h>
 #include <linux/sched.h>
@@ -35,7 +36,6 @@
 #include <media/v4l2-dev.h>
 #include <media/v4l2-ioctl.h>
 #include <plat/iommu.h>
-#include <plat/iovmm.h>
 #include <plat/omap-pm.h>
 
 #include "ispvideo.h"
diff --git a/include/linux/omap-iommu.h b/include/linux/omap-iommu.h
new file mode 100644
index 000000000000..cac78de09c07
--- /dev/null
+++ b/include/linux/omap-iommu.h
@@ -0,0 +1,52 @@
+/*
+ * omap iommu: simple virtual address space management
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _INTEL_IOMMU_H_
+#define _INTEL_IOMMU_H_
+
+struct iovm_struct {
+	struct omap_iommu	*iommu;	/* iommu object which this belongs to */
+	u32			da_start; /* area definition */
+	u32			da_end;
+	u32			flags; /* IOVMF_: see below */
+	struct list_head	list; /* linked in ascending order */
+	const struct sg_table	*sgt; /* keep 'page' <-> 'da' mapping */
+	void			*va; /* mpu side mapped address */
+};
+
+#define MMU_RAM_ENDIAN_SHIFT	9
+#define MMU_RAM_ENDIAN_LITTLE	(0 << MMU_RAM_ENDIAN_SHIFT)
+#define MMU_RAM_ELSZ_8		(0 << MMU_RAM_ELSZ_SHIFT)
+#define IOVMF_ENDIAN_LITTLE	MMU_RAM_ENDIAN_LITTLE
+#define MMU_RAM_ELSZ_SHIFT	7
+#define IOVMF_ELSZ_8		MMU_RAM_ELSZ_8
+
+struct iommu_domain;
+
+extern struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da);
+extern u32
+omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da,
+			const struct sg_table *sgt, u32 flags);
+extern struct sg_table *omap_iommu_vunmap(struct iommu_domain *domain,
+				struct device *dev, u32 da);
+extern u32
+omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev,
+				u32 da, size_t bytes, u32 flags);
+extern void
+omap_iommu_vfree(struct iommu_domain *domain, struct device *dev,
+				const u32 da);
+extern void *omap_da_to_va(struct device *dev, u32 da);
+
+extern void omap_iommu_save_ctx(struct device *dev);
+extern void omap_iommu_restore_ctx(struct device *dev);
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 2ab7c84815cffd5fe4946a472fc67fefa8ac3c29 Mon Sep 17 00:00:00 2001
From: Tony Lindgren
Date: Fri, 2 Nov 2012 12:24:14 -0700
Subject: ARM: OMAP2+: Move iommu/iovmm headers to platform_data

Move iommu/iovmm headers from plat/ to platform_data/ as part of the
single zImage work.

Partially based on an earlier version by Ido Yariv <ido@wizery.com>.

Cc: Ido Yariv <ido@wizery.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Omar Ramirez Luna <omar.luna@linaro.org>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Ohad Ben-Cohen <ohad@wizery.com>
Acked-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/devices.c              |  2 +-
 arch/arm/mach-omap2/omap-iommu.c           |  2 +-
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c |  2 +-
 arch/arm/mach-omap2/omap_hwmod_44xx_data.c |  2 +-
 arch/arm/plat-omap/include/plat/iommu.h    | 49 ------------------------------
 drivers/iommu/omap-iommu-debug.c           |  3 +-
 drivers/iommu/omap-iommu.c                 |  2 +-
 drivers/iommu/omap-iommu2.c                |  2 +-
 drivers/iommu/omap-iovmm.c                 |  3 +-
 drivers/media/platform/omap3isp/ispvideo.c |  1 -
 include/linux/platform_data/iommu-omap.h   | 49 ++++++++++++++++++++++++++++++
 11 files changed, 57 insertions(+), 60 deletions(-)
 delete mode 100644 arch/arm/plat-omap/include/plat/iommu.h
 create mode 100644 include/linux/platform_data/iommu-omap.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index c8c211731d26..6cd0c2a36a25 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -126,7 +126,7 @@ static struct platform_device omap2cam_device = {
 
 #if defined(CONFIG_IOMMU_API)
 
-#include <plat/iommu.h>
+#include <linux/platform_data/iommu-omap.h>
 
 static struct resource omap3isp_resources[] = {
 	{
diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c
index df298d46707c..a6a4ff8744b7 100644
--- a/arch/arm/mach-omap2/omap-iommu.c
+++ b/arch/arm/mach-omap2/omap-iommu.c
@@ -13,7 +13,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 
-#include <plat/iommu.h>
+#include <linux/platform_data/iommu-omap.h>
 
 #include "soc.h"
 #include "common.h"
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index f67b7ee07dd4..621bc7137233 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -26,8 +26,8 @@
 #include <plat/mmc.h>
 #include <linux/platform_data/asoc-ti-mcbsp.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
+#include <linux/platform_data/iommu-omap.h>
 #include <plat/dmtimer.h>
-#include <plat/iommu.h>
 
 #include "am35xx.h"
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index 652d0285bd6d..5850b3e81512 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -27,10 +27,10 @@
 #include <plat/dma.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
 #include <linux/platform_data/asoc-ti-mcbsp.h>
+#include <linux/platform_data/iommu-omap.h>
 #include <plat/mmc.h>
 #include <plat/dmtimer.h>
 #include <plat/common.h>
-#include <plat/iommu.h>
 
 #include "omap_hwmod_common_data.h"
 #include "cm1_44xx.h"
diff --git a/arch/arm/plat-omap/include/plat/iommu.h b/arch/arm/plat-omap/include/plat/iommu.h
deleted file mode 100644
index c677b9f2fefa..000000000000
--- a/arch/arm/plat-omap/include/plat/iommu.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * omap iommu: main structures
- *
- * Copyright (C) 2008-2009 Nokia Corporation
- *
- * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#define MMU_REG_SIZE		256
-
-/**
- * struct iommu_arch_data - omap iommu private data
- * @name: name of the iommu device
- * @iommu_dev: handle of the iommu device
- *
- * This is an omap iommu private data object, which binds an iommu user
- * to its iommu device. This object should be placed at the iommu user's
- * dev_archdata so generic IOMMU API can be used without having to
- * utilize omap-specific plumbing anymore.
- */
-struct omap_iommu_arch_data {
-	const char *name;
-	struct omap_iommu *iommu_dev;
-};
-
-/**
- * struct omap_mmu_dev_attr - OMAP mmu device attributes for omap_hwmod
- * @da_start:		device address where the va space starts.
- * @da_end:		device address where the va space ends.
- * @nr_tlb_entries:	number of entries supported by the translation
- *			look-aside buffer (TLB).
- */
-struct omap_mmu_dev_attr {
-	u32 da_start;
-	u32 da_end;
-	int nr_tlb_entries;
-};
-
-struct iommu_platform_data {
-	const char *name;
-	const char *clk_name;
-	const int nr_tlb_entries;
-	u32 da_start;
-	u32 da_end;
-};
diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index d0427bd21361..d97fbe4fb9b1 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -19,8 +19,7 @@
 #include <linux/platform_device.h>
 #include <linux/debugfs.h>
 #include <linux/omap-iommu.h>
-
-#include <plat/iommu.h>
+#include <linux/platform_data/iommu-omap.h>
 
 #include "omap-iopgtable.h"
 #include "omap-iommu.h"
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index df840870e2a1..badc17c2bcb4 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -26,7 +26,7 @@
 
 #include <asm/cacheflush.h>
 
-#include <plat/iommu.h>
+#include <linux/platform_data/iommu-omap.h>
 
 #include "omap-iopgtable.h"
 #include "omap-iommu.h"
diff --git a/drivers/iommu/omap-iommu2.c b/drivers/iommu/omap-iommu2.c
index 29e98a2af906..c02020292377 100644
--- a/drivers/iommu/omap-iommu2.c
+++ b/drivers/iommu/omap-iommu2.c
@@ -19,8 +19,8 @@
 #include <linux/omap-iommu.h>
 #include <linux/slab.h>
 #include <linux/stringify.h>
+#include <linux/platform_data/iommu-omap.h>
 
-#include <plat/iommu.h>
 #include "omap-iommu.h"
 
 /*
diff --git a/drivers/iommu/omap-iovmm.c b/drivers/iommu/omap-iovmm.c
index 3e3b2421b924..46d875690739 100644
--- a/drivers/iommu/omap-iovmm.c
+++ b/drivers/iommu/omap-iovmm.c
@@ -18,12 +18,11 @@
 #include <linux/scatterlist.h>
 #include <linux/iommu.h>
 #include <linux/omap-iommu.h>
+#include <linux/platform_data/iommu-omap.h>
 
 #include <asm/cacheflush.h>
 #include <asm/mach/map.h>
 
-#include <plat/iommu.h>
-
 #include "omap-iopgtable.h"
 #include "omap-iommu.h"
 
diff --git a/drivers/media/platform/omap3isp/ispvideo.c b/drivers/media/platform/omap3isp/ispvideo.c
index a4b829027d71..21f7313e6999 100644
--- a/drivers/media/platform/omap3isp/ispvideo.c
+++ b/drivers/media/platform/omap3isp/ispvideo.c
@@ -35,7 +35,6 @@
 #include <linux/vmalloc.h>
 #include <media/v4l2-dev.h>
 #include <media/v4l2-ioctl.h>
-#include <plat/iommu.h>
 #include <plat/omap-pm.h>
 
 #include "ispvideo.h"
diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h
new file mode 100644
index 000000000000..c677b9f2fefa
--- /dev/null
+++ b/include/linux/platform_data/iommu-omap.h
@@ -0,0 +1,49 @@
+/*
+ * omap iommu: main structures
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define MMU_REG_SIZE		256
+
+/**
+ * struct iommu_arch_data - omap iommu private data
+ * @name: name of the iommu device
+ * @iommu_dev: handle of the iommu device
+ *
+ * This is an omap iommu private data object, which binds an iommu user
+ * to its iommu device. This object should be placed at the iommu user's
+ * dev_archdata so generic IOMMU API can be used without having to
+ * utilize omap-specific plumbing anymore.
+ */
+struct omap_iommu_arch_data {
+	const char *name;
+	struct omap_iommu *iommu_dev;
+};
+
+/**
+ * struct omap_mmu_dev_attr - OMAP mmu device attributes for omap_hwmod
+ * @da_start:		device address where the va space starts.
+ * @da_end:		device address where the va space ends.
+ * @nr_tlb_entries:	number of entries supported by the translation
+ *			look-aside buffer (TLB).
+ */
+struct omap_mmu_dev_attr {
+	u32 da_start;
+	u32 da_end;
+	int nr_tlb_entries;
+};
+
+struct iommu_platform_data {
+	const char *name;
+	const char *clk_name;
+	const int nr_tlb_entries;
+	u32 da_start;
+	u32 da_end;
+};
-- 
cgit v1.2.3-55-g7522


From 78026a6fde8f7b0ca77c059da11f476d69dfde3b Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen
Date: Tue, 20 Nov 2012 13:36:00 +0000
Subject: iio:imu:adis: Add debugfs register access support

Provide a IIO debugfs register access function for the ADIS library. This
function can be used by individual drivers to allow raw register access via
debugfs.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/imu/adis.c       | 23 +++++++++++++++++++++++
 include/linux/iio/imu/adis.h | 11 +++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c
index 8259b774078f..28d4df2fe76a 100644
--- a/drivers/iio/imu/adis.c
+++ b/drivers/iio/imu/adis.c
@@ -135,6 +135,29 @@ error_ret:
 }
 EXPORT_SYMBOL_GPL(adis_read_reg_16);
 
+#ifdef CONFIG_DEBUG_FS
+
+int adis_debugfs_reg_access(struct iio_dev *indio_dev,
+	unsigned int reg, unsigned int writeval, unsigned int *readval)
+{
+	struct adis *adis = iio_device_get_drvdata(indio_dev);
+
+	if (readval) {
+		uint16_t val16;
+		int ret;
+
+		ret = adis_read_reg_16(adis, reg, &val16);
+		*readval = val16;
+
+		return ret;
+	} else {
+		return adis_write_reg_16(adis, reg, writeval);
+	}
+}
+EXPORT_SYMBOL(adis_debugfs_reg_access);
+
+#endif
+
 /**
  * adis_enable_irq() - Enable or disable data ready IRQ
  * @adis: The adis device
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index 8c3304d44b97..fce7bc3ba0b5 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -183,4 +183,15 @@ static inline void adis_remove_trigger(struct adis *adis)
 
 #endif /* CONFIG_IIO_BUFFER */
 
+#ifdef CONFIG_DEBUG_FS
+
+int adis_debugfs_reg_access(struct iio_dev *indio_dev,
+	unsigned int reg, unsigned int writeval, unsigned int *readval);
+
+#else
+
+#define adis_debugfs_reg_access NULL
+
+#endif
+
 #endif
-- 
cgit v1.2.3-55-g7522


From 57a1228a06b7a5939a8b0078a92b44fa30855bcb Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen
Date: Tue, 20 Nov 2012 13:36:00 +0000
Subject: iio:imu:adis: Add support for 32bit registers

Some of the newer generation devices from the ADIS16XXX family have 32bit wide
register which spans two 16bit wide registers. This patch adds support for
reading and writing a 32bit wide register.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/imu/adis.c        | 145 +++++++++++++++++++++++++++---------------
 drivers/iio/imu/adis_buffer.c |   2 +
 include/linux/iio/imu/adis.h  |  81 +++++++++++++++++++++--
 3 files changed, 171 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c
index 28d4df2fe76a..280a49584adf 100644
--- a/drivers/iio/imu/adis.c
+++ b/drivers/iio/imu/adis.c
@@ -27,36 +27,10 @@
 #define ADIS_MSC_CTRL_DATA_RDY_DIO2	BIT(0)
 #define ADIS_GLOB_CMD_SW_RESET		BIT(7)
 
-/**
- * adis_write_reg_8() - Write single byte to a register
- * @adis: The adis device
- * @reg: The address of the register to be written
- * @val: The value to write
- */
-int adis_write_reg_8(struct adis *adis, unsigned int reg, uint8_t val)
-{
-	int ret;
-
-	mutex_lock(&adis->txrx_lock);
-	adis->tx[0] = ADIS_WRITE_REG(reg);
-	adis->tx[1] = val;
-
-	ret = spi_write(adis->spi, adis->tx, 2);
-	mutex_unlock(&adis->txrx_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(adis_write_reg_8);
-
-/**
- * adis_write_reg_16() - Write 2 bytes to a pair of registers
- * @adis: The adis device
- * @reg: The address of the lower of the two registers
- * @val: Value to be written
- */
-int adis_write_reg_16(struct adis *adis, unsigned int reg, uint16_t value)
+int adis_write_reg(struct adis *adis, unsigned int reg,
+	unsigned int value, unsigned int size)
 {
-	int ret;
+	int ret, i;
 	struct spi_message msg;
 	struct spi_transfer xfers[] = {
 		{
@@ -69,33 +43,69 @@ int adis_write_reg_16(struct adis *adis, unsigned int reg, uint16_t value)
 			.tx_buf = adis->tx + 2,
 			.bits_per_word = 8,
 			.len = 2,
+			.cs_change = 1,
+			.delay_usecs = adis->data->write_delay,
+		}, {
+			.tx_buf = adis->tx + 4,
+			.bits_per_word = 8,
+			.len = 2,
+			.cs_change = 1,
+			.delay_usecs = adis->data->write_delay,
+		}, {
+			.tx_buf = adis->tx + 6,
+			.bits_per_word = 8,
+			.len = 2,
 			.delay_usecs = adis->data->write_delay,
 		},
 	};
 
 	mutex_lock(&adis->txrx_lock);
-	adis->tx[0] = ADIS_WRITE_REG(reg);
-	adis->tx[1] = value & 0xff;
-	adis->tx[2] = ADIS_WRITE_REG(reg + 1);
-	adis->tx[3] = (value >> 8) & 0xff;
 
 	spi_message_init(&msg);
-	spi_message_add_tail(&xfers[0], &msg);
-	spi_message_add_tail(&xfers[1], &msg);
+	switch (size) {
+	case 4:
+		adis->tx[6] = ADIS_WRITE_REG(reg + 3);
+		adis->tx[7] = (value >> 24) & 0xff;
+		adis->tx[4] = ADIS_WRITE_REG(reg + 2);
+		adis->tx[5] = (value >> 16) & 0xff;
+	case 2:
+		adis->tx[2] = ADIS_WRITE_REG(reg + 1);
+		adis->tx[3] = (value >> 8) & 0xff;
+	case 1:
+		adis->tx[0] = ADIS_WRITE_REG(reg);
+		adis->tx[1] = value & 0xff;
+		break;
+	default:
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	xfers[size - 1].cs_change = 0;
+
+	for (i = 0; i < size; i++)
+		spi_message_add_tail(&xfers[i], &msg);
+
 	ret = spi_sync(adis->spi, &msg);
+	if (ret) {
+		dev_err(&adis->spi->dev, "Failed to write register 0x%02X: %d\n",
+				reg, ret);
+	}
+
+out_unlock:
 	mutex_unlock(&adis->txrx_lock);
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(adis_write_reg_16);
+EXPORT_SYMBOL_GPL(adis_write_reg);
 
 /**
- * adis_read_reg_16() - read 2 bytes from a 16-bit register
+ * adis_read_reg() - read 2 bytes from a 16-bit register
  * @adis: The adis device
  * @reg: The address of the lower of the two registers
  * @val: The value read back from the device
  */
-int adis_read_reg_16(struct adis *adis, unsigned int reg, uint16_t *val)
+int adis_read_reg(struct adis *adis, unsigned int reg,
+	unsigned int *val, unsigned int size)
 {
 	struct spi_message msg;
 	int ret;
@@ -107,33 +117,61 @@ int adis_read_reg_16(struct adis *adis, unsigned int reg, uint16_t *val)
 			.cs_change = 1,
 			.delay_usecs = adis->data->read_delay,
 		}, {
+			.tx_buf = adis->tx + 2,
 			.rx_buf = adis->rx,
 			.bits_per_word = 8,
 			.len = 2,
+			.cs_change = 1,
+			.delay_usecs = adis->data->read_delay,
+		}, {
+			.rx_buf = adis->rx + 2,
+			.bits_per_word = 8,
+			.len = 2,
 			.delay_usecs = adis->data->read_delay,
 		},
 	};
 
 	mutex_lock(&adis->txrx_lock);
-	adis->tx[0] = ADIS_READ_REG(reg);
-	adis->tx[1] = 0;
-
 	spi_message_init(&msg);
-	spi_message_add_tail(&xfers[0], &msg);
-	spi_message_add_tail(&xfers[1], &msg);
+
+	switch (size) {
+	case 4:
+		adis->tx[0] = ADIS_READ_REG(reg + 2);
+		adis->tx[1] = 0;
+		spi_message_add_tail(&xfers[0], &msg);
+	case 2:
+		adis->tx[2] = ADIS_READ_REG(reg);
+		adis->tx[3] = 0;
+		spi_message_add_tail(&xfers[1], &msg);
+		spi_message_add_tail(&xfers[2], &msg);
+		break;
+	default:
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
 	ret = spi_sync(adis->spi, &msg);
 	if (ret) {
-		dev_err(&adis->spi->dev, "Failed to read 16 bit register 0x%02X: %d\n",
+		dev_err(&adis->spi->dev, "Failed to read register 0x%02X: %d\n",
 				reg, ret);
-		goto error_ret;
+		goto out_unlock;
 	}
-	*val = get_unaligned_be16(adis->rx);
 
-error_ret:
+	switch (size) {
+	case 4:
+		*val = get_unaligned_be32(adis->rx);
+		break;
+	case 2:
+		*val = get_unaligned_be16(adis->rx + 2);
+		break;
+	}
+
+out_unlock:
 	mutex_unlock(&adis->txrx_lock);
+
 	return ret;
 }
-EXPORT_SYMBOL_GPL(adis_read_reg_16);
+EXPORT_SYMBOL_GPL(adis_read_reg);
 
 #ifdef CONFIG_DEBUG_FS
 
@@ -304,25 +342,26 @@ int adis_single_conversion(struct iio_dev *indio_dev,
 	const struct iio_chan_spec *chan, unsigned int error_mask, int *val)
 {
 	struct adis *adis = iio_device_get_drvdata(indio_dev);
-	uint16_t val16;
+	unsigned int uval;
 	int ret;
 
 	mutex_lock(&indio_dev->mlock);
 
-	ret = adis_read_reg_16(adis, chan->address, &val16);
+	ret = adis_read_reg(adis, chan->address, &uval,
+			chan->scan_type.storagebits / 8);
 	if (ret)
 		goto err_unlock;
 
-	if (val16 & error_mask) {
+	if (uval & error_mask) {
 		ret = adis_check_status(adis);
 		if (ret)
 			goto err_unlock;
 	}
 
 	if (chan->scan_type.sign == 's')
-		*val = sign_extend32(val16, chan->scan_type.realbits - 1);
+		*val = sign_extend32(uval, chan->scan_type.realbits - 1);
 	else
-		*val = val16 & ((1 << chan->scan_type.realbits) - 1);
+		*val = uval & ((1 << chan->scan_type.realbits) - 1);
 
 	ret = IIO_VAL_INT;
 err_unlock:
diff --git a/drivers/iio/imu/adis_buffer.c b/drivers/iio/imu/adis_buffer.c
index a91b4cbdc73a..785713349e9f 100644
--- a/drivers/iio/imu/adis_buffer.c
+++ b/drivers/iio/imu/adis_buffer.c
@@ -64,6 +64,8 @@ int adis_update_scan_mode(struct iio_dev *indio_dev,
 	for (i = 0; i < indio_dev->num_channels; i++, chan++) {
 		if (!test_bit(chan->scan_index, scan_mask))
 			continue;
+		if (chan->scan_type.storagebits == 32)
+			*tx++ = cpu_to_be16((chan->address + 2) << 8);
 		*tx++ = cpu_to_be16(chan->address << 8);
 	}
 
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index fce7bc3ba0b5..6402a08c0e65 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -53,7 +53,7 @@ struct adis {
 	struct spi_transfer	*xfer;
 	void			*buffer;
 
-	uint8_t			tx[8] ____cacheline_aligned;
+	uint8_t			tx[10] ____cacheline_aligned;
 	uint8_t			rx[4];
 };
 
@@ -61,9 +61,82 @@ int adis_init(struct adis *adis, struct iio_dev *indio_dev,
 	struct spi_device *spi, const struct adis_data *data);
 int adis_reset(struct adis *adis);
 
-int adis_write_reg_8(struct adis *adis, unsigned int reg, uint8_t val);
-int adis_write_reg_16(struct adis *adis, unsigned int reg, uint16_t val);
-int adis_read_reg_16(struct adis *adis, unsigned int reg, uint16_t *val);
+int adis_write_reg(struct adis *adis, unsigned int reg,
+	unsigned int val, unsigned int size);
+int adis_read_reg(struct adis *adis, unsigned int reg,
+	unsigned int *val, unsigned int size);
+
+/**
+ * adis_write_reg_8() - Write single byte to a register
+ * @adis: The adis device
+ * @reg: The address of the register to be written
+ * @value: The value to write
+ */
+static inline int adis_write_reg_8(struct adis *adis, unsigned int reg,
+	uint8_t val)
+{
+	return adis_write_reg(adis, reg, val, 1);
+}
+
+/**
+ * adis_write_reg_16() - Write 2 bytes to a pair of registers
+ * @adis: The adis device
+ * @reg: The address of the lower of the two registers
+ * @value: Value to be written
+ */
+static inline int adis_write_reg_16(struct adis *adis, unsigned int reg,
+	uint16_t val)
+{
+	return adis_write_reg(adis, reg, val, 2);
+}
+
+/**
+ * adis_write_reg_32() - write 4 bytes to four registers
+ * @adis: The adis device
+ * @reg: The address of the lower of the four register
+ * @value: Value to be written
+ */
+static inline int adis_write_reg_32(struct adis *adis, unsigned int reg,
+	uint32_t val)
+{
+	return adis_write_reg(adis, reg, val, 4);
+}
+
+/**
+ * adis_read_reg_16() - read 2 bytes from a 16-bit register
+ * @adis: The adis device
+ * @reg: The address of the lower of the two registers
+ * @val: The value read back from the device
+ */
+static inline int adis_read_reg_16(struct adis *adis, unsigned int reg,
+	uint16_t *val)
+{
+	unsigned int tmp;
+	int ret;
+
+	ret = adis_read_reg(adis, reg, &tmp, 2);
+	*val = tmp;
+
+	return ret;
+}
+
+/**
+ * adis_read_reg_32() - read 4 bytes from a 32-bit register
+ * @adis: The adis device
+ * @reg: The address of the lower of the two registers
+ * @val: The value read back from the device
+ */
+static inline int adis_read_reg_32(struct adis *adis, unsigned int reg,
+	uint32_t *val)
+{
+	unsigned int tmp;
+	int ret;
+
+	ret = adis_read_reg(adis, reg, &tmp, 4);
+	*val = tmp;
+
+	return ret;
+}
 
 int adis_enable_irq(struct adis *adis, bool enable);
 int adis_check_status(struct adis *adis);
-- 
cgit v1.2.3-55-g7522


From 484a0bf091c93c379e6524a17bb037c33c898e01 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen
Date: Tue, 20 Nov 2012 13:36:00 +0000
Subject: iio:imu:adis: Add paging support

Some of the newer generation devices from the ADIS16XXX series have more
registers than what can be supported with the current register addressing
scheme. These devices implement register paging to support a larger register
range. Each page is 128 registers large and the currently active page can be
selected via register 0x00 in each page. This patch implements transparent
paging inside the common adis library. The register read/write interface stays
the same and when a register is accessed the library automatically switches to
the correct page if it is not already selected. The page number is encoded in
the upper bits of the register number, e.g. register 0x5 of page 1 is 0x85.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/imu/adis.c        | 70 +++++++++++++++++++++++++++++++++----------
 drivers/iio/imu/adis_buffer.c | 15 ++++++++++
 include/linux/iio/imu/adis.h  | 10 +++++--
 3 files changed, 77 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c
index 280a49584adf..c4ea04ffa60e 100644
--- a/drivers/iio/imu/adis.c
+++ b/drivers/iio/imu/adis.c
@@ -30,6 +30,7 @@
 int adis_write_reg(struct adis *adis, unsigned int reg,
 	unsigned int value, unsigned int size)
 {
+	unsigned int page = reg / ADIS_PAGE_SIZE;
 	int ret, i;
 	struct spi_message msg;
 	struct spi_transfer xfers[] = {
@@ -56,39 +57,53 @@ int adis_write_reg(struct adis *adis, unsigned int reg,
 			.bits_per_word = 8,
 			.len = 2,
 			.delay_usecs = adis->data->write_delay,
+		}, {
+			.tx_buf = adis->tx + 8,
+			.bits_per_word = 8,
+			.len = 2,
+			.delay_usecs = adis->data->write_delay,
 		},
 	};
 
 	mutex_lock(&adis->txrx_lock);
 
 	spi_message_init(&msg);
+
+	if (adis->current_page != page) {
+		adis->tx[0] = ADIS_WRITE_REG(ADIS_REG_PAGE_ID);
+		adis->tx[1] = page;
+		spi_message_add_tail(&xfers[0], &msg);
+	}
+
 	switch (size) {
 	case 4:
-		adis->tx[6] = ADIS_WRITE_REG(reg + 3);
-		adis->tx[7] = (value >> 24) & 0xff;
-		adis->tx[4] = ADIS_WRITE_REG(reg + 2);
-		adis->tx[5] = (value >> 16) & 0xff;
+		adis->tx[8] = ADIS_WRITE_REG(reg + 3);
+		adis->tx[9] = (value >> 24) & 0xff;
+		adis->tx[6] = ADIS_WRITE_REG(reg + 2);
+		adis->tx[7] = (value >> 16) & 0xff;
 	case 2:
-		adis->tx[2] = ADIS_WRITE_REG(reg + 1);
-		adis->tx[3] = (value >> 8) & 0xff;
+		adis->tx[4] = ADIS_WRITE_REG(reg + 1);
+		adis->tx[5] = (value >> 8) & 0xff;
 	case 1:
-		adis->tx[0] = ADIS_WRITE_REG(reg);
-		adis->tx[1] = value & 0xff;
+		adis->tx[2] = ADIS_WRITE_REG(reg);
+		adis->tx[3] = value & 0xff;
 		break;
 	default:
 		ret = -EINVAL;
 		goto out_unlock;
 	}
 
-	xfers[size - 1].cs_change = 0;
+	xfers[size].cs_change = 0;
 
-	for (i = 0; i < size; i++)
+	for (i = 1; i <= size; i++)
 		spi_message_add_tail(&xfers[i], &msg);
 
 	ret = spi_sync(adis->spi, &msg);
 	if (ret) {
 		dev_err(&adis->spi->dev, "Failed to write register 0x%02X: %d\n",
 				reg, ret);
+	} else {
+		adis->current_page = page;
 	}
 
 out_unlock:
@@ -107,6 +122,7 @@ EXPORT_SYMBOL_GPL(adis_write_reg);
 int adis_read_reg(struct adis *adis, unsigned int reg,
 	unsigned int *val, unsigned int size)
 {
+	unsigned int page = reg / ADIS_PAGE_SIZE;
 	struct spi_message msg;
 	int ret;
 	struct spi_transfer xfers[] = {
@@ -115,9 +131,15 @@ int adis_read_reg(struct adis *adis, unsigned int reg,
 			.bits_per_word = 8,
 			.len = 2,
 			.cs_change = 1,
-			.delay_usecs = adis->data->read_delay,
+			.delay_usecs = adis->data->write_delay,
 		}, {
 			.tx_buf = adis->tx + 2,
+			.bits_per_word = 8,
+			.len = 2,
+			.cs_change = 1,
+			.delay_usecs = adis->data->read_delay,
+		}, {
+			.tx_buf = adis->tx + 4,
 			.rx_buf = adis->rx,
 			.bits_per_word = 8,
 			.len = 2,
@@ -134,16 +156,22 @@ int adis_read_reg(struct adis *adis, unsigned int reg,
 	mutex_lock(&adis->txrx_lock);
 	spi_message_init(&msg);
 
+	if (adis->current_page != page) {
+		adis->tx[0] = ADIS_WRITE_REG(ADIS_REG_PAGE_ID);
+		adis->tx[1] = page;
+		spi_message_add_tail(&xfers[0], &msg);
+	}
+
 	switch (size) {
 	case 4:
-		adis->tx[0] = ADIS_READ_REG(reg + 2);
-		adis->tx[1] = 0;
-		spi_message_add_tail(&xfers[0], &msg);
-	case 2:
-		adis->tx[2] = ADIS_READ_REG(reg);
+		adis->tx[2] = ADIS_READ_REG(reg + 2);
 		adis->tx[3] = 0;
 		spi_message_add_tail(&xfers[1], &msg);
+	case 2:
+		adis->tx[4] = ADIS_READ_REG(reg);
+		adis->tx[5] = 0;
 		spi_message_add_tail(&xfers[2], &msg);
+		spi_message_add_tail(&xfers[3], &msg);
 		break;
 	default:
 		ret = -EINVAL;
@@ -155,6 +183,8 @@ int adis_read_reg(struct adis *adis, unsigned int reg,
 		dev_err(&adis->spi->dev, "Failed to read register 0x%02X: %d\n",
 				reg, ret);
 		goto out_unlock;
+	} else {
+		adis->current_page = page;
 	}
 
 	switch (size) {
@@ -390,6 +420,14 @@ int adis_init(struct adis *adis, struct iio_dev *indio_dev,
 	adis->data = data;
 	iio_device_set_drvdata(indio_dev, adis);
 
+	if (data->has_paging) {
+		/* Need to set the page before first read/write */
+		adis->current_page = -1;
+	} else {
+		/* Page will always be 0 */
+		adis->current_page = 0;
+	}
+
 	return adis_enable_irq(adis, false);
 }
 EXPORT_SYMBOL_GPL(adis_init);
diff --git a/drivers/iio/imu/adis_buffer.c b/drivers/iio/imu/adis_buffer.c
index 785713349e9f..99d8e0b0dd34 100644
--- a/drivers/iio/imu/adis_buffer.c
+++ b/drivers/iio/imu/adis_buffer.c
@@ -83,10 +83,25 @@ static irqreturn_t adis_trigger_handler(int irq, void *p)
 	if (!adis->buffer)
 		return -ENOMEM;
 
+	if (adis->data->has_paging) {
+		mutex_lock(&adis->txrx_lock);
+		if (adis->current_page != 0) {
+			adis->tx[0] = ADIS_WRITE_REG(ADIS_REG_PAGE_ID);
+			adis->tx[1] = 0;
+			spi_write(adis->spi, adis->tx, 2);
+		}
+	}
+
 	ret = spi_sync(adis->spi, &adis->msg);
 	if (ret)
 		dev_err(&adis->spi->dev, "Failed to read data: %d", ret);
 
+
+	if (adis->data->has_paging) {
+		adis->current_page = 0;
+		mutex_unlock(&adis->txrx_lock);
+	}
+
 	/* Guaranteed to be aligned with 8 byte boundary */
 	if (indio_dev->scan_timestamp) {
 		void *b = adis->buffer + indio_dev->scan_bytes - sizeof(s64);
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index 6402a08c0e65..e82cd0827e91 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -14,8 +14,11 @@
 #include <linux/interrupt.h>
 #include <linux/iio/types.h>
 
-#define ADIS_WRITE_REG(reg) (0x80 | (reg))
-#define ADIS_READ_REG(reg) (reg)
+#define ADIS_WRITE_REG(reg) ((0x80 | (reg)))
+#define ADIS_READ_REG(reg) ((reg) & 0x7f)
+
+#define ADIS_PAGE_SIZE 0x80
+#define ADIS_REG_PAGE_ID 0x00
 
 /**
  * struct adis_data - ADIS chip variant specific data
@@ -40,6 +43,8 @@ struct adis_data {
 
 	const char * const *status_error_msgs;
 	unsigned int status_error_mask;
+
+	bool has_paging;
 };
 
 struct adis {
@@ -51,6 +56,7 @@ struct adis {
 	struct mutex		txrx_lock;
 	struct spi_message	msg;
 	struct spi_transfer	*xfer;
+	unsigned int		current_page;
 	void			*buffer;
 
 	uint8_t			tx[10] ____cacheline_aligned;
-- 
cgit v1.2.3-55-g7522


From c4f0c6936762ecd6b453275611a785dfdee0d417 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen
Date: Tue, 20 Nov 2012 13:36:00 +0000
Subject: iio: Add pressure channel type

This patch adds support for a new IIO channel type for pressure measurements.
This can for example be used for barometric pressure sensors.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 Documentation/ABI/testing/sysfs-bus-iio | 24 ++++++++++++++++++++++++
 drivers/iio/industrialio-core.c         |  1 +
 include/linux/iio/types.h               |  1 +
 3 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 2f06d40fe07d..2e33dc6b2346 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -189,6 +189,14 @@ Description:
 		A computed peak value based on the sum squared magnitude of
 		the underlying value in the specified directions.
 
+What:		/sys/bus/iio/devices/iio:deviceX/in_pressureY_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_pressure_raw
+KernelVersion:	3.8
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Raw pressure measurement from channel Y. Units after
+		application of scale and offset are kilopascal.
+
 What:		/sys/bus/iio/devices/iio:deviceX/in_accel_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_accel_x_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_accel_y_offset
@@ -197,6 +205,8 @@ What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_tempY_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_temp_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_pressureY_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_pressure_offset
 KernelVersion:	2.6.35
 Contact:	linux-iio@vger.kernel.org
 Description:
@@ -226,6 +236,8 @@ What:		/sys/bus/iio/devices/iio:deviceX/in_magn_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_magn_x_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_magn_y_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_magn_z_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_pressureY_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_pressure_scale
 KernelVersion:	2.6.35
 Contact:	linux-iio@vger.kernel.org
 Description:
@@ -245,6 +257,8 @@ What:		/sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibbias
 What:		/sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibbias
 What:		/sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibbias
 What:		/sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias
+What:		/sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias
+What:		/sys/bus/iio/devices/iio:deviceX/in_pressure_calibbias
 KernelVersion:	2.6.35
 Contact:	linux-iio@vger.kernel.org
 Description:
@@ -262,6 +276,8 @@ What		/sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibscale
 What		/sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibscale
 what		/sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale
 what		/sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
+What:		/sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
+What:		/sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale
 KernelVersion:	2.6.35
 Contact:	linux-iio@vger.kernel.org
 Description:
@@ -275,6 +291,8 @@ What:		/sys/.../iio:deviceX/in_voltage-voltage_scale_available
 What:		/sys/.../iio:deviceX/out_voltageX_scale_available
 What:		/sys/.../iio:deviceX/out_altvoltageX_scale_available
 What:		/sys/.../iio:deviceX/in_capacitance_scale_available
+What:		/sys/.../iio:deviceX/in_pressure_scale_available
+What:		/sys/.../iio:deviceX/in_pressureY_scale_available
 KernelVersion:	2.6.35
 Contact:	linux-iio@vger.kernel.org
 Description:
@@ -694,6 +712,8 @@ What:		/sys/.../buffer/scan_elements/in_voltageY_en
 What:		/sys/.../buffer/scan_elements/in_voltageY-voltageZ_en
 What:		/sys/.../buffer/scan_elements/in_incli_x_en
 What:		/sys/.../buffer/scan_elements/in_incli_y_en
+What:		/sys/.../buffer/scan_elements/in_pressureY_en
+What:		/sys/.../buffer/scan_elements/in_pressure_en
 KernelVersion:	2.6.37
 Contact:	linux-iio@vger.kernel.org
 Description:
@@ -707,6 +727,8 @@ What:		/sys/.../buffer/scan_elements/in_voltageY_type
 What:		/sys/.../buffer/scan_elements/in_voltage_type
 What:		/sys/.../buffer/scan_elements/in_voltageY_supply_type
 What:		/sys/.../buffer/scan_elements/in_timestamp_type
+What:		/sys/.../buffer/scan_elements/in_pressureY_type
+What:		/sys/.../buffer/scan_elements/in_pressure_type
 KernelVersion:	2.6.37
 Contact:	linux-iio@vger.kernel.org
 Description:
@@ -751,6 +773,8 @@ What:		/sys/.../buffer/scan_elements/in_magn_z_index
 What:		/sys/.../buffer/scan_elements/in_incli_x_index
 What:		/sys/.../buffer/scan_elements/in_incli_y_index
 What:		/sys/.../buffer/scan_elements/in_timestamp_index
+What:		/sys/.../buffer/scan_elements/in_pressureY_index
+What:		/sys/.../buffer/scan_elements/in_pressure_index
 KernelVersion:	2.6.37
 Contact:	linux-iio@vger.kernel.org
 Description:
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 060a4045be85..3dccd6c3a889 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -65,6 +65,7 @@ static const char * const iio_chan_type_name_spec[] = {
 	[IIO_CAPACITANCE] = "capacitance",
 	[IIO_ALTVOLTAGE] = "altvoltage",
 	[IIO_CCT] = "cct",
+	[IIO_PRESSURE] = "pressure",
 };
 
 static const char * const iio_modifier_names[] = {
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index 87b196a2d698..88bf0f0d27b4 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -28,6 +28,7 @@ enum iio_chan_type {
 	IIO_CAPACITANCE,
 	IIO_ALTVOLTAGE,
 	IIO_CCT,
+	IIO_PRESSURE,
 };
 
 enum iio_modifier {
-- 
cgit v1.2.3-55-g7522


From 6807d7211327dbdd8df3692f3d26ca711514ba71 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen
Date: Tue, 20 Nov 2012 13:36:00 +0000
Subject: iio: Factor out fixed point number parsing into its own function

Factor out the code for parsing fixed point numbers into its own function and
make this function globally available. This allows us to reuse the code to parse
fixed point numbers in individual IIO drivers.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-core.c | 98 ++++++++++++++++++++++++++---------------
 include/linux/iio/iio.h         |  3 ++
 2 files changed, 66 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 3dccd6c3a889..8848f16c547b 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -408,6 +408,64 @@ static ssize_t iio_read_channel_info(struct device *dev,
 	}
 }
 
+/**
+ * iio_str_to_fixpoint() - Parse a fixed-point number from a string
+ * @str: The string to parse
+ * @fract_mult: Multiplier for the first decimal place, should be a power of 10
+ * @integer: The integer part of the number
+ * @fract: The fractional part of the number
+ *
+ * Returns 0 on success, or a negative error code if the string could not be
+ * parsed.
+ */
+int iio_str_to_fixpoint(const char *str, int fract_mult,
+	int *integer, int *fract)
+{
+	int i = 0, f = 0;
+	bool integer_part = true, negative = false;
+
+	if (str[0] == '-') {
+		negative = true;
+		str++;
+	} else if (str[0] == '+') {
+		str++;
+	}
+
+	while (*str) {
+		if ('0' <= *str && *str <= '9') {
+			if (integer_part) {
+				i = i * 10 + *str - '0';
+			} else {
+				f += fract_mult * (*str - '0');
+				fract_mult /= 10;
+			}
+		} else if (*str == '\n') {
+			if (*(str + 1) == '\0')
+				break;
+			else
+				return -EINVAL;
+		} else if (*str == '.' && integer_part) {
+			integer_part = false;
+		} else {
+			return -EINVAL;
+		}
+		str++;
+	}
+
+	if (negative) {
+		if (i)
+			i = -i;
+		else
+			f = -f;
+	}
+
+	*integer = i;
+	*fract = f;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iio_str_to_fixpoint);
+
 static ssize_t iio_write_channel_info(struct device *dev,
 				      struct device_attribute *attr,
 				      const char *buf,
@@ -415,8 +473,8 @@ static ssize_t iio_write_channel_info(struct device *dev,
 {
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
 	struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
-	int ret, integer = 0, fract = 0, fract_mult = 100000;
-	bool integer_part = true, negative = false;
+	int ret, fract_mult = 100000;
+	int integer, fract;
 
 	/* Assumes decimal - precision based on number of digits */
 	if (!indio_dev->info->write_raw)
@@ -435,39 +493,9 @@ static ssize_t iio_write_channel_info(struct device *dev,
 			return -EINVAL;
 		}
 
-	if (buf[0] == '-') {
-		negative = true;
-		buf++;
-	} else if (buf[0] == '+') {
-		buf++;
-	}
-
-	while (*buf) {
-		if ('0' <= *buf && *buf <= '9') {
-			if (integer_part)
-				integer = integer*10 + *buf - '0';
-			else {
-				fract += fract_mult*(*buf - '0');
-				fract_mult /= 10;
-			}
-		} else if (*buf == '\n') {
-			if (*(buf + 1) == '\0')
-				break;
-			else
-				return -EINVAL;
-		} else if (*buf == '.' && integer_part) {
-			integer_part = false;
-		} else {
-			return -EINVAL;
-		}
-		buf++;
-	}
-	if (negative) {
-		if (integer)
-			integer = -integer;
-		else
-			fract = -fract;
-	}
+	ret = iio_str_to_fixpoint(buf, fract_mult, &integer, &fract);
+	if (ret)
+		return ret;
 
 	ret = indio_dev->info->write_raw(indio_dev, this_attr->c,
 					 integer, fract, this_attr->address);
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index adca93a999a7..da8c776ba0bd 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -620,6 +620,9 @@ static inline struct dentry *iio_get_debugfs_dentry(struct iio_dev *indio_dev)
 };
 #endif
 
+int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer,
+	int *fract);
+
 /**
  * IIO_DEGREE_TO_RAD() - Convert degree to rad
  * @deg: A value in degree
-- 
cgit v1.2.3-55-g7522


From 2f3abe6cbb6c963ac790b40936b6761c9f0497b4 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen
Date: Tue, 20 Nov 2012 13:36:00 +0000
Subject: iio:imu: Add support for the ADIS16480 and similar IMUs

This patch adds support for the ADIS16375, ADIS16480, ADIS16485, ADIS16488 6
degree to 10 degree of freedom IMUs.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/imu/Kconfig      |  16 +
 drivers/iio/imu/Makefile     |   2 +
 drivers/iio/imu/adis.c       |   3 +
 drivers/iio/imu/adis16480.c  | 925 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/iio/imu/adis.h |   4 +
 5 files changed, 950 insertions(+)
 create mode 100644 drivers/iio/imu/adis16480.c

(limited to 'include/linux')

diff --git a/drivers/iio/imu/Kconfig b/drivers/iio/imu/Kconfig
index c24410c873c7..3d79a40e916b 100644
--- a/drivers/iio/imu/Kconfig
+++ b/drivers/iio/imu/Kconfig
@@ -1,3 +1,19 @@
+#
+# IIO imu drivers configuration
+#
+menu "Inertial measurement units"
+
+config ADIS16480
+	tristate "Analog Devices ADIS16480 and similar IMU driver"
+	depends on SPI
+	select IIO_ADIS_LIB
+	select IIO_ADIS_LIB_BUFFER if IIO_BUFFER
+	help
+	  Say yes here to build support for Analog Devices ADIS16375, ADIS16480,
+	  ADIS16485, ADIS16488 inertial sensors.
+
+endmenu
+
 config IIO_ADIS_LIB
 	tristate
 	help
diff --git a/drivers/iio/imu/Makefile b/drivers/iio/imu/Makefile
index 97676ab5723d..cfe57638f6f9 100644
--- a/drivers/iio/imu/Makefile
+++ b/drivers/iio/imu/Makefile
@@ -2,6 +2,8 @@
 # Makefile for Inertial Measurement Units
 #
 
+obj-$(CONFIG_ADIS16480) += adis16480.o
+
 adis_lib-y += adis.o
 adis_lib-$(CONFIG_IIO_ADIS_LIB_BUFFER) += adis_trigger.o
 adis_lib-$(CONFIG_IIO_ADIS_LIB_BUFFER) += adis_buffer.o
diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c
index c4ea04ffa60e..911255d41c1a 100644
--- a/drivers/iio/imu/adis.c
+++ b/drivers/iio/imu/adis.c
@@ -238,6 +238,9 @@ int adis_enable_irq(struct adis *adis, bool enable)
 	int ret = 0;
 	uint16_t msc;
 
+	if (adis->data->enable_irq)
+		return adis->data->enable_irq(adis, enable);
+
 	ret = adis_read_reg_16(adis, adis->data->msc_ctrl_reg, &msc);
 	if (ret)
 		goto error_ret;
diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c
new file mode 100644
index 000000000000..a080b3515015
--- /dev/null
+++ b/drivers/iio/imu/adis16480.c
@@ -0,0 +1,925 @@
+/*
+ * ADIS16480 and similar IMUs driver
+ *
+ * Copyright 2012 Analog Devices Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/spi/spi.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/module.h>
+
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/imu/adis.h>
+
+#include <linux/iio/iio.h>
+#include <linux/debugfs.h>
+
+#define ADIS16480_PAGE_SIZE 0x80
+
+#define ADIS16480_REG(page, reg) ((page) * ADIS16480_PAGE_SIZE + (reg))
+
+#define ADIS16480_REG_PAGE_ID 0x00 /* Same address on each page */
+#define ADIS16480_REG_SEQ_CNT			ADIS16480_REG(0x00, 0x06)
+#define ADIS16480_REG_SYS_E_FLA			ADIS16480_REG(0x00, 0x08)
+#define ADIS16480_REG_DIAG_STS			ADIS16480_REG(0x00, 0x0A)
+#define ADIS16480_REG_ALM_STS			ADIS16480_REG(0x00, 0x0C)
+#define ADIS16480_REG_TEMP_OUT			ADIS16480_REG(0x00, 0x0E)
+#define ADIS16480_REG_X_GYRO_OUT		ADIS16480_REG(0x00, 0x10)
+#define ADIS16480_REG_Y_GYRO_OUT		ADIS16480_REG(0x00, 0x14)
+#define ADIS16480_REG_Z_GYRO_OUT		ADIS16480_REG(0x00, 0x18)
+#define ADIS16480_REG_X_ACCEL_OUT		ADIS16480_REG(0x00, 0x1C)
+#define ADIS16480_REG_Y_ACCEL_OUT		ADIS16480_REG(0x00, 0x20)
+#define ADIS16480_REG_Z_ACCEL_OUT		ADIS16480_REG(0x00, 0x24)
+#define ADIS16480_REG_X_MAGN_OUT		ADIS16480_REG(0x00, 0x28)
+#define ADIS16480_REG_Y_MAGN_OUT		ADIS16480_REG(0x00, 0x2A)
+#define ADIS16480_REG_Z_MAGN_OUT		ADIS16480_REG(0x00, 0x2C)
+#define ADIS16480_REG_BAROM_OUT			ADIS16480_REG(0x00, 0x2E)
+#define ADIS16480_REG_X_DELTAANG_OUT		ADIS16480_REG(0x00, 0x40)
+#define ADIS16480_REG_Y_DELTAANG_OUT		ADIS16480_REG(0x00, 0x44)
+#define ADIS16480_REG_Z_DELTAANG_OUT		ADIS16480_REG(0x00, 0x48)
+#define ADIS16480_REG_X_DELTAVEL_OUT		ADIS16480_REG(0x00, 0x4C)
+#define ADIS16480_REG_Y_DELTAVEL_OUT		ADIS16480_REG(0x00, 0x50)
+#define ADIS16480_REG_Z_DELTAVEL_OUT		ADIS16480_REG(0x00, 0x54)
+#define ADIS16480_REG_PROD_ID			ADIS16480_REG(0x00, 0x7E)
+
+#define ADIS16480_REG_X_GYRO_SCALE		ADIS16480_REG(0x02, 0x04)
+#define ADIS16480_REG_Y_GYRO_SCALE		ADIS16480_REG(0x02, 0x06)
+#define ADIS16480_REG_Z_GYRO_SCALE		ADIS16480_REG(0x02, 0x08)
+#define ADIS16480_REG_X_ACCEL_SCALE		ADIS16480_REG(0x02, 0x0A)
+#define ADIS16480_REG_Y_ACCEL_SCALE		ADIS16480_REG(0x02, 0x0C)
+#define ADIS16480_REG_Z_ACCEL_SCALE		ADIS16480_REG(0x02, 0x0E)
+#define ADIS16480_REG_X_GYRO_BIAS		ADIS16480_REG(0x02, 0x10)
+#define ADIS16480_REG_Y_GYRO_BIAS		ADIS16480_REG(0x02, 0x14)
+#define ADIS16480_REG_Z_GYRO_BIAS		ADIS16480_REG(0x02, 0x18)
+#define ADIS16480_REG_X_ACCEL_BIAS		ADIS16480_REG(0x02, 0x1C)
+#define ADIS16480_REG_Y_ACCEL_BIAS		ADIS16480_REG(0x02, 0x20)
+#define ADIS16480_REG_Z_ACCEL_BIAS		ADIS16480_REG(0x02, 0x24)
+#define ADIS16480_REG_X_HARD_IRON		ADIS16480_REG(0x02, 0x28)
+#define ADIS16480_REG_Y_HARD_IRON		ADIS16480_REG(0x02, 0x2A)
+#define ADIS16480_REG_Z_HARD_IRON		ADIS16480_REG(0x02, 0x2C)
+#define ADIS16480_REG_BAROM_BIAS		ADIS16480_REG(0x02, 0x40)
+#define ADIS16480_REG_FLASH_CNT			ADIS16480_REG(0x02, 0x7C)
+
+#define ADIS16480_REG_GLOB_CMD			ADIS16480_REG(0x03, 0x02)
+#define ADIS16480_REG_FNCTIO_CTRL		ADIS16480_REG(0x03, 0x06)
+#define ADIS16480_REG_GPIO_CTRL			ADIS16480_REG(0x03, 0x08)
+#define ADIS16480_REG_CONFIG			ADIS16480_REG(0x03, 0x0A)
+#define ADIS16480_REG_DEC_RATE			ADIS16480_REG(0x03, 0x0C)
+#define ADIS16480_REG_SLP_CNT			ADIS16480_REG(0x03, 0x10)
+#define ADIS16480_REG_FILTER_BNK0		ADIS16480_REG(0x03, 0x16)
+#define ADIS16480_REG_FILTER_BNK1		ADIS16480_REG(0x03, 0x18)
+#define ADIS16480_REG_ALM_CNFG0			ADIS16480_REG(0x03, 0x20)
+#define ADIS16480_REG_ALM_CNFG1			ADIS16480_REG(0x03, 0x22)
+#define ADIS16480_REG_ALM_CNFG2			ADIS16480_REG(0x03, 0x24)
+#define ADIS16480_REG_XG_ALM_MAGN		ADIS16480_REG(0x03, 0x28)
+#define ADIS16480_REG_YG_ALM_MAGN		ADIS16480_REG(0x03, 0x2A)
+#define ADIS16480_REG_ZG_ALM_MAGN		ADIS16480_REG(0x03, 0x2C)
+#define ADIS16480_REG_XA_ALM_MAGN		ADIS16480_REG(0x03, 0x2E)
+#define ADIS16480_REG_YA_ALM_MAGN		ADIS16480_REG(0x03, 0x30)
+#define ADIS16480_REG_ZA_ALM_MAGN		ADIS16480_REG(0x03, 0x32)
+#define ADIS16480_REG_XM_ALM_MAGN		ADIS16480_REG(0x03, 0x34)
+#define ADIS16480_REG_YM_ALM_MAGN		ADIS16480_REG(0x03, 0x36)
+#define ADIS16480_REG_ZM_ALM_MAGN		ADIS16480_REG(0x03, 0x38)
+#define ADIS16480_REG_BR_ALM_MAGN		ADIS16480_REG(0x03, 0x3A)
+#define ADIS16480_REG_FIRM_REV			ADIS16480_REG(0x03, 0x78)
+#define ADIS16480_REG_FIRM_DM			ADIS16480_REG(0x03, 0x7A)
+#define ADIS16480_REG_FIRM_Y			ADIS16480_REG(0x03, 0x7C)
+
+#define ADIS16480_REG_SERIAL_NUM		ADIS16480_REG(0x04, 0x20)
+
+/* Each filter coefficent bank spans two pages */
+#define ADIS16480_FIR_COEF(page) (x < 60 ? ADIS16480_REG(page, (x) + 8) : \
+		ADIS16480_REG((page) + 1, (x) - 60 + 8))
+#define ADIS16480_FIR_COEF_A(x)			ADIS16480_FIR_COEF(0x05, (x))
+#define ADIS16480_FIR_COEF_B(x)			ADIS16480_FIR_COEF(0x07, (x))
+#define ADIS16480_FIR_COEF_C(x)			ADIS16480_FIR_COEF(0x09, (x))
+#define ADIS16480_FIR_COEF_D(x)			ADIS16480_FIR_COEF(0x0B, (x))
+
+struct adis16480_chip_info {
+	unsigned int num_channels;
+	const struct iio_chan_spec *channels;
+};
+
+struct adis16480 {
+	const struct adis16480_chip_info *chip_info;
+
+	struct adis adis;
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+static ssize_t adis16480_show_firmware_revision(struct file *file,
+		char __user *userbuf, size_t count, loff_t *ppos)
+{
+	struct adis16480 *adis16480 = file->private_data;
+	char buf[6];
+	size_t len;
+	u16 rev;
+	int ret;
+
+	ret = adis_read_reg_16(&adis16480->adis, ADIS16480_REG_FIRM_REV, &rev);
+	if (ret < 0)
+		return ret;
+
+	len = snprintf(buf, sizeof(buf), "%x.%x\n", rev >> 8, rev & 0xff);
+
+	return simple_read_from_buffer(userbuf, count, ppos, buf, len);
+}
+
+static const struct file_operations adis16480_firmware_revision_fops = {
+	.open = simple_open,
+	.read = adis16480_show_firmware_revision,
+	.llseek = default_llseek,
+	.owner = THIS_MODULE,
+};
+
+static ssize_t adis16480_show_firmware_date(struct file *file,
+		char __user *userbuf, size_t count, loff_t *ppos)
+{
+	struct adis16480 *adis16480 = file->private_data;
+	u16 md, year;
+	char buf[12];
+	size_t len;
+	int ret;
+
+	ret = adis_read_reg_16(&adis16480->adis, ADIS16480_REG_FIRM_Y, &year);
+	if (ret < 0)
+		return ret;
+
+	ret = adis_read_reg_16(&adis16480->adis, ADIS16480_REG_FIRM_DM, &md);
+	if (ret < 0)
+		return ret;
+
+	len = snprintf(buf, sizeof(buf), "%.2x-%.2x-%.4x\n",
+			md >> 8, md & 0xff, year);
+
+	return simple_read_from_buffer(userbuf, count, ppos, buf, len);
+}
+
+static const struct file_operations adis16480_firmware_date_fops = {
+	.open = simple_open,
+	.read = adis16480_show_firmware_date,
+	.llseek = default_llseek,
+	.owner = THIS_MODULE,
+};
+
+static int adis16480_show_serial_number(void *arg, u64 *val)
+{
+	struct adis16480 *adis16480 = arg;
+	u16 serial;
+	int ret;
+
+	ret = adis_read_reg_16(&adis16480->adis, ADIS16480_REG_SERIAL_NUM,
+		&serial);
+	if (ret < 0)
+		return ret;
+
+	*val = serial;
+
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(adis16480_serial_number_fops,
+	adis16480_show_serial_number, NULL, "0x%.4llx\n");
+
+static int adis16480_show_product_id(void *arg, u64 *val)
+{
+	struct adis16480 *adis16480 = arg;
+	u16 prod_id;
+	int ret;
+
+	ret = adis_read_reg_16(&adis16480->adis, ADIS16480_REG_PROD_ID,
+		&prod_id);
+	if (ret < 0)
+		return ret;
+
+	*val = prod_id;
+
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(adis16480_product_id_fops,
+	adis16480_show_product_id, NULL, "%llu\n");
+
+static int adis16480_show_flash_count(void *arg, u64 *val)
+{
+	struct adis16480 *adis16480 = arg;
+	u32 flash_count;
+	int ret;
+
+	ret = adis_read_reg_32(&adis16480->adis, ADIS16480_REG_FLASH_CNT,
+		&flash_count);
+	if (ret < 0)
+		return ret;
+
+	*val = flash_count;
+
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(adis16480_flash_count_fops,
+	adis16480_show_flash_count, NULL, "%lld\n");
+
+static int adis16480_debugfs_init(struct iio_dev *indio_dev)
+{
+	struct adis16480 *adis16480 = iio_priv(indio_dev);
+
+	debugfs_create_file("firmware_revision", 0400,
+		indio_dev->debugfs_dentry, adis16480,
+		&adis16480_firmware_revision_fops);
+	debugfs_create_file("firmware_date", 0400, indio_dev->debugfs_dentry,
+		adis16480, &adis16480_firmware_date_fops);
+	debugfs_create_file("serial_number", 0400, indio_dev->debugfs_dentry,
+		adis16480, &adis16480_serial_number_fops);
+	debugfs_create_file("product_id", 0400, indio_dev->debugfs_dentry,
+		adis16480, &adis16480_product_id_fops);
+	debugfs_create_file("flash_count", 0400, indio_dev->debugfs_dentry,
+		adis16480, &adis16480_flash_count_fops);
+
+	return 0;
+}
+
+#else
+
+static int adis16480_debugfs_init(struct iio_dev *indio_dev)
+{
+	return 0;
+}
+
+#endif
+
+static int adis16480_set_freq(struct adis16480 *st, unsigned int freq)
+{
+	unsigned int t;
+
+	t = 2460000 / freq;
+	if (t > 2048)
+		t = 2048;
+
+	if (t != 0)
+		t--;
+
+	return adis_write_reg_16(&st->adis, ADIS16480_REG_DEC_RATE, t);
+}
+
+static int adis16480_get_freq(struct adis16480 *st, unsigned int *freq)
+{
+	uint16_t t;
+	int ret;
+
+	ret = adis_read_reg_16(&st->adis, ADIS16480_REG_DEC_RATE, &t);
+	if (ret < 0)
+		return ret;
+
+	*freq = 2460000 / (t + 1);
+
+	return 0;
+}
+
+static ssize_t adis16480_read_frequency(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct adis16480 *st = iio_priv(indio_dev);
+	unsigned int freq;
+	int ret;
+
+	ret = adis16480_get_freq(st, &freq);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%d.%.3d\n", freq / 1000, freq % 1000);
+}
+
+static ssize_t adis16480_write_frequency(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t len)
+{
+	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct adis16480 *st = iio_priv(indio_dev);
+	int freq_int, freq_fract;
+	long val;
+	int ret;
+
+	ret = iio_str_to_fixpoint(buf, 100, &freq_int, &freq_fract);
+	if (ret)
+		return ret;
+
+	val = freq_int * 1000 + freq_fract;
+
+	if (val <= 0)
+		return -EINVAL;
+
+	ret = adis16480_set_freq(st, val);
+
+	return ret ? ret : len;
+}
+
+static IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO,
+			      adis16480_read_frequency,
+			      adis16480_write_frequency);
+
+enum {
+	ADIS16480_SCAN_GYRO_X,
+	ADIS16480_SCAN_GYRO_Y,
+	ADIS16480_SCAN_GYRO_Z,
+	ADIS16480_SCAN_ACCEL_X,
+	ADIS16480_SCAN_ACCEL_Y,
+	ADIS16480_SCAN_ACCEL_Z,
+	ADIS16480_SCAN_MAGN_X,
+	ADIS16480_SCAN_MAGN_Y,
+	ADIS16480_SCAN_MAGN_Z,
+	ADIS16480_SCAN_BARO,
+	ADIS16480_SCAN_TEMP,
+};
+
+static const unsigned int adis16480_calibbias_regs[] = {
+	[ADIS16480_SCAN_GYRO_X] = ADIS16480_REG_X_GYRO_BIAS,
+	[ADIS16480_SCAN_GYRO_Y] = ADIS16480_REG_Y_GYRO_BIAS,
+	[ADIS16480_SCAN_GYRO_Z] = ADIS16480_REG_Z_GYRO_BIAS,
+	[ADIS16480_SCAN_ACCEL_X] = ADIS16480_REG_X_ACCEL_BIAS,
+	[ADIS16480_SCAN_ACCEL_Y] = ADIS16480_REG_Y_ACCEL_BIAS,
+	[ADIS16480_SCAN_ACCEL_Z] = ADIS16480_REG_Z_ACCEL_BIAS,
+	[ADIS16480_SCAN_MAGN_X] = ADIS16480_REG_X_HARD_IRON,
+	[ADIS16480_SCAN_MAGN_Y] = ADIS16480_REG_Y_HARD_IRON,
+	[ADIS16480_SCAN_MAGN_Z] = ADIS16480_REG_Z_HARD_IRON,
+	[ADIS16480_SCAN_BARO] = ADIS16480_REG_BAROM_BIAS,
+};
+
+static const unsigned int adis16480_calibscale_regs[] = {
+	[ADIS16480_SCAN_GYRO_X] = ADIS16480_REG_X_GYRO_SCALE,
+	[ADIS16480_SCAN_GYRO_Y] = ADIS16480_REG_Y_GYRO_SCALE,
+	[ADIS16480_SCAN_GYRO_Z] = ADIS16480_REG_Z_GYRO_SCALE,
+	[ADIS16480_SCAN_ACCEL_X] = ADIS16480_REG_X_ACCEL_SCALE,
+	[ADIS16480_SCAN_ACCEL_Y] = ADIS16480_REG_Y_ACCEL_SCALE,
+	[ADIS16480_SCAN_ACCEL_Z] = ADIS16480_REG_Z_ACCEL_SCALE,
+};
+
+static int adis16480_set_calibbias(struct iio_dev *indio_dev,
+	const struct iio_chan_spec *chan, int bias)
+{
+	unsigned int reg = adis16480_calibbias_regs[chan->scan_index];
+	struct adis16480 *st = iio_priv(indio_dev);
+
+	switch (chan->type) {
+	case IIO_MAGN:
+	case IIO_PRESSURE:
+		if (bias < -0x8000 || bias >= 0x8000)
+			return -EINVAL;
+		return adis_write_reg_16(&st->adis, reg, bias);
+	case IIO_ANGL_VEL:
+	case IIO_ACCEL:
+		return adis_write_reg_32(&st->adis, reg, bias);
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static int adis16480_get_calibbias(struct iio_dev *indio_dev,
+	const struct iio_chan_spec *chan, int *bias)
+{
+	unsigned int reg = adis16480_calibbias_regs[chan->scan_index];
+	struct adis16480 *st = iio_priv(indio_dev);
+	uint16_t val16;
+	uint32_t val32;
+	int ret;
+
+	switch (chan->type) {
+	case IIO_MAGN:
+	case IIO_PRESSURE:
+		ret = adis_read_reg_16(&st->adis, reg, &val16);
+		*bias = sign_extend32(val16, 15);
+		break;
+	case IIO_ANGL_VEL:
+	case IIO_ACCEL:
+		ret = adis_read_reg_32(&st->adis, reg, &val32);
+		*bias = sign_extend32(val32, 31);
+		break;
+	default:
+			ret = -EINVAL;
+	}
+
+	if (ret < 0)
+		return ret;
+
+	return IIO_VAL_INT;
+}
+
+static int adis16480_set_calibscale(struct iio_dev *indio_dev,
+	const struct iio_chan_spec *chan, int scale)
+{
+	unsigned int reg = adis16480_calibscale_regs[chan->scan_index];
+	struct adis16480 *st = iio_priv(indio_dev);
+
+	if (scale < -0x8000 || scale >= 0x8000)
+		return -EINVAL;
+
+	return adis_write_reg_16(&st->adis, reg, scale);
+}
+
+static int adis16480_get_calibscale(struct iio_dev *indio_dev,
+	const struct iio_chan_spec *chan, int *scale)
+{
+	unsigned int reg = adis16480_calibscale_regs[chan->scan_index];
+	struct adis16480 *st = iio_priv(indio_dev);
+	uint16_t val16;
+	int ret;
+
+	ret = adis_read_reg_16(&st->adis, reg, &val16);
+	if (ret < 0)
+		return ret;
+
+	*scale = sign_extend32(val16, 15);
+	return IIO_VAL_INT;
+}
+
+static const unsigned int adis16480_def_filter_freqs[] = {
+	310,
+	55,
+	275,
+	63,
+};
+
+static const unsigned int ad16480_filter_data[][2] = {
+	[ADIS16480_SCAN_GYRO_X]		= { ADIS16480_REG_FILTER_BNK0, 0 },
+	[ADIS16480_SCAN_GYRO_Y]		= { ADIS16480_REG_FILTER_BNK0, 3 },
+	[ADIS16480_SCAN_GYRO_Z]		= { ADIS16480_REG_FILTER_BNK0, 6 },
+	[ADIS16480_SCAN_ACCEL_X]	= { ADIS16480_REG_FILTER_BNK0, 9 },
+	[ADIS16480_SCAN_ACCEL_Y]	= { ADIS16480_REG_FILTER_BNK0, 12 },
+	[ADIS16480_SCAN_ACCEL_Z]	= { ADIS16480_REG_FILTER_BNK1, 0 },
+	[ADIS16480_SCAN_MAGN_X]		= { ADIS16480_REG_FILTER_BNK1, 3 },
+	[ADIS16480_SCAN_MAGN_Y]		= { ADIS16480_REG_FILTER_BNK1, 6 },
+	[ADIS16480_SCAN_MAGN_Z]		= { ADIS16480_REG_FILTER_BNK1, 9 },
+};
+
+static int adis16480_get_filter_freq(struct iio_dev *indio_dev,
+	const struct iio_chan_spec *chan, int *freq)
+{
+	struct adis16480 *st = iio_priv(indio_dev);
+	unsigned int enable_mask, offset, reg;
+	uint16_t val;
+	int ret;
+
+	reg = ad16480_filter_data[chan->scan_index][0];
+	offset = ad16480_filter_data[chan->scan_index][1];
+	enable_mask = BIT(offset + 2);
+
+	ret = adis_read_reg_16(&st->adis, reg, &val);
+	if (ret < 0)
+		return ret;
+
+	if (!(val & enable_mask))
+		*freq = 0;
+	else
+		*freq = adis16480_def_filter_freqs[(val >> offset) & 0x3];
+
+	return IIO_VAL_INT;
+}
+
+static int adis16480_set_filter_freq(struct iio_dev *indio_dev,
+	const struct iio_chan_spec *chan, unsigned int freq)
+{
+	struct adis16480 *st = iio_priv(indio_dev);
+	unsigned int enable_mask, offset, reg;
+	unsigned int diff, best_diff;
+	unsigned int i, best_freq;
+	uint16_t val;
+	int ret;
+
+	reg = ad16480_filter_data[chan->scan_index][0];
+	offset = ad16480_filter_data[chan->scan_index][1];
+	enable_mask = BIT(offset + 2);
+
+	ret = adis_read_reg_16(&st->adis, reg, &val);
+	if (ret < 0)
+		return ret;
+
+	if (freq == 0) {
+		val &= ~enable_mask;
+	} else {
+		best_freq = 0;
+		best_diff = 310;
+		for (i = 0; i < ARRAY_SIZE(adis16480_def_filter_freqs); i++) {
+			if (adis16480_def_filter_freqs[i] >= freq) {
+				diff = adis16480_def_filter_freqs[i] - freq;
+				if (diff < best_diff) {
+					best_diff = diff;
+					best_freq = i;
+				}
+			}
+		}
+
+		val &= ~(0x3 << offset);
+		val |= best_freq << offset;
+		val |= enable_mask;
+	}
+
+	return adis_write_reg_16(&st->adis, reg, val);
+}
+
+static int adis16480_read_raw(struct iio_dev *indio_dev,
+	const struct iio_chan_spec *chan, int *val, int *val2, long info)
+{
+	switch (info) {
+	case IIO_CHAN_INFO_RAW:
+		return adis_single_conversion(indio_dev, chan, 0, val);
+	case IIO_CHAN_INFO_SCALE:
+		switch (chan->type) {
+		case IIO_ANGL_VEL:
+			*val = 0;
+			*val2 = IIO_DEGREE_TO_RAD(20000); /* 0.02 degree/sec */
+			return IIO_VAL_INT_PLUS_MICRO;
+		case IIO_ACCEL:
+			*val = 0;
+			*val2 = IIO_G_TO_M_S_2(800); /* 0.8 mg */
+			return IIO_VAL_INT_PLUS_MICRO;
+		case IIO_MAGN:
+			*val = 0;
+			*val2 = 100; /* 0.0001 gauss */
+			return IIO_VAL_INT_PLUS_MICRO;
+		case IIO_TEMP:
+			*val = 5;
+			*val2 = 650000; /* 5.65 milli degree Celsius */
+			return IIO_VAL_INT_PLUS_MICRO;
+		case IIO_PRESSURE:
+			*val = 0;
+			*val2 = 4000; /* 40ubar = 0.004 kPa */
+			return IIO_VAL_INT_PLUS_MICRO;
+		default:
+			return -EINVAL;
+		}
+	case IIO_CHAN_INFO_OFFSET:
+		/* Only the temperature channel has a offset */
+		*val = 4425; /* 25 degree Celsius = 0x0000 */
+		return IIO_VAL_INT;
+	case IIO_CHAN_INFO_CALIBBIAS:
+		return adis16480_get_calibbias(indio_dev, chan, val);
+	case IIO_CHAN_INFO_CALIBSCALE:
+		return adis16480_get_calibscale(indio_dev, chan, val);
+	case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY:
+		return adis16480_get_filter_freq(indio_dev, chan, val);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int adis16480_write_raw(struct iio_dev *indio_dev,
+	const struct iio_chan_spec *chan, int val, int val2, long info)
+{
+	switch (info) {
+	case IIO_CHAN_INFO_CALIBBIAS:
+		return adis16480_set_calibbias(indio_dev, chan, val);
+	case IIO_CHAN_INFO_CALIBSCALE:
+		return adis16480_set_calibscale(indio_dev, chan, val);
+	case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY:
+		return adis16480_set_filter_freq(indio_dev, chan, val);
+	default:
+		return -EINVAL;
+	}
+}
+
+#define ADIS16480_MOD_CHANNEL(_type, _mod, _address, _si, _info, _bits) \
+	{ \
+		.type = (_type), \
+		.modified = 1, \
+		.channel2 = (_mod), \
+		.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \
+			IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | \
+			IIO_CHAN_INFO_SCALE_SHARED_BIT | \
+			_info, \
+		.address = (_address), \
+		.scan_index = (_si), \
+		.scan_type = { \
+			.sign = 's', \
+			.realbits = (_bits), \
+			.storagebits = (_bits), \
+			.endianness = IIO_BE, \
+		}, \
+	}
+
+#define ADIS16480_GYRO_CHANNEL(_mod) \
+	ADIS16480_MOD_CHANNEL(IIO_ANGL_VEL, IIO_MOD_ ## _mod, \
+	ADIS16480_REG_ ## _mod ## _GYRO_OUT, ADIS16480_SCAN_GYRO_ ## _mod, \
+	IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY_SEPARATE_BIT | \
+	IIO_CHAN_INFO_CALIBSCALE_SEPARATE_BIT, \
+	32)
+
+#define ADIS16480_ACCEL_CHANNEL(_mod) \
+	ADIS16480_MOD_CHANNEL(IIO_ACCEL, IIO_MOD_ ## _mod, \
+	ADIS16480_REG_ ## _mod ## _ACCEL_OUT, ADIS16480_SCAN_ACCEL_ ## _mod, \
+	IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY_SEPARATE_BIT | \
+	IIO_CHAN_INFO_CALIBSCALE_SEPARATE_BIT, \
+	32)
+
+#define ADIS16480_MAGN_CHANNEL(_mod) \
+	ADIS16480_MOD_CHANNEL(IIO_MAGN, IIO_MOD_ ## _mod, \
+	ADIS16480_REG_ ## _mod ## _MAGN_OUT, ADIS16480_SCAN_MAGN_ ## _mod, \
+	IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY_SEPARATE_BIT, \
+	16)
+
+#define ADIS16480_PRESSURE_CHANNEL() \
+	{ \
+		.type = IIO_PRESSURE, \
+		.indexed = 1, \
+		.channel = 0, \
+		.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \
+			IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | \
+			IIO_CHAN_INFO_SCALE_SEPARATE_BIT, \
+		.address = ADIS16480_REG_BAROM_OUT, \
+		.scan_index = ADIS16480_SCAN_BARO, \
+		.scan_type = { \
+			.sign = 's', \
+			.realbits = 32, \
+			.storagebits = 32, \
+			.endianness = IIO_BE, \
+		}, \
+	}
+
+#define ADIS16480_TEMP_CHANNEL() { \
+		.type = IIO_TEMP, \
+		.indexed = 1, \
+		.channel = 0, \
+		.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \
+			IIO_CHAN_INFO_SCALE_SEPARATE_BIT | \
+			IIO_CHAN_INFO_OFFSET_SEPARATE_BIT, \
+		.address = ADIS16480_REG_TEMP_OUT, \
+		.scan_index = ADIS16480_SCAN_TEMP, \
+		.scan_type = { \
+			.sign = 's', \
+			.realbits = 16, \
+			.storagebits = 16, \
+			.endianness = IIO_BE, \
+		}, \
+	}
+
+static const struct iio_chan_spec adis16480_channels[] = {
+	ADIS16480_GYRO_CHANNEL(X),
+	ADIS16480_GYRO_CHANNEL(Y),
+	ADIS16480_GYRO_CHANNEL(Z),
+	ADIS16480_ACCEL_CHANNEL(X),
+	ADIS16480_ACCEL_CHANNEL(Y),
+	ADIS16480_ACCEL_CHANNEL(Z),
+	ADIS16480_MAGN_CHANNEL(X),
+	ADIS16480_MAGN_CHANNEL(Y),
+	ADIS16480_MAGN_CHANNEL(Z),
+	ADIS16480_PRESSURE_CHANNEL(),
+	ADIS16480_TEMP_CHANNEL(),
+	IIO_CHAN_SOFT_TIMESTAMP(11)
+};
+
+static const struct iio_chan_spec adis16485_channels[] = {
+	ADIS16480_GYRO_CHANNEL(X),
+	ADIS16480_GYRO_CHANNEL(Y),
+	ADIS16480_GYRO_CHANNEL(Z),
+	ADIS16480_ACCEL_CHANNEL(X),
+	ADIS16480_ACCEL_CHANNEL(Y),
+	ADIS16480_ACCEL_CHANNEL(Z),
+	ADIS16480_TEMP_CHANNEL(),
+	IIO_CHAN_SOFT_TIMESTAMP(7)
+};
+
+enum adis16480_variant {
+	ADIS16375,
+	ADIS16480,
+	ADIS16485,
+	ADIS16488,
+};
+
+static const struct adis16480_chip_info adis16480_chip_info[] = {
+	[ADIS16375] = {
+		.channels = adis16485_channels,
+		.num_channels = ARRAY_SIZE(adis16485_channels),
+	},
+	[ADIS16480] = {
+		.channels = adis16480_channels,
+		.num_channels = ARRAY_SIZE(adis16480_channels),
+	},
+	[ADIS16485] = {
+		.channels = adis16485_channels,
+		.num_channels = ARRAY_SIZE(adis16485_channels),
+	},
+	[ADIS16488] = {
+		.channels = adis16480_channels,
+		.num_channels = ARRAY_SIZE(adis16480_channels),
+	},
+};
+
+static struct attribute *adis16480_attributes[] = {
+	&iio_dev_attr_sampling_frequency.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group adis16480_attribute_group = {
+	.attrs = adis16480_attributes,
+};
+
+static const struct iio_info adis16480_info = {
+	.attrs = &adis16480_attribute_group,
+	.read_raw = &adis16480_read_raw,
+	.write_raw = &adis16480_write_raw,
+	.update_scan_mode = adis_update_scan_mode,
+	.driver_module = THIS_MODULE,
+};
+
+static int adis16480_stop_device(struct iio_dev *indio_dev)
+{
+	struct adis16480 *st = iio_priv(indio_dev);
+	int ret;
+
+	ret = adis_write_reg_16(&st->adis, ADIS16480_REG_SLP_CNT, BIT(9));
+	if (ret)
+		dev_err(&indio_dev->dev,
+			"Could not power down device: %d\n", ret);
+
+	return ret;
+}
+
+static int adis16480_enable_irq(struct adis *adis, bool enable)
+{
+	return adis_write_reg_16(adis, ADIS16480_REG_FNCTIO_CTRL,
+		enable ? BIT(3) : 0);
+}
+
+static int adis16480_initial_setup(struct iio_dev *indio_dev)
+{
+	struct adis16480 *st = iio_priv(indio_dev);
+	uint16_t prod_id;
+	unsigned int device_id;
+	int ret;
+
+	adis_reset(&st->adis);
+	msleep(70);
+
+	ret = adis_write_reg_16(&st->adis, ADIS16480_REG_GLOB_CMD, BIT(1));
+	if (ret)
+		return ret;
+	msleep(30);
+
+	ret = adis_check_status(&st->adis);
+	if (ret)
+		return ret;
+
+	ret = adis_read_reg_16(&st->adis, ADIS16480_REG_PROD_ID, &prod_id);
+	if (ret)
+		return ret;
+
+	sscanf(indio_dev->name, "adis%u\n", &device_id);
+
+	if (prod_id != device_id)
+		dev_warn(&indio_dev->dev, "Device ID(%u) and product ID(%u) do not match.",
+				device_id, prod_id);
+
+	return 0;
+}
+
+#define ADIS16480_DIAG_STAT_XGYRO_FAIL 0
+#define ADIS16480_DIAG_STAT_YGYRO_FAIL 1
+#define ADIS16480_DIAG_STAT_ZGYRO_FAIL 2
+#define ADIS16480_DIAG_STAT_XACCL_FAIL 3
+#define ADIS16480_DIAG_STAT_YACCL_FAIL 4
+#define ADIS16480_DIAG_STAT_ZACCL_FAIL 5
+#define ADIS16480_DIAG_STAT_XMAGN_FAIL 8
+#define ADIS16480_DIAG_STAT_YMAGN_FAIL 9
+#define ADIS16480_DIAG_STAT_ZMAGN_FAIL 10
+#define ADIS16480_DIAG_STAT_BARO_FAIL 11
+
+static const char * const adis16480_status_error_msgs[] = {
+	[ADIS16480_DIAG_STAT_XGYRO_FAIL] = "X-axis gyroscope self-test failure",
+	[ADIS16480_DIAG_STAT_YGYRO_FAIL] = "Y-axis gyroscope self-test failure",
+	[ADIS16480_DIAG_STAT_ZGYRO_FAIL] = "Z-axis gyroscope self-test failure",
+	[ADIS16480_DIAG_STAT_XACCL_FAIL] = "X-axis accelerometer self-test failure",
+	[ADIS16480_DIAG_STAT_YACCL_FAIL] = "Y-axis accelerometer self-test failure",
+	[ADIS16480_DIAG_STAT_ZACCL_FAIL] = "Z-axis accelerometer self-test failure",
+	[ADIS16480_DIAG_STAT_XMAGN_FAIL] = "X-axis magnetometer self-test failure",
+	[ADIS16480_DIAG_STAT_YMAGN_FAIL] = "Y-axis magnetometer self-test failure",
+	[ADIS16480_DIAG_STAT_ZMAGN_FAIL] = "Z-axis magnetometer self-test failure",
+	[ADIS16480_DIAG_STAT_BARO_FAIL] = "Barometer self-test failure",
+};
+
+static const struct adis_data adis16480_data = {
+	.diag_stat_reg = ADIS16480_REG_DIAG_STS,
+	.glob_cmd_reg = ADIS16480_REG_GLOB_CMD,
+	.has_paging = true,
+
+	.read_delay = 5,
+	.write_delay = 5,
+
+	.status_error_msgs = adis16480_status_error_msgs,
+	.status_error_mask = BIT(ADIS16480_DIAG_STAT_XGYRO_FAIL) |
+		BIT(ADIS16480_DIAG_STAT_YGYRO_FAIL) |
+		BIT(ADIS16480_DIAG_STAT_ZGYRO_FAIL) |
+		BIT(ADIS16480_DIAG_STAT_XACCL_FAIL) |
+		BIT(ADIS16480_DIAG_STAT_YACCL_FAIL) |
+		BIT(ADIS16480_DIAG_STAT_ZACCL_FAIL) |
+		BIT(ADIS16480_DIAG_STAT_XMAGN_FAIL) |
+		BIT(ADIS16480_DIAG_STAT_YMAGN_FAIL) |
+		BIT(ADIS16480_DIAG_STAT_ZMAGN_FAIL) |
+		BIT(ADIS16480_DIAG_STAT_BARO_FAIL),
+
+	.enable_irq = adis16480_enable_irq,
+};
+
+static int adis16480_probe(struct spi_device *spi)
+{
+	const struct spi_device_id *id = spi_get_device_id(spi);
+	struct iio_dev *indio_dev;
+	struct adis16480 *st;
+	int ret;
+
+	indio_dev = iio_device_alloc(sizeof(*st));
+	if (indio_dev == NULL)
+		return -ENOMEM;
+
+	spi_set_drvdata(spi, indio_dev);
+
+	st = iio_priv(indio_dev);
+
+	st->chip_info = &adis16480_chip_info[id->driver_data];
+	indio_dev->dev.parent = &spi->dev;
+	indio_dev->name = spi_get_device_id(spi)->name;
+	indio_dev->channels = st->chip_info->channels;
+	indio_dev->num_channels = st->chip_info->num_channels;
+	indio_dev->info = &adis16480_info;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+
+	ret = adis_init(&st->adis, indio_dev, spi, &adis16480_data);
+	if (ret)
+		goto error_free_dev;
+
+	ret = adis_setup_buffer_and_trigger(&st->adis, indio_dev, NULL);
+	if (ret)
+		goto error_free_dev;
+
+	ret = adis16480_initial_setup(indio_dev);
+	if (ret)
+		goto error_cleanup_buffer;
+
+	ret = iio_device_register(indio_dev);
+	if (ret)
+		goto error_stop_device;
+
+	adis16480_debugfs_init(indio_dev);
+
+	return 0;
+
+error_stop_device:
+	adis16480_stop_device(indio_dev);
+error_cleanup_buffer:
+	adis_cleanup_buffer_and_trigger(&st->adis, indio_dev);
+error_free_dev:
+	iio_device_free(indio_dev);
+	return ret;
+}
+
+static int adis16480_remove(struct spi_device *spi)
+{
+	struct iio_dev *indio_dev = spi_get_drvdata(spi);
+	struct adis16480 *st = iio_priv(indio_dev);
+
+	iio_device_unregister(indio_dev);
+	adis16480_stop_device(indio_dev);
+
+	adis_cleanup_buffer_and_trigger(&st->adis, indio_dev);
+
+	iio_device_free(indio_dev);
+
+	return 0;
+}
+
+static const struct spi_device_id adis16480_ids[] = {
+	{ "adis16375", ADIS16375 },
+	{ "adis16480", ADIS16480 },
+	{ "adis16485", ADIS16485 },
+	{ "adis16488", ADIS16488 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, adis16480_ids);
+
+static struct spi_driver adis16480_driver = {
+	.driver = {
+		.name = "adis16480",
+		.owner = THIS_MODULE,
+	},
+	.id_table = adis16480_ids,
+	.probe = adis16480_probe,
+	.remove = adis16480_remove,
+};
+module_spi_driver(adis16480_driver);
+
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_DESCRIPTION("Analog Devices ADIS16480 IMU driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index e82cd0827e91..ff781dca2e9a 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -20,6 +20,8 @@
 #define ADIS_PAGE_SIZE 0x80
 #define ADIS_REG_PAGE_ID 0x00
 
+struct adis;
+
 /**
  * struct adis_data - ADIS chip variant specific data
  * @read_delay: SPI delay for read operations in us
@@ -44,6 +46,8 @@ struct adis_data {
 	const char * const *status_error_msgs;
 	unsigned int status_error_mask;
 
+	int (*enable_irq)(struct adis *adis, bool enable);
+
 	bool has_paging;
 };
 
-- 
cgit v1.2.3-55-g7522


From 95f8a082b9b1ead0c2859f2a7b1ac91ff63d8765 Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Wed, 21 Nov 2012 00:21:50 +0100
Subject: ACPI / driver core: Introduce struct acpi_dev_node and related macros

To avoid adding an ACPI handle pointer to struct device on
architectures that don't use ACPI, or generally when CONFIG_ACPI is
not set, in which cases that pointer is useless, define struct
acpi_dev_node that will contain the handle pointer if CONFIG_ACPI is
set and will be empty otherwise and use it to represent the ACPI
device node field in struct device.

In addition to that define macros for reading and setting the ACPI
handle of a device that don't generate code when CONFIG_ACPI is
unset.  Modify the ACPI subsystem to use those macros instead of
referring to the given device's ACPI handle directly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/glue.c     | 16 ++++++++--------
 drivers/acpi/scan.c     |  4 ++--
 include/acpi/acpi_bus.h |  2 +-
 include/linux/device.h  | 18 ++++++++++++++++--
 4 files changed, 27 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 3e75d6e5a469..01551840d236 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -134,12 +134,12 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle)
 	char physical_node_name[sizeof(PHYSICAL_NODE_STRING) + 2];
 	int retval = -EINVAL;
 
-	if (dev->acpi_handle) {
+	if (ACPI_HANDLE(dev)) {
 		if (handle) {
 			dev_warn(dev, "ACPI handle is already set\n");
 			return -EINVAL;
 		} else {
-			handle = dev->acpi_handle;
+			handle = ACPI_HANDLE(dev);
 		}
 	}
 	if (!handle)
@@ -181,8 +181,8 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle)
 
 	mutex_unlock(&acpi_dev->physical_node_lock);
 
-	if (!dev->acpi_handle)
-		dev->acpi_handle = handle;
+	if (!ACPI_HANDLE(dev))
+		ACPI_HANDLE_SET(dev, acpi_dev->handle);
 
 	if (!physical_node->node_id)
 		strcpy(physical_node_name, PHYSICAL_NODE_STRING);
@@ -200,7 +200,7 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle)
 	return 0;
 
  err:
-	dev->acpi_handle = NULL;
+	ACPI_HANDLE_SET(dev, NULL);
 	put_device(dev);
 	return retval;
 
@@ -217,10 +217,10 @@ static int acpi_unbind_one(struct device *dev)
 	acpi_status status;
 	struct list_head *node, *next;
 
-	if (!dev->acpi_handle)
+	if (!ACPI_HANDLE(dev))
 		return 0;
 
-	status = acpi_bus_get_device(dev->acpi_handle, &acpi_dev);
+	status = acpi_bus_get_device(ACPI_HANDLE(dev), &acpi_dev);
 	if (ACPI_FAILURE(status))
 		goto err;
 
@@ -246,7 +246,7 @@ static int acpi_unbind_one(struct device *dev)
 
 		sysfs_remove_link(&acpi_dev->dev.kobj, physical_node_name);
 		sysfs_remove_link(&dev->kobj, "firmware_node");
-		dev->acpi_handle = NULL;
+		ACPI_HANDLE_SET(dev, NULL);
 		/* acpi_bind_one increase refcnt by one */
 		put_device(dev);
 		kfree(entry);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index d842569395a9..e92ca67d0e46 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -386,8 +386,8 @@ const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
 {
 	struct acpi_device *adev;
 
-	if (!ids || !dev->acpi_handle
-	    || ACPI_FAILURE(acpi_bus_get_device(dev->acpi_handle, &adev)))
+	if (!ids || !ACPI_HANDLE(dev)
+	    || ACPI_FAILURE(acpi_bus_get_device(ACPI_HANDLE(dev), &adev)))
 		return NULL;
 
 	return __acpi_match_device(adev, ids);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index bb1537c5e672..d1659904f2a0 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -410,7 +410,7 @@ acpi_handle acpi_get_child(acpi_handle, u64);
 int acpi_is_root_bridge(acpi_handle);
 acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int);
 struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
-#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->acpi_handle))
+#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)ACPI_HANDLE(dev))
 
 int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
 int acpi_disable_wakeup_device_power(struct acpi_device *dev);
diff --git a/include/linux/device.h b/include/linux/device.h
index cc3aee57a46e..05292e488346 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -578,6 +578,12 @@ struct device_dma_parameters {
 	unsigned long segment_boundary_mask;
 };
 
+struct acpi_dev_node {
+#ifdef CONFIG_ACPI
+	void	*handle;
+#endif
+};
+
 /**
  * struct device - The basic device structure
  * @parent:	The device's "parent" device, the device to which it is attached.
@@ -618,7 +624,7 @@ struct device_dma_parameters {
  * @dma_mem:	Internal for coherent mem override.
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
- * @acpi_handle: Associated ACPI device node's namespace handle.
+ * @acpi_node:	Associated ACPI device node.
  * @devt:	For creating the sysfs "dev".
  * @id:		device instance
  * @devres_lock: Spinlock to protect the resource of the device.
@@ -683,7 +689,7 @@ struct device {
 	struct dev_archdata	archdata;
 
 	struct device_node	*of_node; /* associated device tree node */
-	void			*acpi_handle; /* associated ACPI device node */
+	struct acpi_dev_node	acpi_node; /* associated ACPI device node */
 
 	dev_t			devt;	/* dev_t, creates the sysfs "dev" */
 	u32			id;	/* device instance */
@@ -704,6 +710,14 @@ static inline struct device *kobj_to_dev(struct kobject *kobj)
 	return container_of(kobj, struct device, kobj);
 }
 
+#ifdef CONFIG_ACPI
+#define ACPI_HANDLE(dev)	((dev)->acpi_node.handle)
+#define ACPI_HANDLE_SET(dev, _handle_)	(dev)->acpi_node.handle = (_handle_)
+#else
+#define ACPI_HANDLE(dev)	(NULL)
+#define ACPI_HANDLE_SET(dev, _handle_)	do { } while (0)
+#endif
+
 /* Get the wakeup routines, which depend on struct device */
 #include <linux/pm_wakeup.h>
 
-- 
cgit v1.2.3-55-g7522


From 863f9f30e6c1e30cb19a0cd17c5cf8879257dfd7 Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Wed, 21 Nov 2012 00:21:59 +0100
Subject: ACPI / platform: Initialize ACPI handles of platform devices in
 advance

The current platform device creation and registration code in
acpi_create_platform_device() is quite convoluted.  This function
takes an ACPI device node as an argument and eventually calls
platform_device_register_resndata() to create and register a
platform device object on the basis of the information contained
in that code.  However, it doesn't associate the new platform
device with the ACPI node directly, but instead it relies on
acpi_platform_notify(), called from within device_add(), to find
that ACPI node again with the help of acpi_platform_find_device()
and acpi_platform_match() and then attach the new platform device
to it.  This causes an additional ACPI namespace walk to happen and
is clearly suboptimal.

Use the observation that it is now possible to initialize the ACPI
handle of a device before calling device_add() for it to make this
code more straightforward.  Namely, add a new field to struct
platform_device_info allowing us to pass the ACPI handle of interest
to platform_device_register_full(), which will then use it to
initialize the new device's ACPI handle before registering it.
This will cause acpi_platform_notify() to use the ACPI handle from
the device structure directly instead of using the .find_device()
routine provided by the device's bus type.  In consequence,
acpi_platform_bus, acpi_platform_find_device(), and
acpi_platform_match() are not necessary any more, so remove them.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/acpi_platform.c    | 76 ++++++-----------------------------------
 drivers/base/platform.c         |  2 ++
 include/linux/platform_device.h |  1 +
 3 files changed, 13 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 7ac20d8b8f07..b7df9b197bcf 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -33,7 +33,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
 {
 	struct platform_device *pdev = NULL;
 	struct acpi_device *acpi_parent;
-	struct device *parent = NULL;
+	struct platform_device_info pdevinfo;
 	struct resource_list_entry *rentry;
 	struct list_head resource_list;
 	struct resource *resources;
@@ -60,11 +60,13 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
 
 	acpi_dev_free_resource_list(&resource_list);
 
+	memset(&pdevinfo, 0, sizeof(pdevinfo));
 	/*
 	 * If the ACPI node has a parent and that parent has a physical device
 	 * attached to it, that physical device should be the parent of the
 	 * platform device we are about to create.
 	 */
+	pdevinfo.parent = NULL;
 	acpi_parent = adev->parent;
 	if (acpi_parent) {
 		struct acpi_device_physical_node *entry;
@@ -76,12 +78,16 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
 			entry = list_first_entry(list,
 					struct acpi_device_physical_node,
 					node);
-			parent = entry->dev;
+			pdevinfo.parent = entry->dev;
 		}
 		mutex_unlock(&acpi_parent->physical_node_lock);
 	}
-	pdev = platform_device_register_resndata(parent, dev_name(&adev->dev),
-						 -1, resources, count, NULL, 0);
+	pdevinfo.name = dev_name(&adev->dev);
+	pdevinfo.id = -1;
+	pdevinfo.res = resources;
+	pdevinfo.num_res = count;
+	pdevinfo.acpi_node.handle = adev->handle;
+	pdev = platform_device_register_full(&pdevinfo);
 	if (IS_ERR(pdev)) {
 		dev_err(&adev->dev, "platform device creation failed: %ld\n",
 			PTR_ERR(pdev));
@@ -94,65 +100,3 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
 	kfree(resources);
 	return pdev;
 }
-
-static acpi_status acpi_platform_match(acpi_handle handle, u32 depth,
-				       void *data, void **return_value)
-{
-	struct platform_device *pdev = data;
-	struct acpi_device *adev;
-	acpi_status status;
-
-	status = acpi_bus_get_device(handle, &adev);
-	if (ACPI_FAILURE(status))
-		return status;
-
-	/* Skip ACPI devices that have physical device attached */
-	if (adev->physical_node_count)
-		return AE_OK;
-
-	if (!strcmp(dev_name(&pdev->dev), dev_name(&adev->dev))) {
-		*(acpi_handle *)return_value = handle;
-		return AE_CTRL_TERMINATE;
-	}
-
-	return AE_OK;
-}
-
-static int acpi_platform_find_device(struct device *dev, acpi_handle *handle)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	char *name, *tmp, *hid;
-
-	/*
-	 * The platform device is named using the ACPI device name
-	 * _HID:INSTANCE so we strip the INSTANCE out in order to find the
-	 * correct device using its _HID.
-	 */
-	name = kstrdup(dev_name(dev), GFP_KERNEL);
-	if (!name)
-		return -ENOMEM;
-
-	tmp = name;
-	hid = strsep(&tmp, ":");
-	if (!hid) {
-		kfree(name);
-		return -ENODEV;
-	}
-
-	*handle = NULL;
-	acpi_get_devices(hid, acpi_platform_match, pdev, handle);
-
-	kfree(name);
-	return *handle ? 0 : -ENODEV;
-}
-
-static struct acpi_bus_type acpi_platform_bus = {
-	.bus = &platform_bus_type,
-	.find_device = acpi_platform_find_device,
-};
-
-static int __init acpi_platform_init(void)
-{
-	return register_acpi_bus_type(&acpi_platform_bus);
-}
-arch_initcall(acpi_platform_init);
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 7de29ebfce7f..49fd96e23460 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -437,6 +437,7 @@ struct platform_device *platform_device_register_full(
 		goto err_alloc;
 
 	pdev->dev.parent = pdevinfo->parent;
+	ACPI_HANDLE_SET(&pdev->dev, pdevinfo->acpi_node.handle);
 
 	if (pdevinfo->dma_mask) {
 		/*
@@ -467,6 +468,7 @@ struct platform_device *platform_device_register_full(
 	ret = platform_device_add(pdev);
 	if (ret) {
 err:
+		ACPI_HANDLE_SET(&pdev->dev, NULL);
 		kfree(pdev->dev.dma_mask);
 
 err_alloc:
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 5711e9525a2a..a9ded9a3c175 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -55,6 +55,7 @@ extern int platform_add_devices(struct platform_device **, int);
 
 struct platform_device_info {
 		struct device *parent;
+		struct acpi_dev_node acpi_node;
 
 		const char *name;
 		int id;
-- 
cgit v1.2.3-55-g7522


From 76aad392f75e6ce5be3f106554e16f7ff96543e5 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi
Date: Tue, 20 Nov 2012 15:20:30 +0100
Subject: regmap: introduce tables for readable/writeable/volatile/precious
 checks

Many of the regmap enabled drivers implementing one or more of the
readable, writeable, volatile and precious methods use the same code
pattern:

	return ((reg >= X && reg <= Y) || (reg >= W && reg <= Z) || ...)

Switch to a data driven approach, using tables to describe
readable/writeable/volatile and precious registers ranges instead.
The table based check can still be overridden by passing the usual function
pointers via struct regmap_config.

Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/internal.h |  4 ++
 drivers/base/regmap/regmap.c   | 46 +++++++++++++++++++++++
 include/linux/regmap.h         | 83 ++++++++++++++++++++++++++++++++++++------
 3 files changed, 122 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 2cd01b57b1c4..288e135fd5f8 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -55,6 +55,10 @@ struct regmap {
 	bool (*readable_reg)(struct device *dev, unsigned int reg);
 	bool (*volatile_reg)(struct device *dev, unsigned int reg);
 	bool (*precious_reg)(struct device *dev, unsigned int reg);
+	const struct regmap_access_table *wr_table;
+	const struct regmap_access_table *rd_table;
+	const struct regmap_access_table *volatile_table;
+	const struct regmap_access_table *precious_table;
 
 	u8 read_flag_mask;
 	u8 write_flag_mask;
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 64eb8350a074..96f7e8523ee8 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -34,6 +34,36 @@ static int _regmap_update_bits(struct regmap *map, unsigned int reg,
 			       unsigned int mask, unsigned int val,
 			       bool *change);
 
+bool regmap_reg_in_ranges(unsigned int reg,
+			  const struct regmap_range *ranges,
+			  unsigned int nranges)
+{
+	const struct regmap_range *r;
+	int i;
+
+	for (i = 0, r = ranges; i < nranges; i++, r++)
+		if (regmap_reg_in_range(reg, r))
+			return true;
+	return false;
+}
+EXPORT_SYMBOL_GPL(regmap_reg_in_ranges);
+
+static bool _regmap_check_range_table(struct regmap *map,
+				      unsigned int reg,
+				      const struct regmap_access_table *table)
+{
+	/* Check "no ranges" first */
+	if (regmap_reg_in_ranges(reg, table->no_ranges, table->n_no_ranges))
+		return false;
+
+	/* In case zero "yes ranges" are supplied, any reg is OK */
+	if (!table->n_yes_ranges)
+		return true;
+
+	return regmap_reg_in_ranges(reg, table->yes_ranges,
+				    table->n_yes_ranges);
+}
+
 bool regmap_writeable(struct regmap *map, unsigned int reg)
 {
 	if (map->max_register && reg > map->max_register)
@@ -42,6 +72,9 @@ bool regmap_writeable(struct regmap *map, unsigned int reg)
 	if (map->writeable_reg)
 		return map->writeable_reg(map->dev, reg);
 
+	if (map->wr_table)
+		return _regmap_check_range_table(map, reg, map->wr_table);
+
 	return true;
 }
 
@@ -56,6 +89,9 @@ bool regmap_readable(struct regmap *map, unsigned int reg)
 	if (map->readable_reg)
 		return map->readable_reg(map->dev, reg);
 
+	if (map->rd_table)
+		return _regmap_check_range_table(map, reg, map->rd_table);
+
 	return true;
 }
 
@@ -67,6 +103,9 @@ bool regmap_volatile(struct regmap *map, unsigned int reg)
 	if (map->volatile_reg)
 		return map->volatile_reg(map->dev, reg);
 
+	if (map->volatile_table)
+		return _regmap_check_range_table(map, reg, map->volatile_table);
+
 	return true;
 }
 
@@ -78,6 +117,9 @@ bool regmap_precious(struct regmap *map, unsigned int reg)
 	if (map->precious_reg)
 		return map->precious_reg(map->dev, reg);
 
+	if (map->precious_table)
+		return _regmap_check_range_table(map, reg, map->precious_table);
+
 	return false;
 }
 
@@ -370,6 +412,10 @@ struct regmap *regmap_init(struct device *dev,
 	map->bus = bus;
 	map->bus_context = bus_context;
 	map->max_register = config->max_register;
+	map->wr_table = config->wr_table;
+	map->rd_table = config->rd_table;
+	map->volatile_table = config->volatile_table;
+	map->precious_table = config->precious_table;
 	map->writeable_reg = config->writeable_reg;
 	map->readable_reg = config->readable_reg;
 	map->volatile_reg = config->volatile_reg;
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 0e819e3cebce..3bdb1f9a5b43 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -53,6 +53,36 @@ enum regmap_endian {
 	REGMAP_ENDIAN_NATIVE,
 };
 
+/**
+ * A register range, used for access related checks
+ * (readable/writeable/volatile/precious checks)
+ *
+ * @range_min: address of first register
+ * @range_max: address of last register
+ */
+struct regmap_range {
+	unsigned int range_min;
+	unsigned int range_max;
+};
+
+/*
+ * A table of ranges including some yes ranges and some no ranges.
+ * If a register belongs to a no_range, the corresponding check function
+ * will return false. If a register belongs to a yes range, the corresponding
+ * check function will return true. "no_ranges" are searched first.
+ *
+ * @yes_ranges : pointer to an array of regmap ranges used as "yes ranges"
+ * @n_yes_ranges: size of the above array
+ * @no_ranges: pointer to an array of regmap ranges used as "no ranges"
+ * @n_no_ranges: size of the above array
+ */
+struct regmap_access_table {
+	const struct regmap_range *yes_ranges;
+	unsigned int n_yes_ranges;
+	const struct regmap_range *no_ranges;
+	unsigned int n_no_ranges;
+};
+
 typedef void (*regmap_lock)(void *);
 typedef void (*regmap_unlock)(void *);
 
@@ -70,22 +100,39 @@ typedef void (*regmap_unlock)(void *);
  * @val_bits: Number of bits in a register value, mandatory.
  *
  * @writeable_reg: Optional callback returning true if the register
- *                 can be written to.
+ *		   can be written to. If this field is NULL but wr_table
+ *		   (see below) is not, the check is performed on such table
+ *                 (a register is writeable if it belongs to one of the ranges
+ *                  specified by wr_table).
  * @readable_reg: Optional callback returning true if the register
- *                can be read from.
+ *		  can be read from. If this field is NULL but rd_table
+ *		   (see below) is not, the check is performed on such table
+ *                 (a register is readable if it belongs to one of the ranges
+ *                  specified by rd_table).
  * @volatile_reg: Optional callback returning true if the register
- *                value can't be cached.
+ *		  value can't be cached. If this field is NULL but
+ *		  volatile_table (see below) is not, the check is performed on
+ *                such table (a register is volatile if it belongs to one of
+ *                the ranges specified by volatile_table).
  * @precious_reg: Optional callback returning true if the rgister
- *                should not be read outside of a call from the driver
- *                (eg, a clear on read interrupt status register).
- * @lock:         Optional lock callback (overrides regmap's default lock
- *                function, based on spinlock or mutex).
- * @unlock:       As above for unlocking.
- * @lock_arg:     this field is passed as the only argument of lock/unlock
- *                functions (ignored in case regular lock/unlock functions
- *                are not overridden).
+ *		  should not be read outside of a call from the driver
+ *		  (eg, a clear on read interrupt status register). If this
+ *                field is NULL but precious_table (see below) is not, the
+ *                check is performed on such table (a register is precious if
+ *                it belongs to one of the ranges specified by precious_table).
+ * @lock:	  Optional lock callback (overrides regmap's default lock
+ *		  function, based on spinlock or mutex).
+ * @unlock:	  As above for unlocking.
+ * @lock_arg:	  this field is passed as the only argument of lock/unlock
+ *		  functions (ignored in case regular lock/unlock functions
+ *		  are not overridden).
  *
  * @max_register: Optional, specifies the maximum valid register index.
+ * @wr_table:     Optional, points to a struct regmap_access_table specifying
+ *                valid ranges for write access.
+ * @rd_table:     As above, for read access.
+ * @volatile_table: As above, for volatile registers.
+ * @precious_table: As above, for precious registers.
  * @reg_defaults: Power on reset values for registers (for use with
  *                register cache support).
  * @num_reg_defaults: Number of elements in reg_defaults.
@@ -130,6 +177,10 @@ struct regmap_config {
 	void *lock_arg;
 
 	unsigned int max_register;
+	const struct regmap_access_table *wr_table;
+	const struct regmap_access_table *rd_table;
+	const struct regmap_access_table *volatile_table;
+	const struct regmap_access_table *precious_table;
 	const struct reg_default *reg_defaults;
 	unsigned int num_reg_defaults;
 	enum regcache_type cache_type;
@@ -280,6 +331,16 @@ void regcache_mark_dirty(struct regmap *map);
 int regmap_register_patch(struct regmap *map, const struct reg_default *regs,
 			  int num_regs);
 
+static inline bool regmap_reg_in_range(unsigned int reg,
+				       const struct regmap_range *range)
+{
+	return reg >= range->range_min && reg <= range->range_max;
+}
+
+bool regmap_reg_in_ranges(unsigned int reg,
+			  const struct regmap_range *ranges,
+			  unsigned int nranges);
+
 /**
  * Description of an IRQ for the generic regmap irq_chip.
  *
-- 
cgit v1.2.3-55-g7522


From be193249b4178158c0f697cb452b2bbf0cb16361 Mon Sep 17 00:00:00 2001
From: Viresh Kumar
Date: Tue, 20 Nov 2012 10:15:19 +0530
Subject: dt: add helper function to read u8 & u16 variables & arrays

This adds following helper routines:
- of_property_read_u8_array()
- of_property_read_u16_array()
- of_property_read_u8()
- of_property_read_u16()

This expects arrays from DT to be passed as:
- u8 array:
	property = /bits/ 8 <0x50 0x60 0x70>;
- u16 array:
	property = /bits/ 16 <0x5000 0x6000 0x7000>;

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 drivers/of/base.c  | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of.h | 30 +++++++++++++++++++++
 2 files changed, 107 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index af3b22ac7627..f564e3107b3e 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -670,6 +670,82 @@ struct device_node *of_find_node_by_phandle(phandle handle)
 }
 EXPORT_SYMBOL(of_find_node_by_phandle);
 
+/**
+ * of_property_read_u8_array - Find and read an array of u8 from a property.
+ *
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ * @out_value:	pointer to return value, modified only if return value is 0.
+ * @sz:		number of array elements to read
+ *
+ * Search for a property in a device node and read 8-bit value(s) from
+ * it. Returns 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * dts entry of array should be like:
+ *	property = /bits/ 8 <0x50 0x60 0x70>;
+ *
+ * The out_value is modified only if a valid u8 value can be decoded.
+ */
+int of_property_read_u8_array(const struct device_node *np,
+			const char *propname, u8 *out_values, size_t sz)
+{
+	struct property *prop = of_find_property(np, propname, NULL);
+	const u8 *val;
+
+	if (!prop)
+		return -EINVAL;
+	if (!prop->value)
+		return -ENODATA;
+	if ((sz * sizeof(*out_values)) > prop->length)
+		return -EOVERFLOW;
+
+	val = prop->value;
+	while (sz--)
+		*out_values++ = *val++;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_property_read_u8_array);
+
+/**
+ * of_property_read_u16_array - Find and read an array of u16 from a property.
+ *
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ * @out_value:	pointer to return value, modified only if return value is 0.
+ * @sz:		number of array elements to read
+ *
+ * Search for a property in a device node and read 16-bit value(s) from
+ * it. Returns 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * dts entry of array should be like:
+ *	property = /bits/ 16 <0x5000 0x6000 0x7000>;
+ *
+ * The out_value is modified only if a valid u16 value can be decoded.
+ */
+int of_property_read_u16_array(const struct device_node *np,
+			const char *propname, u16 *out_values, size_t sz)
+{
+	struct property *prop = of_find_property(np, propname, NULL);
+	const __be16 *val;
+
+	if (!prop)
+		return -EINVAL;
+	if (!prop->value)
+		return -ENODATA;
+	if ((sz * sizeof(*out_values)) > prop->length)
+		return -EOVERFLOW;
+
+	val = prop->value;
+	while (sz--)
+		*out_values++ = be16_to_cpup(val++);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_property_read_u16_array);
+
 /**
  * of_property_read_u32_array - Find and read an array of 32 bit integers
  * from a property.
@@ -677,6 +753,7 @@ EXPORT_SYMBOL(of_find_node_by_phandle);
  * @np:		device node from which the property value is to be read.
  * @propname:	name of the property to be searched.
  * @out_value:	pointer to return value, modified only if return value is 0.
+ * @sz:		number of array elements to read
  *
  * Search for a property in a device node and read 32-bit value(s) from
  * it. Returns 0 on success, -EINVAL if the property does not exist,
diff --git a/include/linux/of.h b/include/linux/of.h
index 857dde984a6e..ab1af0e14659 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -223,6 +223,10 @@ extern struct device_node *of_find_node_with_property(
 extern struct property *of_find_property(const struct device_node *np,
 					 const char *name,
 					 int *lenp);
+extern int of_property_read_u8_array(const struct device_node *np,
+			const char *propname, u8 *out_values, size_t sz);
+extern int of_property_read_u16_array(const struct device_node *np,
+			const char *propname, u16 *out_values, size_t sz);
 extern int of_property_read_u32_array(const struct device_node *np,
 				      const char *propname,
 				      u32 *out_values,
@@ -364,6 +368,18 @@ static inline struct device_node *of_find_compatible_node(
 	return NULL;
 }
 
+static inline int of_property_read_u8_array(const struct device_node *np,
+			const char *propname, u8 *out_values, size_t sz)
+{
+	return -ENOSYS;
+}
+
+static inline int of_property_read_u16_array(const struct device_node *np,
+			const char *propname, u16 *out_values, size_t sz)
+{
+	return -ENOSYS;
+}
+
 static inline int of_property_read_u32_array(const struct device_node *np,
 					     const char *propname,
 					     u32 *out_values, size_t sz)
@@ -470,6 +486,20 @@ static inline bool of_property_read_bool(const struct device_node *np,
 	return prop ? true : false;
 }
 
+static inline int of_property_read_u8(const struct device_node *np,
+				       const char *propname,
+				       u8 *out_value)
+{
+	return of_property_read_u8_array(np, propname, out_value, 1);
+}
+
+static inline int of_property_read_u16(const struct device_node *np,
+				       const char *propname,
+				       u16 *out_value)
+{
+	return of_property_read_u16_array(np, propname, out_value, 1);
+}
+
 static inline int of_property_read_u32(const struct device_node *np,
 				       const char *propname,
 				       u32 *out_value)
-- 
cgit v1.2.3-55-g7522


From 50c8af4cf98fd97d6779f244215154e4c89699c7 Mon Sep 17 00:00:00 2001
From: Stephen Warren
Date: Tue, 20 Nov 2012 16:12:20 -0700
Subject: of: introduce for_each_matching_node_and_match()

The following pattern of code is tempting:

    for_each_matching_node(np, table) {
        match = of_match_node(table, np);

However, this results in iterating over table twice; the second time
inside of_match_node(). The implementation of for_each_matching_node()
already found the match, so this is redundant. Invent new function
of_find_matching_node_and_match() and macro
for_each_matching_node_and_match() to remove the double iteration,
thus transforming the above code to:

    for_each_matching_node_and_match(np, table, &match)

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 drivers/of/base.c  | 18 +++++++++++++-----
 include/linux/of.h | 15 +++++++++++++--
 2 files changed, 26 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index f564e3107b3e..0ceb26a16050 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -594,27 +594,35 @@ const struct of_device_id *of_match_node(const struct of_device_id *matches,
 EXPORT_SYMBOL(of_match_node);
 
 /**
- *	of_find_matching_node - Find a node based on an of_device_id match
- *				table.
+ *	of_find_matching_node_and_match - Find a node based on an of_device_id
+ *					  match table.
  *	@from:		The node to start searching from or NULL, the node
  *			you pass will not be searched, only the next one
  *			will; typically, you pass what the previous call
  *			returned. of_node_put() will be called on it
  *	@matches:	array of of device match structures to search in
+ *	@match		Updated to point at the matches entry which matched
  *
  *	Returns a node pointer with refcount incremented, use
  *	of_node_put() on it when done.
  */
-struct device_node *of_find_matching_node(struct device_node *from,
-					  const struct of_device_id *matches)
+struct device_node *of_find_matching_node_and_match(struct device_node *from,
+					const struct of_device_id *matches,
+					const struct of_device_id **match)
 {
 	struct device_node *np;
 
+	if (match)
+		*match = NULL;
+
 	read_lock(&devtree_lock);
 	np = from ? from->allnext : allnodes;
 	for (; np; np = np->allnext) {
-		if (of_match_node(matches, np) && of_node_get(np))
+		if (of_match_node(matches, np) && of_node_get(np)) {
+			if (match)
+				*match = matches;
 			break;
+		}
 	}
 	of_node_put(from);
 	read_unlock(&devtree_lock);
diff --git a/include/linux/of.h b/include/linux/of.h
index ab1af0e14659..13e0aacb4d9f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -179,11 +179,22 @@ extern struct device_node *of_find_compatible_node(struct device_node *from,
 #define for_each_compatible_node(dn, type, compatible) \
 	for (dn = of_find_compatible_node(NULL, type, compatible); dn; \
 	     dn = of_find_compatible_node(dn, type, compatible))
-extern struct device_node *of_find_matching_node(struct device_node *from,
-	const struct of_device_id *matches);
+extern struct device_node *of_find_matching_node_and_match(
+	struct device_node *from,
+	const struct of_device_id *matches,
+	const struct of_device_id **match);
+static inline struct device_node *of_find_matching_node(
+	struct device_node *from,
+	const struct of_device_id *matches)
+{
+	return of_find_matching_node_and_match(from, matches, NULL);
+}
 #define for_each_matching_node(dn, matches) \
 	for (dn = of_find_matching_node(NULL, matches); dn; \
 	     dn = of_find_matching_node(dn, matches))
+#define for_each_matching_node_and_match(dn, matches, match) \
+	for (dn = of_find_matching_node_and_match(NULL, matches, match); \
+	     dn; dn = of_find_matching_node_and_match(dn, matches, match))
 extern struct device_node *of_find_node_by_path(const char *path);
 extern struct device_node *of_find_node_by_phandle(phandle handle);
 extern struct device_node *of_get_parent(const struct device_node *node);
-- 
cgit v1.2.3-55-g7522


From 1a8f85d402a2bf3b86c08c696a0adf36656f770a Mon Sep 17 00:00:00 2001
From: Philip Rakity
Date: Tue, 20 Nov 2012 18:07:41 +0100
Subject: regulator: add missing prototype for regulator_is_supported_voltage

avoids needs for CONFIG_REGULATOR in sdhci.c

Signed-off-by: Philip Rakity <prakity@nvidia.com>
Reviewed-by: Eric Miao <eric.y.miao@gmail.com>
Signed-off-by: Eric Miao <eric.y.miao@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/regulator/consumer.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index c43cd3556b1f..4e3ec91bc639 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -358,6 +358,10 @@ static inline void regulator_set_drvdata(struct regulator *regulator,
 {
 }
 
+static inline int regulator_count_voltages(struct regulator *regulator)
+{
+	return 0;
+}
 #endif
 
 static inline int regulator_set_voltage_tol(struct regulator *regulator,
-- 
cgit v1.2.3-55-g7522


From ae72ae676045274c82f3c25159a9dd7cfcf5ffae Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Tue, 20 Nov 2012 11:02:55 -0500
Subject: NFSv4.1: Don't confuse CREATE_SESSION arguments and results

Don't store the target request and response sizes in the same
variables used to store the server's replies to those targets.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c         | 30 +++++++++++++++++-------------
 include/linux/nfs_fs_sb.h |  3 +++
 2 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a32d953b08de..3e572dc316e4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5807,8 +5807,8 @@ void nfs4_destroy_session(struct nfs4_session *session)
 static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
 {
 	struct nfs4_session *session = args->client->cl_session;
-	unsigned int mxrqst_sz = session->fc_attrs.max_rqst_sz,
-		     mxresp_sz = session->fc_attrs.max_resp_sz;
+	unsigned int mxrqst_sz = session->fc_target_max_rqst_sz,
+		     mxresp_sz = session->fc_target_max_resp_sz;
 
 	if (mxrqst_sz == 0)
 		mxrqst_sz = NFS_MAX_FILE_IO_SIZE;
@@ -6015,24 +6015,28 @@ int nfs4_init_session(struct nfs_server *server)
 {
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_session *session;
-	unsigned int rsize, wsize;
+	unsigned int target_max_rqst_sz = NFS_MAX_FILE_IO_SIZE;
+	unsigned int target_max_resp_sz = NFS_MAX_FILE_IO_SIZE;
 
 	if (!nfs4_has_session(clp))
 		return 0;
 
+	if (server->rsize != 0)
+		target_max_resp_sz = server->rsize;
+	target_max_resp_sz += nfs41_maxread_overhead;
+
+	if (server->wsize != 0)
+		target_max_rqst_sz = server->wsize;
+	target_max_rqst_sz += nfs41_maxwrite_overhead;
+
 	session = clp->cl_session;
 	spin_lock(&clp->cl_lock);
 	if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
-
-		rsize = server->rsize;
-		if (rsize == 0)
-			rsize = NFS_MAX_FILE_IO_SIZE;
-		wsize = server->wsize;
-		if (wsize == 0)
-			wsize = NFS_MAX_FILE_IO_SIZE;
-
-		session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
-		session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
+		/* Initialise targets and channel attributes */
+		session->fc_target_max_rqst_sz = target_max_rqst_sz;
+		session->fc_attrs.max_rqst_sz = target_max_rqst_sz;
+		session->fc_target_max_resp_sz = target_max_resp_sz;
+		session->fc_attrs.max_resp_sz = target_max_resp_sz;
 	}
 	spin_unlock(&clp->cl_lock);
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index a9e76ee1adca..97c8f9191880 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -242,6 +242,9 @@ struct nfs4_session {
 	struct nfs4_channel_attrs	bc_attrs;
 	struct nfs4_slot_table		bc_slot_table;
 	struct nfs_client		*clp;
+	/* Create session arguments */
+	unsigned int			fc_target_max_rqst_sz;
+	unsigned int			fc_target_max_resp_sz;
 };
 
 #endif /* CONFIG_NFS_V4 */
-- 
cgit v1.2.3-55-g7522


From 933602e368c4452260c9bff4fbb3baba35cf987a Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Fri, 16 Nov 2012 12:12:38 -0500
Subject: NFSv4.1: Shrink struct nfs4_sequence_res by moving sr_renewal_time

Store the renewal time inside the session slot instead.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 15 +++++++++------
 include/linux/nfs_xdr.h |  2 +-
 2 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5e5cc5a5065f..14b39742b6e4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -486,6 +486,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 
 static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
 {
+	struct nfs4_slot *slot;
 	unsigned long timestamp;
 	struct nfs_client *clp;
 
@@ -502,12 +503,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 	if (!RPC_WAS_SENT(task))
 		goto out;
 
+	slot = res->sr_slot;
+
 	/* Check the SEQUENCE operation status */
 	switch (res->sr_status) {
 	case 0:
 		/* Update the slot's sequence and clientid lease timer */
-		++res->sr_slot->seq_nr;
-		timestamp = res->sr_renewal_time;
+		++slot->seq_nr;
+		timestamp = slot->renewal_time;
 		clp = res->sr_session->clp;
 		do_renew_lease(clp, timestamp);
 		/* Check sequence flags */
@@ -521,12 +524,12 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		 */
 		dprintk("%s: slot=%td seq=%d: Operation in progress\n",
 			__func__,
-			res->sr_slot - res->sr_session->fc_slot_table.slots,
-			res->sr_slot->seq_nr);
+			slot - res->sr_session->fc_slot_table.slots,
+			slot->seq_nr);
 		goto out_retry;
 	default:
 		/* Just update the slot sequence no. */
-		++res->sr_slot->seq_nr;
+		++slot->seq_nr;
 	}
 out:
 	/* The session may be reset by one of the error handlers. */
@@ -637,6 +640,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
 
 	rpc_task_set_priority(task, RPC_PRIORITY_NORMAL);
 	slot = tbl->slots + slotid;
+	slot->renewal_time = jiffies;
 	args->sa_session = session;
 	args->sa_slotid = slotid;
 
@@ -644,7 +648,6 @@ int nfs41_setup_sequence(struct nfs4_session *session,
 
 	res->sr_session = session;
 	res->sr_slot = slot;
-	res->sr_renewal_time = jiffies;
 	res->sr_status_flags = 0;
 	/*
 	 * sr_status is only set in decode_sequence, and so will remain
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a73ea89789d1..9cb1c63a70c2 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -187,6 +187,7 @@ struct nfs4_channel_attrs {
 
 /* nfs41 sessions slot seqid */
 struct nfs4_slot {
+	unsigned long		renewal_time;
 	u32		 	seq_nr;
 };
 
@@ -200,7 +201,6 @@ struct nfs4_sequence_res {
 	struct nfs4_session	*sr_session;
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
 	int			sr_status;	/* sequence operation status */
-	unsigned long		sr_renewal_time;
 	u32			sr_status_flags;
 };
 
-- 
cgit v1.2.3-55-g7522


From 22a8578fca5a47e643bb4f70c232d0ec84db9e4e Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia
Date: Sat, 10 Nov 2012 13:08:20 -0300
Subject: mtd: mtd_blkdevs: Replace request handler kthread with a workqueue

By replacing a kthread with a workqueue, the code is now a bit clearer.
There's also a slight reduction of code size (numbers apply for x86):
Before:
   text	   data	    bss	    dec	    hex	filename
   3248	     36	      0	   3284	    cd4	drivers/mtd/mtd_blkdevs.o

After:
   text	   data	    bss	    dec	    hex	filename
   3150	     36	      0	   3186	    c72	drivers/mtd/mtd_blkdevs.o

Due to lack of real hardware, tests have been performed on an emulated
environment with mtdswap and mtdblock over nandsim devices.
Some real testing should be done, before merging this patch.

Signed-off-by: Ezequiel Garcia <elezegarcia@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/mtd/mtd_blkdevs.c    | 47 ++++++++++++++------------------------------
 include/linux/mtd/blktrans.h |  4 +++-
 2 files changed, 18 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 26ef2a72bdae..5ad39bb5ab4c 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -32,7 +32,6 @@
 #include <linux/hdreg.h>
 #include <linux/init.h>
 #include <linux/mutex.h>
-#include <linux/kthread.h>
 #include <asm/uaccess.h>
 
 #include "mtdcore.h"
@@ -121,16 +120,14 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 
 int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev)
 {
-	if (kthread_should_stop())
-		return 1;
-
 	return dev->bg_stop;
 }
 EXPORT_SYMBOL_GPL(mtd_blktrans_cease_background);
 
-static int mtd_blktrans_thread(void *arg)
+static void mtd_blktrans_work(struct work_struct *work)
 {
-	struct mtd_blktrans_dev *dev = arg;
+	struct mtd_blktrans_dev *dev =
+		container_of(work, struct mtd_blktrans_dev, work);
 	struct mtd_blktrans_ops *tr = dev->tr;
 	struct request_queue *rq = dev->rq;
 	struct request *req = NULL;
@@ -138,7 +135,7 @@ static int mtd_blktrans_thread(void *arg)
 
 	spin_lock_irq(rq->queue_lock);
 
-	while (!kthread_should_stop()) {
+	while (1) {
 		int res;
 
 		dev->bg_stop = false;
@@ -156,15 +153,7 @@ static int mtd_blktrans_thread(void *arg)
 				background_done = !dev->bg_stop;
 				continue;
 			}
-			set_current_state(TASK_INTERRUPTIBLE);
-
-			if (kthread_should_stop())
-				set_current_state(TASK_RUNNING);
-
-			spin_unlock_irq(rq->queue_lock);
-			schedule();
-			spin_lock_irq(rq->queue_lock);
-			continue;
+			break;
 		}
 
 		spin_unlock_irq(rq->queue_lock);
@@ -185,8 +174,6 @@ static int mtd_blktrans_thread(void *arg)
 		__blk_end_request_all(req, -EIO);
 
 	spin_unlock_irq(rq->queue_lock);
-
-	return 0;
 }
 
 static void mtd_blktrans_request(struct request_queue *rq)
@@ -199,10 +186,8 @@ static void mtd_blktrans_request(struct request_queue *rq)
 	if (!dev)
 		while ((req = blk_fetch_request(rq)) != NULL)
 			__blk_end_request_all(req, -ENODEV);
-	else {
-		dev->bg_stop = true;
-		wake_up_process(dev->thread);
-	}
+	else
+		queue_work(dev->wq, &dev->work);
 }
 
 static int blktrans_open(struct block_device *bdev, fmode_t mode)
@@ -437,14 +422,13 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 
 	gd->queue = new->rq;
 
-	/* Create processing thread */
-	/* TODO: workqueue ? */
-	new->thread = kthread_run(mtd_blktrans_thread, new,
-			"%s%d", tr->name, new->mtd->index);
-	if (IS_ERR(new->thread)) {
-		ret = PTR_ERR(new->thread);
+	/* Create processing workqueue */
+	new->wq = alloc_workqueue("%s%d", 0, 0,
+				  tr->name, new->mtd->index);
+	if (!new->wq)
 		goto error4;
-	}
+	INIT_WORK(&new->work, mtd_blktrans_work);
+
 	gd->driverfs_dev = &new->mtd->dev;
 
 	if (new->readonly)
@@ -484,9 +468,8 @@ int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
 	/* Stop new requests to arrive */
 	del_gendisk(old->disk);
 
-
-	/* Stop the thread */
-	kthread_stop(old->thread);
+	/* Stop workqueue. This will perform any pending request. */
+	destroy_workqueue(old->wq);
 
 	/* Kill current requests */
 	spin_lock_irqsave(&old->queue_lock, flags);
diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index ed270bd2e4df..4eb0a50d0c55 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h
@@ -23,6 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/kref.h>
 #include <linux/sysfs.h>
+#include <linux/workqueue.h>
 
 struct hd_geometry;
 struct mtd_info;
@@ -43,7 +44,8 @@ struct mtd_blktrans_dev {
 	struct kref ref;
 	struct gendisk *disk;
 	struct attribute_group *disk_attributes;
-	struct task_struct *thread;
+	struct workqueue_struct *wq;
+	struct work_struct work;
 	struct request_queue *rq;
 	spinlock_t queue_lock;
 	void *priv;
-- 
cgit v1.2.3-55-g7522


From edd7eabc85e2f8d76a933b9639bebfe7f98861e4 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Wed, 14 Nov 2012 21:09:28 +0530
Subject: mfd: Add TI TPS80031 mfd core driver

TPS80031/ TPS80032 Fully Integrated Power Management with Power
Path and Battery Charger. The device provides five configurable
step-down converters, 11 general purpose LDOs, USB OTG Module,
ADC, RTC, 2 PWM, System Voltage Regulator/Battery Charger with
Power Path from USB, 32K clock generator.

Add the mfd core driver for TPS80031/TPS80032.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Reviwed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig          |  14 +
 drivers/mfd/Makefile         |   1 +
 drivers/mfd/tps80031.c       | 573 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/tps80031.h | 637 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1225 insertions(+)
 create mode 100644 drivers/mfd/tps80031.c
 create mode 100644 include/linux/mfd/tps80031.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 59359a7a2493..ca633df7c330 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -265,6 +265,20 @@ config MFD_TPS65912_SPI
 	  If you say yes here you get support for the TPS65912 series of
 	  PM chips with SPI interface.
 
+config MFD_TPS80031
+	bool "TI TPS80031/TPS80032 Power Management chips"
+	depends on I2C=y && GENERIC_HARDIRQS
+	select MFD_CORE
+	select REGMAP_I2C
+	select IRQ_DOMAIN
+	help
+	  If you say yes here you get support for the Texas Instruments
+	  TPS80031/ TPS80032 Fully Integrated Power Management with Power
+	  Path and Battery Charger. The device provides five configurable
+	  step-down converters, 11 general purpose LDOs, USB OTG Module,
+	  ADC, RTC, 2 PWM, System Voltage Regulator/Battery Charger with
+	  Power Path from USB, 32K clock generator.
+
 config MENELAUS
 	bool "Texas Instruments TWL92330/Menelaus PM chip"
 	depends on I2C=y && ARCH_OMAP2
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 1c3ee7c76906..8072460e99d2 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -61,6 +61,7 @@ tps65912-objs                   := tps65912-core.o tps65912-irq.o
 obj-$(CONFIG_MFD_TPS65912)	+= tps65912.o
 obj-$(CONFIG_MFD_TPS65912_I2C)	+= tps65912-i2c.o
 obj-$(CONFIG_MFD_TPS65912_SPI)  += tps65912-spi.o
+obj-$(CONFIG_MFD_TPS80031)	+= tps80031.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
 obj-$(CONFIG_TWL4030_CORE)	+= twl-core.o twl4030-irq.o twl6030-irq.o
diff --git a/drivers/mfd/tps80031.c b/drivers/mfd/tps80031.c
new file mode 100644
index 000000000000..f64005efa6fa
--- /dev/null
+++ b/drivers/mfd/tps80031.c
@@ -0,0 +1,573 @@
+/*
+ * tps80031.c -- TI TPS80031/TPS80032 mfd core driver.
+ *
+ * MFD core driver for TI TPS80031/TPS80032 Fully Integrated
+ * Power Management with Power Path and Battery Charger
+ *
+ * Copyright (c) 2012, NVIDIA Corporation.
+ *
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/tps80031.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+static struct resource tps80031_rtc_resources[] = {
+	{
+		.start = TPS80031_INT_RTC_ALARM,
+		.end = TPS80031_INT_RTC_ALARM,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+/* TPS80031 sub mfd devices */
+static struct mfd_cell tps80031_cell[] = {
+	{
+		.name = "tps80031-pmic",
+	},
+	{
+		.name = "tps80031-clock",
+	},
+	{
+		.name = "tps80031-rtc",
+		.num_resources = ARRAY_SIZE(tps80031_rtc_resources),
+		.resources = tps80031_rtc_resources,
+	},
+	{
+		.name = "tps80031-gpadc",
+	},
+	{
+		.name = "tps80031-fuel-gauge",
+	},
+	{
+		.name = "tps80031-charger",
+	},
+};
+
+static int tps80031_slave_address[TPS80031_NUM_SLAVES] = {
+	TPS80031_I2C_ID0_ADDR,
+	TPS80031_I2C_ID1_ADDR,
+	TPS80031_I2C_ID2_ADDR,
+	TPS80031_I2C_ID3_ADDR,
+};
+
+struct tps80031_pupd_data {
+	u8	reg;
+	u8	pullup_bit;
+	u8	pulldown_bit;
+};
+
+#define TPS80031_IRQ(_reg, _mask)			\
+	{							\
+		.reg_offset = (TPS80031_INT_MSK_LINE_##_reg) -	\
+				TPS80031_INT_MSK_LINE_A,	\
+		.mask = BIT(_mask),				\
+	}
+
+static const struct regmap_irq tps80031_main_irqs[] = {
+	[TPS80031_INT_PWRON]		= TPS80031_IRQ(A, 0),
+	[TPS80031_INT_RPWRON]		= TPS80031_IRQ(A, 1),
+	[TPS80031_INT_SYS_VLOW]		= TPS80031_IRQ(A, 2),
+	[TPS80031_INT_RTC_ALARM]	= TPS80031_IRQ(A, 3),
+	[TPS80031_INT_RTC_PERIOD]	= TPS80031_IRQ(A, 4),
+	[TPS80031_INT_HOT_DIE]		= TPS80031_IRQ(A, 5),
+	[TPS80031_INT_VXX_SHORT]	= TPS80031_IRQ(A, 6),
+	[TPS80031_INT_SPDURATION]	= TPS80031_IRQ(A, 7),
+	[TPS80031_INT_WATCHDOG]		= TPS80031_IRQ(B, 0),
+	[TPS80031_INT_BAT]		= TPS80031_IRQ(B, 1),
+	[TPS80031_INT_SIM]		= TPS80031_IRQ(B, 2),
+	[TPS80031_INT_MMC]		= TPS80031_IRQ(B, 3),
+	[TPS80031_INT_RES]		= TPS80031_IRQ(B, 4),
+	[TPS80031_INT_GPADC_RT]		= TPS80031_IRQ(B, 5),
+	[TPS80031_INT_GPADC_SW2_EOC]	= TPS80031_IRQ(B, 6),
+	[TPS80031_INT_CC_AUTOCAL]	= TPS80031_IRQ(B, 7),
+	[TPS80031_INT_ID_WKUP]		= TPS80031_IRQ(C, 0),
+	[TPS80031_INT_VBUSS_WKUP]	= TPS80031_IRQ(C, 1),
+	[TPS80031_INT_ID]		= TPS80031_IRQ(C, 2),
+	[TPS80031_INT_VBUS]		= TPS80031_IRQ(C, 3),
+	[TPS80031_INT_CHRG_CTRL]	= TPS80031_IRQ(C, 4),
+	[TPS80031_INT_EXT_CHRG]		= TPS80031_IRQ(C, 5),
+	[TPS80031_INT_INT_CHRG]		= TPS80031_IRQ(C, 6),
+	[TPS80031_INT_RES2]		= TPS80031_IRQ(C, 7),
+};
+
+static struct regmap_irq_chip tps80031_irq_chip = {
+	.name = "tps80031",
+	.irqs = tps80031_main_irqs,
+	.num_irqs = ARRAY_SIZE(tps80031_main_irqs),
+	.num_regs = 3,
+	.status_base = TPS80031_INT_STS_A,
+	.mask_base = TPS80031_INT_MSK_LINE_A,
+};
+
+#define PUPD_DATA(_reg, _pulldown_bit, _pullup_bit)	\
+	{						\
+		.reg = TPS80031_CFG_INPUT_PUPD##_reg,	\
+		.pulldown_bit = _pulldown_bit,		\
+		.pullup_bit = _pullup_bit,		\
+	}
+
+static const struct tps80031_pupd_data tps80031_pupds[] = {
+	[TPS80031_PREQ1]		= PUPD_DATA(1, BIT(0),	BIT(1)),
+	[TPS80031_PREQ2A]		= PUPD_DATA(1, BIT(2),	BIT(3)),
+	[TPS80031_PREQ2B]		= PUPD_DATA(1, BIT(4),	BIT(5)),
+	[TPS80031_PREQ2C]		= PUPD_DATA(1, BIT(6),	BIT(7)),
+	[TPS80031_PREQ3]		= PUPD_DATA(2, BIT(0),	BIT(1)),
+	[TPS80031_NRES_WARM]		= PUPD_DATA(2, 0,	BIT(2)),
+	[TPS80031_PWM_FORCE]		= PUPD_DATA(2, BIT(5),	0),
+	[TPS80031_CHRG_EXT_CHRG_STATZ]	= PUPD_DATA(2, 0,	BIT(6)),
+	[TPS80031_SIM]			= PUPD_DATA(3, BIT(0),	BIT(1)),
+	[TPS80031_MMC]			= PUPD_DATA(3, BIT(2),	BIT(3)),
+	[TPS80031_GPADC_START]		= PUPD_DATA(3, BIT(4),	0),
+	[TPS80031_DVSI2C_SCL]		= PUPD_DATA(4, 0,	BIT(0)),
+	[TPS80031_DVSI2C_SDA]		= PUPD_DATA(4, 0,	BIT(1)),
+	[TPS80031_CTLI2C_SCL]		= PUPD_DATA(4, 0,	BIT(2)),
+	[TPS80031_CTLI2C_SDA]		= PUPD_DATA(4, 0,	BIT(3)),
+};
+static struct tps80031 *tps80031_power_off_dev;
+
+int tps80031_ext_power_req_config(struct device *dev,
+		unsigned long ext_ctrl_flag, int preq_bit,
+		int state_reg_add, int trans_reg_add)
+{
+	u8 res_ass_reg = 0;
+	int preq_mask_bit = 0;
+	int ret;
+
+	if (!(ext_ctrl_flag & TPS80031_EXT_PWR_REQ))
+		return 0;
+
+	if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ1) {
+		res_ass_reg = TPS80031_PREQ1_RES_ASS_A + (preq_bit >> 3);
+		preq_mask_bit = 5;
+	} else if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ2) {
+		res_ass_reg = TPS80031_PREQ2_RES_ASS_A + (preq_bit >> 3);
+		preq_mask_bit = 6;
+	} else if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ3) {
+		res_ass_reg = TPS80031_PREQ3_RES_ASS_A + (preq_bit >> 3);
+		preq_mask_bit = 7;
+	}
+
+	/* Configure REQ_ASS registers */
+	ret = tps80031_set_bits(dev, TPS80031_SLAVE_ID1, res_ass_reg,
+					BIT(preq_bit & 0x7));
+	if (ret < 0) {
+		dev_err(dev, "reg 0x%02x setbit failed, err = %d\n",
+				res_ass_reg, ret);
+		return ret;
+	}
+
+	/* Unmask the PREQ */
+	ret = tps80031_clr_bits(dev, TPS80031_SLAVE_ID1,
+			TPS80031_PHOENIX_MSK_TRANSITION, BIT(preq_mask_bit));
+	if (ret < 0) {
+		dev_err(dev, "reg 0x%02x clrbit failed, err = %d\n",
+			TPS80031_PHOENIX_MSK_TRANSITION, ret);
+		return ret;
+	}
+
+	/* Switch regulator control to resource now */
+	if (ext_ctrl_flag & (TPS80031_PWR_REQ_INPUT_PREQ2 |
+					TPS80031_PWR_REQ_INPUT_PREQ3)) {
+		ret = tps80031_update(dev, TPS80031_SLAVE_ID1, state_reg_add,
+						0x0, TPS80031_STATE_MASK);
+		if (ret < 0)
+			dev_err(dev, "reg 0x%02x update failed, err = %d\n",
+				state_reg_add, ret);
+	} else {
+		ret = tps80031_update(dev, TPS80031_SLAVE_ID1, trans_reg_add,
+				TPS80031_TRANS_SLEEP_OFF,
+				TPS80031_TRANS_SLEEP_MASK);
+		if (ret < 0)
+			dev_err(dev, "reg 0x%02x update failed, err = %d\n",
+				trans_reg_add, ret);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tps80031_ext_power_req_config);
+
+static void tps80031_power_off(void)
+{
+	dev_info(tps80031_power_off_dev->dev, "switching off PMU\n");
+	tps80031_write(tps80031_power_off_dev->dev, TPS80031_SLAVE_ID1,
+				TPS80031_PHOENIX_DEV_ON, TPS80031_DEVOFF);
+}
+
+static void tps80031_pupd_init(struct tps80031 *tps80031,
+			       struct tps80031_platform_data *pdata)
+{
+	struct tps80031_pupd_init_data *pupd_init_data = pdata->pupd_init_data;
+	int data_size = pdata->pupd_init_data_size;
+	int i;
+
+	for (i = 0; i < data_size; ++i) {
+		struct tps80031_pupd_init_data *pupd_init = &pupd_init_data[i];
+		const struct tps80031_pupd_data *pupd =
+			&tps80031_pupds[pupd_init->input_pin];
+		u8 update_value = 0;
+		u8 update_mask = pupd->pulldown_bit | pupd->pullup_bit;
+
+		if (pupd_init->setting == TPS80031_PUPD_PULLDOWN)
+			update_value = pupd->pulldown_bit;
+		else if (pupd_init->setting == TPS80031_PUPD_PULLUP)
+			update_value = pupd->pullup_bit;
+
+		tps80031_update(tps80031->dev, TPS80031_SLAVE_ID1, pupd->reg,
+				update_value, update_mask);
+	}
+}
+
+static int tps80031_init_ext_control(struct tps80031 *tps80031,
+			struct tps80031_platform_data *pdata)
+{
+	struct device *dev = tps80031->dev;
+	int ret;
+	int i;
+
+	/* Clear all external control for this rail */
+	for (i = 0; i < 9; ++i) {
+		ret = tps80031_write(dev, TPS80031_SLAVE_ID1,
+				TPS80031_PREQ1_RES_ASS_A + i, 0);
+		if (ret < 0) {
+			dev_err(dev, "reg 0x%02x write failed, err = %d\n",
+				TPS80031_PREQ1_RES_ASS_A + i, ret);
+			return ret;
+		}
+	}
+
+	/* Mask the PREQ */
+	ret = tps80031_set_bits(dev, TPS80031_SLAVE_ID1,
+			TPS80031_PHOENIX_MSK_TRANSITION, 0x7 << 5);
+	if (ret < 0) {
+		dev_err(dev, "reg 0x%02x set_bits failed, err = %d\n",
+			TPS80031_PHOENIX_MSK_TRANSITION, ret);
+		return ret;
+	}
+	return ret;
+}
+
+static int __devinit tps80031_irq_init(struct tps80031 *tps80031, int irq,
+				int irq_base)
+{
+	struct device *dev = tps80031->dev;
+	int i, ret;
+
+	/*
+	 * The MASK register used for updating status register when
+	 * interrupt occurs and LINE register used to pass the status
+	 * to actual interrupt line.  As per datasheet:
+	 * When INT_MSK_LINE [i] is set to 1, the associated interrupt
+	 * number i is INT line masked, which means that no interrupt is
+	 * generated on the INT line.
+	 * When INT_MSK_LINE [i] is set to 0, the associated interrupt
+	 * number i is  line enabled: An interrupt is generated on the
+	 * INT line.
+	 * In any case, the INT_STS [i] status bit may or may not be updated,
+	 * only linked to the INT_MSK_STS [i] configuration register bit.
+	 *
+	 * When INT_MSK_STS [i] is set to 1, the associated interrupt number
+	 * i is status masked, which means that no interrupt is stored in
+	 * the INT_STS[i] status bit. Note that no interrupt number i is
+	 * generated on the INT line, even if the INT_MSK_LINE [i] register
+	 * bit is set to 0.
+	 * When INT_MSK_STS [i] is set to 0, the associated interrupt number i
+	 * is status enabled: An interrupt status is updated in the INT_STS [i]
+	 * register. The interrupt may or may not be generated on the INT line,
+	 * depending on the INT_MSK_LINE [i] configuration register bit.
+	 */
+	for (i = 0; i < 3; i++)
+		tps80031_write(dev, TPS80031_SLAVE_ID2,
+				TPS80031_INT_MSK_STS_A + i, 0x00);
+
+	ret = regmap_add_irq_chip(tps80031->regmap[TPS80031_SLAVE_ID2], irq,
+			IRQF_ONESHOT, irq_base,
+			&tps80031_irq_chip, &tps80031->irq_data);
+	if (ret < 0) {
+		dev_err(dev, "add irq failed, err = %d\n", ret);
+		return ret;
+	}
+	return ret;
+}
+
+static bool rd_wr_reg_id0(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_SMPS1_CFG_FORCE ... TPS80031_SMPS2_CFG_VOLTAGE:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool rd_wr_reg_id1(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_SECONDS_REG ... TPS80031_RTC_RESET_STATUS_REG:
+	case TPS80031_VALIDITY0 ... TPS80031_VALIDITY7:
+	case TPS80031_PHOENIX_START_CONDITION ... TPS80031_KEY_PRESS_DUR_CFG:
+	case TPS80031_SMPS4_CFG_TRANS ... TPS80031_SMPS3_CFG_VOLTAGE:
+	case TPS80031_BROADCAST_ADDR_ALL ... TPS80031_BROADCAST_ADDR_CLK_RST:
+	case TPS80031_VANA_CFG_TRANS ... TPS80031_LDO7_CFG_VOLTAGE:
+	case TPS80031_REGEN1_CFG_TRANS ... TPS80031_TMP_CFG_STATE:
+	case TPS80031_PREQ1_RES_ASS_A ... TPS80031_PREQ3_RES_ASS_C:
+	case TPS80031_SMPS_OFFSET ... TPS80031_BATDEBOUNCING:
+	case TPS80031_CFG_INPUT_PUPD1 ... TPS80031_CFG_SMPS_PD:
+	case TPS80031_BACKUP_REG:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_volatile_reg_id1(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_SMPS4_CFG_TRANS ... TPS80031_SMPS3_CFG_VOLTAGE:
+	case TPS80031_VANA_CFG_TRANS ... TPS80031_LDO7_CFG_VOLTAGE:
+	case TPS80031_REGEN1_CFG_TRANS ... TPS80031_TMP_CFG_STATE:
+	case TPS80031_PREQ1_RES_ASS_A ... TPS80031_PREQ3_RES_ASS_C:
+	case TPS80031_SMPS_OFFSET ... TPS80031_BATDEBOUNCING:
+	case TPS80031_CFG_INPUT_PUPD1 ... TPS80031_CFG_SMPS_PD:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool rd_wr_reg_id2(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_USB_VENDOR_ID_LSB ... TPS80031_USB_OTG_REVISION:
+	case TPS80031_GPADC_CTRL ... TPS80031_CTRL_P1:
+	case TPS80031_RTCH0_LSB ... TPS80031_GPCH0_MSB:
+	case TPS80031_TOGGLE1 ... TPS80031_VIBMODE:
+	case TPS80031_PWM1ON ... TPS80031_PWM2OFF:
+	case TPS80031_FG_REG_00 ... TPS80031_FG_REG_11:
+	case TPS80031_INT_STS_A ... TPS80031_INT_MSK_STS_C:
+	case TPS80031_CONTROLLER_CTRL2 ... TPS80031_LED_PWM_CTRL2:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool rd_wr_reg_id3(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_GPADC_TRIM0 ... TPS80031_GPADC_TRIM18:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config tps80031_regmap_configs[] = {
+	{
+		.reg_bits = 8,
+		.val_bits = 8,
+		.writeable_reg = rd_wr_reg_id0,
+		.readable_reg = rd_wr_reg_id0,
+		.max_register = TPS80031_MAX_REGISTER,
+	},
+	{
+		.reg_bits = 8,
+		.val_bits = 8,
+		.writeable_reg = rd_wr_reg_id1,
+		.readable_reg = rd_wr_reg_id1,
+		.volatile_reg = is_volatile_reg_id1,
+		.max_register = TPS80031_MAX_REGISTER,
+	},
+	{
+		.reg_bits = 8,
+		.val_bits = 8,
+		.writeable_reg = rd_wr_reg_id2,
+		.readable_reg = rd_wr_reg_id2,
+		.max_register = TPS80031_MAX_REGISTER,
+	},
+	{
+		.reg_bits = 8,
+		.val_bits = 8,
+		.writeable_reg = rd_wr_reg_id3,
+		.readable_reg = rd_wr_reg_id3,
+		.max_register = TPS80031_MAX_REGISTER,
+	},
+};
+
+static int __devinit tps80031_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct tps80031_platform_data *pdata = client->dev.platform_data;
+	struct tps80031 *tps80031;
+	int ret;
+	uint8_t es_version;
+	uint8_t ep_ver;
+	int i;
+
+	if (!pdata) {
+		dev_err(&client->dev, "tps80031 requires platform data\n");
+		return -EINVAL;
+	}
+
+	tps80031 = devm_kzalloc(&client->dev, sizeof(*tps80031), GFP_KERNEL);
+	if (!tps80031) {
+		dev_err(&client->dev, "Malloc failed for tps80031\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < TPS80031_NUM_SLAVES; i++) {
+		if (tps80031_slave_address[i] == client->addr)
+			tps80031->clients[i] = client;
+		else
+			tps80031->clients[i] = i2c_new_dummy(client->adapter,
+						tps80031_slave_address[i]);
+		if (!tps80031->clients[i]) {
+			dev_err(&client->dev, "can't attach client %d\n", i);
+			ret = -ENOMEM;
+			goto fail_client_reg;
+		}
+
+		i2c_set_clientdata(tps80031->clients[i], tps80031);
+		tps80031->regmap[i] = devm_regmap_init_i2c(tps80031->clients[i],
+					&tps80031_regmap_configs[i]);
+		if (IS_ERR(tps80031->regmap[i])) {
+			ret = PTR_ERR(tps80031->regmap[i]);
+			dev_err(&client->dev,
+				"regmap %d init failed, err %d\n", i, ret);
+			goto fail_client_reg;
+		}
+	}
+
+	ret = tps80031_read(&client->dev, TPS80031_SLAVE_ID3,
+			TPS80031_JTAGVERNUM, &es_version);
+	if (ret < 0) {
+		dev_err(&client->dev,
+			"Silicon version number read failed: %d\n", ret);
+		goto fail_client_reg;
+	}
+
+	ret = tps80031_read(&client->dev, TPS80031_SLAVE_ID3,
+			TPS80031_EPROM_REV, &ep_ver);
+	if (ret < 0) {
+		dev_err(&client->dev,
+			"Silicon eeprom version read failed: %d\n", ret);
+		goto fail_client_reg;
+	}
+
+	dev_info(&client->dev, "ES version 0x%02x and EPROM version 0x%02x\n",
+					es_version, ep_ver);
+	tps80031->es_version = es_version;
+	tps80031->dev = &client->dev;
+	i2c_set_clientdata(client, tps80031);
+	tps80031->chip_info = id->driver_data;
+
+	ret = tps80031_irq_init(tps80031, client->irq, pdata->irq_base);
+	if (ret) {
+		dev_err(&client->dev, "IRQ init failed: %d\n", ret);
+		goto fail_client_reg;
+	}
+
+	tps80031_pupd_init(tps80031, pdata);
+
+	tps80031_init_ext_control(tps80031, pdata);
+
+	ret = mfd_add_devices(tps80031->dev, -1,
+			tps80031_cell, ARRAY_SIZE(tps80031_cell),
+			NULL, 0,
+			regmap_irq_get_domain(tps80031->irq_data));
+	if (ret < 0) {
+		dev_err(&client->dev, "mfd_add_devices failed: %d\n", ret);
+		goto fail_mfd_add;
+	}
+
+	if (pdata->use_power_off && !pm_power_off) {
+		tps80031_power_off_dev = tps80031;
+		pm_power_off = tps80031_power_off;
+	}
+	return 0;
+
+fail_mfd_add:
+	regmap_del_irq_chip(client->irq, tps80031->irq_data);
+
+fail_client_reg:
+	for (i = 0; i < TPS80031_NUM_SLAVES; i++) {
+		if (tps80031->clients[i]  && (tps80031->clients[i] != client))
+			i2c_unregister_device(tps80031->clients[i]);
+	}
+	return ret;
+}
+
+static int __devexit tps80031_remove(struct i2c_client *client)
+{
+	struct tps80031 *tps80031 = i2c_get_clientdata(client);
+	int i;
+
+	if (tps80031_power_off_dev == tps80031) {
+		tps80031_power_off_dev = NULL;
+		pm_power_off = NULL;
+	}
+
+	mfd_remove_devices(tps80031->dev);
+
+	regmap_del_irq_chip(client->irq, tps80031->irq_data);
+
+	for (i = 0; i < TPS80031_NUM_SLAVES; i++) {
+		if (tps80031->clients[i] != client)
+			i2c_unregister_device(tps80031->clients[i]);
+	}
+	return 0;
+}
+
+static const struct i2c_device_id tps80031_id_table[] = {
+	{ "tps80031", TPS80031 },
+	{ "tps80032", TPS80032 },
+};
+MODULE_DEVICE_TABLE(i2c, tps80031_id_table);
+
+static struct i2c_driver tps80031_driver = {
+	.driver	= {
+		.name	= "tps80031",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= tps80031_probe,
+	.remove		= __devexit_p(tps80031_remove),
+	.id_table	= tps80031_id_table,
+};
+
+static int __init tps80031_init(void)
+{
+	return i2c_add_driver(&tps80031_driver);
+}
+subsys_initcall(tps80031_init);
+
+static void __exit tps80031_exit(void)
+{
+	i2c_del_driver(&tps80031_driver);
+}
+module_exit(tps80031_exit);
+
+MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
+MODULE_DESCRIPTION("TPS80031 core driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/tps80031.h b/include/linux/mfd/tps80031.h
new file mode 100644
index 000000000000..2c75c9c9318f
--- /dev/null
+++ b/include/linux/mfd/tps80031.h
@@ -0,0 +1,637 @@
+/*
+ * tps80031.h -- TI TPS80031 and TI TPS80032 PMIC driver.
+ *
+ * Copyright (c) 2012, NVIDIA Corporation.
+ *
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#ifndef __LINUX_MFD_TPS80031_H
+#define __LINUX_MFD_TPS80031_H
+
+#include <linux/device.h>
+#include <linux/regmap.h>
+
+/* Pull-ups/Pull-downs */
+#define TPS80031_CFG_INPUT_PUPD1			0xF0
+#define TPS80031_CFG_INPUT_PUPD2			0xF1
+#define TPS80031_CFG_INPUT_PUPD3			0xF2
+#define TPS80031_CFG_INPUT_PUPD4			0xF3
+#define TPS80031_CFG_LDO_PD1				0xF4
+#define TPS80031_CFG_LDO_PD2				0xF5
+#define TPS80031_CFG_SMPS_PD				0xF6
+
+/* Real Time Clock */
+#define TPS80031_SECONDS_REG				0x00
+#define TPS80031_MINUTES_REG				0x01
+#define TPS80031_HOURS_REG				0x02
+#define TPS80031_DAYS_REG				0x03
+#define TPS80031_MONTHS_REG				0x04
+#define TPS80031_YEARS_REG				0x05
+#define TPS80031_WEEKS_REG				0x06
+#define TPS80031_ALARM_SECONDS_REG			0x08
+#define TPS80031_ALARM_MINUTES_REG			0x09
+#define TPS80031_ALARM_HOURS_REG			0x0A
+#define TPS80031_ALARM_DAYS_REG				0x0B
+#define TPS80031_ALARM_MONTHS_REG			0x0C
+#define TPS80031_ALARM_YEARS_REG			0x0D
+#define TPS80031_RTC_CTRL_REG				0x10
+#define TPS80031_RTC_STATUS_REG				0x11
+#define TPS80031_RTC_INTERRUPTS_REG			0x12
+#define TPS80031_RTC_COMP_LSB_REG			0x13
+#define TPS80031_RTC_COMP_MSB_REG			0x14
+#define TPS80031_RTC_RESET_STATUS_REG			0x16
+
+/*PMC Master Module */
+#define TPS80031_PHOENIX_START_CONDITION		0x1F
+#define TPS80031_PHOENIX_MSK_TRANSITION			0x20
+#define TPS80031_STS_HW_CONDITIONS			0x21
+#define TPS80031_PHOENIX_LAST_TURNOFF_STS		0x22
+#define TPS80031_VSYSMIN_LO_THRESHOLD			0x23
+#define TPS80031_VSYSMIN_HI_THRESHOLD			0x24
+#define TPS80031_PHOENIX_DEV_ON				0x25
+#define TPS80031_STS_PWR_GRP_STATE			0x27
+#define TPS80031_PH_CFG_VSYSLOW				0x28
+#define TPS80031_PH_STS_BOOT				0x29
+#define TPS80031_PHOENIX_SENS_TRANSITION		0x2A
+#define TPS80031_PHOENIX_SEQ_CFG			0x2B
+#define TPS80031_PRIMARY_WATCHDOG_CFG			0X2C
+#define TPS80031_KEY_PRESS_DUR_CFG			0X2D
+#define TPS80031_SMPS_LDO_SHORT_STS			0x2E
+
+/* PMC Slave Module - Broadcast */
+#define TPS80031_BROADCAST_ADDR_ALL			0x31
+#define TPS80031_BROADCAST_ADDR_REF			0x32
+#define TPS80031_BROADCAST_ADDR_PROV			0x33
+#define TPS80031_BROADCAST_ADDR_CLK_RST			0x34
+
+/* PMC Slave Module  SMPS Regulators */
+#define TPS80031_SMPS4_CFG_TRANS			0x41
+#define TPS80031_SMPS4_CFG_STATE			0x42
+#define TPS80031_SMPS4_CFG_VOLTAGE			0x44
+#define TPS80031_VIO_CFG_TRANS				0x47
+#define TPS80031_VIO_CFG_STATE				0x48
+#define TPS80031_VIO_CFG_FORCE				0x49
+#define TPS80031_VIO_CFG_VOLTAGE			0x4A
+#define TPS80031_VIO_CFG_STEP				0x48
+#define TPS80031_SMPS1_CFG_TRANS			0x53
+#define TPS80031_SMPS1_CFG_STATE			0x54
+#define TPS80031_SMPS1_CFG_FORCE			0x55
+#define TPS80031_SMPS1_CFG_VOLTAGE			0x56
+#define TPS80031_SMPS1_CFG_STEP				0x57
+#define TPS80031_SMPS2_CFG_TRANS			0x59
+#define TPS80031_SMPS2_CFG_STATE			0x5A
+#define TPS80031_SMPS2_CFG_FORCE			0x5B
+#define TPS80031_SMPS2_CFG_VOLTAGE			0x5C
+#define TPS80031_SMPS2_CFG_STEP				0x5D
+#define TPS80031_SMPS3_CFG_TRANS			0x65
+#define TPS80031_SMPS3_CFG_STATE			0x66
+#define TPS80031_SMPS3_CFG_VOLTAGE			0x68
+
+/* PMC Slave Module  LDO Regulators */
+#define TPS80031_VANA_CFG_TRANS				0x81
+#define TPS80031_VANA_CFG_STATE				0x82
+#define TPS80031_VANA_CFG_VOLTAGE			0x83
+#define TPS80031_LDO2_CFG_TRANS				0x85
+#define TPS80031_LDO2_CFG_STATE				0x86
+#define TPS80031_LDO2_CFG_VOLTAGE			0x87
+#define TPS80031_LDO4_CFG_TRANS				0x89
+#define TPS80031_LDO4_CFG_STATE				0x8A
+#define TPS80031_LDO4_CFG_VOLTAGE			0x8B
+#define TPS80031_LDO3_CFG_TRANS				0x8D
+#define TPS80031_LDO3_CFG_STATE				0x8E
+#define TPS80031_LDO3_CFG_VOLTAGE			0x8F
+#define TPS80031_LDO6_CFG_TRANS				0x91
+#define TPS80031_LDO6_CFG_STATE				0x92
+#define TPS80031_LDO6_CFG_VOLTAGE			0x93
+#define TPS80031_LDOLN_CFG_TRANS			0x95
+#define TPS80031_LDOLN_CFG_STATE			0x96
+#define TPS80031_LDOLN_CFG_VOLTAGE			0x97
+#define TPS80031_LDO5_CFG_TRANS				0x99
+#define TPS80031_LDO5_CFG_STATE				0x9A
+#define TPS80031_LDO5_CFG_VOLTAGE			0x9B
+#define TPS80031_LDO1_CFG_TRANS				0x9D
+#define TPS80031_LDO1_CFG_STATE				0x9E
+#define TPS80031_LDO1_CFG_VOLTAGE			0x9F
+#define TPS80031_LDOUSB_CFG_TRANS			0xA1
+#define TPS80031_LDOUSB_CFG_STATE			0xA2
+#define TPS80031_LDOUSB_CFG_VOLTAGE			0xA3
+#define TPS80031_LDO7_CFG_TRANS				0xA5
+#define TPS80031_LDO7_CFG_STATE				0xA6
+#define TPS80031_LDO7_CFG_VOLTAGE			0xA7
+
+/* PMC Slave Module  External Control */
+#define TPS80031_REGEN1_CFG_TRANS			0xAE
+#define TPS80031_REGEN1_CFG_STATE			0xAF
+#define TPS80031_REGEN2_CFG_TRANS			0xB1
+#define TPS80031_REGEN2_CFG_STATE			0xB2
+#define TPS80031_SYSEN_CFG_TRANS			0xB4
+#define TPS80031_SYSEN_CFG_STATE			0xB5
+
+/* PMC Slave Module  Internal Control */
+#define TPS80031_NRESPWRON_CFG_TRANS			0xB7
+#define TPS80031_NRESPWRON_CFG_STATE			0xB8
+#define TPS80031_CLK32KAO_CFG_TRANS			0xBA
+#define TPS80031_CLK32KAO_CFG_STATE			0xBB
+#define TPS80031_CLK32KG_CFG_TRANS			0xBD
+#define TPS80031_CLK32KG_CFG_STATE			0xBE
+#define TPS80031_CLK32KAUDIO_CFG_TRANS			0xC0
+#define TPS80031_CLK32KAUDIO_CFG_STATE			0xC1
+#define TPS80031_VRTC_CFG_TRANS				0xC3
+#define TPS80031_VRTC_CFG_STATE				0xC4
+#define TPS80031_BIAS_CFG_TRANS				0xC6
+#define TPS80031_BIAS_CFG_STATE				0xC7
+#define TPS80031_VSYSMIN_HI_CFG_TRANS			0xC9
+#define TPS80031_VSYSMIN_HI_CFG_STATE			0xCA
+#define TPS80031_RC6MHZ_CFG_TRANS			0xCC
+#define TPS80031_RC6MHZ_CFG_STATE			0xCD
+#define TPS80031_TMP_CFG_TRANS				0xCF
+#define TPS80031_TMP_CFG_STATE				0xD0
+
+/* PMC Slave Module  resources assignment */
+#define TPS80031_PREQ1_RES_ASS_A			0xD7
+#define TPS80031_PREQ1_RES_ASS_B			0xD8
+#define TPS80031_PREQ1_RES_ASS_C			0xD9
+#define TPS80031_PREQ2_RES_ASS_A			0xDA
+#define TPS80031_PREQ2_RES_ASS_B			0xDB
+#define TPS80031_PREQ2_RES_ASS_C			0xDC
+#define TPS80031_PREQ3_RES_ASS_A			0xDD
+#define TPS80031_PREQ3_RES_ASS_B			0xDE
+#define TPS80031_PREQ3_RES_ASS_C			0xDF
+
+/* PMC Slave Module  Miscellaneous */
+#define TPS80031_SMPS_OFFSET				0xE0
+#define TPS80031_SMPS_MULT				0xE3
+#define TPS80031_MISC1					0xE4
+#define TPS80031_MISC2					0xE5
+#define TPS80031_BBSPOR_CFG				0xE6
+#define TPS80031_TMP_CFG				0xE7
+
+/* Battery Charging Controller and Indicator LED */
+#define TPS80031_CONTROLLER_CTRL2			0xDA
+#define TPS80031_CONTROLLER_VSEL_COMP			0xDB
+#define TPS80031_CHARGERUSB_VSYSREG			0xDC
+#define TPS80031_CHARGERUSB_VICHRG_PC			0xDD
+#define TPS80031_LINEAR_CHRG_STS			0xDE
+#define TPS80031_CONTROLLER_INT_MASK			0xE0
+#define TPS80031_CONTROLLER_CTRL1			0xE1
+#define TPS80031_CONTROLLER_WDG				0xE2
+#define TPS80031_CONTROLLER_STAT1			0xE3
+#define TPS80031_CHARGERUSB_INT_STATUS			0xE4
+#define TPS80031_CHARGERUSB_INT_MASK			0xE5
+#define TPS80031_CHARGERUSB_STATUS_INT1			0xE6
+#define TPS80031_CHARGERUSB_STATUS_INT2			0xE7
+#define TPS80031_CHARGERUSB_CTRL1			0xE8
+#define TPS80031_CHARGERUSB_CTRL2			0xE9
+#define TPS80031_CHARGERUSB_CTRL3			0xEA
+#define TPS80031_CHARGERUSB_STAT1			0xEB
+#define TPS80031_CHARGERUSB_VOREG			0xEC
+#define TPS80031_CHARGERUSB_VICHRG			0xED
+#define TPS80031_CHARGERUSB_CINLIMIT			0xEE
+#define TPS80031_CHARGERUSB_CTRLLIMIT1			0xEF
+#define TPS80031_CHARGERUSB_CTRLLIMIT2			0xF0
+#define TPS80031_LED_PWM_CTRL1				0xF4
+#define TPS80031_LED_PWM_CTRL2				0xF5
+
+/* USB On-The-Go  */
+#define TPS80031_BACKUP_REG				0xFA
+#define TPS80031_USB_VENDOR_ID_LSB			0x00
+#define TPS80031_USB_VENDOR_ID_MSB			0x01
+#define TPS80031_USB_PRODUCT_ID_LSB			0x02
+#define TPS80031_USB_PRODUCT_ID_MSB			0x03
+#define TPS80031_USB_VBUS_CTRL_SET			0x04
+#define TPS80031_USB_VBUS_CTRL_CLR			0x05
+#define TPS80031_USB_ID_CTRL_SET			0x06
+#define TPS80031_USB_ID_CTRL_CLR			0x07
+#define TPS80031_USB_VBUS_INT_SRC			0x08
+#define TPS80031_USB_VBUS_INT_LATCH_SET			0x09
+#define TPS80031_USB_VBUS_INT_LATCH_CLR			0x0A
+#define TPS80031_USB_VBUS_INT_EN_LO_SET			0x0B
+#define TPS80031_USB_VBUS_INT_EN_LO_CLR			0x0C
+#define TPS80031_USB_VBUS_INT_EN_HI_SET			0x0D
+#define TPS80031_USB_VBUS_INT_EN_HI_CLR			0x0E
+#define TPS80031_USB_ID_INT_SRC				0x0F
+#define TPS80031_USB_ID_INT_LATCH_SET			0x10
+#define TPS80031_USB_ID_INT_LATCH_CLR			0x11
+#define TPS80031_USB_ID_INT_EN_LO_SET			0x12
+#define TPS80031_USB_ID_INT_EN_LO_CLR			0x13
+#define TPS80031_USB_ID_INT_EN_HI_SET			0x14
+#define TPS80031_USB_ID_INT_EN_HI_CLR			0x15
+#define TPS80031_USB_OTG_ADP_CTRL			0x16
+#define TPS80031_USB_OTG_ADP_HIGH			0x17
+#define TPS80031_USB_OTG_ADP_LOW			0x18
+#define TPS80031_USB_OTG_ADP_RISE			0x19
+#define TPS80031_USB_OTG_REVISION			0x1A
+
+/* Gas Gauge */
+#define TPS80031_FG_REG_00				0xC0
+#define TPS80031_FG_REG_01				0xC1
+#define TPS80031_FG_REG_02				0xC2
+#define TPS80031_FG_REG_03				0xC3
+#define TPS80031_FG_REG_04				0xC4
+#define TPS80031_FG_REG_05				0xC5
+#define TPS80031_FG_REG_06				0xC6
+#define TPS80031_FG_REG_07				0xC7
+#define TPS80031_FG_REG_08				0xC8
+#define TPS80031_FG_REG_09				0xC9
+#define TPS80031_FG_REG_10				0xCA
+#define TPS80031_FG_REG_11				0xCB
+
+/* General Purpose ADC */
+#define TPS80031_GPADC_CTRL				0x2E
+#define TPS80031_GPADC_CTRL2				0x2F
+#define TPS80031_RTSELECT_LSB				0x32
+#define TPS80031_RTSELECT_ISB				0x33
+#define TPS80031_RTSELECT_MSB				0x34
+#define TPS80031_GPSELECT_ISB				0x35
+#define TPS80031_CTRL_P1				0x36
+#define TPS80031_RTCH0_LSB				0x37
+#define TPS80031_RTCH0_MSB				0x38
+#define TPS80031_RTCH1_LSB				0x39
+#define TPS80031_RTCH1_MSB				0x3A
+#define TPS80031_GPCH0_LSB				0x3B
+#define TPS80031_GPCH0_MSB				0x3C
+
+/* SIM, MMC and Battery Detection */
+#define TPS80031_SIMDEBOUNCING				0xEB
+#define TPS80031_SIMCTRL				0xEC
+#define TPS80031_MMCDEBOUNCING				0xED
+#define TPS80031_MMCCTRL				0xEE
+#define TPS80031_BATDEBOUNCING				0xEF
+
+/* Vibrator Driver and PWMs */
+#define TPS80031_VIBCTRL				0x9B
+#define TPS80031_VIBMODE				0x9C
+#define TPS80031_PWM1ON					0xBA
+#define TPS80031_PWM1OFF				0xBB
+#define TPS80031_PWM2ON					0xBD
+#define TPS80031_PWM2OFF				0xBE
+
+/* Control Interface */
+#define TPS80031_INT_STS_A				0xD0
+#define TPS80031_INT_STS_B				0xD1
+#define TPS80031_INT_STS_C				0xD2
+#define TPS80031_INT_MSK_LINE_A				0xD3
+#define TPS80031_INT_MSK_LINE_B				0xD4
+#define TPS80031_INT_MSK_LINE_C				0xD5
+#define TPS80031_INT_MSK_STS_A				0xD6
+#define TPS80031_INT_MSK_STS_B				0xD7
+#define TPS80031_INT_MSK_STS_C				0xD8
+#define TPS80031_TOGGLE1				0x90
+#define TPS80031_TOGGLE2				0x91
+#define TPS80031_TOGGLE3				0x92
+#define TPS80031_PWDNSTATUS1				0x93
+#define TPS80031_PWDNSTATUS2				0x94
+#define TPS80031_VALIDITY0				0x17
+#define TPS80031_VALIDITY1				0x18
+#define TPS80031_VALIDITY2				0x19
+#define TPS80031_VALIDITY3				0x1A
+#define TPS80031_VALIDITY4				0x1B
+#define TPS80031_VALIDITY5				0x1C
+#define TPS80031_VALIDITY6				0x1D
+#define TPS80031_VALIDITY7				0x1E
+
+/* Version number related register */
+#define TPS80031_JTAGVERNUM				0x87
+#define TPS80031_EPROM_REV				0xDF
+
+/* GPADC Trimming Bits. */
+#define TPS80031_GPADC_TRIM0				0xCC
+#define TPS80031_GPADC_TRIM1				0xCD
+#define TPS80031_GPADC_TRIM2				0xCE
+#define TPS80031_GPADC_TRIM3				0xCF
+#define TPS80031_GPADC_TRIM4				0xD0
+#define TPS80031_GPADC_TRIM5				0xD1
+#define TPS80031_GPADC_TRIM6				0xD2
+#define TPS80031_GPADC_TRIM7				0xD3
+#define TPS80031_GPADC_TRIM8				0xD4
+#define TPS80031_GPADC_TRIM9				0xD5
+#define TPS80031_GPADC_TRIM10				0xD6
+#define TPS80031_GPADC_TRIM11				0xD7
+#define TPS80031_GPADC_TRIM12				0xD8
+#define TPS80031_GPADC_TRIM13				0xD9
+#define TPS80031_GPADC_TRIM14				0xDA
+#define TPS80031_GPADC_TRIM15				0xDB
+#define TPS80031_GPADC_TRIM16				0xDC
+#define TPS80031_GPADC_TRIM17				0xDD
+#define TPS80031_GPADC_TRIM18				0xDE
+
+/* TPS80031_CONTROLLER_STAT1 bit fields */
+#define TPS80031_CONTROLLER_STAT1_BAT_TEMP		0
+#define TPS80031_CONTROLLER_STAT1_BAT_REMOVED		1
+#define TPS80031_CONTROLLER_STAT1_VBUS_DET		2
+#define TPS80031_CONTROLLER_STAT1_VAC_DET		3
+#define TPS80031_CONTROLLER_STAT1_FAULT_WDG		4
+#define TPS80031_CONTROLLER_STAT1_LINCH_GATED		6
+/* TPS80031_CONTROLLER_INT_MASK bit filed */
+#define TPS80031_CONTROLLER_INT_MASK_MVAC_DET		0
+#define TPS80031_CONTROLLER_INT_MASK_MVBUS_DET		1
+#define TPS80031_CONTROLLER_INT_MASK_MBAT_TEMP		2
+#define TPS80031_CONTROLLER_INT_MASK_MFAULT_WDG		3
+#define TPS80031_CONTROLLER_INT_MASK_MBAT_REMOVED	4
+#define TPS80031_CONTROLLER_INT_MASK_MLINCH_GATED	5
+
+#define TPS80031_CHARGE_CONTROL_SUB_INT_MASK		0x3F
+
+/* TPS80031_PHOENIX_DEV_ON bit field */
+#define TPS80031_DEVOFF					0x1
+
+#define TPS80031_EXT_CONTROL_CFG_TRANS			0
+#define TPS80031_EXT_CONTROL_CFG_STATE			1
+
+/* State register field */
+#define TPS80031_STATE_OFF				0x00
+#define TPS80031_STATE_ON				0x01
+#define TPS80031_STATE_MASK				0x03
+
+/* Trans register field */
+#define TPS80031_TRANS_ACTIVE_OFF			0x00
+#define TPS80031_TRANS_ACTIVE_ON			0x01
+#define TPS80031_TRANS_ACTIVE_MASK			0x03
+#define TPS80031_TRANS_SLEEP_OFF			0x00
+#define TPS80031_TRANS_SLEEP_ON				0x04
+#define TPS80031_TRANS_SLEEP_MASK			0x0C
+#define TPS80031_TRANS_OFF_OFF				0x00
+#define TPS80031_TRANS_OFF_ACTIVE			0x10
+#define TPS80031_TRANS_OFF_MASK				0x30
+
+#define TPS80031_EXT_PWR_REQ		(TPS80031_PWR_REQ_INPUT_PREQ1 | \
+					TPS80031_PWR_REQ_INPUT_PREQ2 | \
+					TPS80031_PWR_REQ_INPUT_PREQ3)
+
+/* TPS80031_BBSPOR_CFG bit field */
+#define TPS80031_BBSPOR_CHG_EN				0x8
+#define TPS80031_MAX_REGISTER				0xFF
+
+struct i2c_client;
+
+/* Supported chips */
+enum chips {
+	TPS80031 = 0x00000001,
+	TPS80032 = 0x00000002,
+};
+
+enum {
+	TPS80031_INT_PWRON,
+	TPS80031_INT_RPWRON,
+	TPS80031_INT_SYS_VLOW,
+	TPS80031_INT_RTC_ALARM,
+	TPS80031_INT_RTC_PERIOD,
+	TPS80031_INT_HOT_DIE,
+	TPS80031_INT_VXX_SHORT,
+	TPS80031_INT_SPDURATION,
+	TPS80031_INT_WATCHDOG,
+	TPS80031_INT_BAT,
+	TPS80031_INT_SIM,
+	TPS80031_INT_MMC,
+	TPS80031_INT_RES,
+	TPS80031_INT_GPADC_RT,
+	TPS80031_INT_GPADC_SW2_EOC,
+	TPS80031_INT_CC_AUTOCAL,
+	TPS80031_INT_ID_WKUP,
+	TPS80031_INT_VBUSS_WKUP,
+	TPS80031_INT_ID,
+	TPS80031_INT_VBUS,
+	TPS80031_INT_CHRG_CTRL,
+	TPS80031_INT_EXT_CHRG,
+	TPS80031_INT_INT_CHRG,
+	TPS80031_INT_RES2,
+	TPS80031_INT_BAT_TEMP_OVRANGE,
+	TPS80031_INT_BAT_REMOVED,
+	TPS80031_INT_VBUS_DET,
+	TPS80031_INT_VAC_DET,
+	TPS80031_INT_FAULT_WDG,
+	TPS80031_INT_LINCH_GATED,
+
+	/* Last interrupt id to get the end number */
+	TPS80031_INT_NR,
+};
+
+/* TPS80031 Slave IDs */
+#define TPS80031_NUM_SLAVES				4
+#define TPS80031_SLAVE_ID0				0
+#define TPS80031_SLAVE_ID1				1
+#define TPS80031_SLAVE_ID2				2
+#define TPS80031_SLAVE_ID3				3
+
+/* TPS80031 I2C addresses */
+#define TPS80031_I2C_ID0_ADDR				0x12
+#define TPS80031_I2C_ID1_ADDR				0x48
+#define TPS80031_I2C_ID2_ADDR				0x49
+#define TPS80031_I2C_ID3_ADDR				0x4A
+
+enum {
+	TPS80031_REGULATOR_VIO,
+	TPS80031_REGULATOR_SMPS1,
+	TPS80031_REGULATOR_SMPS2,
+	TPS80031_REGULATOR_SMPS3,
+	TPS80031_REGULATOR_SMPS4,
+	TPS80031_REGULATOR_VANA,
+	TPS80031_REGULATOR_LDO1,
+	TPS80031_REGULATOR_LDO2,
+	TPS80031_REGULATOR_LDO3,
+	TPS80031_REGULATOR_LDO4,
+	TPS80031_REGULATOR_LDO5,
+	TPS80031_REGULATOR_LDO6,
+	TPS80031_REGULATOR_LDO7,
+	TPS80031_REGULATOR_LDOLN,
+	TPS80031_REGULATOR_LDOUSB,
+	TPS80031_REGULATOR_VBUS,
+	TPS80031_REGULATOR_REGEN1,
+	TPS80031_REGULATOR_REGEN2,
+	TPS80031_REGULATOR_SYSEN,
+	TPS80031_REGULATOR_MAX,
+};
+
+/* Different configurations for the rails */
+enum {
+	/* USBLDO input selection */
+	TPS80031_USBLDO_INPUT_VSYS		= 0x00000001,
+	TPS80031_USBLDO_INPUT_PMID		= 0x00000002,
+
+	/* LDO3 output mode */
+	TPS80031_LDO3_OUTPUT_VIB		= 0x00000004,
+
+	/* VBUS configuration */
+	TPS80031_VBUS_DISCHRG_EN_PDN		= 0x00000004,
+	TPS80031_VBUS_SW_ONLY			= 0x00000008,
+	TPS80031_VBUS_SW_N_ID			= 0x00000010,
+};
+
+/* External controls requests */
+enum tps80031_ext_control {
+	TPS80031_PWR_REQ_INPUT_NONE		= 0x00000000,
+	TPS80031_PWR_REQ_INPUT_PREQ1		= 0x00000001,
+	TPS80031_PWR_REQ_INPUT_PREQ2		= 0x00000002,
+	TPS80031_PWR_REQ_INPUT_PREQ3		= 0x00000004,
+	TPS80031_PWR_OFF_ON_SLEEP		= 0x00000008,
+	TPS80031_PWR_ON_ON_SLEEP		= 0x00000010,
+};
+
+enum tps80031_pupd_pins {
+	TPS80031_PREQ1 = 0,
+	TPS80031_PREQ2A,
+	TPS80031_PREQ2B,
+	TPS80031_PREQ2C,
+	TPS80031_PREQ3,
+	TPS80031_NRES_WARM,
+	TPS80031_PWM_FORCE,
+	TPS80031_CHRG_EXT_CHRG_STATZ,
+	TPS80031_SIM,
+	TPS80031_MMC,
+	TPS80031_GPADC_START,
+	TPS80031_DVSI2C_SCL,
+	TPS80031_DVSI2C_SDA,
+	TPS80031_CTLI2C_SCL,
+	TPS80031_CTLI2C_SDA,
+};
+
+enum tps80031_pupd_settings {
+	TPS80031_PUPD_NORMAL,
+	TPS80031_PUPD_PULLDOWN,
+	TPS80031_PUPD_PULLUP,
+};
+
+struct tps80031 {
+	struct device		*dev;
+	unsigned long		chip_info;
+	int			es_version;
+	struct i2c_client	*clients[TPS80031_NUM_SLAVES];
+	struct regmap		*regmap[TPS80031_NUM_SLAVES];
+	struct regmap_irq_chip_data *irq_data;
+};
+
+struct tps80031_pupd_init_data {
+	int input_pin;
+	int setting;
+};
+
+/*
+ * struct tps80031_regulator_platform_data - tps80031 regulator platform data.
+ *
+ * @reg_init_data: The regulator init data.
+ * @ext_ctrl_flag: External control flag for sleep/power request control.
+ * @config_flags: Configuration flag to configure the rails.
+ *		  It should be ORed of config enums.
+ */
+
+struct tps80031_regulator_platform_data {
+	struct regulator_init_data *reg_init_data;
+	unsigned int ext_ctrl_flag;
+	unsigned int config_flags;
+};
+
+struct tps80031_platform_data {
+	int irq_base;
+	bool use_power_off;
+	struct tps80031_pupd_init_data *pupd_init_data;
+	int pupd_init_data_size;
+	struct tps80031_regulator_platform_data
+			*regulator_pdata[TPS80031_REGULATOR_MAX];
+};
+
+static inline int tps80031_write(struct device *dev, int sid,
+		int reg, uint8_t val)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_write(tps80031->regmap[sid], reg, val);
+}
+
+static inline int tps80031_writes(struct device *dev, int sid, int reg,
+		int len, uint8_t *val)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_bulk_write(tps80031->regmap[sid], reg, val, len);
+}
+
+static inline int tps80031_read(struct device *dev, int sid,
+		int reg, uint8_t *val)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+	unsigned int ival;
+	int ret;
+
+	ret = regmap_read(tps80031->regmap[sid], reg, &ival);
+	if (ret < 0) {
+		dev_err(dev, "failed reading from reg 0x%02x\n", reg);
+		return ret;
+	}
+
+	*val = ival;
+	return ret;
+}
+
+static inline int tps80031_reads(struct device *dev, int sid,
+		int reg, int len, uint8_t *val)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_bulk_read(tps80031->regmap[sid], reg, val, len);
+}
+
+static inline int tps80031_set_bits(struct device *dev, int sid,
+		int reg, uint8_t bit_mask)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps80031->regmap[sid], reg,
+				bit_mask, bit_mask);
+}
+
+static inline int tps80031_clr_bits(struct device *dev, int sid,
+		int reg, uint8_t bit_mask)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps80031->regmap[sid], reg, bit_mask, 0);
+}
+
+static inline int tps80031_update(struct device *dev, int sid,
+		int reg, uint8_t val, uint8_t mask)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps80031->regmap[sid], reg, mask, val);
+}
+
+static inline unsigned long tps80031_get_chip_info(struct device *dev)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return tps80031->chip_info;
+}
+
+static inline int tps80031_get_pmu_version(struct device *dev)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return tps80031->es_version;
+}
+
+static inline int tps80031_irq_get_virq(struct device *dev, int irq)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_irq_get_virq(tps80031->irq_data, irq);
+}
+
+extern int tps80031_ext_power_req_config(struct device *dev,
+		unsigned long ext_ctrl_flag, int preq_bit,
+		int state_reg_add, int trans_reg_add);
+#endif /*__LINUX_MFD_TPS80031_H */
-- 
cgit v1.2.3-55-g7522


From 3863db3e800c64e21e4effcc3de0f72cdb9b0d77 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Tue, 20 Nov 2012 08:44:47 +0530
Subject: mfd: tps65090: Remove unused member of struct tps65090

Remove unused member from tps65090 data structure as
these are not used.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps65090.c       |  6 +-----
 include/linux/mfd/tps65090.h | 11 -----------
 2 files changed, 1 insertion(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c
index f95f7f6b846c..3cfc9dcbe9dc 100644
--- a/drivers/mfd/tps65090.c
+++ b/drivers/mfd/tps65090.c
@@ -269,12 +269,9 @@ static int __devinit tps65090_i2c_probe(struct i2c_client *client,
 		return -ENOMEM;
 	}
 
-	tps65090->client = client;
 	tps65090->dev = &client->dev;
 	i2c_set_clientdata(client, tps65090);
 
-	mutex_init(&tps65090->lock);
-
 	if (client->irq) {
 		ret = tps65090_irq_init(tps65090, client->irq, pdata->irq_base);
 		if (ret) {
@@ -284,8 +281,7 @@ static int __devinit tps65090_i2c_probe(struct i2c_client *client,
 		}
 	}
 
-	tps65090->rmap = devm_regmap_init_i2c(tps65090->client,
-					      &tps65090_regmap_config);
+	tps65090->rmap = devm_regmap_init_i2c(client, &tps65090_regmap_config);
 	if (IS_ERR(tps65090->rmap)) {
 		ret = PTR_ERR(tps65090->rmap);
 		dev_err(&client->dev, "regmap_init failed with err: %d\n", ret);
diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index 6bc31d854626..6c576224f637 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h
@@ -25,26 +25,15 @@
 #include <linux/irq.h>
 
 struct tps65090 {
-	struct mutex		lock;
 	struct device		*dev;
-	struct i2c_client	*client;
 	struct regmap		*rmap;
 	struct irq_chip		irq_chip;
 	struct mutex		irq_lock;
 	int			irq_base;
-	unsigned int		id;
-};
-
-struct tps65090_subdev_info {
-	int		id;
-	const char	*name;
-	void		*platform_data;
 };
 
 struct tps65090_platform_data {
 	int irq_base;
-	int num_subdevs;
-	struct tps65090_subdev_info *subdevs;
 };
 
 /*
-- 
cgit v1.2.3-55-g7522


From b9c79323166530a14c1fa8c10337eeaa54e3f98d Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Tue, 20 Nov 2012 08:44:48 +0530
Subject: mfd: tps65090: Move register access APIs to header

Since tps65090 register is accessed via regmap, moving
the register access APIs to header and making it as inline.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps65090.c       | 34 ----------------------------------
 include/linux/mfd/tps65090.h | 39 +++++++++++++++++++++++++++++++++++----
 2 files changed, 35 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c
index 3cfc9dcbe9dc..355a07749454 100644
--- a/drivers/mfd/tps65090.c
+++ b/drivers/mfd/tps65090.c
@@ -25,7 +25,6 @@
 #include <linux/i2c.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/tps65090.h>
-#include <linux/regmap.h>
 #include <linux/err.h>
 
 #define NUM_INT_REG 2
@@ -78,39 +77,6 @@ static struct mfd_cell tps65090s[] = {
 	},
 };
 
-int tps65090_write(struct device *dev, int reg, uint8_t val)
-{
-	struct tps65090 *tps = dev_get_drvdata(dev);
-	return regmap_write(tps->rmap, reg, val);
-}
-EXPORT_SYMBOL_GPL(tps65090_write);
-
-int tps65090_read(struct device *dev, int reg, uint8_t *val)
-{
-	struct tps65090 *tps = dev_get_drvdata(dev);
-	unsigned int temp_val;
-	int ret;
-	ret = regmap_read(tps->rmap, reg, &temp_val);
-	if (!ret)
-		*val = temp_val;
-	return ret;
-}
-EXPORT_SYMBOL_GPL(tps65090_read);
-
-int tps65090_set_bits(struct device *dev, int reg, uint8_t bit_num)
-{
-	struct tps65090 *tps = dev_get_drvdata(dev);
-	return regmap_update_bits(tps->rmap, reg, BIT(bit_num), ~0u);
-}
-EXPORT_SYMBOL_GPL(tps65090_set_bits);
-
-int tps65090_clr_bits(struct device *dev, int reg, uint8_t bit_num)
-{
-	struct tps65090 *tps = dev_get_drvdata(dev);
-	return regmap_update_bits(tps->rmap, reg, BIT(bit_num), 0u);
-}
-EXPORT_SYMBOL_GPL(tps65090_clr_bits);
-
 static void tps65090_irq_lock(struct irq_data *data)
 {
 	struct tps65090 *tps65090 = irq_data_get_irq_chip_data(data);
diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index 6c576224f637..1a5f916b7383 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h
@@ -23,6 +23,7 @@
 #define __LINUX_MFD_TPS65090_H
 
 #include <linux/irq.h>
+#include <linux/regmap.h>
 
 struct tps65090 {
 	struct device		*dev;
@@ -40,9 +41,39 @@ struct tps65090_platform_data {
  * NOTE: the functions below are not intended for use outside
  * of the TPS65090 sub-device drivers
  */
-extern int tps65090_write(struct device *dev, int reg, uint8_t val);
-extern int tps65090_read(struct device *dev, int reg, uint8_t *val);
-extern int tps65090_set_bits(struct device *dev, int reg, uint8_t bit_num);
-extern int tps65090_clr_bits(struct device *dev, int reg, uint8_t bit_num);
+static inline int tps65090_write(struct device *dev, int reg, uint8_t val)
+{
+	struct tps65090 *tps = dev_get_drvdata(dev);
+
+	return regmap_write(tps->rmap, reg, val);
+}
+
+static inline int tps65090_read(struct device *dev, int reg, uint8_t *val)
+{
+	struct tps65090 *tps = dev_get_drvdata(dev);
+	unsigned int temp_val;
+	int ret;
+
+	ret = regmap_read(tps->rmap, reg, &temp_val);
+	if (!ret)
+		*val = temp_val;
+	return ret;
+}
+
+static inline int tps65090_set_bits(struct device *dev, int reg,
+		uint8_t bit_num)
+{
+	struct tps65090 *tps = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps->rmap, reg, BIT(bit_num), ~0u);
+}
+
+static inline int tps65090_clr_bits(struct device *dev, int reg,
+		uint8_t bit_num)
+{
+	struct tps65090 *tps = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps->rmap, reg, BIT(bit_num), 0u);
+}
 
 #endif /*__LINUX_MFD_TPS65090_H */
-- 
cgit v1.2.3-55-g7522


From 759f2598ef3876637e40d99a4ceb7a3d83a4d8d3 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan
Date: Tue, 20 Nov 2012 08:44:49 +0530
Subject: mfd: tps65090: Use regmap irq framework for interrupt support

Use the regmap irq framework for implementing TPS65090 interrupt
support in place of implementing it locally.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps65090.c       | 263 ++++++++++++++++---------------------------
 include/linux/mfd/tps65090.h |  23 +++-
 2 files changed, 118 insertions(+), 168 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c
index 355a07749454..2eaae52cb5b8 100644
--- a/drivers/mfd/tps65090.c
+++ b/drivers/mfd/tps65090.c
@@ -38,35 +38,21 @@
 #define TPS65090_INT_MSK	0x2
 #define TPS65090_INT_MSK2	0x3
 
-struct tps65090_irq_data {
-	u8		mask_reg;
-	u8		mask_pos;
-};
-
-#define TPS65090_IRQ(_reg, _mask_pos)		\
-	{					\
-		.mask_reg	= (_reg),	\
-		.mask_pos	= (_mask_pos),	\
-	}
-
-static const struct tps65090_irq_data tps65090_irqs[] = {
-	[0]		= TPS65090_IRQ(0, 0),
-	[1]		= TPS65090_IRQ(0, 1),
-	[2]		= TPS65090_IRQ(0, 2),
-	[3]		= TPS65090_IRQ(0, 3),
-	[4]		= TPS65090_IRQ(0, 4),
-	[5]		= TPS65090_IRQ(0, 5),
-	[6]		= TPS65090_IRQ(0, 6),
-	[7]		= TPS65090_IRQ(0, 7),
-	[8]		= TPS65090_IRQ(1, 0),
-	[9]		= TPS65090_IRQ(1, 1),
-	[10]		= TPS65090_IRQ(1, 2),
-	[11]		= TPS65090_IRQ(1, 3),
-	[12]		= TPS65090_IRQ(1, 4),
-	[13]		= TPS65090_IRQ(1, 5),
-	[14]		= TPS65090_IRQ(1, 6),
-	[15]		= TPS65090_IRQ(1, 7),
-};
+#define TPS65090_INT1_MASK_VAC_STATUS_CHANGE		1
+#define TPS65090_INT1_MASK_VSYS_STATUS_CHANGE		2
+#define TPS65090_INT1_MASK_BAT_STATUS_CHANGE		3
+#define TPS65090_INT1_MASK_CHARGING_STATUS_CHANGE	4
+#define TPS65090_INT1_MASK_CHARGING_COMPLETE		5
+#define TPS65090_INT1_MASK_OVERLOAD_DCDC1		6
+#define TPS65090_INT1_MASK_OVERLOAD_DCDC2		7
+#define TPS65090_INT2_MASK_OVERLOAD_DCDC3		0
+#define TPS65090_INT2_MASK_OVERLOAD_FET1		1
+#define TPS65090_INT2_MASK_OVERLOAD_FET2		2
+#define TPS65090_INT2_MASK_OVERLOAD_FET3		3
+#define TPS65090_INT2_MASK_OVERLOAD_FET4		4
+#define TPS65090_INT2_MASK_OVERLOAD_FET5		5
+#define TPS65090_INT2_MASK_OVERLOAD_FET6		6
+#define TPS65090_INT2_MASK_OVERLOAD_FET7		7
 
 static struct mfd_cell tps65090s[] = {
 	{
@@ -77,132 +63,77 @@ static struct mfd_cell tps65090s[] = {
 	},
 };
 
-static void tps65090_irq_lock(struct irq_data *data)
-{
-	struct tps65090 *tps65090 = irq_data_get_irq_chip_data(data);
-
-	mutex_lock(&tps65090->irq_lock);
-}
-
-static void tps65090_irq_mask(struct irq_data *irq_data)
-{
-	struct tps65090 *tps65090 = irq_data_get_irq_chip_data(irq_data);
-	unsigned int __irq = irq_data->hwirq;
-	const struct tps65090_irq_data *data = &tps65090_irqs[__irq];
-
-	tps65090_set_bits(tps65090->dev, (TPS65090_INT_MSK + data->mask_reg),
-		data->mask_pos);
-}
-
-static void tps65090_irq_unmask(struct irq_data *irq_data)
-{
-	struct tps65090 *tps65090 = irq_data_get_irq_chip_data(irq_data);
-	unsigned int __irq = irq_data->irq - tps65090->irq_base;
-	const struct tps65090_irq_data *data = &tps65090_irqs[__irq];
-
-	tps65090_clr_bits(tps65090->dev, (TPS65090_INT_MSK + data->mask_reg),
-		data->mask_pos);
-}
-
-static void tps65090_irq_sync_unlock(struct irq_data *data)
-{
-	struct tps65090 *tps65090 = irq_data_get_irq_chip_data(data);
-
-	mutex_unlock(&tps65090->irq_lock);
-}
-
-static irqreturn_t tps65090_irq(int irq, void *data)
-{
-	struct tps65090 *tps65090 = data;
-	int ret = 0;
-	u8 status, mask;
-	unsigned long int acks = 0;
-	int i;
-
-	for (i = 0; i < NUM_INT_REG; i++) {
-		ret = tps65090_read(tps65090->dev, TPS65090_INT_MSK + i, &mask);
-		if (ret < 0) {
-			dev_err(tps65090->dev,
-				"failed to read mask reg [addr:%d]\n",
-				TPS65090_INT_MSK + i);
-			return IRQ_NONE;
-		}
-		ret = tps65090_read(tps65090->dev, TPS65090_INT_STS + i,
-			&status);
-		if (ret < 0) {
-			dev_err(tps65090->dev,
-				"failed to read status reg [addr:%d]\n",
-				 TPS65090_INT_STS + i);
-			return IRQ_NONE;
-		}
-		if (status) {
-			/* Ack only those interrupts which are not masked */
-			status &= (~mask);
-			ret = tps65090_write(tps65090->dev,
-					TPS65090_INT_STS + i, status);
-			if (ret < 0) {
-				dev_err(tps65090->dev,
-					"failed to write interrupt status\n");
-				return IRQ_NONE;
-			}
-			acks |= (status << (i * 8));
-		}
-	}
-
-	for_each_set_bit(i, &acks, ARRAY_SIZE(tps65090_irqs))
-		handle_nested_irq(tps65090->irq_base + i);
-	return acks ? IRQ_HANDLED : IRQ_NONE;
-}
-
-static int __devinit tps65090_irq_init(struct tps65090 *tps65090, int irq,
-	int irq_base)
-{
-	int i, ret;
-
-	if (!irq_base) {
-		dev_err(tps65090->dev, "IRQ base not set\n");
-		return -EINVAL;
-	}
-
-	mutex_init(&tps65090->irq_lock);
-
-	for (i = 0; i < NUM_INT_REG; i++)
-		tps65090_write(tps65090->dev, TPS65090_INT_MSK + i, 0xFF);
-
-	for (i = 0; i < NUM_INT_REG; i++)
-		tps65090_write(tps65090->dev, TPS65090_INT_STS + i, 0xff);
-
-	tps65090->irq_base = irq_base;
-	tps65090->irq_chip.name = "tps65090";
-	tps65090->irq_chip.irq_mask = tps65090_irq_mask;
-	tps65090->irq_chip.irq_unmask = tps65090_irq_unmask;
-	tps65090->irq_chip.irq_bus_lock = tps65090_irq_lock;
-	tps65090->irq_chip.irq_bus_sync_unlock = tps65090_irq_sync_unlock;
-
-	for (i = 0; i < ARRAY_SIZE(tps65090_irqs); i++) {
-		int __irq = i + tps65090->irq_base;
-		irq_set_chip_data(__irq, tps65090);
-		irq_set_chip_and_handler(__irq, &tps65090->irq_chip,
-					 handle_simple_irq);
-		irq_set_nested_thread(__irq, 1);
-#ifdef CONFIG_ARM
-		set_irq_flags(__irq, IRQF_VALID);
-#endif
-	}
-
-	ret = request_threaded_irq(irq, NULL, tps65090_irq, IRQF_ONESHOT,
-				"tps65090", tps65090);
-	if (!ret) {
-		device_init_wakeup(tps65090->dev, 1);
-		enable_irq_wake(irq);
-	}
+static const struct regmap_irq tps65090_irqs[] = {
+	/* INT1 IRQs*/
+	[TPS65090_IRQ_VAC_STATUS_CHANGE] = {
+			.mask = TPS65090_INT1_MASK_VAC_STATUS_CHANGE,
+	},
+	[TPS65090_IRQ_VSYS_STATUS_CHANGE] = {
+			.mask = TPS65090_INT1_MASK_VSYS_STATUS_CHANGE,
+	},
+	[TPS65090_IRQ_BAT_STATUS_CHANGE] = {
+			.mask = TPS65090_INT1_MASK_BAT_STATUS_CHANGE,
+	},
+	[TPS65090_IRQ_CHARGING_STATUS_CHANGE] = {
+			.mask = TPS65090_INT1_MASK_CHARGING_STATUS_CHANGE,
+	},
+	[TPS65090_IRQ_CHARGING_COMPLETE] = {
+			.mask = TPS65090_INT1_MASK_CHARGING_COMPLETE,
+	},
+	[TPS65090_IRQ_OVERLOAD_DCDC1] = {
+			.mask = TPS65090_INT1_MASK_OVERLOAD_DCDC1,
+	},
+	[TPS65090_IRQ_OVERLOAD_DCDC2] = {
+			.mask = TPS65090_INT1_MASK_OVERLOAD_DCDC2,
+	},
+	/* INT2 IRQs*/
+	[TPS65090_IRQ_OVERLOAD_DCDC3] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_DCDC3,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET1] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET1,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET2] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET2,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET3] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET3,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET4] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET4,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET5] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET5,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET6] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET6,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET7] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET7,
+	},
+};
 
-	return ret;
-}
+static struct regmap_irq_chip tps65090_irq_chip = {
+	.name = "tps65090",
+	.irqs = tps65090_irqs,
+	.num_irqs = ARRAY_SIZE(tps65090_irqs),
+	.num_regs = NUM_INT_REG,
+	.status_base = TPS65090_INT_STS,
+	.mask_base = TPS65090_INT_MSK,
+	.mask_invert = true,
+};
 
 static bool is_volatile_reg(struct device *dev, unsigned int reg)
 {
-	if (reg == TPS65090_INT_STS)
+	if ((reg == TPS65090_INT_STS) || (reg == TPS65090_INT_STS2))
 		return true;
 	else
 		return false;
@@ -238,24 +169,27 @@ static int __devinit tps65090_i2c_probe(struct i2c_client *client,
 	tps65090->dev = &client->dev;
 	i2c_set_clientdata(client, tps65090);
 
-	if (client->irq) {
-		ret = tps65090_irq_init(tps65090, client->irq, pdata->irq_base);
-		if (ret) {
-			dev_err(&client->dev, "IRQ init failed with err: %d\n",
-				ret);
-			goto err_exit;
-		}
-	}
-
 	tps65090->rmap = devm_regmap_init_i2c(client, &tps65090_regmap_config);
 	if (IS_ERR(tps65090->rmap)) {
 		ret = PTR_ERR(tps65090->rmap);
 		dev_err(&client->dev, "regmap_init failed with err: %d\n", ret);
-		goto err_irq_exit;
+		return ret;
+	}
+
+	if (client->irq) {
+		ret = regmap_add_irq_chip(tps65090->rmap, client->irq,
+			IRQF_ONESHOT | IRQF_TRIGGER_LOW, pdata->irq_base,
+			&tps65090_irq_chip, &tps65090->irq_data);
+			if (ret) {
+				dev_err(&client->dev,
+					"IRQ init failed with err: %d\n", ret);
+			return ret;
+		}
 	}
 
 	ret = mfd_add_devices(tps65090->dev, -1, tps65090s,
-			      ARRAY_SIZE(tps65090s), NULL, 0, NULL);
+		ARRAY_SIZE(tps65090s), NULL,
+		regmap_irq_chip_get_base(tps65090->irq_data), NULL);
 	if (ret) {
 		dev_err(&client->dev, "add mfd devices failed with err: %d\n",
 			ret);
@@ -266,8 +200,7 @@ static int __devinit tps65090_i2c_probe(struct i2c_client *client,
 
 err_irq_exit:
 	if (client->irq)
-		free_irq(client->irq, tps65090);
-err_exit:
+		regmap_del_irq_chip(client->irq, tps65090->irq_data);
 	return ret;
 }
 
@@ -277,7 +210,7 @@ static int __devexit tps65090_i2c_remove(struct i2c_client *client)
 
 	mfd_remove_devices(tps65090->dev);
 	if (client->irq)
-		free_irq(client->irq, tps65090);
+		regmap_del_irq_chip(client->irq, tps65090->irq_data);
 
 	return 0;
 }
diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index 1a5f916b7383..4bbbb1350b91 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h
@@ -25,12 +25,29 @@
 #include <linux/irq.h>
 #include <linux/regmap.h>
 
+/* TPS65090 IRQs */
+enum {
+	TPS65090_IRQ_VAC_STATUS_CHANGE,
+	TPS65090_IRQ_VSYS_STATUS_CHANGE,
+	TPS65090_IRQ_BAT_STATUS_CHANGE,
+	TPS65090_IRQ_CHARGING_STATUS_CHANGE,
+	TPS65090_IRQ_CHARGING_COMPLETE,
+	TPS65090_IRQ_OVERLOAD_DCDC1,
+	TPS65090_IRQ_OVERLOAD_DCDC2,
+	TPS65090_IRQ_OVERLOAD_DCDC3,
+	TPS65090_IRQ_OVERLOAD_FET1,
+	TPS65090_IRQ_OVERLOAD_FET2,
+	TPS65090_IRQ_OVERLOAD_FET3,
+	TPS65090_IRQ_OVERLOAD_FET4,
+	TPS65090_IRQ_OVERLOAD_FET5,
+	TPS65090_IRQ_OVERLOAD_FET6,
+	TPS65090_IRQ_OVERLOAD_FET7,
+};
+
 struct tps65090 {
 	struct device		*dev;
 	struct regmap		*rmap;
-	struct irq_chip		irq_chip;
-	struct mutex		irq_lock;
-	int			irq_base;
+	struct regmap_irq_chip_data *irq_data;
 };
 
 struct tps65090_platform_data {
-- 
cgit v1.2.3-55-g7522


From c7b76dce8ac95fd464bfae741b830d407884c274 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen
Date: Sun, 18 Nov 2012 18:36:20 +0200
Subject: mfd: Introduce retu-mfd driver

Retu is a multi-function device found on Nokia Internet Tablets
implementing at least watchdog, RTC, headset detection and power button
functionality.

This patch implements minimum functionality providing register access,
IRQ handling and power off functions.

Acked-by: Felipe Balbi <balbi@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig      |   9 ++
 drivers/mfd/Makefile     |   1 +
 drivers/mfd/retu-mfd.c   | 264 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/retu.h |  22 ++++
 4 files changed, 296 insertions(+)
 create mode 100644 drivers/mfd/retu-mfd.c
 create mode 100644 include/linux/mfd/retu.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index ca633df7c330..f5b839b718aa 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1094,6 +1094,15 @@ config MFD_VIPERBOARD
 	  You need to select the mfd cell drivers separately.
 	  The drivers do not support all features the board exposes.
 
+config MFD_RETU
+	tristate "Support for Retu multi-function device"
+	select MFD_CORE
+	depends on I2C
+	select REGMAP_IRQ
+	help
+	  Retu is a multi-function device found on Nokia Internet Tablets
+	  (770, N800 and N810).
+
 endmenu
 endif
 
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 8072460e99d2..2689c8a0d781 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -145,3 +145,4 @@ obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
 obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
+obj-$(CONFIG_MFD_RETU)		+= retu-mfd.o
diff --git a/drivers/mfd/retu-mfd.c b/drivers/mfd/retu-mfd.c
new file mode 100644
index 000000000000..7ff4a37ab0c0
--- /dev/null
+++ b/drivers/mfd/retu-mfd.c
@@ -0,0 +1,264 @@
+/*
+ * Retu MFD driver
+ *
+ * Copyright (C) 2004, 2005 Nokia Corporation
+ *
+ * Based on code written by Juha Yrjölä, David Weinehall and Mikko Ylinen.
+ * Rewritten by Aaro Koskinen.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/retu.h>
+#include <linux/interrupt.h>
+#include <linux/moduleparam.h>
+
+/* Registers */
+#define RETU_REG_ASICR		0x00		/* ASIC ID and revision */
+#define RETU_REG_ASICR_VILMA	(1 << 7)	/* Bit indicating Vilma */
+#define RETU_REG_IDR		0x01		/* Interrupt ID */
+#define RETU_REG_IMR		0x02		/* Interrupt mask */
+
+/* Interrupt sources */
+#define RETU_INT_PWR		0		/* Power button */
+
+struct retu_dev {
+	struct regmap			*regmap;
+	struct device			*dev;
+	struct mutex			mutex;
+	struct regmap_irq_chip_data	*irq_data;
+};
+
+static struct resource retu_pwrbutton_res[] = {
+	{
+		.name	= "retu-pwrbutton",
+		.start	= RETU_INT_PWR,
+		.end	= RETU_INT_PWR,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell retu_devs[] = {
+	{
+		.name		= "retu-wdt"
+	},
+	{
+		.name		= "retu-pwrbutton",
+		.resources	= retu_pwrbutton_res,
+		.num_resources	= ARRAY_SIZE(retu_pwrbutton_res),
+	}
+};
+
+static struct regmap_irq retu_irqs[] = {
+	[RETU_INT_PWR] = {
+		.mask = 1 << RETU_INT_PWR,
+	}
+};
+
+static struct regmap_irq_chip retu_irq_chip = {
+	.name		= "RETU",
+	.irqs		= retu_irqs,
+	.num_irqs	= ARRAY_SIZE(retu_irqs),
+	.num_regs	= 1,
+	.status_base	= RETU_REG_IDR,
+	.mask_base	= RETU_REG_IMR,
+	.ack_base	= RETU_REG_IDR,
+};
+
+/* Retu device registered for the power off. */
+static struct retu_dev *retu_pm_power_off;
+
+int retu_read(struct retu_dev *rdev, u8 reg)
+{
+	int ret;
+	int value;
+
+	mutex_lock(&rdev->mutex);
+	ret = regmap_read(rdev->regmap, reg, &value);
+	mutex_unlock(&rdev->mutex);
+
+	return ret ? ret : value;
+}
+EXPORT_SYMBOL_GPL(retu_read);
+
+int retu_write(struct retu_dev *rdev, u8 reg, u16 data)
+{
+	int ret;
+
+	mutex_lock(&rdev->mutex);
+	ret = regmap_write(rdev->regmap, reg, data);
+	mutex_unlock(&rdev->mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(retu_write);
+
+static void retu_power_off(void)
+{
+	struct retu_dev *rdev = retu_pm_power_off;
+	int reg;
+
+	mutex_lock(&retu_pm_power_off->mutex);
+
+	/* Ignore power button state */
+	regmap_read(rdev->regmap, RETU_REG_CC1, &reg);
+	regmap_write(rdev->regmap, RETU_REG_CC1, reg | 2);
+
+	/* Expire watchdog immediately */
+	regmap_write(rdev->regmap, RETU_REG_WATCHDOG, 0);
+
+	/* Wait for poweroff */
+	for (;;)
+		cpu_relax();
+
+	mutex_unlock(&retu_pm_power_off->mutex);
+}
+
+static int retu_regmap_read(void *context, const void *reg, size_t reg_size,
+			    void *val, size_t val_size)
+{
+	int ret;
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+
+	BUG_ON(reg_size != 1 || val_size != 2);
+
+	ret = i2c_smbus_read_word_data(i2c, *(u8 const *)reg);
+	if (ret < 0)
+		return ret;
+
+	*(u16 *)val = ret;
+	return 0;
+}
+
+static int retu_regmap_write(void *context, const void *data, size_t count)
+{
+	u8 reg;
+	u16 val;
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+
+	BUG_ON(count != sizeof(reg) + sizeof(val));
+	memcpy(&reg, data, sizeof(reg));
+	memcpy(&val, data + sizeof(reg), sizeof(val));
+	return i2c_smbus_write_word_data(i2c, reg, val);
+}
+
+static struct regmap_bus retu_bus = {
+	.read = retu_regmap_read,
+	.write = retu_regmap_write,
+	.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
+};
+
+static struct regmap_config retu_config = {
+	.reg_bits = 8,
+	.val_bits = 16,
+};
+
+static int __devinit retu_probe(struct i2c_client *i2c,
+				const struct i2c_device_id *id)
+{
+	struct retu_dev *rdev;
+	int ret;
+
+	rdev = devm_kzalloc(&i2c->dev, sizeof(*rdev), GFP_KERNEL);
+	if (rdev == NULL)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, rdev);
+	rdev->dev = &i2c->dev;
+	mutex_init(&rdev->mutex);
+	rdev->regmap = devm_regmap_init(&i2c->dev, &retu_bus, &i2c->dev,
+					&retu_config);
+	if (IS_ERR(rdev->regmap))
+		return PTR_ERR(rdev->regmap);
+
+	ret = retu_read(rdev, RETU_REG_ASICR);
+	if (ret < 0) {
+		dev_err(rdev->dev, "could not read Retu revision: %d\n", ret);
+		return ret;
+	}
+
+	dev_info(rdev->dev, "Retu%s v%d.%d found\n",
+		 (ret & RETU_REG_ASICR_VILMA) ? " & Vilma" : "",
+		 (ret >> 4) & 0x7, ret & 0xf);
+
+	/* Mask all RETU interrupts. */
+	ret = retu_write(rdev, RETU_REG_IMR, 0xffff);
+	if (ret < 0)
+		return ret;
+
+	ret = regmap_add_irq_chip(rdev->regmap, i2c->irq, IRQF_ONESHOT, -1,
+				  &retu_irq_chip, &rdev->irq_data);
+	if (ret < 0)
+		return ret;
+
+	ret = mfd_add_devices(rdev->dev, -1, retu_devs, ARRAY_SIZE(retu_devs),
+			      NULL, regmap_irq_chip_get_base(rdev->irq_data),
+			      NULL);
+	if (ret < 0) {
+		regmap_del_irq_chip(i2c->irq, rdev->irq_data);
+		return ret;
+	}
+
+	if (!pm_power_off) {
+		retu_pm_power_off = rdev;
+		pm_power_off	  = retu_power_off;
+	}
+
+	return 0;
+}
+
+static int __devexit retu_remove(struct i2c_client *i2c)
+{
+	struct retu_dev *rdev = i2c_get_clientdata(i2c);
+
+	if (retu_pm_power_off == rdev) {
+		pm_power_off	  = NULL;
+		retu_pm_power_off = NULL;
+	}
+	mfd_remove_devices(rdev->dev);
+	regmap_del_irq_chip(i2c->irq, rdev->irq_data);
+
+	return 0;
+}
+
+static const struct i2c_device_id retu_id[] = {
+	{ "retu-mfd", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, retu_id);
+
+static struct i2c_driver retu_driver = {
+	.driver		= {
+		.name = "retu-mfd",
+		.owner = THIS_MODULE,
+	},
+	.probe		= retu_probe,
+	.remove		= retu_remove,
+	.id_table	= retu_id,
+};
+module_i2c_driver(retu_driver);
+
+MODULE_DESCRIPTION("Retu MFD driver");
+MODULE_AUTHOR("Juha Yrjölä");
+MODULE_AUTHOR("David Weinehall");
+MODULE_AUTHOR("Mikko Ylinen");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/retu.h b/include/linux/mfd/retu.h
new file mode 100644
index 000000000000..1e2715d5b836
--- /dev/null
+++ b/include/linux/mfd/retu.h
@@ -0,0 +1,22 @@
+/*
+ * Retu MFD driver interface
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ */
+
+#ifndef __LINUX_MFD_RETU_H
+#define __LINUX_MFD_RETU_H
+
+struct retu_dev;
+
+int retu_read(struct retu_dev *, u8);
+int retu_write(struct retu_dev *, u8, u16);
+
+/* Registers */
+#define RETU_REG_WATCHDOG	0x17		/* Watchdog */
+#define RETU_REG_CC1		0x0d		/* Common control register 1 */
+#define RETU_REG_STATUS		0x16		/* Status register */
+
+#endif /* __LINUX_MFD_RETU_H */
-- 
cgit v1.2.3-55-g7522


From 0e8f1398a388bbaa5ca965711b9ed5ac4794332d Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi
Date: Tue, 13 Nov 2012 09:28:47 +0100
Subject: mfd: twl: Remove unused TWL_MODULE definitions

AUDIO and MADC only available on twl4030 series and the TWL_MODULE_* mapping
is not needed.

Acked-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/i2c/twl.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 9a5e28462324..7278c72479d1 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -73,9 +73,7 @@
 #define TWL4030_MODULE_SECURED_REG	0x17
 
 #define TWL_MODULE_USB		TWL4030_MODULE_USB
-#define TWL_MODULE_AUDIO_VOICE	TWL4030_MODULE_AUDIO_VOICE
 #define TWL_MODULE_PIH		TWL4030_MODULE_PIH
-#define TWL_MODULE_MADC		TWL4030_MODULE_MADC
 #define TWL_MODULE_MAIN_CHARGE	TWL4030_MODULE_MAIN_CHARGE
 #define TWL_MODULE_PM_MASTER	TWL4030_MODULE_PM_MASTER
 #define TWL_MODULE_PM_RECEIVER	TWL4030_MODULE_PM_RECEIVER
-- 
cgit v1.2.3-55-g7522


From da059ecfc9f9d98556607c6d6db065aa3b7f162d Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi
Date: Tue, 13 Nov 2012 09:28:48 +0100
Subject: mfd: twl: Convert module id definitions to enums

Use enum list for the module definitions (TWL4030_MODULE_*) which will ease
up future work with the IDs.
At the same time group the IDs in block of five so it is easier to find the
ID we are looking for (to count the number they stand for).

At the same time define TWL_MODULE_LED so client drivers can switch to use
it as soon as it is possible.

Acked-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  |  9 +++----
 include/linux/i2c/twl.h | 65 +++++++++++++++++++++++++------------------------
 2 files changed, 36 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index d666c9d454cb..bb33b52cfa57 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -66,9 +66,6 @@
 
 /* Triton Core internal information (BEGIN) */
 
-/* Last - for index max*/
-#define TWL4030_MODULE_LAST		TWL4030_MODULE_SECURED_REG
-
 #define TWL_NUM_SLAVES		4
 
 #define SUB_CHIP_ID0 0
@@ -184,7 +181,7 @@ struct twl_mapping {
 };
 static struct twl_mapping *twl_map;
 
-static struct twl_mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
+static struct twl_mapping twl4030_map[] = {
 	/*
 	 * NOTE:  don't change this table without updating the
 	 * <linux/i2c/twl.h> defines for TWL4030_MODULE_*
@@ -327,7 +324,7 @@ int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 	int sid;
 	struct twl_client *twl;
 
-	if (unlikely(mod_no > TWL_MODULE_LAST)) {
+	if (unlikely(mod_no >= TWL_MODULE_LAST)) {
 		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
 		return -EPERM;
 	}
@@ -369,7 +366,7 @@ int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 	int sid;
 	struct twl_client *twl;
 
-	if (unlikely(mod_no > TWL_MODULE_LAST)) {
+	if (unlikely(mod_no >= TWL_MODULE_LAST)) {
 		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
 		return -EPERM;
 	}
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 7278c72479d1..b1c44cccef31 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -39,39 +39,39 @@
  * address each module uses within a given i2c slave.
  */
 
-/* Slave 0 (i2c address 0x48) */
-#define TWL4030_MODULE_USB		0x00
-
-/* Slave 1 (i2c address 0x49) */
-#define TWL4030_MODULE_AUDIO_VOICE	0x01
-#define TWL4030_MODULE_GPIO		0x02
-#define TWL4030_MODULE_INTBR		0x03
-#define TWL4030_MODULE_PIH		0x04
-#define TWL4030_MODULE_TEST		0x05
-
-/* Slave 2 (i2c address 0x4a) */
-#define TWL4030_MODULE_KEYPAD		0x06
-#define TWL4030_MODULE_MADC		0x07
-#define TWL4030_MODULE_INTERRUPTS	0x08
-#define TWL4030_MODULE_LED		0x09
-#define TWL4030_MODULE_MAIN_CHARGE	0x0A
-#define TWL4030_MODULE_PRECHARGE	0x0B
-#define TWL4030_MODULE_PWM0		0x0C
-#define TWL4030_MODULE_PWM1		0x0D
-#define TWL4030_MODULE_PWMA		0x0E
-#define TWL4030_MODULE_PWMB		0x0F
-
-#define TWL5031_MODULE_ACCESSORY	0x10
-#define TWL5031_MODULE_INTERRUPTS	0x11
-
-/* Slave 3 (i2c address 0x4b) */
-#define TWL4030_MODULE_BACKUP		0x12
-#define TWL4030_MODULE_INT		0x13
-#define TWL4030_MODULE_PM_MASTER	0x14
-#define TWL4030_MODULE_PM_RECEIVER	0x15
-#define TWL4030_MODULE_RTC		0x16
-#define TWL4030_MODULE_SECURED_REG	0x17
+enum twl4030_module_ids {
+	TWL4030_MODULE_USB = 0,		/* Slave 0 (i2c address 0x48) */
+	TWL4030_MODULE_AUDIO_VOICE,	/* Slave 1 (i2c address 0x49) */
+	TWL4030_MODULE_GPIO,
+	TWL4030_MODULE_INTBR,
+	TWL4030_MODULE_PIH,
+
+	TWL4030_MODULE_TEST,
+	TWL4030_MODULE_KEYPAD,		/* Slave 2 (i2c address 0x4a) */
+	TWL4030_MODULE_MADC,
+	TWL4030_MODULE_INTERRUPTS,
+	TWL4030_MODULE_LED,
+
+	TWL4030_MODULE_MAIN_CHARGE,
+	TWL4030_MODULE_PRECHARGE,
+	TWL4030_MODULE_PWM0,
+	TWL4030_MODULE_PWM1,
+	TWL4030_MODULE_PWMA,
+
+	TWL4030_MODULE_PWMB,
+	TWL5031_MODULE_ACCESSORY,
+	TWL5031_MODULE_INTERRUPTS,
+	TWL4030_MODULE_BACKUP,		/* Slave 3 (i2c address 0x4b) */
+	TWL4030_MODULE_INT,
+
+	TWL4030_MODULE_PM_MASTER,
+	TWL4030_MODULE_PM_RECEIVER,
+	TWL4030_MODULE_RTC,
+	TWL4030_MODULE_SECURED_REG,
+	TWL4030_MODULE_LAST,
+};
 
+/* Similar functionalities implemented in TWL4030/6030 */
 #define TWL_MODULE_USB		TWL4030_MODULE_USB
 #define TWL_MODULE_PIH		TWL4030_MODULE_PIH
 #define TWL_MODULE_MAIN_CHARGE	TWL4030_MODULE_MAIN_CHARGE
@@ -79,6 +79,7 @@
 #define TWL_MODULE_PM_RECEIVER	TWL4030_MODULE_PM_RECEIVER
 #define TWL_MODULE_RTC		TWL4030_MODULE_RTC
 #define TWL_MODULE_PWM		TWL4030_MODULE_PWM0
+#define TWL_MODULE_LED		TWL4030_MODULE_LED
 
 #define TWL6030_MODULE_ID0	0x0D
 #define TWL6030_MODULE_ID1	0x0E
-- 
cgit v1.2.3-55-g7522


From 04575d5a3dc15d762c7428d2745d30b2d448e443 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi
Date: Tue, 13 Nov 2012 09:28:49 +0100
Subject: mfd: twl: Use decimal numbers for TWL6030_MODULE_IDs

It is easier fro humans to understand decimal numbers than hexadecimals when
they are used as indexes.

Acked-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/i2c/twl.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index b1c44cccef31..1ff54b114efc 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -81,9 +81,9 @@ enum twl4030_module_ids {
 #define TWL_MODULE_PWM		TWL4030_MODULE_PWM0
 #define TWL_MODULE_LED		TWL4030_MODULE_LED
 
-#define TWL6030_MODULE_ID0	0x0D
-#define TWL6030_MODULE_ID1	0x0E
-#define TWL6030_MODULE_ID2	0x0F
+#define TWL6030_MODULE_ID0	13
+#define TWL6030_MODULE_ID1	14
+#define TWL6030_MODULE_ID2	15
 
 #define GPIO_INTR_OFFSET	0
 #define KEYPAD_INTR_OFFSET	1
-- 
cgit v1.2.3-55-g7522


From ef85fb28305fad7617f307383ebba554a3a891a2 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Tue, 20 Nov 2012 22:24:27 +0000
Subject: bcma: add locking around GPIO register accesses

The GPIOs are access through some registers in the chip common core.
We need locking around these GPIO accesses, all GPIOs are accessed
through the same registers and parallel writes will cause problems.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Patchwork: http://patchwork.linux-mips.org/patch/4585
Acked-by: Florian Fainelli <florian@openwrt.org>
---
 drivers/bcma/driver_chipcommon.c            | 47 ++++++++++++++++++++++++++---
 include/linux/bcma/bcma_driver_chipcommon.h |  3 ++
 2 files changed, 45 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index a4c3ebcc4c86..c9b63d9ff61d 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -30,6 +30,8 @@ void bcma_core_chipcommon_init(struct bcma_drv_cc *cc)
 	if (cc->setup_done)
 		return;
 
+	spin_lock_init(&cc->gpio_lock);
+
 	if (cc->core->id.rev >= 11)
 		cc->status = bcma_cc_read32(cc, BCMA_CC_CHIPSTAT);
 	cc->capabilities = bcma_cc_read32(cc, BCMA_CC_CAP);
@@ -84,28 +86,63 @@ u32 bcma_chipco_gpio_in(struct bcma_drv_cc *cc, u32 mask)
 
 u32 bcma_chipco_gpio_out(struct bcma_drv_cc *cc, u32 mask, u32 value)
 {
-	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUT, mask, value);
+	unsigned long flags;
+	u32 res;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUT, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 bcma_chipco_gpio_outen(struct bcma_drv_cc *cc, u32 mask, u32 value)
 {
-	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUTEN, mask, value);
+	unsigned long flags;
+	u32 res;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUTEN, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 bcma_chipco_gpio_control(struct bcma_drv_cc *cc, u32 mask, u32 value)
 {
-	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOCTL, mask, value);
+	unsigned long flags;
+	u32 res;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOCTL, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 EXPORT_SYMBOL_GPL(bcma_chipco_gpio_control);
 
 u32 bcma_chipco_gpio_intmask(struct bcma_drv_cc *cc, u32 mask, u32 value)
 {
-	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOIRQ, mask, value);
+	unsigned long flags;
+	u32 res;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOIRQ, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 bcma_chipco_gpio_polarity(struct bcma_drv_cc *cc, u32 mask, u32 value)
 {
-	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOPOL, mask, value);
+	unsigned long flags;
+	u32 res;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOPOL, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 #ifdef CONFIG_BCMA_DRIVER_MIPS
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 1cf1749440ac..a085d986c804 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -567,6 +567,9 @@ struct bcma_drv_cc {
 	int nr_serial_ports;
 	struct bcma_serial_port serial_ports[4];
 #endif /* CONFIG_BCMA_DRIVER_MIPS */
+
+	/* Lock for GPIO register access. */
+	spinlock_t gpio_lock;
 };
 
 /* Register access */
-- 
cgit v1.2.3-55-g7522


From ea3488f4696fe22811eb19bee7088b732d3f8fe0 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Tue, 20 Nov 2012 22:24:28 +0000
Subject: bcma: add bcma_chipco_gpio_pull{up,down}

Add functions to access the GPIO registers for pullup and pulldown.
These are needed for handling gpio registration.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Patchwork: http://patchwork.linux-mips.org/patch/4586
Acked-by: Florian Fainelli <florian@openwrt.org>
---
 drivers/bcma/driver_chipcommon.c            | 30 +++++++++++++++++++++++++++++
 include/linux/bcma/bcma_driver_chipcommon.h |  2 ++
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index c9b63d9ff61d..0945383c2271 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -145,6 +145,36 @@ u32 bcma_chipco_gpio_polarity(struct bcma_drv_cc *cc, u32 mask, u32 value)
 	return res;
 }
 
+u32 bcma_chipco_gpio_pullup(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	unsigned long flags;
+	u32 res;
+
+	if (cc->core->id.rev < 20)
+		return 0;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOPULLUP, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
+}
+
+u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	unsigned long flags;
+	u32 res;
+
+	if (cc->core->id.rev < 20)
+		return 0;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOPULLDOWN, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
+}
+
 #ifdef CONFIG_BCMA_DRIVER_MIPS
 void bcma_chipco_serial_init(struct bcma_drv_cc *cc)
 {
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index a085d986c804..256702663b68 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -606,6 +606,8 @@ u32 bcma_chipco_gpio_outen(struct bcma_drv_cc *cc, u32 mask, u32 value);
 u32 bcma_chipco_gpio_control(struct bcma_drv_cc *cc, u32 mask, u32 value);
 u32 bcma_chipco_gpio_intmask(struct bcma_drv_cc *cc, u32 mask, u32 value);
 u32 bcma_chipco_gpio_polarity(struct bcma_drv_cc *cc, u32 mask, u32 value);
+u32 bcma_chipco_gpio_pullup(struct bcma_drv_cc *cc, u32 mask, u32 value);
+u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value);
 
 /* PMU support */
 extern void bcma_pmu_init(struct bcma_drv_cc *cc);
-- 
cgit v1.2.3-55-g7522


From cf0936b06d8e98a157630e99f647e2ff6d29d7ad Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Tue, 20 Nov 2012 22:24:30 +0000
Subject: bcma: add GPIO driver

Register a GPIO driver to access the GPIOs provided by the chip.
The GPIOs of the SoC should always start at 0 and the other GPIOs could
start at a random position. There is just one SoC in a system and when
they start at 0 the number is predictable.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Patchwork: http://patchwork.linux-mips.org/patch/4587
Acked-by: Florian Fainelli <florian@openwrt.org>
---
 drivers/bcma/Kconfig                        |  9 +++
 drivers/bcma/Makefile                       |  1 +
 drivers/bcma/bcma_private.h                 | 10 +++
 drivers/bcma/driver_gpio.c                  | 98 +++++++++++++++++++++++++++++
 drivers/bcma/main.c                         |  5 ++
 include/linux/bcma/bcma_driver_chipcommon.h |  5 ++
 6 files changed, 128 insertions(+)
 create mode 100644 drivers/bcma/driver_gpio.c

(limited to 'include/linux')

diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
index a533af218368..d7b56a88c9f4 100644
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -65,6 +65,15 @@ config BCMA_DRIVER_GMAC_CMN
 
 	  If unsure, say N
 
+config BCMA_DRIVER_GPIO
+	bool "BCMA GPIO driver"
+	depends on BCMA
+	select GPIOLIB
+	help
+	  Driver to provide access to the GPIO pins of the bcma bus.
+
+	  If unsure, say N
+
 config BCMA_DEBUG
 	bool "BCMA debugging"
 	depends on BCMA
diff --git a/drivers/bcma/Makefile b/drivers/bcma/Makefile
index 8ad42d41b2f2..734b32f09c0a 100644
--- a/drivers/bcma/Makefile
+++ b/drivers/bcma/Makefile
@@ -6,6 +6,7 @@ bcma-y					+= driver_pci.o
 bcma-$(CONFIG_BCMA_DRIVER_PCI_HOSTMODE)	+= driver_pci_host.o
 bcma-$(CONFIG_BCMA_DRIVER_MIPS)		+= driver_mips.o
 bcma-$(CONFIG_BCMA_DRIVER_GMAC_CMN)	+= driver_gmac_cmn.o
+bcma-$(CONFIG_BCMA_DRIVER_GPIO)		+= driver_gpio.o
 bcma-$(CONFIG_BCMA_HOST_PCI)		+= host_pci.o
 bcma-$(CONFIG_BCMA_HOST_SOC)		+= host_soc.o
 obj-$(CONFIG_BCMA)			+= bcma.o
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 169fc58427d3..8cd80bff8e91 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -89,4 +89,14 @@ bool __devinit bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc);
 void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc);
 #endif /* CONFIG_BCMA_DRIVER_PCI_HOSTMODE */
 
+#ifdef CONFIG_BCMA_DRIVER_GPIO
+/* driver_gpio.c */
+int bcma_gpio_init(struct bcma_drv_cc *cc);
+#else
+static inline int bcma_gpio_init(struct bcma_drv_cc *cc)
+{
+	return -ENOTSUPP;
+}
+#endif /* CONFIG_BCMA_DRIVER_GPIO */
+
 #endif
diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c
new file mode 100644
index 000000000000..9a6f585da2d9
--- /dev/null
+++ b/drivers/bcma/driver_gpio.c
@@ -0,0 +1,98 @@
+/*
+ * Broadcom specific AMBA
+ * GPIO driver
+ *
+ * Copyright 2011, Broadcom Corporation
+ * Copyright 2012, Hauke Mehrtens <hauke@hauke-m.de>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include <linux/gpio.h>
+#include <linux/export.h>
+#include <linux/bcma/bcma.h>
+
+#include "bcma_private.h"
+
+static inline struct bcma_drv_cc *bcma_gpio_get_cc(struct gpio_chip *chip)
+{
+	return container_of(chip, struct bcma_drv_cc, gpio);
+}
+
+static int bcma_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
+{
+	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
+
+	return !!bcma_chipco_gpio_in(cc, 1 << gpio);
+}
+
+static void bcma_gpio_set_value(struct gpio_chip *chip, unsigned gpio,
+				int value)
+{
+	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
+
+	bcma_chipco_gpio_out(cc, 1 << gpio, value ? 1 << gpio : 0);
+}
+
+static int bcma_gpio_direction_input(struct gpio_chip *chip, unsigned gpio)
+{
+	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
+
+	bcma_chipco_gpio_outen(cc, 1 << gpio, 0);
+	return 0;
+}
+
+static int bcma_gpio_direction_output(struct gpio_chip *chip, unsigned gpio,
+				      int value)
+{
+	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
+
+	bcma_chipco_gpio_outen(cc, 1 << gpio, 1 << gpio);
+	bcma_chipco_gpio_out(cc, 1 << gpio, value ? 1 << gpio : 0);
+	return 0;
+}
+
+static int bcma_gpio_request(struct gpio_chip *chip, unsigned gpio)
+{
+	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
+
+	bcma_chipco_gpio_control(cc, 1 << gpio, 0);
+	/* clear pulldown */
+	bcma_chipco_gpio_pulldown(cc, 1 << gpio, 0);
+	/* Set pullup */
+	bcma_chipco_gpio_pullup(cc, 1 << gpio, 1 << gpio);
+
+	return 0;
+}
+
+static void bcma_gpio_free(struct gpio_chip *chip, unsigned gpio)
+{
+	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
+
+	/* clear pullup */
+	bcma_chipco_gpio_pullup(cc, 1 << gpio, 0);
+}
+
+int bcma_gpio_init(struct bcma_drv_cc *cc)
+{
+	struct gpio_chip *chip = &cc->gpio;
+
+	chip->label		= "bcma_gpio";
+	chip->owner		= THIS_MODULE;
+	chip->request		= bcma_gpio_request;
+	chip->free		= bcma_gpio_free;
+	chip->get		= bcma_gpio_get_value;
+	chip->set		= bcma_gpio_set_value;
+	chip->direction_input	= bcma_gpio_direction_input;
+	chip->direction_output	= bcma_gpio_direction_output;
+	chip->ngpio		= 16;
+	/* There is just one SoC in one device and its GPIO addresses should be
+	 * deterministic to address them more easily. The other buses could get
+	 * a random base number. */
+	if (cc->core->bus->hosttype == BCMA_HOSTTYPE_SOC)
+		chip->base		= 0;
+	else
+		chip->base		= -1;
+
+	return gpiochip_add(chip);
+}
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index d865470bc951..478ba01ca0c2 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -152,6 +152,11 @@ static int bcma_register_cores(struct bcma_bus *bus)
 			bcma_err(bus, "Error registering NAND flash\n");
 	}
 #endif
+	err = bcma_gpio_init(&bus->drv_cc);
+	if (err == -ENOTSUPP)
+		bcma_debug(bus, "GPIO driver not activated\n");
+	else if (err)
+		bcma_err(bus, "Error registering GPIO driver: %i\n", err);
 
 	return 0;
 }
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 256702663b68..7d662a988d24 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -1,6 +1,8 @@
 #ifndef LINUX_BCMA_DRIVER_CC_H_
 #define LINUX_BCMA_DRIVER_CC_H_
 
+#include <linux/gpio.h>
+
 /** ChipCommon core registers. **/
 #define BCMA_CC_ID			0x0000
 #define  BCMA_CC_ID_ID			0x0000FFFF
@@ -570,6 +572,9 @@ struct bcma_drv_cc {
 
 	/* Lock for GPIO register access. */
 	spinlock_t gpio_lock;
+#ifdef CONFIG_BCMA_DRIVER_GPIO
+	struct gpio_chip gpio;
+#endif
 };
 
 /* Register access */
-- 
cgit v1.2.3-55-g7522


From da22f22e91f0d14d996c7258101575a5a06ddf85 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Tue, 20 Nov 2012 22:24:31 +0000
Subject: ssb: add ssb_chipco_gpio_pull{up,down}

Add functions to access the GPIO registers for pullup and pulldown.
These are needed for handling gpio registration.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Patchwork: http://patchwork.linux-mips.org/patch/4589
Acked-by: Florian Fainelli <florian@openwrt.org>
---
 drivers/ssb/driver_chipcommon.c           | 16 ++++++++++++++++
 include/linux/ssb/ssb_driver_chipcommon.h |  2 ++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c
index e9d2ca11283b..4df492665565 100644
--- a/drivers/ssb/driver_chipcommon.c
+++ b/drivers/ssb/driver_chipcommon.c
@@ -442,6 +442,22 @@ u32 ssb_chipco_gpio_polarity(struct ssb_chipcommon *cc, u32 mask, u32 value)
 	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPOL, mask, value);
 }
 
+u32 ssb_chipco_gpio_pullup(struct ssb_chipcommon *cc, u32 mask, u32 value)
+{
+	if (cc->dev->id.revision < 20)
+		return 0xffffffff;
+
+	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLUP, mask, value);
+}
+
+u32 ssb_chipco_gpio_pulldown(struct ssb_chipcommon *cc, u32 mask, u32 value)
+{
+	if (cc->dev->id.revision < 20)
+		return 0xffffffff;
+
+	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLDOWN, mask, value);
+}
+
 #ifdef CONFIG_SSB_SERIAL
 int ssb_chipco_serial_init(struct ssb_chipcommon *cc,
 			   struct ssb_serial_port *ports)
diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h
index c2b02a5c86ae..c8d07c95d76e 100644
--- a/include/linux/ssb/ssb_driver_chipcommon.h
+++ b/include/linux/ssb/ssb_driver_chipcommon.h
@@ -644,6 +644,8 @@ u32 ssb_chipco_gpio_outen(struct ssb_chipcommon *cc, u32 mask, u32 value);
 u32 ssb_chipco_gpio_control(struct ssb_chipcommon *cc, u32 mask, u32 value);
 u32 ssb_chipco_gpio_intmask(struct ssb_chipcommon *cc, u32 mask, u32 value);
 u32 ssb_chipco_gpio_polarity(struct ssb_chipcommon *cc, u32 mask, u32 value);
+u32 ssb_chipco_gpio_pullup(struct ssb_chipcommon *cc, u32 mask, u32 value);
+u32 ssb_chipco_gpio_pulldown(struct ssb_chipcommon *cc, u32 mask, u32 value);
 
 #ifdef CONFIG_SSB_SERIAL
 extern int ssb_chipco_serial_init(struct ssb_chipcommon *cc,
-- 
cgit v1.2.3-55-g7522


From 394bc7e38be79987ed15de203920c3cddb724cc1 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Tue, 20 Nov 2012 22:24:32 +0000
Subject: ssb: add locking around gpio register accesses

The GPIOs are access through some registers in the chip common core or
over extif. We need locking around these GPIO accesses, all GPIOs are
accessed through the same registers and parallel writes will cause
problems.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Patchwork: http://patchwork.linux-mips.org/patch/4590
Acked-by: Florian Fainelli <florian@openwrt.org>
---
 drivers/ssb/driver_chipcommon.c           | 66 +++++++++++++++++++++++++++----
 drivers/ssb/driver_extif.c                | 43 ++++++++++++++++++--
 drivers/ssb/main.c                        |  1 +
 drivers/ssb/ssb_private.h                 |  8 ++++
 include/linux/ssb/ssb_driver_chipcommon.h |  1 +
 include/linux/ssb/ssb_driver_extif.h      |  1 +
 6 files changed, 109 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c
index 4df492665565..24e02bb2ecd8 100644
--- a/drivers/ssb/driver_chipcommon.c
+++ b/drivers/ssb/driver_chipcommon.c
@@ -284,6 +284,9 @@ void ssb_chipcommon_init(struct ssb_chipcommon *cc)
 {
 	if (!cc->dev)
 		return; /* We don't have a ChipCommon */
+
+	spin_lock_init(&cc->gpio_lock);
+
 	if (cc->dev->id.revision >= 11)
 		cc->status = chipco_read32(cc, SSB_CHIPCO_CHIPSTAT);
 	ssb_dprintk(KERN_INFO PFX "chipcommon status is 0x%x\n", cc->status);
@@ -418,44 +421,93 @@ u32 ssb_chipco_gpio_in(struct ssb_chipcommon *cc, u32 mask)
 
 u32 ssb_chipco_gpio_out(struct ssb_chipcommon *cc, u32 mask, u32 value)
 {
-	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOOUT, mask, value);
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOOUT, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_chipco_gpio_outen(struct ssb_chipcommon *cc, u32 mask, u32 value)
 {
-	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOOUTEN, mask, value);
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOOUTEN, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_chipco_gpio_control(struct ssb_chipcommon *cc, u32 mask, u32 value)
 {
-	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOCTL, mask, value);
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOCTL, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 EXPORT_SYMBOL(ssb_chipco_gpio_control);
 
 u32 ssb_chipco_gpio_intmask(struct ssb_chipcommon *cc, u32 mask, u32 value)
 {
-	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOIRQ, mask, value);
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOIRQ, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_chipco_gpio_polarity(struct ssb_chipcommon *cc, u32 mask, u32 value)
 {
-	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPOL, mask, value);
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOPOL, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_chipco_gpio_pullup(struct ssb_chipcommon *cc, u32 mask, u32 value)
 {
+	unsigned long flags;
+	u32 res = 0;
+
 	if (cc->dev->id.revision < 20)
 		return 0xffffffff;
 
-	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLUP, mask, value);
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLUP, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_chipco_gpio_pulldown(struct ssb_chipcommon *cc, u32 mask, u32 value)
 {
+	unsigned long flags;
+	u32 res = 0;
+
 	if (cc->dev->id.revision < 20)
 		return 0xffffffff;
 
-	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLDOWN, mask, value);
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLDOWN, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 #ifdef CONFIG_SSB_SERIAL
diff --git a/drivers/ssb/driver_extif.c b/drivers/ssb/driver_extif.c
index dc47f30e9cf7..e1d0bb8ad725 100644
--- a/drivers/ssb/driver_extif.c
+++ b/drivers/ssb/driver_extif.c
@@ -118,6 +118,13 @@ void ssb_extif_watchdog_timer_set(struct ssb_extif *extif,
 	extif_write32(extif, SSB_EXTIF_WATCHDOG, ticks);
 }
 
+void ssb_extif_init(struct ssb_extif *extif)
+{
+	if (!extif->dev)
+		return; /* We don't have a Extif core */
+	spin_lock_init(&extif->gpio_lock);
+}
+
 u32 ssb_extif_gpio_in(struct ssb_extif *extif, u32 mask)
 {
 	return extif_read32(extif, SSB_EXTIF_GPIO_IN) & mask;
@@ -125,22 +132,50 @@ u32 ssb_extif_gpio_in(struct ssb_extif *extif, u32 mask)
 
 u32 ssb_extif_gpio_out(struct ssb_extif *extif, u32 mask, u32 value)
 {
-	return extif_write32_masked(extif, SSB_EXTIF_GPIO_OUT(0),
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&extif->gpio_lock, flags);
+	res = extif_write32_masked(extif, SSB_EXTIF_GPIO_OUT(0),
 				   mask, value);
+	spin_unlock_irqrestore(&extif->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_extif_gpio_outen(struct ssb_extif *extif, u32 mask, u32 value)
 {
-	return extif_write32_masked(extif, SSB_EXTIF_GPIO_OUTEN(0),
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&extif->gpio_lock, flags);
+	res = extif_write32_masked(extif, SSB_EXTIF_GPIO_OUTEN(0),
 				   mask, value);
+	spin_unlock_irqrestore(&extif->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_extif_gpio_polarity(struct ssb_extif *extif, u32 mask, u32 value)
 {
-	return extif_write32_masked(extif, SSB_EXTIF_GPIO_INTPOL, mask, value);
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&extif->gpio_lock, flags);
+	res = extif_write32_masked(extif, SSB_EXTIF_GPIO_INTPOL, mask, value);
+	spin_unlock_irqrestore(&extif->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_extif_gpio_intmask(struct ssb_extif *extif, u32 mask, u32 value)
 {
-	return extif_write32_masked(extif, SSB_EXTIF_GPIO_INTMASK, mask, value);
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&extif->gpio_lock, flags);
+	res = extif_write32_masked(extif, SSB_EXTIF_GPIO_INTMASK, mask, value);
+	spin_unlock_irqrestore(&extif->gpio_lock, flags);
+
+	return res;
 }
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index df0f145c22fc..6fe2d102734a 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -796,6 +796,7 @@ static int __devinit ssb_bus_register(struct ssb_bus *bus,
 	if (err)
 		goto err_pcmcia_exit;
 	ssb_chipcommon_init(&bus->chipco);
+	ssb_extif_init(&bus->extif);
 	ssb_mipscore_init(&bus->mipscore);
 	err = ssb_fetch_invariants(bus, get_invariants);
 	if (err) {
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index a305550b4b65..d6a1ba9394d9 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -211,4 +211,12 @@ static inline void b43_pci_ssb_bridge_exit(void)
 extern u32 ssb_pmu_get_cpu_clock(struct ssb_chipcommon *cc);
 extern u32 ssb_pmu_get_controlclock(struct ssb_chipcommon *cc);
 
+#ifdef CONFIG_SSB_DRIVER_EXTIF
+extern void ssb_extif_init(struct ssb_extif *extif);
+#else
+static inline void ssb_extif_init(struct ssb_extif *extif)
+{
+}
+#endif
+
 #endif /* LINUX_SSB_PRIVATE_H_ */
diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h
index c8d07c95d76e..30b694345d47 100644
--- a/include/linux/ssb/ssb_driver_chipcommon.h
+++ b/include/linux/ssb/ssb_driver_chipcommon.h
@@ -590,6 +590,7 @@ struct ssb_chipcommon {
 	u32 status;
 	/* Fast Powerup Delay constant */
 	u16 fast_pwrup_delay;
+	spinlock_t gpio_lock;
 	struct ssb_chipcommon_pmu pmu;
 };
 
diff --git a/include/linux/ssb/ssb_driver_extif.h b/include/linux/ssb/ssb_driver_extif.h
index 91161f0aa22b..bd2306854a91 100644
--- a/include/linux/ssb/ssb_driver_extif.h
+++ b/include/linux/ssb/ssb_driver_extif.h
@@ -158,6 +158,7 @@
 
 struct ssb_extif {
 	struct ssb_device *dev;
+	spinlock_t gpio_lock;
 };
 
 static inline bool ssb_extif_available(struct ssb_extif *extif)
-- 
cgit v1.2.3-55-g7522


From ec43b08b5733494ad88aa618ecdf534320dd8207 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Tue, 20 Nov 2012 22:24:33 +0000
Subject: ssb: add GPIO driver

Register a GPIO driver to access the GPIOs provided by the chip.
The GPIOs of the SoC should always start at 0 and the other GPIOs could
start at a random position. There is just one SoC in a system and when
they start at 0 the number is predictable.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Patchwork: http://patchwork.linux-mips.org/patch/4591
Acked-by: Florian Fainelli <florian@openwrt.org>
---
 drivers/ssb/Kconfig       |   9 +++
 drivers/ssb/Makefile      |   1 +
 drivers/ssb/driver_gpio.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ssb/main.c        |   6 ++
 drivers/ssb/ssb_private.h |   9 +++
 include/linux/ssb/ssb.h   |   4 ++
 6 files changed, 205 insertions(+)
 create mode 100644 drivers/ssb/driver_gpio.c

(limited to 'include/linux')

diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig
index 42cdaa9a4d8a..ff3c8a21f10d 100644
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -160,4 +160,13 @@ config SSB_DRIVER_GIGE
 
 	  If unsure, say N
 
+config SSB_DRIVER_GPIO
+	bool "SSB GPIO driver"
+	depends on SSB
+	select GPIOLIB
+	help
+	  Driver to provide access to the GPIO pins on the bus.
+
+	  If unsure, say N
+
 endmenu
diff --git a/drivers/ssb/Makefile b/drivers/ssb/Makefile
index 656e58b92618..9159ba77c388 100644
--- a/drivers/ssb/Makefile
+++ b/drivers/ssb/Makefile
@@ -15,6 +15,7 @@ ssb-$(CONFIG_SSB_DRIVER_MIPS)		+= driver_mipscore.o
 ssb-$(CONFIG_SSB_DRIVER_EXTIF)		+= driver_extif.o
 ssb-$(CONFIG_SSB_DRIVER_PCICORE)	+= driver_pcicore.o
 ssb-$(CONFIG_SSB_DRIVER_GIGE)		+= driver_gige.o
+ssb-$(CONFIG_SSB_DRIVER_GPIO)		+= driver_gpio.o
 
 # b43 pci-ssb-bridge driver
 # Not strictly a part of SSB, but kept here for convenience
diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c
new file mode 100644
index 000000000000..97ac0a38e3d0
--- /dev/null
+++ b/drivers/ssb/driver_gpio.c
@@ -0,0 +1,176 @@
+/*
+ * Sonics Silicon Backplane
+ * GPIO driver
+ *
+ * Copyright 2011, Broadcom Corporation
+ * Copyright 2012, Hauke Mehrtens <hauke@hauke-m.de>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include <linux/gpio.h>
+#include <linux/export.h>
+#include <linux/ssb/ssb.h>
+
+#include "ssb_private.h"
+
+static struct ssb_bus *ssb_gpio_get_bus(struct gpio_chip *chip)
+{
+	return container_of(chip, struct ssb_bus, gpio);
+}
+
+static int ssb_gpio_chipco_get_value(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	return !!ssb_chipco_gpio_in(&bus->chipco, 1 << gpio);
+}
+
+static void ssb_gpio_chipco_set_value(struct gpio_chip *chip, unsigned gpio,
+				      int value)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	ssb_chipco_gpio_out(&bus->chipco, 1 << gpio, value ? 1 << gpio : 0);
+}
+
+static int ssb_gpio_chipco_direction_input(struct gpio_chip *chip,
+					   unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	ssb_chipco_gpio_outen(&bus->chipco, 1 << gpio, 0);
+	return 0;
+}
+
+static int ssb_gpio_chipco_direction_output(struct gpio_chip *chip,
+					    unsigned gpio, int value)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	ssb_chipco_gpio_outen(&bus->chipco, 1 << gpio, 1 << gpio);
+	ssb_chipco_gpio_out(&bus->chipco, 1 << gpio, value ? 1 << gpio : 0);
+	return 0;
+}
+
+static int ssb_gpio_chipco_request(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	ssb_chipco_gpio_control(&bus->chipco, 1 << gpio, 0);
+	/* clear pulldown */
+	ssb_chipco_gpio_pulldown(&bus->chipco, 1 << gpio, 0);
+	/* Set pullup */
+	ssb_chipco_gpio_pullup(&bus->chipco, 1 << gpio, 1 << gpio);
+
+	return 0;
+}
+
+static void ssb_gpio_chipco_free(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	/* clear pullup */
+	ssb_chipco_gpio_pullup(&bus->chipco, 1 << gpio, 0);
+}
+
+static int ssb_gpio_chipco_init(struct ssb_bus *bus)
+{
+	struct gpio_chip *chip = &bus->gpio;
+
+	chip->label		= "ssb_chipco_gpio";
+	chip->owner		= THIS_MODULE;
+	chip->request		= ssb_gpio_chipco_request;
+	chip->free		= ssb_gpio_chipco_free;
+	chip->get		= ssb_gpio_chipco_get_value;
+	chip->set		= ssb_gpio_chipco_set_value;
+	chip->direction_input	= ssb_gpio_chipco_direction_input;
+	chip->direction_output	= ssb_gpio_chipco_direction_output;
+	chip->ngpio		= 16;
+	/* There is just one SoC in one device and its GPIO addresses should be
+	 * deterministic to address them more easily. The other buses could get
+	 * a random base number. */
+	if (bus->bustype == SSB_BUSTYPE_SSB)
+		chip->base		= 0;
+	else
+		chip->base		= -1;
+
+	return gpiochip_add(chip);
+}
+
+#ifdef CONFIG_SSB_DRIVER_EXTIF
+
+static int ssb_gpio_extif_get_value(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	return !!ssb_extif_gpio_in(&bus->extif, 1 << gpio);
+}
+
+static void ssb_gpio_extif_set_value(struct gpio_chip *chip, unsigned gpio,
+				     int value)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	ssb_extif_gpio_out(&bus->extif, 1 << gpio, value ? 1 << gpio : 0);
+}
+
+static int ssb_gpio_extif_direction_input(struct gpio_chip *chip,
+					  unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	ssb_extif_gpio_outen(&bus->extif, 1 << gpio, 0);
+	return 0;
+}
+
+static int ssb_gpio_extif_direction_output(struct gpio_chip *chip,
+					   unsigned gpio, int value)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	ssb_extif_gpio_outen(&bus->extif, 1 << gpio, 1 << gpio);
+	ssb_extif_gpio_out(&bus->extif, 1 << gpio, value ? 1 << gpio : 0);
+	return 0;
+}
+
+static int ssb_gpio_extif_init(struct ssb_bus *bus)
+{
+	struct gpio_chip *chip = &bus->gpio;
+
+	chip->label		= "ssb_extif_gpio";
+	chip->owner		= THIS_MODULE;
+	chip->get		= ssb_gpio_extif_get_value;
+	chip->set		= ssb_gpio_extif_set_value;
+	chip->direction_input	= ssb_gpio_extif_direction_input;
+	chip->direction_output	= ssb_gpio_extif_direction_output;
+	chip->ngpio		= 5;
+	/* There is just one SoC in one device and its GPIO addresses should be
+	 * deterministic to address them more easily. The other buses could get
+	 * a random base number. */
+	if (bus->bustype == SSB_BUSTYPE_SSB)
+		chip->base		= 0;
+	else
+		chip->base		= -1;
+
+	return gpiochip_add(chip);
+}
+
+#else
+static int ssb_gpio_extif_init(struct ssb_bus *bus)
+{
+	return -ENOTSUPP;
+}
+#endif
+
+int ssb_gpio_init(struct ssb_bus *bus)
+{
+	if (ssb_chipco_available(&bus->chipco))
+		return ssb_gpio_chipco_init(bus);
+	else if (ssb_extif_available(&bus->extif))
+		return ssb_gpio_extif_init(bus);
+	else
+		SSB_WARN_ON(1);
+
+	return -1;
+}
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 6fe2d102734a..87f0ddf4f3f3 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -798,6 +798,12 @@ static int __devinit ssb_bus_register(struct ssb_bus *bus,
 	ssb_chipcommon_init(&bus->chipco);
 	ssb_extif_init(&bus->extif);
 	ssb_mipscore_init(&bus->mipscore);
+	err = ssb_gpio_init(bus);
+	if (err == -ENOTSUPP)
+		ssb_dprintk(KERN_DEBUG PFX "GPIO driver not activated\n");
+	else if (err)
+		ssb_dprintk(KERN_ERR PFX
+			   "Error registering GPIO driver: %i\n", err);
 	err = ssb_fetch_invariants(bus, get_invariants);
 	if (err) {
 		ssb_bus_may_powerdown(bus);
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index d6a1ba9394d9..463b76a2140b 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -219,4 +219,13 @@ static inline void ssb_extif_init(struct ssb_extif *extif)
 }
 #endif
 
+#ifdef CONFIG_SSB_DRIVER_GPIO
+extern int ssb_gpio_init(struct ssb_bus *bus);
+#else /* CONFIG_SSB_DRIVER_GPIO */
+static inline int ssb_gpio_init(struct ssb_bus *bus)
+{
+	return -ENOTSUPP;
+}
+#endif /* CONFIG_SSB_DRIVER_GPIO */
+
 #endif /* LINUX_SSB_PRIVATE_H_ */
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index bb674c02f306..3862a5bbd73b 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/pci.h>
+#include <linux/gpio.h>
 #include <linux/mod_devicetable.h>
 #include <linux/dma-mapping.h>
 
@@ -433,6 +434,9 @@ struct ssb_bus {
 	/* Lock for GPIO register access. */
 	spinlock_t gpio_lock;
 #endif /* EMBEDDED */
+#ifdef CONFIG_SSB_DRIVER_GPIO
+	struct gpio_chip gpio;
+#endif /* DRIVER_GPIO */
 
 	/* Internal-only stuff follows. Do not touch. */
 	struct list_head list;
-- 
cgit v1.2.3-55-g7522


From fbfddae696572e57a441252abbd65f7220e06030 Mon Sep 17 00:00:00 2001
From: Toshi Kani
Date: Wed, 21 Nov 2012 01:36:28 +0000
Subject: ACPI: Add acpi_handle_<level>() interfaces

This patch introduces acpi_handle_<level>(), where <level> is
a kernel message level such as err/warn/info, to support improved
logging messages for ACPI, esp. hot-plug operations.
acpi_handle_<level>() appends "ACPI" prefix and ACPI object path
to the messages.  This improves diagnosis of hotplug operations
since an error message in a log file identifies an object that
caused an issue.  This interface acquires the global namespace
mutex to obtain an object path.  In interrupt context, it shows
the object path as <n/a>.

acpi_handle_<level>() takes acpi_handle as an argument, which is
passed to ACPI hotplug notify handlers from the ACPICA.  Therefore,
it is always available unlike other kernel objects, such as device.

For example:
  acpi_handle_err(handle, "Device don't exist, dropping EJECT\n");
logs an error message like this at KERN_ERR.
  ACPI: \_SB_.SCK4.CPU4: Device don't exist, dropping EJECT

ACPI hot-plug drivers can use acpi_handle_<level>() when they need
to identify a target ACPI object path in their messages, such as
error cases.  The usage model is similar to dev_<level>().
acpi_handle_<level>() can be used when a device is not created or
is invalid during hot-plug operations.  ACPI object path is also
consistent on the platform, unlike device name that gets incremented
over hotplug operations.

ACPI drivers should use dev_<level>() when a device object is valid.
Device name provides more user friendly information, and avoids
acquiring the global ACPI namespace mutex.  ACPI drivers also
continue to use pr_<level>() when they do not need to specify device
information, such as boot-up messages.

Note: ACPI_[WARNING|INFO|ERROR]() are intended for the ACPICA and
are not associated with the kernel message level.

Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Tested-by: Vijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/utils.c | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/acpi.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 462f7e300363..744371304313 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -28,6 +28,8 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/hardirq.h>
+#include <linux/acpi.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
@@ -457,3 +459,39 @@ acpi_evaluate_hotplug_ost(acpi_handle handle, u32 source_event,
 #endif
 }
 EXPORT_SYMBOL(acpi_evaluate_hotplug_ost);
+
+/**
+ * acpi_handle_printk: Print message with ACPI prefix and object path
+ *
+ * This function is called through acpi_handle_<level> macros and prints
+ * a message with ACPI prefix and object path.  This function acquires
+ * the global namespace mutex to obtain an object path.  In interrupt
+ * context, it shows the object path as <n/a>.
+ */
+void
+acpi_handle_printk(const char *level, acpi_handle handle, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	struct acpi_buffer buffer = {
+		.length = ACPI_ALLOCATE_BUFFER,
+		.pointer = NULL
+	};
+	const char *path;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	if (in_interrupt() ||
+	    acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer) != AE_OK)
+		path = "<n/a>";
+	else
+		path = buffer.pointer;
+
+	printk("%sACPI: %s: %pV", level, path, &vaf);
+
+	va_end(args);
+	kfree(buffer.pointer);
+}
+EXPORT_SYMBOL(acpi_handle_printk);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 87edfcc0ab62..9201ac1f0511 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -434,4 +434,47 @@ acpi_status acpi_os_prepare_sleep(u8 sleep_state,
 #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0)
 #endif
 
+#ifdef CONFIG_ACPI
+__printf(3, 4)
+void acpi_handle_printk(const char *level, acpi_handle handle,
+			const char *fmt, ...);
+#else	/* !CONFIG_ACPI */
+static inline __printf(3, 4) void
+acpi_handle_printk(const char *level, void *handle, const char *fmt, ...) {}
+#endif	/* !CONFIG_ACPI */
+
+/*
+ * acpi_handle_<level>: Print message with ACPI prefix and object path
+ *
+ * These interfaces acquire the global namespace mutex to obtain an object
+ * path.  In interrupt context, it shows the object path as <n/a>.
+ */
+#define acpi_handle_emerg(handle, fmt, ...)				\
+	acpi_handle_printk(KERN_EMERG, handle, fmt, ##__VA_ARGS__)
+#define acpi_handle_alert(handle, fmt, ...)				\
+	acpi_handle_printk(KERN_ALERT, handle, fmt, ##__VA_ARGS__)
+#define acpi_handle_crit(handle, fmt, ...)				\
+	acpi_handle_printk(KERN_CRIT, handle, fmt, ##__VA_ARGS__)
+#define acpi_handle_err(handle, fmt, ...)				\
+	acpi_handle_printk(KERN_ERR, handle, fmt, ##__VA_ARGS__)
+#define acpi_handle_warn(handle, fmt, ...)				\
+	acpi_handle_printk(KERN_WARNING, handle, fmt, ##__VA_ARGS__)
+#define acpi_handle_notice(handle, fmt, ...)				\
+	acpi_handle_printk(KERN_NOTICE, handle, fmt, ##__VA_ARGS__)
+#define acpi_handle_info(handle, fmt, ...)				\
+	acpi_handle_printk(KERN_INFO, handle, fmt, ##__VA_ARGS__)
+
+/* REVISIT: Support CONFIG_DYNAMIC_DEBUG when necessary */
+#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
+#define acpi_handle_debug(handle, fmt, ...)				\
+	acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__)
+#else
+#define acpi_handle_debug(handle, fmt, ...)				\
+({									\
+	if (0)								\
+		acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__); \
+	0;								\
+})
+#endif
+
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v1.2.3-55-g7522


From d79251f0fc2fdb7587c99617aae0bf52ff6c5510 Mon Sep 17 00:00:00 2001
From: Bill Pemberton
Date: Mon, 19 Nov 2012 13:25:25 -0500
Subject: ipack: remove use of __devinitconst

CONFIG_HOTPLUG is going away as an option so __devinitconst is no
longer needed.

Signed-off-by: Bill Pemberton <wfp5p@virginia.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ipack.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipack.h b/include/linux/ipack.h
index a360c73129a6..fea12cbb2aeb 100644
--- a/include/linux/ipack.h
+++ b/include/linux/ipack.h
@@ -229,8 +229,7 @@ void ipack_device_unregister(struct ipack_device *dev);
  * in a generic manner.
  */
 #define DEFINE_IPACK_DEVICE_TABLE(_table) \
-	const struct ipack_device_id _table[] __devinitconst
-
+	const struct ipack_device_id _table[]
 /**
  * IPACK_DEVICE - macro used to describe a specific IndustryPack device
  * @_format: the format version (currently either 1 or 2, 8 bit value)
-- 
cgit v1.2.3-55-g7522


From 743179849015dc71bb2ea63d8cd4bfa7fdfb4bc6 Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD
Date: Thu, 15 Nov 2012 20:19:57 +0100
Subject: of_spi: add generic binding support to specify cs gpio

This will allow to use gpio for chip select with no modification in the
driver binding

When use the cs-gpios, the gpio number will be passed via the cs_gpio field
and the number of chip select will automatically increased with max(hw cs, gpio cs).

So if for example the controller has 2 CS lines, and the cs-gpios
property looks like this:

cs-gpios = <&gpio1 0 0> <0> <&gpio1 1 0> <&gpio1 2 0>;

Then it should be configured so that num_chipselect = 4 with the
following mapping:

cs0 : &gpio1 0 0
cs1 : native
cs2 : &gpio1 1 0
cs3 : &gpio1 2 0

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: devicetree-discuss@lists.ozlabs.org
Cc: spi-devel-general@lists.sourceforge.net
Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
[grant.likely: fixed up type of cs count so min() can do type checking]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 Documentation/devicetree/bindings/spi/spi-bus.txt | 20 +++++++++
 drivers/spi/spi.c                                 | 54 +++++++++++++++++++++--
 include/linux/spi/spi.h                           |  3 ++
 3 files changed, 74 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/spi/spi-bus.txt b/Documentation/devicetree/bindings/spi/spi-bus.txt
index d2c33d0f533e..77a8b0d39b54 100644
--- a/Documentation/devicetree/bindings/spi/spi-bus.txt
+++ b/Documentation/devicetree/bindings/spi/spi-bus.txt
@@ -12,6 +12,7 @@ The SPI master node requires the following properties:
 - #size-cells     - should be zero.
 - compatible      - name of SPI bus controller following generic names
     		recommended practice.
+- cs-gpios	  - (optional) gpios chip select.
 No other properties are required in the SPI bus node.  It is assumed
 that a driver for an SPI bus device will understand that it is an SPI bus.
 However, the binding does not attempt to define the specific method for
@@ -24,6 +25,22 @@ support describing the chip select layout.
 Optional property:
 - num-cs : total number of chipselects
 
+If cs-gpios is used the number of chip select will automatically increased
+with max(cs-gpios > hw cs)
+
+So if for example the controller has 2 CS lines, and the cs-gpios
+property looks like this:
+
+cs-gpios = <&gpio1 0 0> <0> <&gpio1 1 0> <&gpio1 2 0>;
+
+Then it should be configured so that num_chipselect = 4 with the
+following mapping:
+
+cs0 : &gpio1 0 0
+cs1 : native
+cs2 : &gpio1 1 0
+cs3 : &gpio1 2 0
+
 SPI slave nodes must be children of the SPI master node and can
 contain the following properties.
 - reg             - (required) chip select address of device.
@@ -37,6 +54,9 @@ contain the following properties.
 - spi-cs-high     - (optional) Empty property indicating device requires
     		chip select active high
 
+If a gpio chipselect is used for the SPI slave the gpio number will be passed
+via the cs_gpio
+
 SPI example for an MPC5200 SPI bus:
 	spi@f00 {
 		#address-cells = <1>;
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 84c2861d6f4d..1587a4a5ff41 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/mod_devicetable.h>
 #include <linux/spi/spi.h>
+#include <linux/of_gpio.h>
 #include <linux/pm_runtime.h>
 #include <linux/export.h>
 #include <linux/sched.h>
@@ -327,6 +328,7 @@ struct spi_device *spi_alloc_device(struct spi_master *master)
 	spi->dev.parent = &master->dev;
 	spi->dev.bus = &spi_bus_type;
 	spi->dev.release = spidev_release;
+	spi->cs_gpio = -EINVAL;
 	device_initialize(&spi->dev);
 	return spi;
 }
@@ -344,15 +346,16 @@ EXPORT_SYMBOL_GPL(spi_alloc_device);
 int spi_add_device(struct spi_device *spi)
 {
 	static DEFINE_MUTEX(spi_add_lock);
-	struct device *dev = spi->master->dev.parent;
+	struct spi_master *master = spi->master;
+	struct device *dev = master->dev.parent;
 	struct device *d;
 	int status;
 
 	/* Chipselects are numbered 0..max; validate. */
-	if (spi->chip_select >= spi->master->num_chipselect) {
+	if (spi->chip_select >= master->num_chipselect) {
 		dev_err(dev, "cs%d >= max %d\n",
 			spi->chip_select,
-			spi->master->num_chipselect);
+			master->num_chipselect);
 		return -EINVAL;
 	}
 
@@ -376,6 +379,9 @@ int spi_add_device(struct spi_device *spi)
 		goto done;
 	}
 
+	if (master->cs_gpios)
+		spi->cs_gpio = master->cs_gpios[spi->chip_select];
+
 	/* Drivers may modify this initial i/o setup, but will
 	 * normally rely on the device being setup.  Devices
 	 * using SPI_CS_HIGH can't coexist well otherwise...
@@ -946,6 +952,44 @@ struct spi_master *spi_alloc_master(struct device *dev, unsigned size)
 }
 EXPORT_SYMBOL_GPL(spi_alloc_master);
 
+#ifdef CONFIG_OF
+static int of_spi_register_master(struct spi_master *master)
+{
+	u16 nb;
+	int i, *cs;
+	struct device_node *np = master->dev.of_node;
+
+	if (!np)
+		return 0;
+
+	nb = of_gpio_named_count(np, "cs-gpios");
+	master->num_chipselect = max(nb, master->num_chipselect);
+
+	if (nb < 1)
+		return 0;
+
+	cs = devm_kzalloc(&master->dev,
+			  sizeof(int) * master->num_chipselect,
+			  GFP_KERNEL);
+	master->cs_gpios = cs;
+
+	if (!master->cs_gpios)
+		return -ENOMEM;
+
+	memset(cs, -EINVAL, master->num_chipselect);
+
+	for (i = 0; i < nb; i++)
+		cs[i] = of_get_named_gpio(np, "cs-gpios", i);
+
+	return 0;
+}
+#else
+static int of_spi_register_master(struct spi_master *master)
+{
+	return 0;
+}
+#endif
+
 /**
  * spi_register_master - register SPI master controller
  * @master: initialized master, originally from spi_alloc_master()
@@ -977,6 +1021,10 @@ int spi_register_master(struct spi_master *master)
 	if (!dev)
 		return -ENODEV;
 
+	status = of_spi_register_master(master);
+	if (status)
+		return status;
+
 	/* even if it's just one always-selected device, there must
 	 * be at least one chipselect
 	 */
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index fa702aeb5038..f62918946d86 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -90,6 +90,7 @@ struct spi_device {
 	void			*controller_state;
 	void			*controller_data;
 	char			modalias[SPI_NAME_SIZE];
+	int			cs_gpio;	/* chip select gpio */
 
 	/*
 	 * likely need more hooks for more protocol options affecting how
@@ -362,6 +363,8 @@ struct spi_master {
 	int (*transfer_one_message)(struct spi_master *master,
 				    struct spi_message *mesg);
 	int (*unprepare_transfer_hardware)(struct spi_master *master);
+	/* gpio chip select */
+	int			*cs_gpios;
 };
 
 static inline void *spi_master_get_devdata(struct spi_master *master)
-- 
cgit v1.2.3-55-g7522


From dc96efb72054985c0912f831da009a2da4e9f6dd Mon Sep 17 00:00:00 2001
From: Matt Schulte
Date: Mon, 19 Nov 2012 09:12:04 -0600
Subject: Serial: Add support for new devices: Exar's XR17V35x family of
 multi-port PCIe UARTs

Add support for new devices: Exar's XR17V35x family of multi-port PCIe UARTs.

Signed-off-by: Matt Schulte <matts@commtech-fastcom.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250.c     | 71 ++++++++++++++++++++++++++++
 drivers/tty/serial/8250/8250_pci.c | 96 ++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h            |  3 ++
 include/uapi/linux/serial_core.h   |  3 +-
 include/uapi/linux/serial_reg.h    |  6 +++
 5 files changed, 178 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c
index 2af83a246499..3624df674a31 100644
--- a/drivers/tty/serial/8250/8250.c
+++ b/drivers/tty/serial/8250/8250.c
@@ -282,6 +282,15 @@ static const struct serial8250_config uart_config[] = {
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.flags		= UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR,
 	},
+	[PORT_XR17V35X] = {
+		.name		= "XR17V35X",
+		.fifo_size	= 256,
+		.tx_loadsz	= 256,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11 |
+				  UART_FCR_T_TRIG_11,
+		.flags		= UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR |
+				  UART_CAP_SLEEP,
+	},
 	[PORT_LPC3220] = {
 		.name		= "LPC3220",
 		.fifo_size	= 64,
@@ -455,6 +464,7 @@ static void io_serial_out(struct uart_port *p, int offset, int value)
 }
 
 static int serial8250_default_handle_irq(struct uart_port *port);
+static int exar_handle_irq(struct uart_port *port);
 
 static void set_io_from_upio(struct uart_port *p)
 {
@@ -574,6 +584,18 @@ EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos);
  */
 static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
 {
+	/*
+	 * Exar UARTs have a SLEEP register that enables or disables
+	 * each UART to enter sleep mode separately.  On the XR17V35x the
+	 * register is accessible to each UART at the UART_EXAR_SLEEP
+	 * offset but the UART channel may only write to the corresponding
+	 * bit.
+	 */
+	if (p->port.type == PORT_XR17V35X) {
+		serial_out(p, UART_EXAR_SLEEP, 0xff);
+		return;
+	}
+
 	if (p->capabilities & UART_CAP_SLEEP) {
 		if (p->capabilities & UART_CAP_EFR) {
 			serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
@@ -881,6 +903,27 @@ static void autoconfig_16550a(struct uart_8250_port *up)
 	up->port.type = PORT_16550A;
 	up->capabilities |= UART_CAP_FIFO;
 
+	/*
+	 * XR17V35x UARTs have an extra divisor register, DLD
+	 * that gets enabled with when DLAB is set which will
+	 * cause the device to incorrectly match and assign
+	 * port type to PORT_16650.  The EFR for this UART is
+	 * found at offset 0x09. Instead check the Deice ID (DVID)
+	 * register for a 2, 4 or 8 port UART.
+	 */
+	status1 = serial_in(up, UART_EXAR_DVID);
+	if (status1 == 0x82 || status1 == 0x84 || status1 == 0x88) {
+		if (up->port.flags & UPF_EXAR_EFR) {
+			DEBUG_AUTOCONF("Exar XR17V35x ");
+			up->port.type = PORT_XR17V35X;
+			up->capabilities |= UART_CAP_AFE | UART_CAP_EFR |
+						UART_CAP_SLEEP;
+
+			return;
+		}
+
+	}
+
 	/*
 	 * Check for presence of the EFR when DLAB is set.
 	 * Only ST16C650V1 UARTs pass this test.
@@ -1515,6 +1558,30 @@ static int serial8250_default_handle_irq(struct uart_port *port)
 	return serial8250_handle_irq(port, iir);
 }
 
+/*
+ * These Exar UARTs have an extra interrupt indicator that could
+ * fire for a few unimplemented interrupts.  One of which is a
+ * wakeup event when coming out of sleep.  Put this here just
+ * to be on the safe side that these interrupts don't go unhandled.
+ */
+static int exar_handle_irq(struct uart_port *port)
+{
+	unsigned char int0, int1, int2, int3;
+	unsigned int iir = serial_port_in(port, UART_IIR);
+	int ret;
+
+	ret = serial8250_handle_irq(port, iir);
+
+	if (port->type == PORT_XR17V35X) {
+		int0 = serial_port_in(port, 0x80);
+		int1 = serial_port_in(port, 0x81);
+		int2 = serial_port_in(port, 0x82);
+		int3 = serial_port_in(port, 0x83);
+	}
+
+	return ret;
+}
+
 /*
  * This is the serial driver's interrupt routine.
  *
@@ -2614,6 +2681,10 @@ static void serial8250_config_port(struct uart_port *port, int flags)
 		serial8250_release_rsa_resource(up);
 	if (port->type == PORT_UNKNOWN)
 		serial8250_release_std_resource(up);
+
+	/* Fixme: probably not the best place for this */
+	if (port->type == PORT_XR17V35X)
+		port->handle_irq = exar_handle_irq;
 }
 
 static int
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 97058c1d7d45..2285d3283b3b 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1164,6 +1164,39 @@ pci_xr17c154_setup(struct serial_private *priv,
 	return pci_default_setup(priv, board, port, idx);
 }
 
+static int
+pci_xr17v35x_setup(struct serial_private *priv,
+		  const struct pciserial_board *board,
+		  struct uart_8250_port *port, int idx)
+{
+	u8 __iomem *p;
+
+	p = pci_ioremap_bar(priv->dev, 0);
+
+	port->port.flags |= UPF_EXAR_EFR;
+
+	/*
+	 * Setup Multipurpose Input/Output pins.
+	 */
+	if (idx == 0) {
+		writeb(0x00, p + 0x8f); /*MPIOINT[7:0]*/
+		writeb(0x00, p + 0x90); /*MPIOLVL[7:0]*/
+		writeb(0x00, p + 0x91); /*MPIO3T[7:0]*/
+		writeb(0x00, p + 0x92); /*MPIOINV[7:0]*/
+		writeb(0x00, p + 0x93); /*MPIOSEL[7:0]*/
+		writeb(0x00, p + 0x94); /*MPIOOD[7:0]*/
+		writeb(0x00, p + 0x95); /*MPIOINT[15:8]*/
+		writeb(0x00, p + 0x96); /*MPIOLVL[15:8]*/
+		writeb(0x00, p + 0x97); /*MPIO3T[15:8]*/
+		writeb(0x00, p + 0x98); /*MPIOINV[15:8]*/
+		writeb(0x00, p + 0x99); /*MPIOSEL[15:8]*/
+		writeb(0x00, p + 0x9a); /*MPIOOD[15:8]*/
+	}
+	iounmap(p);
+
+	return pci_default_setup(priv, board, port, idx);
+}
+
 static int
 pci_wch_ch353_setup(struct serial_private *priv,
                     const struct pciserial_board *board,
@@ -1622,6 +1655,27 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
 		.subdevice	= PCI_ANY_ID,
 		.setup		= pci_xr17c154_setup,
 	},
+	{
+		.vendor = PCI_VENDOR_ID_EXAR,
+		.device = PCI_DEVICE_ID_EXAR_XR17V352,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= pci_xr17v35x_setup,
+	},
+	{
+		.vendor = PCI_VENDOR_ID_EXAR,
+		.device = PCI_DEVICE_ID_EXAR_XR17V354,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= pci_xr17v35x_setup,
+	},
+	{
+		.vendor = PCI_VENDOR_ID_EXAR,
+		.device = PCI_DEVICE_ID_EXAR_XR17V358,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= pci_xr17v35x_setup,
+	},
 	/*
 	 * Xircom cards
 	 */
@@ -1962,6 +2016,9 @@ enum pci_board_num_t {
 	pbn_exar_XR17C152,
 	pbn_exar_XR17C154,
 	pbn_exar_XR17C158,
+	pbn_exar_XR17V352,
+	pbn_exar_XR17V354,
+	pbn_exar_XR17V358,
 	pbn_exar_ibm_saturn,
 	pbn_pasemi_1682M,
 	pbn_ni8430_2,
@@ -2580,6 +2637,30 @@ static struct pciserial_board pci_boards[] = {
 		.base_baud	= 921600,
 		.uart_offset	= 0x200,
 	},
+	[pbn_exar_XR17V352] = {
+		.flags		= FL_BASE0,
+		.num_ports	= 2,
+		.base_baud	= 7812500,
+		.uart_offset	= 0x400,
+		.reg_shift	= 0,
+		.first_offset	= 0,
+	},
+	[pbn_exar_XR17V354] = {
+		.flags		= FL_BASE0,
+		.num_ports	= 4,
+		.base_baud	= 7812500,
+		.uart_offset	= 0x400,
+		.reg_shift	= 0,
+		.first_offset	= 0,
+	},
+	[pbn_exar_XR17V358] = {
+		.flags		= FL_BASE0,
+		.num_ports	= 8,
+		.base_baud	= 7812500,
+		.uart_offset	= 0x400,
+		.reg_shift	= 0,
+		.first_offset	= 0,
+	},
 	[pbn_exar_ibm_saturn] = {
 		.flags		= FL_BASE0,
 		.num_ports	= 1,
@@ -3826,6 +3907,21 @@ static struct pci_device_id serial_pci_tbl[] = {
 		PCI_ANY_ID, PCI_ANY_ID,
 		0,
 		0, pbn_exar_XR17C158 },
+	/*
+	 * Exar Corp. XR17V35[248] Dual/Quad/Octal PCIe UARTs
+	 */
+	{	PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V352,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0,
+		0, pbn_exar_XR17V352 },
+	{	PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V354,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0,
+		0, pbn_exar_XR17V354 },
+	{	PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V358,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0,
+		0, pbn_exar_XR17V358 },
 
 	/*
 	 * Topic TP560 Data/Fax/Voice 56k modem (reported by Evan Clarke)
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 9d36b829533a..0199a7a76fcb 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1985,6 +1985,9 @@
 #define PCI_DEVICE_ID_EXAR_XR17C152	0x0152
 #define PCI_DEVICE_ID_EXAR_XR17C154	0x0154
 #define PCI_DEVICE_ID_EXAR_XR17C158	0x0158
+#define PCI_DEVICE_ID_EXAR_XR17V352	0x0352
+#define PCI_DEVICE_ID_EXAR_XR17V354	0x0354
+#define PCI_DEVICE_ID_EXAR_XR17V358	0x0358
 
 #define PCI_VENDOR_ID_MICROGATE		0x13c0
 #define PCI_DEVICE_ID_MICROGATE_USC	0x0010
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index ebcc73f0418a..78f99d97475b 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -49,7 +49,8 @@
 #define PORT_XR17D15X	21	/* Exar XR17D15x UART */
 #define PORT_LPC3220	22	/* NXP LPC32xx SoC "Standard" UART */
 #define PORT_8250_CIR	23	/* CIR infrared port, has its own driver */
-#define PORT_MAX_8250	23	/* max port ID */
+#define PORT_XR17V35X	24	/* Exar XR17V35x UARTs */
+#define PORT_MAX_8250	24	/* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h
index 5ed325e88a81..d0b47607b90b 100644
--- a/include/uapi/linux/serial_reg.h
+++ b/include/uapi/linux/serial_reg.h
@@ -367,5 +367,11 @@
 #define UART_OMAP_MDR1_CIR_MODE		0x06	/* CIR mode */
 #define UART_OMAP_MDR1_DISABLE		0x07	/* Disable (default state) */
 
+/*
+ * These are definitions for the XR17V35X and XR17D15X
+ */
+#define UART_EXAR_SLEEP		0x8b	/* Sleep mode */
+#define UART_EXAR_DVID		0x8d	/* Device identification */
+
 #endif /* _LINUX_SERIAL_REG_H */
 
-- 
cgit v1.2.3-55-g7522


From 14faa8cce88e18ed4daf5c3643f0faa4198863f9 Mon Sep 17 00:00:00 2001
From: Matt Schulte
Date: Wed, 21 Nov 2012 10:35:15 -0600
Subject: tty/8250 Add support for Commtech's Fastcom Async-335 and Fastcom
 Async-PCIe cards

Add support for Commtech's Fastcom Async-335 and Fastcom Async-PCIe cards

Signed-off-by: Matt Schulte <matts@commtech-fastcom.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_pci.c | 163 +++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h            |   2 +
 2 files changed, 165 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 089fd43e3784..a795bd971eaa 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1201,6 +1201,55 @@ pci_xr17v35x_setup(struct serial_private *priv,
 	return pci_default_setup(priv, board, port, idx);
 }
 
+#define PCI_DEVICE_ID_COMMTECH_4222PCI335 0x0004
+#define PCI_DEVICE_ID_COMMTECH_4224PCI335 0x0002
+#define PCI_DEVICE_ID_COMMTECH_2324PCI335 0x000a
+#define PCI_DEVICE_ID_COMMTECH_2328PCI335 0x000b
+
+static int
+pci_fastcom335_setup(struct serial_private *priv,
+		  const struct pciserial_board *board,
+		  struct uart_8250_port *port, int idx)
+{
+	u8 __iomem *p;
+
+	p = pci_ioremap_bar(priv->dev, 0);
+	if (p == NULL)
+		return -ENOMEM;
+
+	port->port.flags |= UPF_EXAR_EFR;
+
+	/*
+	 * Setup Multipurpose Input/Output pins.
+	 */
+	if (idx == 0) {
+		switch (priv->dev->device) {
+		case PCI_DEVICE_ID_COMMTECH_4222PCI335:
+		case PCI_DEVICE_ID_COMMTECH_4224PCI335:
+			writeb(0x78, p + 0x90); /* MPIOLVL[7:0] */
+			writeb(0x00, p + 0x92); /* MPIOINV[7:0] */
+			writeb(0x00, p + 0x93); /* MPIOSEL[7:0] */
+			break;
+		case PCI_DEVICE_ID_COMMTECH_2324PCI335:
+		case PCI_DEVICE_ID_COMMTECH_2328PCI335:
+			writeb(0x00, p + 0x90); /* MPIOLVL[7:0] */
+			writeb(0xc0, p + 0x92); /* MPIOINV[7:0] */
+			writeb(0xc0, p + 0x93); /* MPIOSEL[7:0] */
+			break;
+		}
+		writeb(0x00, p + 0x8f); /* MPIOINT[7:0] */
+		writeb(0x00, p + 0x91); /* MPIO3T[7:0] */
+		writeb(0x00, p + 0x94); /* MPIOOD[7:0] */
+	}
+	writeb(0x00, p + UART_EXAR_8XMODE);
+	writeb(UART_FCTR_EXAR_TRGD, p + UART_EXAR_FCTR);
+	writeb(32, p + UART_EXAR_TXTRG);
+	writeb(32, p + UART_EXAR_RXTRG);
+	iounmap(p);
+
+	return pci_default_setup(priv, board, port, idx);
+}
+
 static int
 pci_wch_ch353_setup(struct serial_private *priv,
                     const struct pciserial_board *board,
@@ -1250,6 +1299,10 @@ pci_wch_ch353_setup(struct serial_private *priv,
 #define PCI_VENDOR_ID_AGESTAR		0x5372
 #define PCI_DEVICE_ID_AGESTAR_9375	0x6872
 #define PCI_VENDOR_ID_ASIX		0x9710
+#define PCI_DEVICE_ID_COMMTECH_4222PCIE 0x0019
+#define PCI_DEVICE_ID_COMMTECH_4224PCIE	0x0020
+#define PCI_DEVICE_ID_COMMTECH_4228PCIE	0x0021
+
 
 /* Unknown vendors/cards - this should not be in linux/pci_ids.h */
 #define PCI_SUBDEVICE_ID_UNKNOWN_0x1584	0x1584
@@ -1845,6 +1898,59 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
 		.subdevice	= PCI_ANY_ID,
 		.setup		= pci_asix_setup,
 	},
+	/*
+	 * Commtech, Inc. Fastcom adapters
+	 *
+	 */
+	{
+		.vendor = PCI_VENDOR_ID_COMMTECH,
+		.device = PCI_DEVICE_ID_COMMTECH_4222PCI335,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= pci_fastcom335_setup,
+	},
+	{
+		.vendor = PCI_VENDOR_ID_COMMTECH,
+		.device = PCI_DEVICE_ID_COMMTECH_4224PCI335,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= pci_fastcom335_setup,
+	},
+	{
+		.vendor = PCI_VENDOR_ID_COMMTECH,
+		.device = PCI_DEVICE_ID_COMMTECH_2324PCI335,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= pci_fastcom335_setup,
+	},
+	{
+		.vendor = PCI_VENDOR_ID_COMMTECH,
+		.device = PCI_DEVICE_ID_COMMTECH_2328PCI335,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= pci_fastcom335_setup,
+	},
+	{
+		.vendor = PCI_VENDOR_ID_COMMTECH,
+		.device = PCI_DEVICE_ID_COMMTECH_4222PCIE,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= pci_xr17v35x_setup,
+	},
+	{
+		.vendor = PCI_VENDOR_ID_COMMTECH,
+		.device = PCI_DEVICE_ID_COMMTECH_4224PCIE,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= pci_xr17v35x_setup,
+	},
+	{
+		.vendor = PCI_VENDOR_ID_COMMTECH,
+		.device = PCI_DEVICE_ID_COMMTECH_4228PCIE,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= pci_xr17v35x_setup,
+	},
 	/*
 	 * Default "match everything" terminator entry
 	 */
@@ -1921,6 +2027,10 @@ enum pci_board_num_t {
 
 	pbn_b0_4_1152000,
 
+	pbn_b0_2_1152000_200,
+	pbn_b0_4_1152000_200,
+	pbn_b0_8_1152000_200,
+
 	pbn_b0_2_1843200,
 	pbn_b0_4_1843200,
 
@@ -2118,6 +2228,27 @@ static struct pciserial_board pci_boards[] = {
 		.uart_offset	= 8,
 	},
 
+	[pbn_b0_2_1152000_200] = {
+		.flags		= FL_BASE0,
+		.num_ports	= 2,
+		.base_baud	= 1152000,
+		.uart_offset	= 0x200,
+	},
+
+	[pbn_b0_4_1152000_200] = {
+		.flags		= FL_BASE0,
+		.num_ports	= 4,
+		.base_baud	= 1152000,
+		.uart_offset	= 0x200,
+	},
+
+	[pbn_b0_8_1152000_200] = {
+		.flags		= FL_BASE0,
+		.num_ports	= 2,
+		.base_baud	= 1152000,
+		.uart_offset	= 0x200,
+	},
+
 	[pbn_b0_2_1843200] = {
 		.flags		= FL_BASE0,
 		.num_ports	= 2,
@@ -4356,6 +4487,38 @@ static struct pci_device_id serial_pci_tbl[] = {
 		PCI_ANY_ID, PCI_ANY_ID,
 		0, 0, pbn_b0_bt_2_115200 },
 
+	/*
+	 * Commtech, Inc. Fastcom adapters
+	 */
+	{	PCI_VENDOR_ID_COMMTECH, PCI_DEVICE_ID_COMMTECH_4222PCI335,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0,
+		0, pbn_b0_2_1152000_200 },
+	{	PCI_VENDOR_ID_COMMTECH, PCI_DEVICE_ID_COMMTECH_4224PCI335,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0,
+		0, pbn_b0_4_1152000_200 },
+	{	PCI_VENDOR_ID_COMMTECH, PCI_DEVICE_ID_COMMTECH_2324PCI335,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0,
+		0, pbn_b0_4_1152000_200 },
+	{	PCI_VENDOR_ID_COMMTECH, PCI_DEVICE_ID_COMMTECH_2328PCI335,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0,
+		0, pbn_b0_8_1152000_200 },
+	{	PCI_VENDOR_ID_COMMTECH, PCI_DEVICE_ID_COMMTECH_4222PCIE,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0,
+		0, pbn_exar_XR17V352 },
+	{	PCI_VENDOR_ID_COMMTECH, PCI_DEVICE_ID_COMMTECH_4224PCIE,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0,
+		0, pbn_exar_XR17V354 },
+	{	PCI_VENDOR_ID_COMMTECH, PCI_DEVICE_ID_COMMTECH_4228PCIE,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0,
+		0, pbn_exar_XR17V358 },
+
 	/*
 	 * These entries match devices with class COMMUNICATION_SERIAL,
 	 * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0199a7a76fcb..0f8447376ddb 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2326,6 +2326,8 @@
 
 #define PCI_VENDOR_ID_TOPSPIN		0x1867
 
+#define PCI_VENDOR_ID_COMMTECH		0x18f7
+
 #define PCI_VENDOR_ID_SILAN		0x1904
 
 #define PCI_VENDOR_ID_RENESAS		0x1912
-- 
cgit v1.2.3-55-g7522


From df072eb97dcfb819390227649f6b7c07a90aa2df Mon Sep 17 00:00:00 2001
From: MyungJoo Ham
Date: Tue, 20 Nov 2012 19:22:33 +0900
Subject: extcon: kernel_doc style fix

Corrected kernel_doc entries.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 include/linux/extcon.h | 48 ++++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 54c00ce9ce7a..fcb51c88319f 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -76,12 +76,12 @@ struct extcon_cable;
 
 /**
  * struct extcon_dev - An extcon device represents one external connector.
- * @name	The name of this extcon device. Parent device name is used
+ * @name:	The name of this extcon device. Parent device name is used
  *		if NULL.
- * @supported_cable	Array of supported cable names ending with NULL.
+ * @supported_cable:	Array of supported cable names ending with NULL.
  *			If supported_cable is NULL, cable name related APIs
  *			are disabled.
- * @mutually_exclusive	Array of mutually exclusive set of cables that cannot
+ * @mutually_exclusive:	Array of mutually exclusive set of cables that cannot
  *			be attached simultaneously. The array should be
  *			ending with NULL or be NULL (no mutually exclusive
  *			cables). For example, if it is { 0x7, 0x30, 0}, then,
@@ -89,21 +89,21 @@ struct extcon_cable;
  *			be attached simulataneously. {0x7, 0} is equivalent to
  *			{0x3, 0x6, 0x5, 0}. If it is {0xFFFFFFFF, 0}, there
  *			can be no simultaneous connections.
- * @print_name	An optional callback to override the method to print the
+ * @print_name:	An optional callback to override the method to print the
  *		name of the extcon device.
- * @print_state	An optional callback to override the method to print the
+ * @print_state:	An optional callback to override the method to print the
  *		status of the extcon device.
- * @dev		Device of this extcon. Do not provide at register-time.
- * @state	Attach/detach state of this extcon. Do not provide at
+ * @dev:	Device of this extcon. Do not provide at register-time.
+ * @state:	Attach/detach state of this extcon. Do not provide at
  *		register-time
- * @nh	Notifier for the state change events from this extcon
- * @entry	To support list of extcon devices so that users can search
+ * @nh:	Notifier for the state change events from this extcon
+ * @entry:	To support list of extcon devices so that users can search
  *		for extcon devices based on the extcon name.
- * @lock
- * @max_supported	Internal value to store the number of cables.
- * @extcon_dev_type	Device_type struct to provide attribute_groups
+ * @lock:
+ * @max_supported:	Internal value to store the number of cables.
+ * @extcon_dev_type:	Device_type struct to provide attribute_groups
  *			customized for each extcon device.
- * @cables	Sysfs subdirectories. Each represents one cable.
+ * @cables:	Sysfs subdirectories. Each represents one cable.
  *
  * In most cases, users only need to provide "User initializing data" of
  * this struct when registering an extcon. In some exceptional cases,
@@ -139,12 +139,12 @@ struct extcon_dev {
 
 /**
  * struct extcon_cable	- An internal data for each cable of extcon device.
- * @edev	The extcon device
- * @cable_index	Index of this cable in the edev
- * @attr_g	Attribute group for the cable
- * @attr_name	"name" sysfs entry
- * @attr_state	"state" sysfs entry
- * @attrs	Array pointing to attr_name and attr_state for attr_g
+ * @edev:	The extcon device
+ * @cable_index:	Index of this cable in the edev
+ * @attr_g:	Attribute group for the cable
+ * @attr_name:	"name" sysfs entry
+ * @attr_state:	"state" sysfs entry
+ * @attrs:	Array pointing to attr_name and attr_state for attr_g
  */
 struct extcon_cable {
 	struct extcon_dev *edev;
@@ -160,11 +160,11 @@ struct extcon_cable {
 /**
  * struct extcon_specific_cable_nb - An internal data for
  *				extcon_register_interest().
- * @internal_nb	a notifier block bridging extcon notifier and cable notifier.
- * @user_nb	user provided notifier block for events from a specific cable.
- * @cable_index	the target cable.
- * @edev	the target extcon device.
- * @previous_value	the saved previous event value.
+ * @internal_nb:	a notifier block bridging extcon notifier and cable notifier.
+ * @user_nb:	user provided notifier block for events from a specific cable.
+ * @cable_index:	the target cable.
+ * @edev:	the target extcon device.
+ * @previous_value:	the saved previous event value.
  */
 struct extcon_specific_cable_nb {
 	struct notifier_block internal_nb;
-- 
cgit v1.2.3-55-g7522


From 8d4b9e3182634d8b5afb5a144a8c6c24b187bcc1 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki
Date: Mon, 12 Nov 2012 13:03:20 +0100
Subject: bcma: export PLL reading function

This is required by NAND flash driver for initializing wait counters.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/bcma/driver_chipcommon_pmu.c | 3 ++-
 include/linux/bcma/bcma.h            | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c
index 201faf106b3f..657f23517481 100644
--- a/drivers/bcma/driver_chipcommon_pmu.c
+++ b/drivers/bcma/driver_chipcommon_pmu.c
@@ -13,12 +13,13 @@
 #include <linux/export.h>
 #include <linux/bcma/bcma.h>
 
-static u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset)
+u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset)
 {
 	bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR, offset);
 	bcma_cc_read32(cc, BCMA_CC_PLLCTL_ADDR);
 	return bcma_cc_read32(cc, BCMA_CC_PLLCTL_DATA);
 }
+EXPORT_SYMBOL_GPL(bcma_chipco_pll_read);
 
 void bcma_chipco_pll_write(struct bcma_drv_cc *cc, u32 offset, u32 value)
 {
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 4180eb78d575..4fb6bd7941d7 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -345,6 +345,7 @@ extern void bcma_core_set_clockmode(struct bcma_device *core,
 				    enum bcma_clkmode clkmode);
 extern void bcma_core_pll_ctl(struct bcma_device *core, u32 req, u32 status,
 			      bool on);
+extern u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset);
 #define BCMA_DMA_TRANSLATION_MASK	0xC0000000
 #define  BCMA_DMA_TRANSLATION_NONE	0x00000000
 #define  BCMA_DMA_TRANSLATION_DMA32_CMT	0x40000000 /* Client Mode Translation for 32-bit DMA */
-- 
cgit v1.2.3-55-g7522


From 3778ff5c709899a29cefe4b3077a926d0e1e9785 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin
Date: Mon, 12 Nov 2012 15:58:50 +0000
Subject: thermal: cpu cooling: use const parameter while registering

There are predefined cpu_masks that are const data structures.
This patch changes the cpu cooling register function so that
those const cpu_masks can be used, without compilation warnings.

include/linux/cpumask.h

 * The following particular system cpumasks and operations manage
 * possible, present, active and online cpus.
 *
 *     cpu_possible_mask- has bit 'cpu' set iff cpu is populatable
 *     cpu_present_mask - has bit 'cpu' set iff cpu is populated
 *     cpu_online_mask  - has bit 'cpu' set iff cpu available to scheduler
 *     cpu_active_mask  - has bit 'cpu' set iff cpu available to migration
 *

Signed-off-by: Eduardo Valentin <eduardo.valentin@ti.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/cpu_cooling.c | 2 +-
 include/linux/cpu_cooling.h   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 6f94c2cac1f0..836828e29a87 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -313,7 +313,7 @@ static struct notifier_block thermal_cpufreq_notifier_block = {
  * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
  */
 struct thermal_cooling_device *cpufreq_cooling_register(
-	struct cpumask *clip_cpus)
+	const struct cpumask *clip_cpus)
 {
 	struct thermal_cooling_device *cool_dev;
 	struct cpufreq_cooling_device *cpufreq_dev = NULL;
diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index 851530128e65..b30cc79c7a82 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -35,7 +35,7 @@
  * @clip_cpus: cpumask of cpus where the frequency constraints will happen
  */
 struct thermal_cooling_device *cpufreq_cooling_register(
-		struct cpumask *clip_cpus);
+		const struct cpumask *clip_cpus);
 
 /**
  * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
@@ -44,7 +44,7 @@ struct thermal_cooling_device *cpufreq_cooling_register(
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev);
 #else /* !CONFIG_CPU_THERMAL */
 static inline struct thermal_cooling_device *cpufreq_cooling_register(
-	struct cpumask *clip_cpus)
+	const struct cpumask *clip_cpus)
 {
 	return NULL;
 }
-- 
cgit v1.2.3-55-g7522


From 4ba115b1e1ab6b717e08e0f4e766a1d16853d217 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin
Date: Wed, 14 Nov 2012 15:23:30 +0000
Subject: thermal: cpu cooling: allow module builds

As thermal drivers can be built as modules and also
the thermal framework itself, building cpu cooling
only as built-in can cause linking errors. For instance:
* Generic Thermal sysfs driver
*
Generic Thermal sysfs driver (THERMAL) [M/n/y/?] m
  generic cpu cooling support (CPU_THERMAL) [N/y/?] (NEW) y

with the following drive:
CONFIG_OMAP_BANDGAP=m

generates:
ERROR: "cpufreq_cooling_unregister" [drivers/staging/omap-thermal/omap-thermal.ko] undefined!
ERROR: "cpufreq_cooling_register" [drivers/staging/omap-thermal/omap-thermal.ko] undefined!

This patch changes cpu cooling driver to allow it
to be built as module.

Reported-by: Enric Balletbo i Serra <eballetbo@gmail.com>
Signed-off-by: Eduardo Valentin <eduardo.valentin@ti.com>
Reviewed-by: Durgadoss R <durgadoss.r@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/Kconfig     | 2 +-
 include/linux/cpu_cooling.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index d96da075c9f6..8636fae1f7ec 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -67,7 +67,7 @@ config USER_SPACE
 	  Enable this to let the user space manage the platform thermals.
 
 config CPU_THERMAL
-	bool "generic cpu cooling support"
+	tristate "generic cpu cooling support"
 	depends on CPU_FREQ
 	select CPU_FREQ_TABLE
 	help
diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index b30cc79c7a82..40b4ef54cc7d 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -29,7 +29,7 @@
 #define CPUFREQ_COOLING_START		0
 #define CPUFREQ_COOLING_STOP		1
 
-#ifdef CONFIG_CPU_THERMAL
+#if defined(CONFIG_CPU_THERMAL) || defined(CONFIG_CPU_THERMAL_MODULE)
 /**
  * cpufreq_cooling_register - function to create cpufreq cooling device.
  * @clip_cpus: cpumask of cpus where the frequency constraints will happen
-- 
cgit v1.2.3-55-g7522


From 83af24027b3df1af5c5a9aa9adcdcfeb3429d3be Mon Sep 17 00:00:00 2001
From: Philip, Avinash
Date: Wed, 21 Nov 2012 13:10:44 +0530
Subject: pwm: Device tree support for PWM polarity

Add support for encoding PWM properties in bit encoded form with
of_pwm_xlate_with_flags() function support. Platforms require platform
specific PWM properties has to populate in 3rd cell of the pwm-specifier
and PWM driver should also set .of_xlate support with this function.
Currently PWM property polarity encoded in bit position 0 of the third
cell in pwm-specifier.

Signed-off-by: Philip, Avinash <avinashphilip@ti.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
---
 Documentation/devicetree/bindings/pwm/pwm.txt | 17 +++++++++++++---
 drivers/pwm/core.c                            | 28 +++++++++++++++++++++++++++
 include/linux/pwm.h                           |  3 +++
 3 files changed, 45 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/pwm/pwm.txt b/Documentation/devicetree/bindings/pwm/pwm.txt
index 73ec962bfe8c..06e67247859a 100644
--- a/Documentation/devicetree/bindings/pwm/pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm.txt
@@ -37,10 +37,21 @@ device:
 		pwm-names = "backlight";
 	};
 
+Note that in the example above, specifying the "pwm-names" is redundant
+because the name "backlight" would be used as fallback anyway.
+
 pwm-specifier typically encodes the chip-relative PWM number and the PWM
-period in nanoseconds. Note that in the example above, specifying the
-"pwm-names" is redundant because the name "backlight" would be used as
-fallback anyway.
+period in nanoseconds.
+
+Optionally, the pwm-specifier can encode a number of flags in a third cell:
+- bit 0: PWM signal polarity (0: normal polarity, 1: inverse polarity)
+
+Example with optional PWM specifier for inverse polarity
+
+	bl: backlight {
+		pwms = <&pwm 0 5000000 1>;
+		pwm-names = "backlight";
+	};
 
 2) PWM controller nodes
 -----------------------
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index f5acdaa52707..780cb6b8a8f0 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -32,6 +32,9 @@
 
 #define MAX_PWMS 1024
 
+/* flags in the third cell of the DT PWM specifier */
+#define PWM_SPEC_POLARITY	(1 << 0)
+
 static DEFINE_MUTEX(pwm_lookup_lock);
 static LIST_HEAD(pwm_lookup_list);
 static DEFINE_MUTEX(pwm_lock);
@@ -129,6 +132,31 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label)
 	return 0;
 }
 
+struct pwm_device *
+of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args)
+{
+	struct pwm_device *pwm;
+
+	if (pc->of_pwm_n_cells < 3)
+		return ERR_PTR(-EINVAL);
+
+	if (args->args[0] >= pc->npwm)
+		return ERR_PTR(-EINVAL);
+
+	pwm = pwm_request_from_chip(pc, args->args[0], NULL);
+	if (IS_ERR(pwm))
+		return pwm;
+
+	pwm_set_period(pwm, args->args[1]);
+
+	if (args->args[2] & PWM_SPEC_POLARITY)
+		pwm_set_polarity(pwm, PWM_POLARITY_INVERSED);
+	else
+		pwm_set_polarity(pwm, PWM_POLARITY_NORMAL);
+
+	return pwm;
+}
+
 static struct pwm_device *
 of_pwm_simple_xlate(struct pwm_chip *pc, const struct of_phandle_args *args)
 {
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 112b31436848..6d661f32e0e4 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -171,6 +171,9 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
 					 unsigned int index,
 					 const char *label);
 
+struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc,
+		const struct of_phandle_args *args);
+
 struct pwm_device *pwm_get(struct device *dev, const char *consumer);
 void pwm_put(struct pwm_device *pwm);
 
-- 
cgit v1.2.3-55-g7522


From acad189b08456722ca4a8984218d6f38f4563cbc Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski
Date: Thu, 22 Nov 2012 11:12:04 +0100
Subject: mfd: Add an AS3711 PMIC MFD driver

AS3711 is a PMIC with multiple DCDC and LDO power supplies, GPIOs, an RTC,
a battery charger and a general purpose ADC. This patch adds support for
the MFD with support for a regulator driver and a backlight driver.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig        |   9 ++
 drivers/mfd/Makefile       |   1 +
 drivers/mfd/as3711.c       | 217 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/as3711.h | 126 ++++++++++++++++++++++++++
 4 files changed, 353 insertions(+)
 create mode 100644 drivers/mfd/as3711.c
 create mode 100644 include/linux/mfd/as3711.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f5b839b718aa..475c26696b95 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1103,6 +1103,15 @@ config MFD_RETU
 	  Retu is a multi-function device found on Nokia Internet Tablets
 	  (770, N800 and N810).
 
+config MFD_AS3711
+	bool "Support for AS3711"
+	select MFD_CORE
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	depends on I2C=y
+	help
+	  Support for the AS3711 PMIC from AMS
+
 endmenu
 endif
 
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 2689c8a0d781..f2216dfd963c 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -146,3 +146,4 @@ obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
 obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
 obj-$(CONFIG_MFD_RETU)		+= retu-mfd.o
+obj-$(CONFIG_MFD_AS3711)	+= as3711.o
diff --git a/drivers/mfd/as3711.c b/drivers/mfd/as3711.c
new file mode 100644
index 000000000000..e994c9691124
--- /dev/null
+++ b/drivers/mfd/as3711.c
@@ -0,0 +1,217 @@
+/*
+ * AS3711 PMIC MFC driver
+ *
+ * Copyright (C) 2012 Renesas Electronics Corporation
+ * Author: Guennadi Liakhovetski, <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License as
+ * published by the Free Software Foundation
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mfd/as3711.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+enum {
+	AS3711_REGULATOR,
+	AS3711_BACKLIGHT,
+};
+
+/*
+ * Ok to have it static: it is only used during probing and multiple I2C devices
+ * cannot be probed simultaneously. Just make sure to avoid stale data.
+ */
+static struct mfd_cell as3711_subdevs[] = {
+	[AS3711_REGULATOR] = {.name = "as3711-regulator",},
+	[AS3711_BACKLIGHT] = {.name = "as3711-backlight",},
+};
+
+static bool as3711_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case AS3711_GPIO_SIGNAL_IN:
+	case AS3711_INTERRUPT_STATUS_1:
+	case AS3711_INTERRUPT_STATUS_2:
+	case AS3711_INTERRUPT_STATUS_3:
+	case AS3711_CHARGER_STATUS_1:
+	case AS3711_CHARGER_STATUS_2:
+	case AS3711_REG_STATUS:
+		return true;
+	}
+	return false;
+}
+
+static bool as3711_precious_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case AS3711_INTERRUPT_STATUS_1:
+	case AS3711_INTERRUPT_STATUS_2:
+	case AS3711_INTERRUPT_STATUS_3:
+		return true;
+	}
+	return false;
+}
+
+static bool as3711_readable_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case AS3711_SD_1_VOLTAGE:
+	case AS3711_SD_2_VOLTAGE:
+	case AS3711_SD_3_VOLTAGE:
+	case AS3711_SD_4_VOLTAGE:
+	case AS3711_LDO_1_VOLTAGE:
+	case AS3711_LDO_2_VOLTAGE:
+	case AS3711_LDO_3_VOLTAGE:
+	case AS3711_LDO_4_VOLTAGE:
+	case AS3711_LDO_5_VOLTAGE:
+	case AS3711_LDO_6_VOLTAGE:
+	case AS3711_LDO_7_VOLTAGE:
+	case AS3711_LDO_8_VOLTAGE:
+	case AS3711_SD_CONTROL:
+	case AS3711_GPIO_SIGNAL_OUT:
+	case AS3711_GPIO_SIGNAL_IN:
+	case AS3711_SD_CONTROL_1:
+	case AS3711_SD_CONTROL_2:
+	case AS3711_CURR_CONTROL:
+	case AS3711_CURR1_VALUE:
+	case AS3711_CURR2_VALUE:
+	case AS3711_CURR3_VALUE:
+	case AS3711_STEPUP_CONTROL_1:
+	case AS3711_STEPUP_CONTROL_2:
+	case AS3711_STEPUP_CONTROL_4:
+	case AS3711_STEPUP_CONTROL_5:
+	case AS3711_REG_STATUS:
+	case AS3711_INTERRUPT_STATUS_1:
+	case AS3711_INTERRUPT_STATUS_2:
+	case AS3711_INTERRUPT_STATUS_3:
+	case AS3711_CHARGER_STATUS_1:
+	case AS3711_CHARGER_STATUS_2:
+	case AS3711_ASIC_ID_1:
+	case AS3711_ASIC_ID_2:
+		return true;
+	}
+	return false;
+}
+
+static const struct regmap_config as3711_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.volatile_reg = as3711_volatile_reg,
+	.readable_reg = as3711_readable_reg,
+	.precious_reg = as3711_precious_reg,
+	.max_register = AS3711_MAX_REGS,
+	.num_reg_defaults_raw = AS3711_MAX_REGS,
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static int as3711_i2c_probe(struct i2c_client *client,
+			    const struct i2c_device_id *id)
+{
+	struct as3711 *as3711;
+	struct as3711_platform_data *pdata = client->dev.platform_data;
+	unsigned int id1, id2;
+	int ret;
+
+	if (!pdata)
+		dev_dbg(&client->dev, "Platform data not found\n");
+
+	as3711 = devm_kzalloc(&client->dev, sizeof(struct as3711), GFP_KERNEL);
+	if (!as3711) {
+		dev_err(&client->dev, "Memory allocation failed\n");
+		return -ENOMEM;
+	}
+
+	as3711->dev = &client->dev;
+	i2c_set_clientdata(client, as3711);
+
+	if (client->irq)
+		dev_notice(&client->dev, "IRQ not supported yet\n");
+
+	as3711->regmap = devm_regmap_init_i2c(client, &as3711_regmap_config);
+	if (IS_ERR(as3711->regmap)) {
+		ret = PTR_ERR(as3711->regmap);
+		dev_err(&client->dev, "regmap initialization failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_read(as3711->regmap, AS3711_ASIC_ID_1, &id1);
+	if (!ret)
+		ret = regmap_read(as3711->regmap, AS3711_ASIC_ID_2, &id2);
+	if (ret < 0) {
+		dev_err(&client->dev, "regmap_read() failed: %d\n", ret);
+		return ret;
+	}
+	if (id1 != 0x8b)
+		return -ENODEV;
+	dev_info(as3711->dev, "AS3711 detected: %x:%x\n", id1, id2);
+
+	/* We can reuse as3711_subdevs[], it will be copied in mfd_add_devices() */
+	if (pdata) {
+		as3711_subdevs[AS3711_REGULATOR].platform_data = &pdata->regulator;
+		as3711_subdevs[AS3711_REGULATOR].pdata_size = sizeof(pdata->regulator);
+		as3711_subdevs[AS3711_BACKLIGHT].platform_data = &pdata->backlight;
+		as3711_subdevs[AS3711_BACKLIGHT].pdata_size = sizeof(pdata->backlight);
+	} else {
+		as3711_subdevs[AS3711_REGULATOR].platform_data = NULL;
+		as3711_subdevs[AS3711_REGULATOR].pdata_size = 0;
+		as3711_subdevs[AS3711_BACKLIGHT].platform_data = NULL;
+		as3711_subdevs[AS3711_BACKLIGHT].pdata_size = 0;
+	}
+
+	ret = mfd_add_devices(as3711->dev, -1, as3711_subdevs,
+			      ARRAY_SIZE(as3711_subdevs), NULL, 0, NULL);
+	if (ret < 0)
+		dev_err(&client->dev, "add mfd devices failed: %d\n", ret);
+
+	return ret;
+}
+
+static int as3711_i2c_remove(struct i2c_client *client)
+{
+	struct as3711 *as3711 = i2c_get_clientdata(client);
+
+	mfd_remove_devices(as3711->dev);
+	return 0;
+}
+
+static const struct i2c_device_id as3711_i2c_id[] = {
+	{.name = "as3711", .driver_data = 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, as3711_i2c_id);
+
+static struct i2c_driver as3711_i2c_driver = {
+	.driver = {
+		   .name = "as3711",
+		   .owner = THIS_MODULE,
+		   },
+	.probe = as3711_i2c_probe,
+	.remove = as3711_i2c_remove,
+	.id_table = as3711_i2c_id,
+};
+
+static int __init as3711_i2c_init(void)
+{
+	return i2c_add_driver(&as3711_i2c_driver);
+}
+/* Initialise early */
+subsys_initcall(as3711_i2c_init);
+
+static void __exit as3711_i2c_exit(void)
+{
+	i2c_del_driver(&as3711_i2c_driver);
+}
+module_exit(as3711_i2c_exit);
+
+MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");
+MODULE_DESCRIPTION("AS3711 PMIC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/as3711.h b/include/linux/mfd/as3711.h
new file mode 100644
index 000000000000..38452ce1e892
--- /dev/null
+++ b/include/linux/mfd/as3711.h
@@ -0,0 +1,126 @@
+/*
+ * AS3711 PMIC MFC driver header
+ *
+ * Copyright (C) 2012 Renesas Electronics Corporation
+ * Author: Guennadi Liakhovetski, <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License as
+ * published by the Free Software Foundation
+ */
+
+#ifndef MFD_AS3711_H
+#define MFD_AS3711_H
+
+/*
+ * Client data
+ */
+
+/* Register addresses */
+#define AS3711_SD_1_VOLTAGE		0	/* Digital Step-Down */
+#define AS3711_SD_2_VOLTAGE		1
+#define AS3711_SD_3_VOLTAGE		2
+#define AS3711_SD_4_VOLTAGE		3
+#define AS3711_LDO_1_VOLTAGE		4	/* Analog LDO */
+#define AS3711_LDO_2_VOLTAGE		5
+#define AS3711_LDO_3_VOLTAGE		6	/* Digital LDO */
+#define AS3711_LDO_4_VOLTAGE		7
+#define AS3711_LDO_5_VOLTAGE		8
+#define AS3711_LDO_6_VOLTAGE		9
+#define AS3711_LDO_7_VOLTAGE		0xa
+#define AS3711_LDO_8_VOLTAGE		0xb
+#define AS3711_SD_CONTROL		0x10
+#define AS3711_GPIO_SIGNAL_OUT		0x20
+#define AS3711_GPIO_SIGNAL_IN		0x21
+#define AS3711_SD_CONTROL_1		0x30
+#define AS3711_SD_CONTROL_2		0x31
+#define AS3711_CURR_CONTROL		0x40
+#define AS3711_CURR1_VALUE		0x43
+#define AS3711_CURR2_VALUE		0x44
+#define AS3711_CURR3_VALUE		0x45
+#define AS3711_STEPUP_CONTROL_1		0x50
+#define AS3711_STEPUP_CONTROL_2		0x51
+#define AS3711_STEPUP_CONTROL_4		0x53
+#define AS3711_STEPUP_CONTROL_5		0x54
+#define AS3711_REG_STATUS		0x73
+#define AS3711_INTERRUPT_STATUS_1	0x77
+#define AS3711_INTERRUPT_STATUS_2	0x78
+#define AS3711_INTERRUPT_STATUS_3	0x79
+#define AS3711_CHARGER_STATUS_1		0x86
+#define AS3711_CHARGER_STATUS_2		0x87
+#define AS3711_ASIC_ID_1		0x90
+#define AS3711_ASIC_ID_2		0x91
+
+#define AS3711_MAX_REGS			0x92
+
+/* Regulators */
+enum {
+	AS3711_REGULATOR_SD_1,
+	AS3711_REGULATOR_SD_2,
+	AS3711_REGULATOR_SD_3,
+	AS3711_REGULATOR_SD_4,
+	AS3711_REGULATOR_LDO_1,
+	AS3711_REGULATOR_LDO_2,
+	AS3711_REGULATOR_LDO_3,
+	AS3711_REGULATOR_LDO_4,
+	AS3711_REGULATOR_LDO_5,
+	AS3711_REGULATOR_LDO_6,
+	AS3711_REGULATOR_LDO_7,
+	AS3711_REGULATOR_LDO_8,
+
+	AS3711_REGULATOR_MAX,
+};
+
+struct device;
+struct regmap;
+
+struct as3711 {
+	struct device *dev;
+	struct regmap *regmap;
+};
+
+#define AS3711_MAX_STEPDOWN 4
+#define AS3711_MAX_STEPUP 2
+#define AS3711_MAX_LDO 8
+
+enum as3711_su2_feedback {
+	AS3711_SU2_VOLTAGE,
+	AS3711_SU2_CURR1,
+	AS3711_SU2_CURR2,
+	AS3711_SU2_CURR3,
+	AS3711_SU2_CURR_AUTO,
+};
+
+enum as3711_su2_fbprot {
+	AS3711_SU2_LX_SD4,
+	AS3711_SU2_GPIO2,
+	AS3711_SU2_GPIO3,
+	AS3711_SU2_GPIO4,
+};
+
+/*
+ * Platform data
+ */
+
+struct as3711_regulator_pdata {
+	struct regulator_init_data *init_data[AS3711_REGULATOR_MAX];
+};
+
+struct as3711_bl_pdata {
+	const char *su1_fb;
+	int su1_max_uA;
+	const char *su2_fb;
+	int su2_max_uA;
+	enum as3711_su2_feedback su2_feedback;
+	enum as3711_su2_fbprot su2_fbprot;
+	bool su2_auto_curr1;
+	bool su2_auto_curr2;
+	bool su2_auto_curr3;
+};
+
+struct as3711_platform_data {
+	struct as3711_regulator_pdata regulator;
+	struct as3711_bl_pdata backlight;
+};
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 907ddf89d0bb7f57e1e21485900e6564a1ab512a Mon Sep 17 00:00:00 2001
From: Mika Westerberg
Date: Fri, 23 Nov 2012 12:23:40 +0100
Subject: i2c / ACPI: add ACPI enumeration support

ACPI 5 introduced I2cSerialBus resource that makes it possible to enumerate
and configure the I2C slave devices behind the I2C controller. This patch
adds helper functions to support I2C slave enumeration.

An ACPI enabled I2C controller driver only needs to call acpi_i2c_register_devices()
in order to get its slave devices enumerated, created and bound to the
corresponding ACPI handle.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/Kconfig    |   6 +++
 drivers/acpi/Makefile   |   1 +
 drivers/acpi/acpi_i2c.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/i2c/i2c-core.c  |   6 +++
 include/linux/i2c.h     |   9 +++++
 5 files changed, 125 insertions(+)
 create mode 100644 drivers/acpi/acpi_i2c.c

(limited to 'include/linux')

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 119d58db8342..0300bf612946 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -181,6 +181,12 @@ config ACPI_DOCK
 	  This driver supports ACPI-controlled docking stations and removable
 	  drive bays such as the IBM Ultrabay and the Dell Module Bay.
 
+config ACPI_I2C
+	def_tristate I2C
+	depends on I2C
+	help
+	  ACPI I2C enumeration support.
+
 config ACPI_PROCESSOR
 	tristate "Processor"
 	select THERMAL
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 3223edfb23b6..7198b6d6b763 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_ACPI_HED)		+= hed.o
 obj-$(CONFIG_ACPI_EC_DEBUGFS)	+= ec_sys.o
 obj-$(CONFIG_ACPI_CUSTOM_METHOD)+= custom_method.o
 obj-$(CONFIG_ACPI_BGRT)		+= bgrt.o
+obj-$(CONFIG_ACPI_I2C)		+= acpi_i2c.o
 
 # processor has its own "processor." module_param namespace
 processor-y			:= processor_driver.o processor_throttling.o
diff --git a/drivers/acpi/acpi_i2c.c b/drivers/acpi/acpi_i2c.c
new file mode 100644
index 000000000000..82045e3f5cac
--- /dev/null
+++ b/drivers/acpi/acpi_i2c.c
@@ -0,0 +1,103 @@
+/*
+ * ACPI I2C enumeration support
+ *
+ * Copyright (C) 2012, Intel Corporation
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/i2c.h>
+#include <linux/ioport.h>
+
+ACPI_MODULE_NAME("i2c");
+
+static int acpi_i2c_add_resource(struct acpi_resource *ares, void *data)
+{
+	struct i2c_board_info *info = data;
+
+	if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) {
+		struct acpi_resource_i2c_serialbus *sb;
+
+		sb = &ares->data.i2c_serial_bus;
+		if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_I2C) {
+			info->addr = sb->slave_address;
+			if (sb->access_mode == ACPI_I2C_10BIT_MODE)
+				info->flags |= I2C_CLIENT_TEN;
+		}
+	} else if (info->irq < 0) {
+		struct resource r;
+
+		if (acpi_dev_resource_interrupt(ares, 0, &r))
+			info->irq = r.start;
+	}
+
+	/* Tell the ACPI core to skip this resource */
+	return 1;
+}
+
+static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level,
+				       void *data, void **return_value)
+{
+	struct i2c_adapter *adapter = data;
+	struct list_head resource_list;
+	struct i2c_board_info info;
+	struct acpi_device *adev;
+	int ret;
+
+	if (acpi_bus_get_device(handle, &adev))
+		return AE_OK;
+	if (acpi_bus_get_status(adev) || !adev->status.present)
+		return AE_OK;
+
+	memset(&info, 0, sizeof(info));
+	info.acpi_node.handle = handle;
+	info.irq = -1;
+
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list,
+				     acpi_i2c_add_resource, &info);
+	acpi_dev_free_resource_list(&resource_list);
+
+	if (ret < 0 || !info.addr)
+		return AE_OK;
+
+	strlcpy(info.type, dev_name(&adev->dev), sizeof(info.type));
+	if (!i2c_new_device(adapter, &info)) {
+		dev_err(&adapter->dev,
+			"failed to add I2C device %s from ACPI\n",
+			dev_name(&adev->dev));
+	}
+
+	return AE_OK;
+}
+
+/**
+ * acpi_i2c_register_devices - enumerate I2C slave devices behind adapter
+ * @adapter: pointer to adapter
+ *
+ * Enumerate all I2C slave devices behind this adapter by walking the ACPI
+ * namespace. When a device is found it will be added to the Linux device
+ * model and bound to the corresponding ACPI handle.
+ */
+void acpi_i2c_register_devices(struct i2c_adapter *adapter)
+{
+	acpi_handle handle;
+	acpi_status status;
+
+	handle = ACPI_HANDLE(&adapter->dev);
+	if (!handle)
+		return;
+
+	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+				     acpi_i2c_add_device, NULL,
+				     adapter, NULL);
+	if (ACPI_FAILURE(status))
+		dev_warn(&adapter->dev, "failed to enumerate I2C slaves\n");
+}
+EXPORT_SYMBOL_GPL(acpi_i2c_register_devices);
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index a7edf987a339..e388590b44ab 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -39,6 +39,7 @@
 #include <linux/irqflags.h>
 #include <linux/rwsem.h>
 #include <linux/pm_runtime.h>
+#include <linux/acpi.h>
 #include <asm/uaccess.h>
 
 #include "i2c-core.h"
@@ -78,6 +79,10 @@ static int i2c_device_match(struct device *dev, struct device_driver *drv)
 	if (of_driver_match_device(dev, drv))
 		return 1;
 
+	/* Then ACPI style match */
+	if (acpi_driver_match_device(dev, drv))
+		return 1;
+
 	driver = to_i2c_driver(drv);
 	/* match on an id table if there is one */
 	if (driver->id_table)
@@ -539,6 +544,7 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
 	client->dev.bus = &i2c_bus_type;
 	client->dev.type = &i2c_client_type;
 	client->dev.of_node = info->of_node;
+	ACPI_HANDLE_SET(&client->dev, info->acpi_node.handle);
 
 	/* For 10-bit clients, add an arbitrary offset to avoid collisions */
 	dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 800de224336b..d0c4db7b4872 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -259,6 +259,7 @@ static inline void i2c_set_clientdata(struct i2c_client *dev, void *data)
  * @platform_data: stored in i2c_client.dev.platform_data
  * @archdata: copied into i2c_client.dev.archdata
  * @of_node: pointer to OpenFirmware device node
+ * @acpi_node: ACPI device node
  * @irq: stored in i2c_client.irq
  *
  * I2C doesn't actually support hardware probing, although controllers and
@@ -279,6 +280,7 @@ struct i2c_board_info {
 	void		*platform_data;
 	struct dev_archdata	*archdata;
 	struct device_node *of_node;
+	struct acpi_dev_node acpi_node;
 	int		irq;
 };
 
@@ -501,4 +503,11 @@ static inline int i2c_adapter_id(struct i2c_adapter *adap)
 			i2c_del_driver)
 
 #endif /* I2C */
+
+#if IS_ENABLED(CONFIG_ACPI_I2C)
+extern void acpi_i2c_register_devices(struct i2c_adapter *adap);
+#else
+static inline void acpi_i2c_register_devices(struct i2c_adapter *adap) {}
+#endif
+
 #endif /* _LINUX_I2C_H */
-- 
cgit v1.2.3-55-g7522


From a8a8fc287dd7497b3adb2f2952513849ab6506eb Mon Sep 17 00:00:00 2001
From: Mark Brown
Date: Mon, 8 Oct 2012 11:21:09 +0900
Subject: mfd: wm8994: Store platform data in device

This is better style as platform data is supposed to be discardable after
init (though hotplug usually prevents this) and will ease implementation
of device tree property bindings.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8994-core.c       | 35 +++++++++++++++++++----------------
 include/linux/mfd/wm8994/core.h |  4 ++++
 2 files changed, 23 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index 8fefc961ec06..45a20c573aa3 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -401,13 +401,19 @@ static const __devinitconst struct reg_default wm1811_reva_patch[] = {
  */
 static __devinit int wm8994_device_init(struct wm8994 *wm8994, int irq)
 {
-	struct wm8994_pdata *pdata = wm8994->dev->platform_data;
+	struct wm8994_pdata *pdata;
 	struct regmap_config *regmap_config;
 	const struct reg_default *regmap_patch = NULL;
 	const char *devname;
 	int ret, i, patch_regs;
 	int pulls = 0;
 
+	if (dev_get_platdata(wm8994->dev)) {
+		pdata = dev_get_platdata(wm8994->dev);
+		wm8994->pdata = *pdata;
+	}
+	pdata = &wm8994->pdata;
+
 	dev_set_drvdata(wm8994->dev, wm8994);
 
 	/* Add the on-chip regulators first for bootstrapping */
@@ -604,24 +610,21 @@ static __devinit int wm8994_device_init(struct wm8994 *wm8994, int irq)
 		}
 	}
 
-	if (pdata) {
-		wm8994->irq_base = pdata->irq_base;
-		wm8994->gpio_base = pdata->gpio_base;
-
-		/* GPIO configuration is only applied if it's non-zero */
-		for (i = 0; i < ARRAY_SIZE(pdata->gpio_defaults); i++) {
-			if (pdata->gpio_defaults[i]) {
-				wm8994_set_bits(wm8994, WM8994_GPIO_1 + i,
-						0xffff,
-						pdata->gpio_defaults[i]);
-			}
+	wm8994->irq_base = pdata->irq_base;
+	wm8994->gpio_base = pdata->gpio_base;
+
+	/* GPIO configuration is only applied if it's non-zero */
+	for (i = 0; i < ARRAY_SIZE(pdata->gpio_defaults); i++) {
+		if (pdata->gpio_defaults[i]) {
+			wm8994_set_bits(wm8994, WM8994_GPIO_1 + i,
+					0xffff, pdata->gpio_defaults[i]);
 		}
+	}
 
-		wm8994->ldo_ena_always_driven = pdata->ldo_ena_always_driven;
+	wm8994->ldo_ena_always_driven = pdata->ldo_ena_always_driven;
 
-		if (pdata->spkmode_pu)
-			pulls |= WM8994_SPKMODE_PU;
-	}
+	if (pdata->spkmode_pu)
+		pulls |= WM8994_SPKMODE_PU;
 
 	/* Disable unneeded pulls */
 	wm8994_set_bits(wm8994, WM8994_PULL_CONTROL_2,
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index 1f173306bf05..ae5c249530b4 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -19,6 +19,8 @@
 #include <linux/interrupt.h>
 #include <linux/regmap.h>
 
+#include <linux/mfd/wm8994/pdata.h>
+
 enum wm8994_type {
 	WM8994 = 0,
 	WM8958 = 1,
@@ -55,6 +57,8 @@ struct regulator_bulk_data;
 struct wm8994 {
 	struct mutex irq_lock;
 
+	struct wm8994_pdata pdata;
+
 	enum wm8994_type type;
 	int revision;
 	int cust_id;
-- 
cgit v1.2.3-55-g7522


From 1ad7e89940d5ac411928189e1a4a01901dbf590f Mon Sep 17 00:00:00 2001
From: Stephen Warren
Date: Thu, 8 Nov 2012 16:12:25 -0800
Subject: block: store partition_meta_info.uuid as a string

This will allow other types of UUID to be stored here, aside from true
UUIDs.  This also simplifies code that uses this field, since it's usually
constructed from a, used as a, or compared to other, strings.

Note: A simplistic approach here would be to set uuid_str[36]=0 whenever a
/PARTNROFF option was found to be present.  However, this modifies the
input string, and causes subsequent calls to devt_from_partuuid() not to
see the /PARTNROFF option, which causes different results.  In order to
avoid misleading future maintainers, this parameter is marked const.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Will Drewry <wad@chromium.org>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c          |  8 +-------
 block/partitions/efi.c |  7 +------
 include/linux/genhd.h  |  8 ++++++--
 init/do_mounts.c       | 28 +++++++++++++++++-----------
 4 files changed, 25 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/block/genhd.c b/block/genhd.c
index 6cace663a80e..b281f3a2d26a 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -743,7 +743,6 @@ void __init printk_all_partitions(void)
 		struct hd_struct *part;
 		char name_buf[BDEVNAME_SIZE];
 		char devt_buf[BDEVT_SIZE];
-		char uuid_buf[PARTITION_META_INFO_UUIDLTH * 2 + 5];
 
 		/*
 		 * Don't show empty devices or things that have been
@@ -762,16 +761,11 @@ void __init printk_all_partitions(void)
 		while ((part = disk_part_iter_next(&piter))) {
 			bool is_part0 = part == &disk->part0;
 
-			uuid_buf[0] = '\0';
-			if (part->info)
-				snprintf(uuid_buf, sizeof(uuid_buf), "%pU",
-					 part->info->uuid);
-
 			printk("%s%s %10llu %s %s", is_part0 ? "" : "  ",
 			       bdevt_str(part_devt(part), devt_buf),
 			       (unsigned long long)part_nr_sects_read(part) >> 1
 			       , disk_name(disk, part->partno, name_buf),
-			       uuid_buf);
+			       part->info ? part->info->uuid : "");
 			if (is_part0) {
 				if (disk->driverfs_dev != NULL &&
 				    disk->driverfs_dev->driver != NULL)
diff --git a/block/partitions/efi.c b/block/partitions/efi.c
index 6296b403c67a..b62fb88b8711 100644
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -620,7 +620,6 @@ int efi_partition(struct parsed_partitions *state)
 	gpt_entry *ptes = NULL;
 	u32 i;
 	unsigned ssz = bdev_logical_block_size(state->bdev) / 512;
-	u8 unparsed_guid[37];
 
 	if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
 		kfree(gpt);
@@ -649,11 +648,7 @@ int efi_partition(struct parsed_partitions *state)
 			state->parts[i + 1].flags = ADDPART_FLAG_RAID;
 
 		info = &state->parts[i + 1].info;
-		/* Instead of doing a manual swap to big endian, reuse the
-		 * common ASCII hex format as the interim.
-		 */
-		efi_guid_unparse(&ptes[i].unique_partition_guid, unparsed_guid);
-		part_pack_uuid(unparsed_guid, info->uuid);
+		efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid);
 
 		/* Naively convert UTF16-LE to 7 bits. */
 		label_max = min(sizeof(info->volname) - 1,
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 4f440b3e89fe..79b8bba19363 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -88,10 +88,14 @@ struct disk_stats {
 };
 
 #define PARTITION_META_INFO_VOLNAMELTH	64
-#define PARTITION_META_INFO_UUIDLTH	16
+/*
+ * Enough for the string representation of any kind of UUID plus NULL.
+ * EFI UUID is 36 characters. MSDOS UUID is 11 characters.
+ */
+#define PARTITION_META_INFO_UUIDLTH	37
 
 struct partition_meta_info {
-	u8 uuid[PARTITION_META_INFO_UUIDLTH];	/* always big endian */
+	char uuid[PARTITION_META_INFO_UUIDLTH];
 	u8 volname[PARTITION_META_INFO_VOLNAMELTH];
 };
 
diff --git a/init/do_mounts.c b/init/do_mounts.c
index f8a66424360d..b28ec5819325 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -69,23 +69,28 @@ __setup("ro", readonly);
 __setup("rw", readwrite);
 
 #ifdef CONFIG_BLOCK
+struct uuidcmp {
+	const char *uuid;
+	int len;
+};
+
 /**
  * match_dev_by_uuid - callback for finding a partition using its uuid
  * @dev:	device passed in by the caller
- * @data:	opaque pointer to a 36 byte char array with a UUID
+ * @data:	opaque pointer to the desired struct uuidcmp to match
  *
  * Returns 1 if the device matches, and 0 otherwise.
  */
 static int match_dev_by_uuid(struct device *dev, void *data)
 {
-	u8 *uuid = data;
+	struct uuidcmp *cmp = data;
 	struct hd_struct *part = dev_to_part(dev);
 
 	if (!part->info)
 		goto no_match;
 
-	if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
-			goto no_match;
+	if (strncasecmp(cmp->uuid, part->info->uuid, cmp->len))
+		goto no_match;
 
 	return 1;
 no_match:
@@ -95,7 +100,7 @@ no_match:
 
 /**
  * devt_from_partuuid - looks up the dev_t of a partition by its UUID
- * @uuid:	min 36 byte char array containing a hex ascii UUID
+ * @uuid:	char array containing ascii UUID
  *
  * The function will return the first partition which contains a matching
  * UUID value in its partition_meta_info struct.  This does not search
@@ -106,11 +111,11 @@ no_match:
  *
  * Returns the matching dev_t on success or 0 on failure.
  */
-static dev_t devt_from_partuuid(char *uuid_str)
+static dev_t devt_from_partuuid(const char *uuid_str)
 {
 	dev_t res = 0;
+	struct uuidcmp cmp;
 	struct device *dev = NULL;
-	u8 uuid[16];
 	struct gendisk *disk;
 	struct hd_struct *part;
 	int offset = 0;
@@ -118,6 +123,9 @@ static dev_t devt_from_partuuid(char *uuid_str)
 	if (strlen(uuid_str) < 36)
 		goto done;
 
+	cmp.uuid = uuid_str;
+	cmp.len = 36;
+
 	/* Check for optional partition number offset attributes. */
 	if (uuid_str[36]) {
 		char c = 0;
@@ -134,10 +142,8 @@ static dev_t devt_from_partuuid(char *uuid_str)
 		}
 	}
 
-	/* Pack the requested UUID in the expected format. */
-	part_pack_uuid(uuid_str, uuid);
-
-	dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
+	dev = class_find_device(&block_class, NULL, &cmp,
+				&match_dev_by_uuid);
 	if (!dev)
 		goto done;
 
-- 
cgit v1.2.3-55-g7522


From 93272e07d8539cddce9a2a17b7b0de1326957599 Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD
Date: Wed, 21 Nov 2012 05:38:07 +0000
Subject: net: add micrel KSZ8873MLL switch support

this will allow to detect the link between the switch and the soc

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c   | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/micrel_phy.h |  1 +
 2 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 2165d5fdb8c0..b983596abcbb 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -127,6 +127,39 @@ static int ks8051_config_init(struct phy_device *phydev)
 	return 0;
 }
 
+#define KSZ8873MLL_GLOBAL_CONTROL_4	0x06
+#define KSZ8873MLL_GLOBAL_CONTROL_4_DUPLEX	(1 << 6)
+#define KSZ8873MLL_GLOBAL_CONTROL_4_SPEED	(1 << 4)
+int ksz8873mll_read_status(struct phy_device *phydev)
+{
+	int regval;
+
+	/* dummy read */
+	regval = phy_read(phydev, KSZ8873MLL_GLOBAL_CONTROL_4);
+
+	regval = phy_read(phydev, KSZ8873MLL_GLOBAL_CONTROL_4);
+
+	if (regval & KSZ8873MLL_GLOBAL_CONTROL_4_DUPLEX)
+		phydev->duplex = DUPLEX_HALF;
+	else
+		phydev->duplex = DUPLEX_FULL;
+
+	if (regval & KSZ8873MLL_GLOBAL_CONTROL_4_SPEED)
+		phydev->speed = SPEED_10;
+	else
+		phydev->speed = SPEED_100;
+
+	phydev->link = 1;
+	phydev->pause = phydev->asym_pause = 0;
+
+	return 0;
+}
+
+static int ksz8873mll_config_aneg(struct phy_device *phydev)
+{
+	return 0;
+}
+
 static struct phy_driver ksphy_driver[] = {
 {
 	.phy_id		= PHY_ID_KS8737,
@@ -204,6 +237,16 @@ static struct phy_driver ksphy_driver[] = {
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= ksz9021_config_intr,
 	.driver		= { .owner = THIS_MODULE, },
+}, {
+	.phy_id		= PHY_ID_KSZ8873MLL,
+	.phy_id_mask	= 0x00fffff0,
+	.name		= "Micrel KSZ8873MLL Switch",
+	.features	= (SUPPORTED_Pause | SUPPORTED_Asym_Pause),
+	.flags		= PHY_HAS_MAGICANEG,
+	.config_init	= kszphy_config_init,
+	.config_aneg	= ksz8873mll_config_aneg,
+	.read_status	= ksz8873mll_read_status,
+	.driver		= { .owner = THIS_MODULE, },
 } };
 
 static int __init ksphy_init(void)
@@ -232,6 +275,7 @@ static struct mdio_device_id __maybe_unused micrel_tbl[] = {
 	{ PHY_ID_KSZ8021, 0x00ffffff },
 	{ PHY_ID_KSZ8041, 0x00fffff0 },
 	{ PHY_ID_KSZ8051, 0x00fffff0 },
+	{ PHY_ID_KSZ8873MLL, 0x00fffff0 },
 	{ }
 };
 
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index de201203bc7c..adfe8c058f29 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -15,6 +15,7 @@
 
 #define MICREL_PHY_ID_MASK	0x00fffff0
 
+#define PHY_ID_KSZ8873MLL	0x000e7237
 #define PHY_ID_KSZ9021		0x00221610
 #define PHY_ID_KS8737		0x00221720
 #define PHY_ID_KSZ8021		0x00221555
-- 
cgit v1.2.3-55-g7522


From e5f5762177be52fde50ccba88938d66b5103c8e0 Mon Sep 17 00:00:00 2001
From: Li Haifeng
Date: Fri, 23 Nov 2012 21:55:19 +0100
Subject: PM / Freezer: Fixup compile error of try_to_freeze_nowarn()

If FREEZER is not defined, the error as following will be throw
when compiled.
arch/arm/kernel/signal.c:645: error: implicit declaration of function
'try_to_freeze_nowarn'

Signed-off-by: Haifeng Li <omycle@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/freezer.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index d09af4b67cf1..b90091af5798 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -177,6 +177,7 @@ static inline int freeze_kernel_threads(void) { return -ENOSYS; }
 static inline void thaw_processes(void) {}
 static inline void thaw_kernel_threads(void) {}
 
+static inline bool try_to_freeze_nowarn(void) { return false; }
 static inline bool try_to_freeze(void) { return false; }
 
 static inline void freezer_do_not_count(void) {}
-- 
cgit v1.2.3-55-g7522


From b88ce2a41562d1a9554f209e0f31a32d9f473794 Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Mon, 26 Nov 2012 10:03:06 +0100
Subject: ACPI / PM: Allow attach/detach routines to change device power states

Make it possible to ask the routines used for adding/removing devices
to/from the general ACPI PM domain, acpi_dev_pm_attach() and
acpi_dev_pm_detach(), respectively, to change the power states of
devices so that they are put into the full-power state automatically
by acpi_dev_pm_attach() and into the lowest-power state available
automatically by acpi_dev_pm_detach().

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/acpi/device_pm.c | 28 ++++++++++++++++++++++++----
 include/linux/acpi.h     | 11 +++++++----
 2 files changed, 31 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index a8e059f69d50..f09dc987cf17 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -599,10 +599,12 @@ static struct dev_pm_domain acpi_general_pm_domain = {
 /**
  * acpi_dev_pm_attach - Prepare device for ACPI power management.
  * @dev: Device to prepare.
+ * @power_on: Whether or not to power on the device.
  *
  * If @dev has a valid ACPI handle that has a valid struct acpi_device object
  * attached to it, install a wakeup notification handler for the device and
- * add it to the general ACPI PM domain.
+ * add it to the general ACPI PM domain.  If @power_on is set, the device will
+ * be put into the ACPI D0 state before the function returns.
  *
  * This assumes that the @dev's bus type uses generic power management callbacks
  * (or doesn't use any power management callbacks at all).
@@ -610,7 +612,7 @@ static struct dev_pm_domain acpi_general_pm_domain = {
  * Callers must ensure proper synchronization of this function with power
  * management callbacks.
  */
-int acpi_dev_pm_attach(struct device *dev)
+int acpi_dev_pm_attach(struct device *dev, bool power_on)
 {
 	struct acpi_device *adev = acpi_dev_pm_get_node(dev);
 
@@ -622,6 +624,10 @@ int acpi_dev_pm_attach(struct device *dev)
 
 	acpi_add_pm_notifier(adev, acpi_wakeup_device, dev);
 	dev->pm_domain = &acpi_general_pm_domain;
+	if (power_on) {
+		acpi_dev_pm_full_power(adev);
+		__acpi_device_run_wake(adev, false);
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(acpi_dev_pm_attach);
@@ -629,20 +635,34 @@ EXPORT_SYMBOL_GPL(acpi_dev_pm_attach);
 /**
  * acpi_dev_pm_detach - Remove ACPI power management from the device.
  * @dev: Device to take care of.
+ * @power_off: Whether or not to try to remove power from the device.
  *
  * Remove the device from the general ACPI PM domain and remove its wakeup
- * notifier.
+ * notifier.  If @power_off is set, additionally remove power from the device if
+ * possible.
  *
  * Callers must ensure proper synchronization of this function with power
  * management callbacks.
  */
-void acpi_dev_pm_detach(struct device *dev)
+void acpi_dev_pm_detach(struct device *dev, bool power_off)
 {
 	struct acpi_device *adev = acpi_dev_pm_get_node(dev);
 
 	if (adev && dev->pm_domain == &acpi_general_pm_domain) {
 		dev->pm_domain = NULL;
 		acpi_remove_pm_notifier(adev, acpi_wakeup_device);
+		if (power_off) {
+			/*
+			 * If the device's PM QoS resume latency limit or flags
+			 * have been exposed to user space, they have to be
+			 * hidden at this point, so that they don't affect the
+			 * choice of the low-power state to put the device into.
+			 */
+			dev_pm_qos_hide_latency_limit(dev);
+			dev_pm_qos_hide_flags(dev);
+			__acpi_device_run_wake(adev, false);
+			acpi_dev_pm_low_power(dev, adev, ACPI_STATE_S0);
+		}
 	}
 }
 EXPORT_SYMBOL_GPL(acpi_dev_pm_detach);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 5fdd87271518..28ba643c92c1 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -458,11 +458,14 @@ static inline int acpi_subsys_resume_early(struct device *dev) { return 0; }
 #endif
 
 #if defined(CONFIG_ACPI) && defined(CONFIG_PM)
-int acpi_dev_pm_attach(struct device *dev);
-int acpi_dev_pm_detach(struct device *dev);
+int acpi_dev_pm_attach(struct device *dev, bool power_on);
+int acpi_dev_pm_detach(struct device *dev, bool power_off);
 #else
-static inline int acpi_dev_pm_attach(struct device *dev) { return -ENODEV; }
-static inline void acpi_dev_pm_detach(struct device *dev) {}
+static inline int acpi_dev_pm_attach(struct device *dev, bool power_on)
+{
+	return -ENODEV;
+}
+static inline void acpi_dev_pm_detach(struct device *dev, bool power_off) {}
 #endif
 
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v1.2.3-55-g7522


From 883d588e556347c4b3221ac492a8acd8a75e730a Mon Sep 17 00:00:00 2001
From: MyungJoo Ham
Date: Wed, 21 Nov 2012 17:17:11 +0900
Subject: PM / devfreq: remove compiler error when a governor is module

With the intruction of patch, eff607fdb1f787da1fedf46ab6e64adc2afd1c5a,
it became possible to include a governor as a module.
Thus the #ifdef statement for a governor should become #if IS_ENABLED.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 include/linux/devfreq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 235248cb2c93..e83ef39b3bea 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -192,7 +192,7 @@ extern int devfreq_register_opp_notifier(struct device *dev,
 extern int devfreq_unregister_opp_notifier(struct device *dev,
 					   struct devfreq *devfreq);
 
-#ifdef CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
 /**
  * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq
  *	and devfreq_add_device
-- 
cgit v1.2.3-55-g7522


From 65821635d26d3173a3b22781e2c60d5e6fcaeb22 Mon Sep 17 00:00:00 2001
From: Marco Porsch
Date: Wed, 21 Nov 2012 18:40:30 -0800
Subject: mac80211: move Mesh Capability field definition to ieee80211.h

Signed-off-by: Marco Porsch <marco.porsch@etit.tu-chemnitz.de>
[prefix with IEEE80211_]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 15 +++++++++++++++
 net/mac80211/mesh.c       |  8 ++++----
 net/mac80211/mesh.h       | 14 --------------
 net/mac80211/mesh_sync.c  |  2 +-
 4 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d68790903b9e..65c6b0f3ef60 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -667,6 +667,21 @@ struct ieee80211_meshconf_ie {
 	u8 meshconf_cap;
 } __attribute__ ((packed));
 
+/**
+ * enum mesh_config_capab_flags - Mesh Configuration IE capability field flags
+ *
+ * @IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS: STA is willing to establish
+ *	additional mesh peerings with other mesh STAs
+ * @IEEE80211_MESHCONF_CAPAB_FORWARDING: the STA forwards MSDUs
+ * @IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING: TBTT adjustment procedure
+ *	is ongoing
+ */
+enum mesh_config_capab_flags {
+	IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS		= 0x01,
+	IEEE80211_MESHCONF_CAPAB_FORWARDING		= 0x08,
+	IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING		= 0x20,
+};
+
 /**
  * struct ieee80211_rann_ie
  *
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index a350cab4b339..943694a52624 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -129,7 +129,7 @@ mismatch:
 bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie)
 {
 	return (ie->mesh_config->meshconf_cap &
-	    MESHCONF_CAPAB_ACCEPT_PLINKS) != 0;
+	    IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS) != 0;
 }
 
 /**
@@ -269,11 +269,11 @@ mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	neighbors = (neighbors > 15) ? 15 : neighbors;
 	*pos++ = neighbors << 1;
 	/* Mesh capability */
-	*pos = MESHCONF_CAPAB_FORWARDING;
+	*pos = IEEE80211_MESHCONF_CAPAB_FORWARDING;
 	*pos |= ifmsh->accepting_plinks ?
-	    MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
+	    IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
 	*pos++ |= ifmsh->adjusting_tbtt ?
-	    MESHCONF_CAPAB_TBTT_ADJUSTING : 0x00;
+	    IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING : 0x00;
 	*pos++ = 0x00;
 
 	return 0;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 9285f3f67e66..7c9215fb2ac8 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -18,20 +18,6 @@
 
 /* Data structures */
 
-/**
- * enum mesh_config_capab_flags - mesh config IE capability flags
- *
- * @MESHCONF_CAPAB_ACCEPT_PLINKS: STA is willing to establish
- * additional mesh peerings with other mesh STAs
- * @MESHCONF_CAPAB_FORWARDING: the STA forwards MSDUs
- * @MESHCONF_CAPAB_TBTT_ADJUSTING: TBTT adjustment procedure is ongoing
- */
-enum mesh_config_capab_flags {
-	MESHCONF_CAPAB_ACCEPT_PLINKS = BIT(0),
-	MESHCONF_CAPAB_FORWARDING = BIT(3),
-	MESHCONF_CAPAB_TBTT_ADJUSTING = BIT(5),
-};
-
 /**
  * enum mesh_path_flags - mac80211 mesh path flags
  *
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index 9c6ea9cfe1b3..0f40086cce18 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -43,7 +43,7 @@ struct sync_method {
 static bool mesh_peer_tbtt_adjusting(struct ieee802_11_elems *ie)
 {
 	return (ie->mesh_config->meshconf_cap &
-	    MESHCONF_CAPAB_TBTT_ADJUSTING) != 0;
+	    IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING) != 0;
 }
 
 void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata)
-- 
cgit v1.2.3-55-g7522


From 3f9be35bd9090eaa2f68ed9b24efdbf3abcf4b28 Mon Sep 17 00:00:00 2001
From: Axel Lin
Date: Tue, 20 Nov 2012 10:34:56 +0800
Subject: mfd: rc5t583: Fix array subscript is above array bounds

I got below build warning while compiling this driver.
It's obviously RC5T583_MAX_INTERRUPT_MASK_REGS is 9 but irq_en_add
array only has 8 elements.

  CC      drivers/mfd/rc5t583-irq.o
drivers/mfd/rc5t583-irq.c: In function 'rc5t583_irq_sync_unlock':
drivers/mfd/rc5t583-irq.c:227: warning: array subscript is above array bounds
drivers/mfd/rc5t583-irq.c: In function 'rc5t583_irq_init':
drivers/mfd/rc5t583-irq.c:349: warning: array subscript is above array bounds

Since the number of interrupt enable registers is 8, this patch adds
define for RC5T583_MAX_INTERRUPT_EN_REGS to fix this bug.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/rc5t583-irq.c   | 2 +-
 include/linux/mfd/rc5t583.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rc5t583-irq.c b/drivers/mfd/rc5t583-irq.c
index fe00cdd6f83d..b41db5968706 100644
--- a/drivers/mfd/rc5t583-irq.c
+++ b/drivers/mfd/rc5t583-irq.c
@@ -345,7 +345,7 @@ int rc5t583_irq_init(struct rc5t583 *rc5t583, int irq, int irq_base)
 	mutex_init(&rc5t583->irq_lock);
 
 	/* Initailize all int register to 0 */
-	for (i = 0; i < RC5T583_MAX_INTERRUPT_MASK_REGS; i++)  {
+	for (i = 0; i < RC5T583_MAX_INTERRUPT_EN_REGS; i++)  {
 		ret = rc5t583_write(rc5t583->dev, irq_en_add[i],
 				rc5t583->irq_en_reg[i]);
 		if (ret < 0)
diff --git a/include/linux/mfd/rc5t583.h b/include/linux/mfd/rc5t583.h
index 36c242e52ef1..fd413ccab915 100644
--- a/include/linux/mfd/rc5t583.h
+++ b/include/linux/mfd/rc5t583.h
@@ -33,6 +33,7 @@
 /* Maximum number of main interrupts */
 #define MAX_MAIN_INTERRUPT		5
 #define RC5T583_MAX_GPEDGE_REG		2
+#define RC5T583_MAX_INTERRUPT_EN_REGS	8
 #define RC5T583_MAX_INTERRUPT_MASK_REGS	9
 
 /* Interrupt enable register */
@@ -304,7 +305,7 @@ struct rc5t583 {
 	uint8_t		intc_inten_reg;
 
 	/* For group interrupt bits and address */
-	uint8_t		irq_en_reg[RC5T583_MAX_INTERRUPT_MASK_REGS];
+	uint8_t		irq_en_reg[RC5T583_MAX_INTERRUPT_EN_REGS];
 
 	/* For gpio edge */
 	uint8_t		gpedge_reg[RC5T583_MAX_GPEDGE_REG];
-- 
cgit v1.2.3-55-g7522


From df1590d9ae5e37e07e7cf91107e4c2c946ce8bf4 Mon Sep 17 00:00:00 2001
From: Viresh Kumar
Date: Mon, 12 Nov 2012 22:56:03 +0530
Subject: ARM: SPEAr3xx: Shirq: Move shirq controller out of plat/

This patch moves shirq interrupt controllers driver and header file out of
plat-spear directory. It is moved to drivers/irqchip/ directory.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 arch/arm/mach-spear3xx/spear3xx.c        |   2 +-
 arch/arm/plat-spear/Makefile             |   2 +-
 arch/arm/plat-spear/include/plat/shirq.h |  66 -------
 arch/arm/plat-spear/shirq.c              | 315 ------------------------------
 drivers/irqchip/Makefile                 |   3 +-
 drivers/irqchip/spear-shirq.c            | 316 +++++++++++++++++++++++++++++++
 include/linux/irqchip/spear-shirq.h      |  64 +++++++
 7 files changed, 384 insertions(+), 384 deletions(-)
 delete mode 100644 arch/arm/plat-spear/include/plat/shirq.h
 delete mode 100644 arch/arm/plat-spear/shirq.c
 create mode 100644 drivers/irqchip/spear-shirq.c
 create mode 100644 include/linux/irqchip/spear-shirq.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-spear3xx/spear3xx.c b/arch/arm/mach-spear3xx/spear3xx.c
index f1aaf5b168b2..38fe95db31a7 100644
--- a/arch/arm/mach-spear3xx/spear3xx.c
+++ b/arch/arm/mach-spear3xx/spear3xx.c
@@ -15,12 +15,12 @@
 
 #include <linux/amba/pl022.h>
 #include <linux/amba/pl08x.h>
+#include <linux/irqchip/spear-shirq.h>
 #include <linux/of_irq.h>
 #include <linux/io.h>
 #include <asm/hardware/pl080.h>
 #include <asm/hardware/vic.h>
 #include <plat/pl080.h>
-#include <plat/shirq.h>
 #include <mach/generic.h>
 #include <mach/spear.h>
 
diff --git a/arch/arm/plat-spear/Makefile b/arch/arm/plat-spear/Makefile
index 2607bd05c525..01e88532a5db 100644
--- a/arch/arm/plat-spear/Makefile
+++ b/arch/arm/plat-spear/Makefile
@@ -5,5 +5,5 @@
 # Common support
 obj-y	:= restart.o time.o
 
-obj-$(CONFIG_ARCH_SPEAR3XX)	+= pl080.o shirq.o
+obj-$(CONFIG_ARCH_SPEAR3XX)	+= pl080.o
 obj-$(CONFIG_ARCH_SPEAR6XX)	+= pl080.o
diff --git a/arch/arm/plat-spear/include/plat/shirq.h b/arch/arm/plat-spear/include/plat/shirq.h
deleted file mode 100644
index c51b355f00de..000000000000
--- a/arch/arm/plat-spear/include/plat/shirq.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * arch/arm/plat-spear/include/plat/shirq.h
- *
- * SPEAr platform shared irq layer header file
- *
- * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#ifndef __PLAT_SHIRQ_H
-#define __PLAT_SHIRQ_H
-
-#include <linux/irq.h>
-#include <linux/types.h>
-
-/*
- * struct shirq_regs: shared irq register configuration
- *
- * enb_reg: enable register offset
- * reset_to_enb: val 1 indicates, we need to clear bit for enabling interrupt
- * status_reg: status register offset
- * status_reg_mask: status register valid mask
- * clear_reg: clear register offset
- * reset_to_clear: val 1 indicates, we need to clear bit for clearing interrupt
- */
-struct shirq_regs {
-	u32 enb_reg;
-	u32 reset_to_enb;
-	u32 status_reg;
-	u32 clear_reg;
-	u32 reset_to_clear;
-};
-
-/*
- * struct spear_shirq: shared irq structure
- *
- * irq: hardware irq number
- * irq_base: base irq in linux domain
- * irq_nr: no. of shared interrupts in a particular block
- * irq_bit_off: starting bit offset in the status register
- * invalid_irq: irq group is currently disabled
- * base: base address of shared irq register
- * regs: register configuration for shared irq block
- */
-struct spear_shirq {
-	u32 irq;
-	u32 irq_base;
-	u32 irq_nr;
-	u32 irq_bit_off;
-	int invalid_irq;
-	void __iomem *base;
-	struct shirq_regs regs;
-};
-
-int __init spear300_shirq_of_init(struct device_node *np,
-		struct device_node *parent);
-int __init spear310_shirq_of_init(struct device_node *np,
-		struct device_node *parent);
-int __init spear320_shirq_of_init(struct device_node *np,
-		struct device_node *parent);
-
-#endif /* __PLAT_SHIRQ_H */
diff --git a/arch/arm/plat-spear/shirq.c b/arch/arm/plat-spear/shirq.c
deleted file mode 100644
index 955c7249a5c1..000000000000
--- a/arch/arm/plat-spear/shirq.c
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * arch/arm/plat-spear/shirq.c
- *
- * SPEAr platform shared irq layer source file
- *
- * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/err.h>
-#include <linux/export.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/irqdomain.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/spinlock.h>
-#include <plat/shirq.h>
-
-static DEFINE_SPINLOCK(lock);
-
-/* spear300 shared irq registers offsets and masks */
-#define SPEAR300_INT_ENB_MASK_REG	0x54
-#define SPEAR300_INT_STS_MASK_REG	0x58
-
-static struct spear_shirq spear300_shirq_ras1 = {
-	.irq_nr = 9,
-	.irq_bit_off = 0,
-	.regs = {
-		.enb_reg = SPEAR300_INT_ENB_MASK_REG,
-		.status_reg = SPEAR300_INT_STS_MASK_REG,
-		.clear_reg = -1,
-	},
-};
-
-static struct spear_shirq *spear300_shirq_blocks[] = {
-	&spear300_shirq_ras1,
-};
-
-/* spear310 shared irq registers offsets and masks */
-#define SPEAR310_INT_STS_MASK_REG	0x04
-
-static struct spear_shirq spear310_shirq_ras1 = {
-	.irq_nr = 8,
-	.irq_bit_off = 0,
-	.regs = {
-		.enb_reg = -1,
-		.status_reg = SPEAR310_INT_STS_MASK_REG,
-		.clear_reg = -1,
-	},
-};
-
-static struct spear_shirq spear310_shirq_ras2 = {
-	.irq_nr = 5,
-	.irq_bit_off = 8,
-	.regs = {
-		.enb_reg = -1,
-		.status_reg = SPEAR310_INT_STS_MASK_REG,
-		.clear_reg = -1,
-	},
-};
-
-static struct spear_shirq spear310_shirq_ras3 = {
-	.irq_nr = 1,
-	.irq_bit_off = 13,
-	.regs = {
-		.enb_reg = -1,
-		.status_reg = SPEAR310_INT_STS_MASK_REG,
-		.clear_reg = -1,
-	},
-};
-
-static struct spear_shirq spear310_shirq_intrcomm_ras = {
-	.irq_nr = 3,
-	.irq_bit_off = 14,
-	.regs = {
-		.enb_reg = -1,
-		.status_reg = SPEAR310_INT_STS_MASK_REG,
-		.clear_reg = -1,
-	},
-};
-
-static struct spear_shirq *spear310_shirq_blocks[] = {
-	&spear310_shirq_ras1,
-	&spear310_shirq_ras2,
-	&spear310_shirq_ras3,
-	&spear310_shirq_intrcomm_ras,
-};
-
-/* spear320 shared irq registers offsets and masks */
-#define SPEAR320_INT_STS_MASK_REG		0x04
-#define SPEAR320_INT_CLR_MASK_REG		0x04
-#define SPEAR320_INT_ENB_MASK_REG		0x08
-
-static struct spear_shirq spear320_shirq_ras1 = {
-	.irq_nr = 3,
-	.irq_bit_off = 7,
-	.regs = {
-		.enb_reg = -1,
-		.status_reg = SPEAR320_INT_STS_MASK_REG,
-		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
-		.reset_to_clear = 1,
-	},
-};
-
-static struct spear_shirq spear320_shirq_ras2 = {
-	.irq_nr = 1,
-	.irq_bit_off = 10,
-	.regs = {
-		.enb_reg = -1,
-		.status_reg = SPEAR320_INT_STS_MASK_REG,
-		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
-		.reset_to_clear = 1,
-	},
-};
-
-static struct spear_shirq spear320_shirq_ras3 = {
-	.irq_nr = 3,
-	.irq_bit_off = 0,
-	.invalid_irq = 1,
-	.regs = {
-		.enb_reg = SPEAR320_INT_ENB_MASK_REG,
-		.reset_to_enb = 1,
-		.status_reg = SPEAR320_INT_STS_MASK_REG,
-		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
-		.reset_to_clear = 1,
-	},
-};
-
-static struct spear_shirq spear320_shirq_intrcomm_ras = {
-	.irq_nr = 11,
-	.irq_bit_off = 11,
-	.regs = {
-		.enb_reg = -1,
-		.status_reg = SPEAR320_INT_STS_MASK_REG,
-		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
-		.reset_to_clear = 1,
-	},
-};
-
-static struct spear_shirq *spear320_shirq_blocks[] = {
-	&spear320_shirq_ras3,
-	&spear320_shirq_ras1,
-	&spear320_shirq_ras2,
-	&spear320_shirq_intrcomm_ras,
-};
-
-static void shirq_irq_mask_unmask(struct irq_data *d, bool mask)
-{
-	struct spear_shirq *shirq = irq_data_get_irq_chip_data(d);
-	u32 val, offset = d->irq - shirq->irq_base;
-	unsigned long flags;
-
-	if (shirq->regs.enb_reg == -1)
-		return;
-
-	spin_lock_irqsave(&lock, flags);
-	val = readl(shirq->base + shirq->regs.enb_reg);
-
-	if (mask ^ shirq->regs.reset_to_enb)
-		val &= ~(0x1 << shirq->irq_bit_off << offset);
-	else
-		val |= 0x1 << shirq->irq_bit_off << offset;
-
-	writel(val, shirq->base + shirq->regs.enb_reg);
-	spin_unlock_irqrestore(&lock, flags);
-
-}
-
-static void shirq_irq_mask(struct irq_data *d)
-{
-	shirq_irq_mask_unmask(d, 1);
-}
-
-static void shirq_irq_unmask(struct irq_data *d)
-{
-	shirq_irq_mask_unmask(d, 0);
-}
-
-static struct irq_chip shirq_chip = {
-	.name		= "spear-shirq",
-	.irq_ack	= shirq_irq_mask,
-	.irq_mask	= shirq_irq_mask,
-	.irq_unmask	= shirq_irq_unmask,
-};
-
-static void shirq_handler(unsigned irq, struct irq_desc *desc)
-{
-	u32 i, j, val, mask, tmp;
-	struct irq_chip *chip;
-	struct spear_shirq *shirq = irq_get_handler_data(irq);
-
-	chip = irq_get_chip(irq);
-	chip->irq_ack(&desc->irq_data);
-
-	mask = ((0x1 << shirq->irq_nr) - 1) << shirq->irq_bit_off;
-	while ((val = readl(shirq->base + shirq->regs.status_reg) &
-				mask)) {
-
-		val >>= shirq->irq_bit_off;
-		for (i = 0, j = 1; i < shirq->irq_nr; i++, j <<= 1) {
-
-			if (!(j & val))
-				continue;
-
-			generic_handle_irq(shirq->irq_base + i);
-
-			/* clear interrupt */
-			if (shirq->regs.clear_reg == -1)
-				continue;
-
-			tmp = readl(shirq->base + shirq->regs.clear_reg);
-			if (shirq->regs.reset_to_clear)
-				tmp &= ~(j << shirq->irq_bit_off);
-			else
-				tmp |= (j << shirq->irq_bit_off);
-			writel(tmp, shirq->base + shirq->regs.clear_reg);
-		}
-	}
-	chip->irq_unmask(&desc->irq_data);
-}
-
-static void __init spear_shirq_register(struct spear_shirq *shirq)
-{
-	int i;
-
-	if (shirq->invalid_irq)
-		return;
-
-	irq_set_chained_handler(shirq->irq, shirq_handler);
-	for (i = 0; i < shirq->irq_nr; i++) {
-		irq_set_chip_and_handler(shirq->irq_base + i,
-					 &shirq_chip, handle_simple_irq);
-		set_irq_flags(shirq->irq_base + i, IRQF_VALID);
-		irq_set_chip_data(shirq->irq_base + i, shirq);
-	}
-
-	irq_set_handler_data(shirq->irq, shirq);
-}
-
-static int __init shirq_init(struct spear_shirq **shirq_blocks, int block_nr,
-		struct device_node *np)
-{
-	int i, irq_base, hwirq = 0, irq_nr = 0;
-	static struct irq_domain *shirq_domain;
-	void __iomem *base;
-
-	base = of_iomap(np, 0);
-	if (!base) {
-		pr_err("%s: failed to map shirq registers\n", __func__);
-		return -ENXIO;
-	}
-
-	for (i = 0; i < block_nr; i++)
-		irq_nr += shirq_blocks[i]->irq_nr;
-
-	irq_base = irq_alloc_descs(-1, 0, irq_nr, 0);
-	if (IS_ERR_VALUE(irq_base)) {
-		pr_err("%s: irq desc alloc failed\n", __func__);
-		goto err_unmap;
-	}
-
-	shirq_domain = irq_domain_add_legacy(np, irq_nr, irq_base, 0,
-			&irq_domain_simple_ops, NULL);
-	if (WARN_ON(!shirq_domain)) {
-		pr_warn("%s: irq domain init failed\n", __func__);
-		goto err_free_desc;
-	}
-
-	for (i = 0; i < block_nr; i++) {
-		shirq_blocks[i]->base = base;
-		shirq_blocks[i]->irq_base = irq_find_mapping(shirq_domain,
-				hwirq);
-		shirq_blocks[i]->irq = irq_of_parse_and_map(np, i);
-
-		spear_shirq_register(shirq_blocks[i]);
-		hwirq += shirq_blocks[i]->irq_nr;
-	}
-
-	return 0;
-
-err_free_desc:
-	irq_free_descs(irq_base, irq_nr);
-err_unmap:
-	iounmap(base);
-	return -ENXIO;
-}
-
-int __init spear300_shirq_of_init(struct device_node *np,
-		struct device_node *parent)
-{
-	return shirq_init(spear300_shirq_blocks,
-			ARRAY_SIZE(spear300_shirq_blocks), np);
-}
-
-int __init spear310_shirq_of_init(struct device_node *np,
-		struct device_node *parent)
-{
-	return shirq_init(spear310_shirq_blocks,
-			ARRAY_SIZE(spear310_shirq_blocks), np);
-}
-
-int __init spear320_shirq_of_init(struct device_node *np,
-		struct device_node *parent)
-{
-	return shirq_init(spear320_shirq_blocks,
-			ARRAY_SIZE(spear320_shirq_blocks), np);
-}
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 054321db4350..bf7b43696cdb 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -1 +1,2 @@
-obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o
+obj-$(CONFIG_ARCH_BCM2835)		+= irq-bcm2835.o
+obj-$(CONFIG_ARCH_SPEAR3XX)		+= spear-shirq.o
diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c
new file mode 100644
index 000000000000..80e1d2fd9d4c
--- /dev/null
+++ b/drivers/irqchip/spear-shirq.c
@@ -0,0 +1,316 @@
+/*
+ * SPEAr platform shared irq layer source file
+ *
+ * Copyright (C) 2009-2012 ST Microelectronics
+ * Viresh Kumar <viresh.linux@gmail.com>
+ *
+ * Copyright (C) 2012 ST Microelectronics
+ * Shiraz Hashim <shiraz.hashim@st.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip/spear-shirq.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/spinlock.h>
+
+static DEFINE_SPINLOCK(lock);
+
+/* spear300 shared irq registers offsets and masks */
+#define SPEAR300_INT_ENB_MASK_REG	0x54
+#define SPEAR300_INT_STS_MASK_REG	0x58
+
+static struct spear_shirq spear300_shirq_ras1 = {
+	.irq_nr = 9,
+	.irq_bit_off = 0,
+	.regs = {
+		.enb_reg = SPEAR300_INT_ENB_MASK_REG,
+		.status_reg = SPEAR300_INT_STS_MASK_REG,
+		.clear_reg = -1,
+	},
+};
+
+static struct spear_shirq *spear300_shirq_blocks[] = {
+	&spear300_shirq_ras1,
+};
+
+/* spear310 shared irq registers offsets and masks */
+#define SPEAR310_INT_STS_MASK_REG	0x04
+
+static struct spear_shirq spear310_shirq_ras1 = {
+	.irq_nr = 8,
+	.irq_bit_off = 0,
+	.regs = {
+		.enb_reg = -1,
+		.status_reg = SPEAR310_INT_STS_MASK_REG,
+		.clear_reg = -1,
+	},
+};
+
+static struct spear_shirq spear310_shirq_ras2 = {
+	.irq_nr = 5,
+	.irq_bit_off = 8,
+	.regs = {
+		.enb_reg = -1,
+		.status_reg = SPEAR310_INT_STS_MASK_REG,
+		.clear_reg = -1,
+	},
+};
+
+static struct spear_shirq spear310_shirq_ras3 = {
+	.irq_nr = 1,
+	.irq_bit_off = 13,
+	.regs = {
+		.enb_reg = -1,
+		.status_reg = SPEAR310_INT_STS_MASK_REG,
+		.clear_reg = -1,
+	},
+};
+
+static struct spear_shirq spear310_shirq_intrcomm_ras = {
+	.irq_nr = 3,
+	.irq_bit_off = 14,
+	.regs = {
+		.enb_reg = -1,
+		.status_reg = SPEAR310_INT_STS_MASK_REG,
+		.clear_reg = -1,
+	},
+};
+
+static struct spear_shirq *spear310_shirq_blocks[] = {
+	&spear310_shirq_ras1,
+	&spear310_shirq_ras2,
+	&spear310_shirq_ras3,
+	&spear310_shirq_intrcomm_ras,
+};
+
+/* spear320 shared irq registers offsets and masks */
+#define SPEAR320_INT_STS_MASK_REG		0x04
+#define SPEAR320_INT_CLR_MASK_REG		0x04
+#define SPEAR320_INT_ENB_MASK_REG		0x08
+
+static struct spear_shirq spear320_shirq_ras1 = {
+	.irq_nr = 3,
+	.irq_bit_off = 7,
+	.regs = {
+		.enb_reg = -1,
+		.status_reg = SPEAR320_INT_STS_MASK_REG,
+		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
+		.reset_to_clear = 1,
+	},
+};
+
+static struct spear_shirq spear320_shirq_ras2 = {
+	.irq_nr = 1,
+	.irq_bit_off = 10,
+	.regs = {
+		.enb_reg = -1,
+		.status_reg = SPEAR320_INT_STS_MASK_REG,
+		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
+		.reset_to_clear = 1,
+	},
+};
+
+static struct spear_shirq spear320_shirq_ras3 = {
+	.irq_nr = 3,
+	.irq_bit_off = 0,
+	.invalid_irq = 1,
+	.regs = {
+		.enb_reg = SPEAR320_INT_ENB_MASK_REG,
+		.reset_to_enb = 1,
+		.status_reg = SPEAR320_INT_STS_MASK_REG,
+		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
+		.reset_to_clear = 1,
+	},
+};
+
+static struct spear_shirq spear320_shirq_intrcomm_ras = {
+	.irq_nr = 11,
+	.irq_bit_off = 11,
+	.regs = {
+		.enb_reg = -1,
+		.status_reg = SPEAR320_INT_STS_MASK_REG,
+		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
+		.reset_to_clear = 1,
+	},
+};
+
+static struct spear_shirq *spear320_shirq_blocks[] = {
+	&spear320_shirq_ras3,
+	&spear320_shirq_ras1,
+	&spear320_shirq_ras2,
+	&spear320_shirq_intrcomm_ras,
+};
+
+static void shirq_irq_mask_unmask(struct irq_data *d, bool mask)
+{
+	struct spear_shirq *shirq = irq_data_get_irq_chip_data(d);
+	u32 val, offset = d->irq - shirq->irq_base;
+	unsigned long flags;
+
+	if (shirq->regs.enb_reg == -1)
+		return;
+
+	spin_lock_irqsave(&lock, flags);
+	val = readl(shirq->base + shirq->regs.enb_reg);
+
+	if (mask ^ shirq->regs.reset_to_enb)
+		val &= ~(0x1 << shirq->irq_bit_off << offset);
+	else
+		val |= 0x1 << shirq->irq_bit_off << offset;
+
+	writel(val, shirq->base + shirq->regs.enb_reg);
+	spin_unlock_irqrestore(&lock, flags);
+
+}
+
+static void shirq_irq_mask(struct irq_data *d)
+{
+	shirq_irq_mask_unmask(d, 1);
+}
+
+static void shirq_irq_unmask(struct irq_data *d)
+{
+	shirq_irq_mask_unmask(d, 0);
+}
+
+static struct irq_chip shirq_chip = {
+	.name		= "spear-shirq",
+	.irq_ack	= shirq_irq_mask,
+	.irq_mask	= shirq_irq_mask,
+	.irq_unmask	= shirq_irq_unmask,
+};
+
+static void shirq_handler(unsigned irq, struct irq_desc *desc)
+{
+	u32 i, j, val, mask, tmp;
+	struct irq_chip *chip;
+	struct spear_shirq *shirq = irq_get_handler_data(irq);
+
+	chip = irq_get_chip(irq);
+	chip->irq_ack(&desc->irq_data);
+
+	mask = ((0x1 << shirq->irq_nr) - 1) << shirq->irq_bit_off;
+	while ((val = readl(shirq->base + shirq->regs.status_reg) &
+				mask)) {
+
+		val >>= shirq->irq_bit_off;
+		for (i = 0, j = 1; i < shirq->irq_nr; i++, j <<= 1) {
+
+			if (!(j & val))
+				continue;
+
+			generic_handle_irq(shirq->irq_base + i);
+
+			/* clear interrupt */
+			if (shirq->regs.clear_reg == -1)
+				continue;
+
+			tmp = readl(shirq->base + shirq->regs.clear_reg);
+			if (shirq->regs.reset_to_clear)
+				tmp &= ~(j << shirq->irq_bit_off);
+			else
+				tmp |= (j << shirq->irq_bit_off);
+			writel(tmp, shirq->base + shirq->regs.clear_reg);
+		}
+	}
+	chip->irq_unmask(&desc->irq_data);
+}
+
+static void __init spear_shirq_register(struct spear_shirq *shirq)
+{
+	int i;
+
+	if (shirq->invalid_irq)
+		return;
+
+	irq_set_chained_handler(shirq->irq, shirq_handler);
+	for (i = 0; i < shirq->irq_nr; i++) {
+		irq_set_chip_and_handler(shirq->irq_base + i,
+					 &shirq_chip, handle_simple_irq);
+		set_irq_flags(shirq->irq_base + i, IRQF_VALID);
+		irq_set_chip_data(shirq->irq_base + i, shirq);
+	}
+
+	irq_set_handler_data(shirq->irq, shirq);
+}
+
+static int __init shirq_init(struct spear_shirq **shirq_blocks, int block_nr,
+		struct device_node *np)
+{
+	int i, irq_base, hwirq = 0, irq_nr = 0;
+	static struct irq_domain *shirq_domain;
+	void __iomem *base;
+
+	base = of_iomap(np, 0);
+	if (!base) {
+		pr_err("%s: failed to map shirq registers\n", __func__);
+		return -ENXIO;
+	}
+
+	for (i = 0; i < block_nr; i++)
+		irq_nr += shirq_blocks[i]->irq_nr;
+
+	irq_base = irq_alloc_descs(-1, 0, irq_nr, 0);
+	if (IS_ERR_VALUE(irq_base)) {
+		pr_err("%s: irq desc alloc failed\n", __func__);
+		goto err_unmap;
+	}
+
+	shirq_domain = irq_domain_add_legacy(np, irq_nr, irq_base, 0,
+			&irq_domain_simple_ops, NULL);
+	if (WARN_ON(!shirq_domain)) {
+		pr_warn("%s: irq domain init failed\n", __func__);
+		goto err_free_desc;
+	}
+
+	for (i = 0; i < block_nr; i++) {
+		shirq_blocks[i]->base = base;
+		shirq_blocks[i]->irq_base = irq_find_mapping(shirq_domain,
+				hwirq);
+		shirq_blocks[i]->irq = irq_of_parse_and_map(np, i);
+
+		spear_shirq_register(shirq_blocks[i]);
+		hwirq += shirq_blocks[i]->irq_nr;
+	}
+
+	return 0;
+
+err_free_desc:
+	irq_free_descs(irq_base, irq_nr);
+err_unmap:
+	iounmap(base);
+	return -ENXIO;
+}
+
+int __init spear300_shirq_of_init(struct device_node *np,
+		struct device_node *parent)
+{
+	return shirq_init(spear300_shirq_blocks,
+			ARRAY_SIZE(spear300_shirq_blocks), np);
+}
+
+int __init spear310_shirq_of_init(struct device_node *np,
+		struct device_node *parent)
+{
+	return shirq_init(spear310_shirq_blocks,
+			ARRAY_SIZE(spear310_shirq_blocks), np);
+}
+
+int __init spear320_shirq_of_init(struct device_node *np,
+		struct device_node *parent)
+{
+	return shirq_init(spear320_shirq_blocks,
+			ARRAY_SIZE(spear320_shirq_blocks), np);
+}
diff --git a/include/linux/irqchip/spear-shirq.h b/include/linux/irqchip/spear-shirq.h
new file mode 100644
index 000000000000..c8be16d213a3
--- /dev/null
+++ b/include/linux/irqchip/spear-shirq.h
@@ -0,0 +1,64 @@
+/*
+ * SPEAr platform shared irq layer header file
+ *
+ * Copyright (C) 2009-2012 ST Microelectronics
+ * Viresh Kumar <viresh.linux@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __SPEAR_SHIRQ_H
+#define __SPEAR_SHIRQ_H
+
+#include <linux/irq.h>
+#include <linux/types.h>
+
+/*
+ * struct shirq_regs: shared irq register configuration
+ *
+ * enb_reg: enable register offset
+ * reset_to_enb: val 1 indicates, we need to clear bit for enabling interrupt
+ * status_reg: status register offset
+ * status_reg_mask: status register valid mask
+ * clear_reg: clear register offset
+ * reset_to_clear: val 1 indicates, we need to clear bit for clearing interrupt
+ */
+struct shirq_regs {
+	u32 enb_reg;
+	u32 reset_to_enb;
+	u32 status_reg;
+	u32 clear_reg;
+	u32 reset_to_clear;
+};
+
+/*
+ * struct spear_shirq: shared irq structure
+ *
+ * irq: hardware irq number
+ * irq_base: base irq in linux domain
+ * irq_nr: no. of shared interrupts in a particular block
+ * irq_bit_off: starting bit offset in the status register
+ * invalid_irq: irq group is currently disabled
+ * base: base address of shared irq register
+ * regs: register configuration for shared irq block
+ */
+struct spear_shirq {
+	u32 irq;
+	u32 irq_base;
+	u32 irq_nr;
+	u32 irq_bit_off;
+	int invalid_irq;
+	void __iomem *base;
+	struct shirq_regs regs;
+};
+
+int __init spear300_shirq_of_init(struct device_node *np,
+		struct device_node *parent);
+int __init spear310_shirq_of_init(struct device_node *np,
+		struct device_node *parent);
+int __init spear320_shirq_of_init(struct device_node *np,
+		struct device_node *parent);
+
+#endif /* __SPEAR_SHIRQ_H */
-- 
cgit v1.2.3-55-g7522


From 7173a1fa53dff94f771a66b93623500424376015 Mon Sep 17 00:00:00 2001
From: Johannes Berg
Date: Mon, 12 Nov 2012 11:44:18 +0100
Subject: wireless: add definitions for VHT MCS support

Add definitions for the VHT MCS support values that
are used to indicate, for each number of streams
(1 through 8) which MCSes are supported.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 65c6b0f3ef60..f9c5a787d350 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1163,11 +1163,13 @@ struct ieee80211_ht_operation {
  *	STA can receive. Rate expressed in units of 1 Mbps.
  *	If this field is 0 this value should not be used to
  *	consider the highest RX data rate supported.
+ *	The top 3 bits of this field are reserved.
  * @tx_mcs_map: TX MCS map 2 bits for each stream, total 8 streams
  * @tx_highest: Indicates highest long GI VHT PPDU data rate
  *	STA can transmit. Rate expressed in units of 1 Mbps.
  *	If this field is 0 this value should not be used to
  *	consider the highest TX data rate supported.
+ *	The top 3 bits of this field are reserved.
  */
 struct ieee80211_vht_mcs_info {
 	__le16 rx_mcs_map;
@@ -1176,6 +1178,27 @@ struct ieee80211_vht_mcs_info {
 	__le16 tx_highest;
 } __packed;
 
+/**
+ * enum ieee80211_vht_mcs_support - VHT MCS support definitions
+ * @IEEE80211_VHT_MCS_SUPPORT_0_7: MCSes 0-7 are supported for the
+ *	number of streams
+ * @IEEE80211_VHT_MCS_SUPPORT_0_8: MCSes 0-8 are supported
+ * @IEEE80211_VHT_MCS_SUPPORT_0_9: MCSes 0-9 are supported
+ * @IEEE80211_VHT_MCS_NOT_SUPPORTED: This number of streams isn't supported
+ *
+ * These definitions are used in each 2-bit subfield of the @rx_mcs_map
+ * and @tx_mcs_map fields of &struct ieee80211_vht_mcs_info, which are
+ * both split into 8 subfields by number of streams. These values indicate
+ * which MCSes are supported for the number of streams the value appears
+ * for.
+ */
+enum ieee80211_vht_mcs_support {
+	IEEE80211_VHT_MCS_SUPPORT_0_7	= 0,
+	IEEE80211_VHT_MCS_SUPPORT_0_8	= 1,
+	IEEE80211_VHT_MCS_SUPPORT_0_9	= 2,
+	IEEE80211_VHT_MCS_NOT_SUPPORTED	= 3,
+};
+
 /**
  * struct ieee80211_vht_cap - VHT capabilities
  *
-- 
cgit v1.2.3-55-g7522


From 08ff32352d6ff7083533dc1c25618d42f92ec28e Mon Sep 17 00:00:00 2001
From: Or Gerlitz
Date: Sun, 21 Oct 2012 14:59:24 +0000
Subject: mlx4: 64-byte CQE/EQE support

ConnectX-3 devices can use either 64- or 32-byte completion queue
entries (CQEs) and event queue entries (EQEs).  Using 64-byte
EQEs/CQEs performs better because each entry is aligned to a complete
cacheline.  This patch queries the HCA's capabilities, and if it
supports 64-byte CQEs and EQES the driver will configure the HW to
work in 64-byte mode.

The 32-byte vs 64-byte mode is global per HCA and not per CQ or EQ.

Since this mode is global, userspace (libmlx4) must be updated to work
with the configured CQE size, and guests using SR-IOV virtual
functions need to know both EQE and CQE size.

In case one of the 64-byte CQE/EQE capabilities is activated, the
patch makes sure that older guest drivers that use the QUERY_DEV_FUNC
command (e.g as done in mlx4_core of Linux 3.3..3.6) will notice that
they need an update to be able to work with the PPF. This is done by
changing the returned pf_context_behaviour not to be zero any more. In
case none of these capabilities is activated that value remains zero
and older guest drivers can run OK.

The SRIOV related flow is as follows

1. the PPF does the detection of the new capabilities using
   QUERY_DEV_CAP command.

2. the PPF activates the new capabilities using INIT_HCA.

3. the VF detects if the PPF activated the capabilities using
   QUERY_HCA, and if this is the case activates them for itself too.

Note that the VF detects that it must be aware to the new PF behaviour
using QUERY_FUNC_CAP.  Steps 1 and 2 apply also for native mode.

User space notification is done through a new field introduced in
struct mlx4_ib_ucontext which holds device capabilities for which user
space must take action. This changes the binary interface so the ABI
towards libmlx4 exposed through uverbs is bumped from 3 to 4 but only
when **needed** i.e. only when the driver does use 64-byte CQEs or
future device capabilities which must be in sync by user space. This
practice allows to work with unmodified libmlx4 on older devices (e.g
A0, B0) which don't support 64-byte CQEs.

In order to keep existing systems functional when they update to a
newer kernel that contains these changes in VF and userspace ABI, a
module parameter enable_64b_cqe_eqe must be set to enable 64-byte
mode; the default is currently false.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/cq.c                | 34 +++++++++++++++++------
 drivers/infiniband/hw/mlx4/main.c              | 27 ++++++++++++++----
 drivers/infiniband/hw/mlx4/mlx4_ib.h           |  1 +
 drivers/infiniband/hw/mlx4/user.h              | 12 +++++++-
 drivers/net/ethernet/mellanox/mlx4/cmd.c       |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_cq.c     |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  1 +
 drivers/net/ethernet/mellanox/mlx4/en_rx.c     |  5 ++--
 drivers/net/ethernet/mellanox/mlx4/en_tx.c     |  5 ++--
 drivers/net/ethernet/mellanox/mlx4/eq.c        | 26 ++++++++++++------
 drivers/net/ethernet/mellanox/mlx4/fw.c        | 30 +++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/fw.h        |  1 +
 drivers/net/ethernet/mellanox/mlx4/main.c      | 38 +++++++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  1 +
 include/linux/mlx4/device.h                    | 21 ++++++++++++++
 15 files changed, 175 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index c9eb6a6815ce..ae67df35dd4d 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -66,7 +66,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
 
 static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
 {
-	return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe));
+	return mlx4_buf_offset(&buf->buf, n * buf->entry_size);
 }
 
 static void *get_cqe(struct mlx4_ib_cq *cq, int n)
@@ -77,8 +77,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n)
 static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
 {
 	struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
+	struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe);
 
-	return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+	return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
 		!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
 }
 
@@ -99,12 +100,13 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
 {
 	int err;
 
-	err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe),
+	err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
 			     PAGE_SIZE * 2, &buf->buf);
 
 	if (err)
 		goto out;
 
+	buf->entry_size = dev->dev->caps.cqe_size;
 	err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
 				    &buf->mtt);
 	if (err)
@@ -120,8 +122,7 @@ err_mtt:
 	mlx4_mtt_cleanup(dev->dev, &buf->mtt);
 
 err_buf:
-	mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe),
-			      &buf->buf);
+	mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf);
 
 out:
 	return err;
@@ -129,7 +130,7 @@ out:
 
 static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
 {
-	mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf);
+	mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
 }
 
 static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
@@ -137,8 +138,9 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
 			       u64 buf_addr, int cqe)
 {
 	int err;
+	int cqe_size = dev->dev->caps.cqe_size;
 
-	*umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe),
+	*umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
 			    IB_ACCESS_LOCAL_WRITE, 1);
 	if (IS_ERR(*umem))
 		return PTR_ERR(*umem);
@@ -331,16 +333,23 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
 {
 	struct mlx4_cqe *cqe, *new_cqe;
 	int i;
+	int cqe_size = cq->buf.entry_size;
+	int cqe_inc = cqe_size == 64 ? 1 : 0;
 
 	i = cq->mcq.cons_index;
 	cqe = get_cqe(cq, i & cq->ibcq.cqe);
+	cqe += cqe_inc;
+
 	while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
 		new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
 					   (i + 1) & cq->resize_buf->cqe);
-		memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
+		memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size);
+		new_cqe += cqe_inc;
+
 		new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
 			(((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
 		cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
+		cqe += cqe_inc;
 	}
 	++cq->mcq.cons_index;
 }
@@ -438,6 +447,7 @@ err_buf:
 
 out:
 	mutex_unlock(&cq->resize_mutex);
+
 	return err;
 }
 
@@ -586,6 +596,9 @@ repoll:
 	if (!cqe)
 		return -EAGAIN;
 
+	if (cq->buf.entry_size == 64)
+		cqe++;
+
 	++cq->mcq.cons_index;
 
 	/*
@@ -807,6 +820,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
 	int nfreed = 0;
 	struct mlx4_cqe *cqe, *dest;
 	u8 owner_bit;
+	int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0;
 
 	/*
 	 * First we need to find the current producer index, so we
@@ -825,12 +839,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
 	 */
 	while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
 		cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
+		cqe += cqe_inc;
+
 		if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
 			if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
 				mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
 			++nfreed;
 		} else if (nfreed) {
 			dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
+			dest += cqe_inc;
+
 			owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
 			memcpy(dest, cqe, sizeof *cqe);
 			dest->owner_sr_opcode = owner_bit |
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 718ec6b2bad2..e7d81c0d1ac5 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -563,15 +563,24 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
 {
 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
 	struct mlx4_ib_ucontext *context;
+	struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
 	struct mlx4_ib_alloc_ucontext_resp resp;
 	int err;
 
 	if (!dev->ib_active)
 		return ERR_PTR(-EAGAIN);
 
-	resp.qp_tab_size      = dev->dev->caps.num_qps;
-	resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
-	resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+	if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
+		resp_v3.qp_tab_size      = dev->dev->caps.num_qps;
+		resp_v3.bf_reg_size      = dev->dev->caps.bf_reg_size;
+		resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+	} else {
+		resp.dev_caps	      = dev->dev->caps.userspace_caps;
+		resp.qp_tab_size      = dev->dev->caps.num_qps;
+		resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
+		resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+		resp.cqe_size	      = dev->dev->caps.cqe_size;
+	}
 
 	context = kmalloc(sizeof *context, GFP_KERNEL);
 	if (!context)
@@ -586,7 +595,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
 	INIT_LIST_HEAD(&context->db_page_list);
 	mutex_init(&context->db_page_mutex);
 
-	err = ib_copy_to_udata(udata, &resp, sizeof resp);
+	if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
+		err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
+	else
+		err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+
 	if (err) {
 		mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
 		kfree(context);
@@ -1342,7 +1355,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	ibdev->ib_dev.num_comp_vectors	= dev->caps.num_comp_vectors;
 	ibdev->ib_dev.dma_device	= &dev->pdev->dev;
 
-	ibdev->ib_dev.uverbs_abi_ver	= MLX4_IB_UVERBS_ABI_VERSION;
+	if (dev->caps.userspace_caps)
+		ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
+	else
+		ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
+
 	ibdev->ib_dev.uverbs_cmd_mask	=
 		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
 		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index e04cbc9a54a5..dcd845bc30f0 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -90,6 +90,7 @@ struct mlx4_ib_xrcd {
 struct mlx4_ib_cq_buf {
 	struct mlx4_buf		buf;
 	struct mlx4_mtt		mtt;
+	int			entry_size;
 };
 
 struct mlx4_ib_cq_resize {
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
index 13beedeeef9f..07e6769ef43b 100644
--- a/drivers/infiniband/hw/mlx4/user.h
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -40,7 +40,9 @@
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define MLX4_IB_UVERBS_ABI_VERSION	3
+
+#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION	3
+#define MLX4_IB_UVERBS_ABI_VERSION		4
 
 /*
  * Make sure that all structs defined in this file remain laid out so
@@ -50,10 +52,18 @@
  * instead.
  */
 
+struct mlx4_ib_alloc_ucontext_resp_v3 {
+	__u32	qp_tab_size;
+	__u16	bf_reg_size;
+	__u16	bf_regs_per_page;
+};
+
 struct mlx4_ib_alloc_ucontext_resp {
+	__u32	dev_caps;
 	__u32	qp_tab_size;
 	__u16	bf_reg_size;
 	__u16	bf_regs_per_page;
+	__u32	cqe_size;
 };
 
 struct mlx4_ib_alloc_pd_resp {
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 3d1899ff1076..e791e705f7b1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1755,7 +1755,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
 			spin_lock_init(&s_state->lock);
 		}
 
-		memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe));
+		memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size);
 		priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
 		INIT_WORK(&priv->mfunc.master.comm_work,
 			  mlx4_master_comm_channel);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index aa9c2f6cf3c0..b8d0854a7ad1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -51,7 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
 	int err;
 
 	cq->size = entries;
-	cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
+	cq->buf_size = cq->size * mdev->dev->caps.cqe_size;
 
 	cq->ring = ring;
 	cq->is_tx = mode;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index edd9cb8d3e1d..93a325669582 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1600,6 +1600,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 		goto out;
 	}
 	priv->rx_ring_num = prof->rx_ring_num;
+	priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
 	priv->mac_index = -1;
 	priv->msg_enable = MLX4_EN_MSG_LEVEL;
 	spin_lock_init(&priv->stats_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 5aba5ecdf1e2..6fa106f6c0ec 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -566,6 +566,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	struct ethhdr *ethh;
 	dma_addr_t dma;
 	u64 s_mac;
+	int factor = priv->cqe_factor;
 
 	if (!priv->port_up)
 		return 0;
@@ -574,7 +575,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	 * descriptor offset can be deduced from the CQE index instead of
 	 * reading 'cqe->index' */
 	index = cq->mcq.cons_index & ring->size_mask;
-	cqe = &cq->buf[index];
+	cqe = &cq->buf[(index << factor) + factor];
 
 	/* Process all completed CQEs */
 	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
@@ -709,7 +710,7 @@ next:
 
 		++cq->mcq.cons_index;
 		index = (cq->mcq.cons_index) & ring->size_mask;
-		cqe = &cq->buf[index];
+		cqe = &cq->buf[(index << factor) + factor];
 		if (++polled == budget) {
 			/* We are here because we reached the NAPI budget -
 			 * flush only pending LRO sessions */
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index b35094c590ba..25c157abdd92 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -315,12 +315,13 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
 	struct mlx4_cqe *buf = cq->buf;
 	u32 packets = 0;
 	u32 bytes = 0;
+	int factor = priv->cqe_factor;
 
 	if (!priv->port_up)
 		return;
 
 	index = cons_index & size_mask;
-	cqe = &buf[index];
+	cqe = &buf[(index << factor) + factor];
 	ring_index = ring->cons & size_mask;
 
 	/* Process all completed CQEs */
@@ -349,7 +350,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
 
 		++cons_index;
 		index = cons_index & size_mask;
-		cqe = &buf[index];
+		cqe = &buf[(index << factor) + factor];
 	}
 
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index b84a88bc44dc..c509a86db610 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -101,15 +101,21 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not)
 	mb();
 }
 
-static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry)
+static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor)
 {
-	unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE;
-	return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
+	/* (entry & (eq->nent - 1)) gives us a cyclic array */
+	unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor);
+	/* CX3 is capable of extending the EQE from 32 to 64 bytes.
+	 * When this feature is enabled, the first (in the lower addresses)
+	 * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes
+	 * contain the legacy EQE information.
+	 */
+	return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
 }
 
-static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq)
+static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor)
 {
-	struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index);
+	struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor);
 	return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
 }
 
@@ -177,7 +183,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
 		return;
 	}
 
-	memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1);
+	memcpy(s_eqe, eqe, dev->caps.eqe_size - 1);
 	s_eqe->slave_id = slave;
 	/* ensure all information is written before setting the ownersip bit */
 	wmb();
@@ -441,7 +447,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 	int i;
 	enum slave_port_gen_event gen_event;
 
-	while ((eqe = next_eqe_sw(eq))) {
+	while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) {
 		/*
 		 * Make sure we read EQ entry contents after we've
 		 * checked the ownership bit.
@@ -864,7 +870,8 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
 
 	eq->dev   = dev;
 	eq->nent  = roundup_pow_of_two(max(nent, 2));
-	npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE;
+	/* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
+	npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE;
 
 	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
 				GFP_KERNEL);
@@ -966,8 +973,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_cmd_mailbox *mailbox;
 	int err;
-	int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE;
 	int i;
+	/* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
+	int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE;
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(mailbox))
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 4f30b99324cf..9a9de51ecc91 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -110,6 +110,8 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags)
 		[42] = "Multicast VEP steering support",
 		[48] = "Counters support",
 		[59] = "Port management change event support",
+		[61] = "64 byte EQE support",
+		[62] = "64 byte CQE support",
 	};
 	int i;
 
@@ -235,7 +237,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		field = dev->caps.num_ports;
 		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
 
-		size = 0; /* no PF behaviour is set for now */
+		size = dev->caps.function_caps; /* set PF behaviours */
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
 
 		field = 0; /* protected FMR support not available as yet */
@@ -1237,6 +1239,24 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)
 		*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4);
 
+	/* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) {
+		*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29);
+		dev->caps.eqe_size   = 64;
+		dev->caps.eqe_factor = 1;
+	} else {
+		dev->caps.eqe_size   = 32;
+		dev->caps.eqe_factor = 0;
+	}
+
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
+		*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
+		dev->caps.cqe_size   = 64;
+		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+	} else {
+		dev->caps.cqe_size   = 32;
+	}
+
 	/* QPC/EEC/CQC/EQC/RDMARC attributes */
 
 	MLX4_PUT(inbox, param->qpc_base,      INIT_HCA_QPC_BASE_OFFSET);
@@ -1319,6 +1339,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 	struct mlx4_cmd_mailbox *mailbox;
 	__be32 *outbox;
 	int err;
+	u8 byte_field;
 
 #define QUERY_HCA_GLOBAL_CAPS_OFFSET	0x04
 
@@ -1370,6 +1391,13 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 			 INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
 	}
 
+	/* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
+	MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS);
+	if (byte_field & 0x20) /* 64-bytes eqe enabled */
+		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
+	if (byte_field & 0x40) /* 64-bytes cqe enabled */
+		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+
 	/* TPT attributes */
 
 	MLX4_GET(param->dmpt_base,  outbox, INIT_HCA_DMPT_BASE_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 85abe9c11a22..2c2e7ade2a34 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -172,6 +172,7 @@ struct mlx4_init_hca_param {
 	u8  log_uar_sz;
 	u8  uar_page_sz; /* log pg sz in 4k chunks */
 	u8  fs_hash_enable_bits;
+	u64 dev_cap_enabled;
 };
 
 struct mlx4_init_ib_param {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 2aa80afd98d2..4337f685175d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -95,8 +95,14 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
 					 " Not in use with device managed"
 					 " flow steering");
 
+static bool enable_64b_cqe_eqe;
+module_param(enable_64b_cqe_eqe, bool, 0444);
+MODULE_PARM_DESC(enable_64b_cqe_eqe,
+		 "Enable 64 byte CQEs/EQEs when the the FW supports this");
+
 #define HCA_GLOBAL_CAP_MASK            0
-#define PF_CONTEXT_BEHAVIOUR_MASK      0
+
+#define PF_CONTEXT_BEHAVIOUR_MASK	MLX4_FUNC_CAP_64B_EQE_CQE
 
 static char mlx4_version[] __devinitdata =
 	DRV_NAME ": Mellanox ConnectX core driver v"
@@ -386,6 +392,21 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
 
 	dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
+
+	if (!enable_64b_cqe_eqe) {
+		if (dev_cap->flags &
+		    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
+			mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
+			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
+			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
+		}
+	}
+
+	if ((dev_cap->flags &
+	    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
+	    mlx4_is_master(dev))
+		dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
+
 	return 0;
 }
 /*The function checks if there are live vf, return the num of them*/
@@ -599,6 +620,21 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 		goto err_mem;
 	}
 
+	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
+		dev->caps.eqe_size   = 64;
+		dev->caps.eqe_factor = 1;
+	} else {
+		dev->caps.eqe_size   = 32;
+		dev->caps.eqe_factor = 0;
+	}
+
+	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
+		dev->caps.cqe_size   = 64;
+		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+	} else {
+		dev->caps.cqe_size   = 32;
+	}
+
 	return 0;
 
 err_mem:
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 9d27e42264e2..73b5c2ac5bd5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -487,6 +487,7 @@ struct mlx4_en_priv {
 	int mac_index;
 	unsigned max_mtu;
 	int base_qpn;
+	int cqe_factor;
 
 	struct mlx4_en_rss_map rss_map;
 	__be32 ctrl_flags;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 6d1acb04cd17..21821da2abfd 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -142,6 +142,8 @@ enum {
 	MLX4_DEV_CAP_FLAG_COUNTERS	= 1LL << 48,
 	MLX4_DEV_CAP_FLAG_SENSE_SUPPORT	= 1LL << 55,
 	MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59,
+	MLX4_DEV_CAP_FLAG_64B_EQE	= 1LL << 61,
+	MLX4_DEV_CAP_FLAG_64B_CQE	= 1LL << 62
 };
 
 enum {
@@ -151,6 +153,20 @@ enum {
 	MLX4_DEV_CAP_FLAG2_FS_EN		= 1LL <<  3
 };
 
+enum {
+	MLX4_DEV_CAP_64B_EQE_ENABLED	= 1LL << 0,
+	MLX4_DEV_CAP_64B_CQE_ENABLED	= 1LL << 1
+};
+
+enum {
+	MLX4_USER_DEV_CAP_64B_CQE	= 1L << 0
+};
+
+enum {
+	MLX4_FUNC_CAP_64B_EQE_CQE	= 1L << 0
+};
+
+
 #define MLX4_ATTR_EXTENDED_PORT_INFO	cpu_to_be16(0xff90)
 
 enum {
@@ -419,6 +435,11 @@ struct mlx4_caps {
 	u32			max_counters;
 	u8			port_ib_mtu[MLX4_MAX_PORTS + 1];
 	u16			sqp_demux;
+	u32			eqe_size;
+	u32			cqe_size;
+	u8			eqe_factor;
+	u32			userspace_caps; /* userspace must be aware of these */
+	u32			function_caps;  /* VFs must be aware of these */
 };
 
 struct mlx4_buf_list {
-- 
cgit v1.2.3-55-g7522


From 39c7caebc94e851f58b84b54659156dd30522e8e Mon Sep 17 00:00:00 2001
From: Ansis Atteka
Date: Mon, 26 Nov 2012 11:24:11 -0800
Subject: openvswitch: add skb mark matching and set action

This patch adds support for skb mark matching and set action.

Signed-off-by: Ansis Atteka <aatteka@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 include/linux/openvswitch.h |  1 +
 net/openvswitch/actions.c   |  4 ++++
 net/openvswitch/datapath.c  |  3 +++
 net/openvswitch/flow.c      | 19 ++++++++++++++++++-
 net/openvswitch/flow.h      |  8 +++++---
 5 files changed, 31 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index eb1efa54fe84..d42e174bd0c8 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -243,6 +243,7 @@ enum ovs_key_attr {
 	OVS_KEY_ATTR_ICMPV6,    /* struct ovs_key_icmpv6 */
 	OVS_KEY_ATTR_ARP,       /* struct ovs_key_arp */
 	OVS_KEY_ATTR_ND,        /* struct ovs_key_nd */
+	OVS_KEY_ATTR_SKB_MARK,  /* u32 skb mark */
 	__OVS_KEY_ATTR_MAX
 };
 
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index a58ed276f70d..ac2defeeba83 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -428,6 +428,10 @@ static int execute_set_action(struct sk_buff *skb,
 		skb->priority = nla_get_u32(nested_attr);
 		break;
 
+	case OVS_KEY_ATTR_SKB_MARK:
+		skb->mark = nla_get_u32(nested_attr);
+		break;
+
 	case OVS_KEY_ATTR_ETHERNET:
 		err = set_eth_addr(skb, nla_data(nested_attr));
 		break;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 7b1d6d2b0c1a..f996db343247 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -482,6 +482,7 @@ static int validate_set(const struct nlattr *a,
 	const struct ovs_key_ipv6 *ipv6_key;
 
 	case OVS_KEY_ATTR_PRIORITY:
+	case OVS_KEY_ATTR_SKB_MARK:
 	case OVS_KEY_ATTR_ETHERNET:
 		break;
 
@@ -695,6 +696,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 		goto err_flow_free;
 
 	err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
+					     &flow->key.phy.skb_mark,
 					     &flow->key.phy.in_port,
 					     a[OVS_PACKET_ATTR_KEY]);
 	if (err)
@@ -714,6 +716,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 
 	OVS_CB(packet)->flow = flow;
 	packet->priority = flow->key.phy.priority;
+	packet->mark = flow->key.phy.skb_mark;
 
 	rcu_read_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index e6ce902e92e5..c3294cebc4f2 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -604,6 +604,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 
 	key->phy.priority = skb->priority;
 	key->phy.in_port = in_port;
+	key->phy.skb_mark = skb->mark;
 
 	skb_reset_mac_header(skb);
 
@@ -803,6 +804,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 	[OVS_KEY_ATTR_ENCAP] = -1,
 	[OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
 	[OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
+	[OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
 	[OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
 	[OVS_KEY_ATTR_VLAN] = sizeof(__be16),
 	[OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
@@ -988,6 +990,10 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 	} else {
 		swkey->phy.in_port = DP_MAX_PORTS;
 	}
+	if (attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
+		swkey->phy.skb_mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
+		attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
+	}
 
 	/* Data attributes. */
 	if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET)))
@@ -1115,6 +1121,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 
 /**
  * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
+ * @priority: receives the skb priority
+ * @mark: receives the skb mark
  * @in_port: receives the extracted input port.
  * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
  * sequence.
@@ -1124,7 +1132,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
  * get the metadata, that is, the parts of the flow key that cannot be
  * extracted from the packet itself.
  */
-int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
+int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port,
 			       const struct nlattr *attr)
 {
 	const struct nlattr *nla;
@@ -1132,6 +1140,7 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
 
 	*in_port = DP_MAX_PORTS;
 	*priority = 0;
+	*mark = 0;
 
 	nla_for_each_nested(nla, attr, rem) {
 		int type = nla_type(nla);
@@ -1150,6 +1159,10 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
 					return -EINVAL;
 				*in_port = nla_get_u32(nla);
 				break;
+
+			case OVS_KEY_ATTR_SKB_MARK:
+				*mark = nla_get_u32(nla);
+				break;
 			}
 		}
 	}
@@ -1171,6 +1184,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 	    nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port))
 		goto nla_put_failure;
 
+	if (swkey->phy.skb_mark &&
+	    nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark))
+		goto nla_put_failure;
+
 	nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
 	if (!nla)
 		goto nla_put_failure;
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 14a324eb017b..a7bb60ff3b5b 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -43,6 +43,7 @@ struct sw_flow_actions {
 struct sw_flow_key {
 	struct {
 		u32	priority;	/* Packet QoS priority. */
+		u32	skb_mark;	/* SKB mark. */
 		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
 	} phy;
 	struct {
@@ -144,6 +145,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
  *                         ------  ---  ------  -----
  *  OVS_KEY_ATTR_PRIORITY      4    --     4      8
  *  OVS_KEY_ATTR_IN_PORT       4    --     4      8
+ *  OVS_KEY_ATTR_SKB_MARK      4    --     4      8
  *  OVS_KEY_ATTR_ETHERNET     12    --     4     16
  *  OVS_KEY_ATTR_ETHERTYPE     2     2     4      8  (outer VLAN ethertype)
  *  OVS_KEY_ATTR_8021Q         4    --     4      8
@@ -153,14 +155,14 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
  *  OVS_KEY_ATTR_ICMPV6        2     2     4      8
  *  OVS_KEY_ATTR_ND           28    --     4     32
  *  -------------------------------------------------
- *  total                                       144
+ *  total                                       152
  */
-#define FLOW_BUFSIZE 144
+#define FLOW_BUFSIZE 152
 
 int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
 int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 		      const struct nlattr *);
-int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
+int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port,
 			       const struct nlattr *);
 
 #define MAX_ACTIONS_BUFSIZE    (16 * 1024)
-- 
cgit v1.2.3-55-g7522


From e95c17e9caff4b106cc95b761f3e07964a5a0d9f Mon Sep 17 00:00:00 2001
From: Timur Tabi
Date: Tue, 16 Oct 2012 17:33:44 -0500
Subject: drivers/video: fsl-diu-fb: add support for set_gamma ioctls

The MPC5121 BSP comes with a gamma_set utility that initializes the gamma
table via an ioctl.  Unfortunately, the ioctl number that utility uses
is defined improperly, but we can still support it.

Signed-off-by: Timur Tabi <timur@freescale.com>
---
 drivers/video/fsl-diu-fb.c | 17 +++++++++++++++++
 include/linux/fsl-diu-fb.h |  9 +++++++++
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c
index 5b12e1783fac..9c4054760627 100644
--- a/drivers/video/fsl-diu-fb.c
+++ b/drivers/video/fsl-diu-fb.c
@@ -1181,6 +1181,23 @@ static int fsl_diu_ioctl(struct fb_info *info, unsigned int cmd,
 			ad->ckmin_b = ck.blue_min;
 		}
 		break;
+#ifdef CONFIG_PPC_MPC512x
+	case MFB_SET_GAMMA: {
+		struct fsl_diu_data *data = mfbi->parent;
+
+		if (copy_from_user(data->gamma, buf, sizeof(data->gamma)))
+			return -EFAULT;
+		setbits32(&data->diu_reg->gamma, 0); /* Force table reload */
+		break;
+	}
+	case MFB_GET_GAMMA: {
+		struct fsl_diu_data *data = mfbi->parent;
+
+		if (copy_to_user(buf, data->gamma, sizeof(data->gamma)))
+			return -EFAULT;
+		break;
+	}
+#endif
 	default:
 		dev_err(info->dev, "unknown ioctl command (0x%08X)\n", cmd);
 		return -ENOIOCTLCMD;
diff --git a/include/linux/fsl-diu-fb.h b/include/linux/fsl-diu-fb.h
index 11c16a1fb9e3..a1e8277120c7 100644
--- a/include/linux/fsl-diu-fb.h
+++ b/include/linux/fsl-diu-fb.h
@@ -46,6 +46,15 @@ struct aoi_display_offset {
 #define MFB_SET_PIXFMT		_IOW('M', 8, __u32)
 #define MFB_GET_PIXFMT		_IOR('M', 8, __u32)
 
+/*
+ * The MPC5121 BSP comes with a gamma_set utility that initializes the
+ * gamma table.  Unfortunately, it uses bad values for the IOCTL commands,
+ * but there's nothing we can do about it now.  These ioctls are only
+ * supported on the MPC5121.
+ */
+#define MFB_SET_GAMMA		_IOW('M', 1, __u8)
+#define MFB_GET_GAMMA		_IOR('M', 1, __u8)
+
 /*
  * The original definitions of MFB_SET_PIXFMT and MFB_GET_PIXFMT used the
  * wrong value for 'size' field of the ioctl.  The current macros above use the
-- 
cgit v1.2.3-55-g7522


From 0751f8654602e4255f0b9c17784d8100d5896010 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Sat, 24 Nov 2012 19:34:17 +0100
Subject: bcma: add more package IDs

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index fd15d9829705..93b1e091b1e9 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -157,6 +157,7 @@ struct bcma_host_ops {
 
 /* Chip IDs of SoCs */
 #define BCMA_CHIP_ID_BCM4706	0x5300
+#define  BCMA_PKG_ID_BCM4706L	1
 #define BCMA_CHIP_ID_BCM4716	0x4716
 #define  BCMA_PKG_ID_BCM4716	8
 #define  BCMA_PKG_ID_BCM4717	9
@@ -166,7 +167,11 @@ struct bcma_host_ops {
 #define BCMA_CHIP_ID_BCM4749	0x4749
 #define BCMA_CHIP_ID_BCM5356	0x5356
 #define BCMA_CHIP_ID_BCM5357	0x5357
+#define  BCMA_PKG_ID_BCM5358	9
+#define  BCMA_PKG_ID_BCM47186	10
+#define  BCMA_PKG_ID_BCM5357	11
 #define BCMA_CHIP_ID_BCM53572	53572
+#define  BCMA_PKG_ID_BCM47188	9
 
 struct bcma_device {
 	struct bcma_bus *bus;
-- 
cgit v1.2.3-55-g7522


From 918b4053184c0ca22236e70e299c5343eea35304 Mon Sep 17 00:00:00 2001
From: Vijay Mohan Pandarathil
Date: Sat, 17 Nov 2012 11:47:18 +0000
Subject: PCI/AER: Report success only when every device has AER-aware driver

When an error is detected on a PCIe device which does not have an
AER-aware driver, prevent AER infrastructure from reporting
successful error recovery.

This is because the report_error_detected() function that gets
called in the first phase of recovery process allows forward
progress even when the driver for the device does not have AER
capabilities. It seems that all callbacks (in pci_error_handlers
structure) registered by drivers that gets called during error
recovery are not mandatory. So the intention of the infrastructure
design seems to be to allow forward progress even when a specific
callback has not been registered by a driver. However, if error
handler structure itself has not been registered, it doesn't make
sense to allow forward progress.

As a result of the current design, in the case of a single device
having an AER-unaware driver or in the case of any function in a
multi-function card having an AER-unaware driver, a successful
recovery is reported.

Typical scenario this happens is when a PCI device is detached
from a KVM host and the pci-stub driver on the host claims the
device. The pci-stub driver does not have error handling capabilities
but the AER infrastructure still reports that the device recovered
successfully.

The changes proposed here leaves the device(s)in an unrecovered state
if the driver for the device or for any device in the subtree
does not have error handler structure registered. This reflects
the true state of the device and prevents any partial recovery (or no
recovery at all) reported as successful.

[bhelgaas: changelog]
Signed-off-by: Vijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Linas Vepstas <linasvepstas@gmail.com>
Reviewed-by: Myron Stowe <myron.stowe@redhat.com>---
 drivers/pci/pcie/aer/aerdrv.h      |  5 ++++-
 drivers/pci/pcie/aer/aerdrv_core.c | 21 ++++++++++++++++++---
 include/linux/pci.h                |  3 +++
 3 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h
index 94a7598eb262..22f840f4dda1 100644
--- a/drivers/pci/pcie/aer/aerdrv.h
+++ b/drivers/pci/pcie/aer/aerdrv.h
@@ -87,6 +87,9 @@ struct aer_broadcast_data {
 static inline pci_ers_result_t merge_result(enum pci_ers_result orig,
 		enum pci_ers_result new)
 {
+	if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
+		return PCI_ERS_RESULT_NO_AER_DRIVER;
+
 	if (new == PCI_ERS_RESULT_NONE)
 		return orig;
 
@@ -97,7 +100,7 @@ static inline pci_ers_result_t merge_result(enum pci_ers_result orig,
 		break;
 	case PCI_ERS_RESULT_DISCONNECT:
 		if (new == PCI_ERS_RESULT_NEED_RESET)
-			orig = new;
+			orig = PCI_ERS_RESULT_NEED_RESET;
 		break;
 	default:
 		break;
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 06bad96af415..eb2f19a9c3cd 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -231,11 +231,26 @@ static int report_error_detected(struct pci_dev *dev, void *data)
 				   dev->driver ?
 				   "no AER-aware driver" : "no driver");
 		}
-		return 0;
+
+		/*
+		 * If there's any device in the subtree that does not
+		 * have an error_detected callback, returning
+		 * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
+		 * the subsequent mmio_enabled/slot_reset/resume
+		 * callbacks of "any" device in the subtree. All the
+		 * devices in the subtree are left in the error state
+		 * without recovery.
+		 */
+
+		if (!(dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
+			vote = PCI_ERS_RESULT_NO_AER_DRIVER;
+		else
+			vote = PCI_ERS_RESULT_NONE;
+	} else {
+		err_handler = dev->driver->err_handler;
+		vote = err_handler->error_detected(dev, result_data->state);
 	}
 
-	err_handler = dev->driver->err_handler;
-	vote = err_handler->error_detected(dev, result_data->state);
 	result_data->result = merge_result(result_data->result, vote);
 	return 0;
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ee2179546c63..fb7e8699673d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -538,6 +538,9 @@ enum pci_ers_result {
 
 	/* Device driver is fully recovered and operational */
 	PCI_ERS_RESULT_RECOVERED = (__force pci_ers_result_t) 5,
+
+	/* No AER capabilities registered for the driver */
+	PCI_ERS_RESULT_NO_AER_DRIVER = (__force pci_ers_result_t) 6,
 };
 
 /* PCI bus error event callbacks */
-- 
cgit v1.2.3-55-g7522


From 62a2ab935c8d0f8643d02d3696abc401b5da6206 Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO
Date: Sun, 25 Nov 2012 23:10:43 +0000
Subject: stmmac: add Rx watchdog support to mitigate the DMA irqs

GMAC devices newer than databook 3.40 has an embedded timer
that can be used for mitigating the number of interrupts.
So this patch adds this optimizations.

At any rate, the Rx watchdog can be disable (on bugged HW) by
passing from the platform the riwt_off field.

In this implementation the rx timer stored in the Reg9 is fixed
to the max value. This will be tuned by using ethtool.

V2: added a platform parameter to force to disable the rx-watchdog
for example on new core where it is bugged.

V3: do not disable NAPI when Rx watchdog is used.

V4: a new extra statistic field has been added to show the early
receive status in the interrupt handler.
This patch also adds an extra check to avoid to call
napi_schedule when the DMA_INTR_ENA_RIE bit is disabled in the
Interrupt Mask register.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h       | 17 +++++++++---
 drivers/net/ethernet/stmicro/stmmac/dwmac1000.h    |  3 ---
 .../net/ethernet/stmicro/stmmac/dwmac1000_dma.c    |  6 +++++
 drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h    |  5 +++-
 drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c    | 19 +++++++++++---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |  2 ++
 .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c   |  9 ++++---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 30 +++++++++++++++++-----
 include/linux/stmmac.h                             |  1 +
 9 files changed, 72 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 723e9035f275..186d14806122 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -48,6 +48,10 @@
 #define CHIP_DBG(fmt, args...)  do { } while (0)
 #endif
 
+/* Synopsys Core versions */
+#define	DWMAC_CORE_3_40	0x34
+#define	DWMAC_CORE_3_50	0x35
+
 #undef FRAME_FILTER_DEBUG
 /* #define FRAME_FILTER_DEBUG */
 
@@ -81,7 +85,7 @@ struct stmmac_extra_stats {
 	unsigned long rx_missed_cntr;
 	unsigned long rx_overflow_cntr;
 	unsigned long rx_vlan;
-	/* Tx/Rx IRQ errors */
+	/* Tx/Rx IRQ error info */
 	unsigned long tx_undeflow_irq;
 	unsigned long tx_process_stopped_irq;
 	unsigned long tx_jabber_irq;
@@ -91,7 +95,8 @@ struct stmmac_extra_stats {
 	unsigned long rx_watchdog_irq;
 	unsigned long tx_early_irq;
 	unsigned long fatal_bus_error_irq;
-	/* Extra info */
+	/* Tx/Rx IRQ Events */
+	unsigned long rx_early_irq;
 	unsigned long threshold;
 	unsigned long tx_pkt_n;
 	unsigned long rx_pkt_n;
@@ -101,11 +106,12 @@ struct stmmac_extra_stats {
 	unsigned long tx_normal_irq_n;
 	unsigned long tx_clean;
 	unsigned long tx_reset_ic_bit;
+	unsigned long irq_receive_pmt_irq_n;
+	/* MMC info */
 	unsigned long mmc_tx_irq_n;
 	unsigned long mmc_rx_irq_n;
 	unsigned long mmc_rx_csum_offload_irq_n;
 	/* EEE */
-	unsigned long irq_receive_pmt_irq_n;
 	unsigned long irq_tx_path_in_lpi_mode_n;
 	unsigned long irq_tx_path_exit_lpi_mode_n;
 	unsigned long irq_rx_path_in_lpi_mode_n;
@@ -165,6 +171,9 @@ struct stmmac_extra_stats {
 #define DMA_HW_FEAT_ACTPHYIF	0x70000000 /* Active/selected PHY interface */
 #define DEFAULT_DMA_PBL		8
 
+/* Max/Min RI Watchdog Timer count value */
+#define MAX_DMA_RIWT		0xff
+#define MIN_DMA_RIWT		0x20
 /* Tx coalesce parameters */
 #define STMMAC_COAL_TX_TIMER	40000
 #define STMMAC_MAX_COAL_TX_TICK	100000
@@ -306,6 +315,8 @@ struct stmmac_dma_ops {
 			      struct stmmac_extra_stats *x);
 	/* If supported then get the optional core features */
 	unsigned int (*get_hw_feature) (void __iomem *ioaddr);
+	/* Program the HW RX Watchdog */
+	void (*rx_watchdog) (void __iomem *ioaddr, u32 riwt);
 };
 
 struct stmmac_ops {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index 0e4cacedc1f0..7ad56afd6324 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -230,8 +230,5 @@ enum rtc_control {
 #define GMAC_MMC_TX_INTR   0x108
 #define GMAC_MMC_RX_CSUM_OFFLOAD   0x208
 
-/* Synopsys Core versions */
-#define	DWMAC_CORE_3_40	0x34
-
 extern const struct stmmac_dma_ops dwmac1000_dma_ops;
 #endif /* __DWMAC1000_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index 033500090f55..bf83c03bfd06 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -174,6 +174,11 @@ static unsigned int dwmac1000_get_hw_feature(void __iomem *ioaddr)
 	return readl(ioaddr + DMA_HW_FEATURE);
 }
 
+static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt)
+{
+	writel(riwt, ioaddr + DMA_RX_WATCHDOG);
+}
+
 const struct stmmac_dma_ops dwmac1000_dma_ops = {
 	.init = dwmac1000_dma_init,
 	.dump_regs = dwmac1000_dump_dma_regs,
@@ -187,4 +192,5 @@ const struct stmmac_dma_ops dwmac1000_dma_ops = {
 	.stop_rx = dwmac_dma_stop_rx,
 	.dma_interrupt = dwmac_dma_interrupt,
 	.get_hw_feature = dwmac1000_get_hw_feature,
+	.rx_watchdog = dwmac1000_rx_watchdog,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
index e49c9a0fd6ff..807f30371e49 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
@@ -35,7 +35,10 @@
 #define DMA_CONTROL		0x00001018	/* Ctrl (Operational Mode) */
 #define DMA_INTR_ENA		0x0000101c	/* Interrupt Enable */
 #define DMA_MISSED_FRAME_CTR	0x00001020	/* Missed Frame Counter */
-#define DMA_AXI_BUS_MODE       0x00001028      /* AXI Bus Mode */
+/* Rx watchdog register */
+#define DMA_RX_WATCHDOG		0x00001024
+/* AXI Bus Mode */
+#define DMA_AXI_BUS_MODE	0x00001028
 #define DMA_CUR_TX_BUF_ADDR	0x00001050	/* Current Host Tx Buffer */
 #define DMA_CUR_RX_BUF_ADDR	0x00001054	/* Current Host Rx Buffer */
 #define DMA_HW_FEATURE		0x00001058	/* HW Feature Register */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index 73766e655011..491d7e930603 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -204,17 +204,28 @@ int dwmac_dma_interrupt(void __iomem *ioaddr,
 		}
 	}
 	/* TX/RX NORMAL interrupts */
-	if (intr_status & DMA_STATUS_NIS) {
+	if (likely(intr_status & DMA_STATUS_NIS)) {
 		x->normal_irq_n++;
-		if (likely(intr_status & DMA_STATUS_RI))
-			ret |= handle_rx;
-		if (intr_status & (DMA_STATUS_TI))
+		if (likely(intr_status & DMA_STATUS_RI)) {
+			u32 value = readl(ioaddr + DMA_INTR_ENA);
+			/* to schedule NAPI on real RIE event. */
+			if (likely(value & DMA_INTR_ENA_RIE)) {
+				x->rx_normal_irq_n++;
+				ret |= handle_rx;
+			}
+		}
+		if (likely(intr_status & DMA_STATUS_TI)) {
+			x->tx_normal_irq_n++;
 			ret |= handle_tx;
+		}
+		if (unlikely(intr_status & DMA_STATUS_ERI))
+			x->rx_early_irq++;
 	}
 	/* Optional hardware blocks, interrupts should be disabled */
 	if (unlikely(intr_status &
 		     (DMA_STATUS_GPI | DMA_STATUS_GMI | DMA_STATUS_GLI)))
 		pr_info("%s: unexpected status %08x\n", __func__, intr_status);
+
 	/* Clear the interrupt by writing a logic 1 to the CSR5[15-0] */
 	writel((intr_status & 0x1ffff), ioaddr + DMA_STATUS);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 01b34517ca8e..fe0535cd86cd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -91,6 +91,8 @@ struct stmmac_priv {
 	u32 tx_count_frames;
 	u32 tx_coal_frames;
 	u32 tx_coal_timer;
+	int use_riwt;
+	u32 rx_riwt;
 };
 
 extern int phyaddr;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 15cb6ee6ee41..7f7ccd217e4d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -76,7 +76,7 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 	STMMAC_STAT(rx_missed_cntr),
 	STMMAC_STAT(rx_overflow_cntr),
 	STMMAC_STAT(rx_vlan),
-	/* Tx/Rx IRQ errors */
+	/* Tx/Rx IRQ error info */
 	STMMAC_STAT(tx_undeflow_irq),
 	STMMAC_STAT(tx_process_stopped_irq),
 	STMMAC_STAT(tx_jabber_irq),
@@ -86,7 +86,8 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 	STMMAC_STAT(rx_watchdog_irq),
 	STMMAC_STAT(tx_early_irq),
 	STMMAC_STAT(fatal_bus_error_irq),
-	/* Extra info */
+	/* Tx/Rx IRQ Events */
+	STMMAC_STAT(rx_early_irq),
 	STMMAC_STAT(threshold),
 	STMMAC_STAT(tx_pkt_n),
 	STMMAC_STAT(rx_pkt_n),
@@ -96,10 +97,12 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 	STMMAC_STAT(tx_normal_irq_n),
 	STMMAC_STAT(tx_clean),
 	STMMAC_STAT(tx_reset_ic_bit),
+	STMMAC_STAT(irq_receive_pmt_irq_n),
+	/* MMC info */
 	STMMAC_STAT(mmc_tx_irq_n),
 	STMMAC_STAT(mmc_rx_irq_n),
 	STMMAC_STAT(mmc_rx_csum_offload_irq_n),
-	STMMAC_STAT(irq_receive_pmt_irq_n),
+	/* EEE */
 	STMMAC_STAT(irq_tx_path_in_lpi_mode_n),
 	STMMAC_STAT(irq_tx_path_exit_lpi_mode_n),
 	STMMAC_STAT(irq_rx_path_in_lpi_mode_n),
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d9d68649bdaa..542edbcd92c7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -603,6 +603,8 @@ static void init_dma_desc_rings(struct net_device *dev)
 	priv->dirty_tx = 0;
 	priv->cur_tx = 0;
 
+	if (priv->use_riwt)
+		dis_ic = 1;
 	/* Clear the Rx/Tx descriptors */
 	priv->hw->desc->init_rx_desc(priv->dma_rx, rxsize, dis_ic);
 	priv->hw->desc->init_tx_desc(priv->dma_tx, txsize);
@@ -1106,6 +1108,11 @@ static int stmmac_open(struct net_device *dev)
 
 	stmmac_init_tx_coalesce(priv);
 
+	if ((priv->use_riwt) && (priv->hw->dma->rx_watchdog)) {
+		priv->rx_riwt = MAX_DMA_RIWT;
+		priv->hw->dma->rx_watchdog(priv->ioaddr, MAX_DMA_RIWT);
+	}
+
 	napi_enable(&priv->napi);
 	netif_start_queue(dev);
 
@@ -1423,14 +1430,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 #endif
 			skb->protocol = eth_type_trans(skb, priv->dev);
 
-			if (unlikely(!priv->plat->rx_coe)) {
-				/* No RX COE for old mac10/100 devices */
+			if (unlikely(!priv->plat->rx_coe))
 				skb_checksum_none_assert(skb);
-				netif_receive_skb(skb);
-			} else {
+			else
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
-				napi_gro_receive(&priv->napi, skb);
-			}
+
+			napi_gro_receive(&priv->napi, skb);
 
 			priv->dev->stats.rx_packets++;
 			priv->dev->stats.rx_bytes += frame_len;
@@ -2001,6 +2006,16 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device,
 	if (flow_ctrl)
 		priv->flow_ctrl = FLOW_AUTO;	/* RX/TX pause on */
 
+	/* Rx Watchdog is available in the COREs newer than the 3.40.
+	 * In some case, for example on bugged HW this feature
+	 * has to be disable and this can be done by passing the
+	 * riwt_off field from the platform.
+	 */
+	if ((priv->synopsys_id >= DWMAC_CORE_3_50) && (!priv->plat->riwt_off)) {
+		priv->use_riwt = 1;
+		pr_info(" Enable RX Mitigation via HW Watchdog Timer\n");
+	}
+
 	netif_napi_add(ndev, &priv->napi, stmmac_poll, 64);
 
 	spin_lock_init(&priv->lock);
@@ -2092,6 +2107,9 @@ int stmmac_suspend(struct net_device *ndev)
 	netif_device_detach(ndev);
 	netif_stop_queue(ndev);
 
+	if (priv->use_riwt)
+		dis_ic = 1;
+
 	napi_disable(&priv->napi);
 
 	/* Stop TX/RX DMA */
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index a1547ea3920d..de5b2f8176ce 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -104,6 +104,7 @@ struct plat_stmmacenet_data {
 	int bugged_jumbo;
 	int pmt;
 	int force_sf_dma_mode;
+	int riwt_off;
 	void (*fix_mac_speed)(void *priv, unsigned int speed);
 	void (*bus_setup)(void __iomem *ioaddr);
 	int (*init)(struct platform_device *pdev);
-- 
cgit v1.2.3-55-g7522


From c91f6df2db4972d3cc983e6988b9abf1ad02f5f9 Mon Sep 17 00:00:00 2001
From: Brian Haley
Date: Mon, 26 Nov 2012 05:21:08 +0000
Subject: sockopt: Change getsockopt() of SO_BINDTODEVICE to return an
 interface name

Instead of having the getsockopt() of SO_BINDTODEVICE return an index, which
will then require another call like if_indextoname() to get the actual interface
name, have it return the name directly.

This also matches the existing man page description on socket(7) which mentions
the argument being an interface name.

If the value has not been set, zero is returned and optlen will be set to zero
to indicate there is no interface name present.

Added a seqlock to protect this code path, and dev_ifname(), from someone
changing the device name via dev_change_name().

v2: Added seqlock protection while copying device name.

v3: Fixed word wrap in patch.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            | 21 ++++++++++++++--
 net/core/sock.c           | 64 ++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 81 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e46c830c88d8..e9929abeb932 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1567,6 +1567,8 @@ extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
 
 extern rwlock_t				dev_base_lock;		/* Device list lock */
 
+extern seqlock_t	devnet_rename_seq;	/* Device rename lock */
+
 
 #define for_each_netdev(net, d)		\
 		list_for_each_entry(d, &(net)->dev_base_head, dev_list)
diff --git a/net/core/dev.c b/net/core/dev.c
index 7304ea8a1f13..2a5f55866429 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -203,6 +203,8 @@ static struct list_head offload_base __read_mostly;
 DEFINE_RWLOCK(dev_base_lock);
 EXPORT_SYMBOL(dev_base_lock);
 
+DEFINE_SEQLOCK(devnet_rename_seq);
+
 static inline void dev_base_seq_inc(struct net *net)
 {
 	while (++net->dev_base_seq == 0);
@@ -1091,22 +1093,31 @@ int dev_change_name(struct net_device *dev, const char *newname)
 	if (dev->flags & IFF_UP)
 		return -EBUSY;
 
-	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
+	write_seqlock(&devnet_rename_seq);
+
+	if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
+		write_sequnlock(&devnet_rename_seq);
 		return 0;
+	}
 
 	memcpy(oldname, dev->name, IFNAMSIZ);
 
 	err = dev_get_valid_name(net, dev, newname);
-	if (err < 0)
+	if (err < 0) {
+		write_sequnlock(&devnet_rename_seq);
 		return err;
+	}
 
 rollback:
 	ret = device_rename(&dev->dev, dev->name);
 	if (ret) {
 		memcpy(dev->name, oldname, IFNAMSIZ);
+		write_sequnlock(&devnet_rename_seq);
 		return ret;
 	}
 
+	write_sequnlock(&devnet_rename_seq);
+
 	write_lock_bh(&dev_base_lock);
 	hlist_del_rcu(&dev->name_hlist);
 	write_unlock_bh(&dev_base_lock);
@@ -1124,6 +1135,7 @@ rollback:
 		/* err >= 0 after dev_alloc_name() or stores the first errno */
 		if (err >= 0) {
 			err = ret;
+			write_seqlock(&devnet_rename_seq);
 			memcpy(dev->name, oldname, IFNAMSIZ);
 			goto rollback;
 		} else {
@@ -4148,6 +4160,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
 {
 	struct net_device *dev;
 	struct ifreq ifr;
+	unsigned seq;
 
 	/*
 	 *	Fetch the caller's info block.
@@ -4156,6 +4169,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 		return -EFAULT;
 
+retry:
+	seq = read_seqbegin(&devnet_rename_seq);
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
 	if (!dev) {
@@ -4165,6 +4180,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
 
 	strcpy(ifr.ifr_name, dev->name);
 	rcu_read_unlock();
+	if (read_seqretry(&devnet_rename_seq, seq))
+		goto retry;
 
 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
 		return -EFAULT;
diff --git a/net/core/sock.c b/net/core/sock.c
index d4f7b58b3866..a692ef49c9bb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -505,7 +505,8 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
 }
 EXPORT_SYMBOL(sk_dst_check);
 
-static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
+static int sock_setbindtodevice(struct sock *sk, char __user *optval,
+				int optlen)
 {
 	int ret = -ENOPROTOOPT;
 #ifdef CONFIG_NETDEVICES
@@ -562,6 +563,59 @@ out:
 	return ret;
 }
 
+static int sock_getbindtodevice(struct sock *sk, char __user *optval,
+				int __user *optlen, int len)
+{
+	int ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+	struct net *net = sock_net(sk);
+	struct net_device *dev;
+	char devname[IFNAMSIZ];
+	unsigned seq;
+
+	if (sk->sk_bound_dev_if == 0) {
+		len = 0;
+		goto zero;
+	}
+
+	ret = -EINVAL;
+	if (len < IFNAMSIZ)
+		goto out;
+
+retry:
+	seq = read_seqbegin(&devnet_rename_seq);
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+	ret = -ENODEV;
+	if (!dev) {
+		rcu_read_unlock();
+		goto out;
+	}
+
+	strcpy(devname, dev->name);
+	rcu_read_unlock();
+	if (read_seqretry(&devnet_rename_seq, seq))
+		goto retry;
+
+	len = strlen(devname) + 1;
+
+	ret = -EFAULT;
+	if (copy_to_user(optval, devname, len))
+		goto out;
+
+zero:
+	ret = -EFAULT;
+	if (put_user(len, optlen))
+		goto out;
+
+	ret = 0;
+
+out:
+#endif
+
+	return ret;
+}
+
 static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
 {
 	if (valbool)
@@ -589,7 +643,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 	 */
 
 	if (optname == SO_BINDTODEVICE)
-		return sock_bindtodevice(sk, optval, optlen);
+		return sock_setbindtodevice(sk, optval, optlen);
 
 	if (optlen < sizeof(int))
 		return -EINVAL;
@@ -1075,15 +1129,17 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 	case SO_NOFCS:
 		v.val = sock_flag(sk, SOCK_NOFCS);
 		break;
+
 	case SO_BINDTODEVICE:
-		v.val = sk->sk_bound_dev_if;
-		break;
+		return sock_getbindtodevice(sk, optval, optlen, len);
+
 	case SO_GET_FILTER:
 		len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
 		if (len < 0)
 			return len;
 
 		goto lenout;
+
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3-55-g7522


From a8df7b1ab70bfd6f261fa5e96985fca638299acc Mon Sep 17 00:00:00 2001
From: Fabio Baltieri
Date: Sun, 4 Nov 2012 01:54:34 -0800
Subject: leds: add led_trigger_rename function

Implements a "led_trigger_rename" function to rename a trigger with
proper locking.

This assumes that led name was originally allocated in non-constant
storage.

Signed-off-by: Fabio Baltieri <fabio.baltieri@gmail.com>
Cc: Kurt Van Dijck <kurt.van.dijck@eia.be>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/led-triggers.c | 13 +++++++++++++
 include/linux/leds.h        | 18 ++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index 262eb4193710..a4fa4bf02d53 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -166,6 +166,19 @@ void led_trigger_set_default(struct led_classdev *led_cdev)
 }
 EXPORT_SYMBOL_GPL(led_trigger_set_default);
 
+void led_trigger_rename_static(const char *name, struct led_trigger *trig)
+{
+	/* new name must be on a temporary string to prevent races */
+	BUG_ON(name == trig->name);
+
+	down_write(&triggers_list_lock);
+	/* this assumes that trig->name was originaly allocated to
+	 * non constant storage */
+	strcpy((char *)trig->name, name);
+	up_write(&triggers_list_lock);
+}
+EXPORT_SYMBOL_GPL(led_trigger_rename_static);
+
 /* LED Trigger Interface */
 
 int led_trigger_register(struct led_trigger *trig)
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 6e53bb31c220..8107592cfc41 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -139,6 +139,24 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev,
 extern void led_set_brightness(struct led_classdev *led_cdev,
 			       enum led_brightness brightness);
 
+/**
+ * led_trigger_rename_static - rename a trigger
+ * @name: the new trigger name
+ * @trig: the LED trigger to rename
+ *
+ * Change a LED trigger name by copying the string passed in
+ * name into current trigger name, which MUST be large
+ * enough for the new string.
+ *
+ * Note that name must NOT point to the same string used
+ * during LED registration, as that could lead to races.
+ *
+ * This is meant to be used on triggers with statically
+ * allocated name.
+ */
+extern void led_trigger_rename_static(const char *name,
+				      struct led_trigger *trig);
+
 /*
  * LED Triggers
  */
-- 
cgit v1.2.3-55-g7522


From 057407c732b7761cf417a1e6633a02d9d473340d Mon Sep 17 00:00:00 2001
From: Jingoo Han
Date: Sun, 18 Nov 2012 21:35:55 -0800
Subject: led: Add dependency on CONFIG_LEDS_TRIGGERS to
 led_trigger_rename_static()

This patch fixes build warnings when CONFIG_LEDS_TRIGGERS is
disabled as below:
include/linux/leds.h:158:18: warning: 'struct led_trigger' declared inside parameter list [enabled by default]
include/linux/leds.h:158:18: warning: its scope is only this definition or declaration, which is probably not what you want [enabled
by default]

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Acked-by: Fabio Baltieri <fabio.baltieri@gmail.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 include/linux/leds.h | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 8107592cfc41..0d9b5eed714e 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -139,24 +139,6 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev,
 extern void led_set_brightness(struct led_classdev *led_cdev,
 			       enum led_brightness brightness);
 
-/**
- * led_trigger_rename_static - rename a trigger
- * @name: the new trigger name
- * @trig: the LED trigger to rename
- *
- * Change a LED trigger name by copying the string passed in
- * name into current trigger name, which MUST be large
- * enough for the new string.
- *
- * Note that name must NOT point to the same string used
- * during LED registration, as that could lead to races.
- *
- * This is meant to be used on triggers with statically
- * allocated name.
- */
-extern void led_trigger_rename_static(const char *name,
-				      struct led_trigger *trig);
-
 /*
  * LED Triggers
  */
@@ -197,6 +179,23 @@ extern void led_trigger_blink_oneshot(struct led_trigger *trigger,
 				      unsigned long *delay_on,
 				      unsigned long *delay_off,
 				      int invert);
+/**
+ * led_trigger_rename_static - rename a trigger
+ * @name: the new trigger name
+ * @trig: the LED trigger to rename
+ *
+ * Change a LED trigger name by copying the string passed in
+ * name into current trigger name, which MUST be large
+ * enough for the new string.
+ *
+ * Note that name must NOT point to the same string used
+ * during LED registration, as that could lead to races.
+ *
+ * This is meant to be used on triggers with statically
+ * allocated name.
+ */
+extern void led_trigger_rename_static(const char *name,
+				      struct led_trigger *trig);
 
 #else
 
-- 
cgit v1.2.3-55-g7522


From e3725ec015dfbbeb896295cf2b3a995f28b0630e Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Fri, 16 Nov 2012 12:25:01 -0500
Subject: NFSv4.1: Shrink struct nfs4_sequence_res by moving the session
 pointer

Move the session pointer into the slot table, then have struct nfs4_slot
point to that slot table.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h          |  3 ++-
 fs/nfs/nfs4proc.c         | 33 +++++++++++++++++++++++----------
 fs/nfs/nfs4state.c        |  2 +-
 fs/nfs/nfs4xdr.c          |  8 +++++---
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   |  2 +-
 6 files changed, 33 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 36880b9aa91e..42c58691fb41 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -258,7 +258,8 @@ extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
 extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
 				  bool sync);
 
-extern struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags);
+extern struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table,
+		u32 max_slots, gfp_t gfp_flags);
 
 static inline bool
 is_ds_only_client(struct nfs_client *clp)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 14b39742b6e4..5b61c4a83191 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -467,25 +467,28 @@ void nfs4_check_drain_bc_complete(struct nfs4_session *ses)
 
 static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 {
+	struct nfs4_session *session;
 	struct nfs4_slot_table *tbl;
 
-	tbl = &res->sr_session->fc_slot_table;
 	if (!res->sr_slot) {
 		/* just wake up the next guy waiting since
 		 * we may have not consumed a slot after all */
 		dprintk("%s: No slot\n", __func__);
 		return;
 	}
+	tbl = res->sr_slot->table;
+	session = tbl->session;
 
 	spin_lock(&tbl->slot_tbl_lock);
 	nfs4_free_slot(tbl, res->sr_slot - tbl->slots);
-	nfs4_check_drain_fc_complete(res->sr_session);
+	nfs4_check_drain_fc_complete(session);
 	spin_unlock(&tbl->slot_tbl_lock);
 	res->sr_slot = NULL;
 }
 
 static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
 {
+	struct nfs4_session *session;
 	struct nfs4_slot *slot;
 	unsigned long timestamp;
 	struct nfs_client *clp;
@@ -504,6 +507,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		goto out;
 
 	slot = res->sr_slot;
+	session = slot->table->session;
 
 	/* Check the SEQUENCE operation status */
 	switch (res->sr_status) {
@@ -511,7 +515,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		/* Update the slot's sequence and clientid lease timer */
 		++slot->seq_nr;
 		timestamp = slot->renewal_time;
-		clp = res->sr_session->clp;
+		clp = session->clp;
 		do_renew_lease(clp, timestamp);
 		/* Check sequence flags */
 		if (res->sr_status_flags != 0)
@@ -524,7 +528,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		 */
 		dprintk("%s: slot=%td seq=%d: Operation in progress\n",
 			__func__,
-			slot - res->sr_session->fc_slot_table.slots,
+			slot - session->fc_slot_table.slots,
 			slot->seq_nr);
 		goto out_retry;
 	default:
@@ -546,7 +550,7 @@ out_retry:
 static int nfs4_sequence_done(struct rpc_task *task,
 			       struct nfs4_sequence_res *res)
 {
-	if (res->sr_session == NULL)
+	if (res->sr_slot == NULL)
 		return 1;
 	return nfs41_sequence_done(task, res);
 }
@@ -591,7 +595,6 @@ static void nfs41_init_sequence(struct nfs4_sequence_args *args,
 	args->sa_cache_this = 0;
 	if (cache_reply)
 		args->sa_cache_this = 1;
-	res->sr_session = NULL;
 	res->sr_slot = NULL;
 }
 
@@ -646,7 +649,6 @@ int nfs41_setup_sequence(struct nfs4_session *session,
 
 	dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
 
-	res->sr_session = session;
 	res->sr_slot = slot;
 	res->sr_status_flags = 0;
 	/*
@@ -5659,9 +5661,18 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
 	return status;
 }
 
-struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags)
+struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table,
+		u32 max_slots, gfp_t gfp_flags)
 {
-	return kmalloc_array(max_slots, sizeof(struct nfs4_slot), gfp_flags);
+	struct nfs4_slot *tbl;
+	u32 i;
+
+	tbl = kmalloc_array(max_slots, sizeof(*tbl), gfp_flags);
+	if (tbl != NULL) {
+		for (i = 0; i < max_slots; i++)
+			tbl[i].table = table;
+	}
+	return tbl;
 }
 
 static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
@@ -5699,7 +5710,7 @@ static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
 
 	/* Does the newly negotiated max_reqs match the existing slot table? */
 	if (max_reqs != tbl->max_slots) {
-		new = nfs4_alloc_slots(max_reqs, GFP_NOFS);
+		new = nfs4_alloc_slots(tbl, max_reqs, GFP_NOFS);
 		if (!new)
 			goto out;
 	}
@@ -5738,11 +5749,13 @@ static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
 	dprintk("--> %s\n", __func__);
 	/* Fore channel */
 	tbl = &ses->fc_slot_table;
+	tbl->session = ses;
 	status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
 	if (status) /* -ENOMEM */
 		return status;
 	/* Back channel */
 	tbl = &ses->bc_slot_table;
+	tbl->session = ses;
 	status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
 	if (status && tbl->slots == NULL)
 		/* Fore and back channel share a connection so get
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 96fcbb97fd6a..9495789c425b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2033,7 +2033,7 @@ static int nfs4_recall_slot(struct nfs_client *clp)
 		return 0;
 	nfs4_begin_drain_session(clp);
 	fc_tbl = &clp->cl_session->fc_slot_table;
-	new = nfs4_alloc_slots(fc_tbl->target_max_slots, GFP_NOFS);
+	new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_max_slots, GFP_NOFS);
         if (!new)
 		return -ENOMEM;
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 672d9b0ef2c5..4126f054610a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5507,12 +5507,13 @@ static int decode_sequence(struct xdr_stream *xdr,
 			   struct rpc_rqst *rqstp)
 {
 #if defined(CONFIG_NFS_V4_1)
+	struct nfs4_session *session;
 	struct nfs4_sessionid id;
 	u32 dummy;
 	int status;
 	__be32 *p;
 
-	if (!res->sr_session)
+	if (res->sr_slot == NULL)
 		return 0;
 
 	status = decode_op_hdr(xdr, OP_SEQUENCE);
@@ -5526,8 +5527,9 @@ static int decode_sequence(struct xdr_stream *xdr,
 	 * sequence number, the server is looney tunes.
 	 */
 	status = -EREMOTEIO;
+	session = res->sr_slot->table->session;
 
-	if (memcmp(id.data, res->sr_session->sess_id.data,
+	if (memcmp(id.data, session->sess_id.data,
 		   NFS4_MAX_SESSIONID_LEN)) {
 		dprintk("%s Invalid session id\n", __func__);
 		goto out_err;
@@ -5545,7 +5547,7 @@ static int decode_sequence(struct xdr_stream *xdr,
 	}
 	/* slot id */
 	dummy = be32_to_cpup(p++);
-	if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) {
+	if (dummy != res->sr_slot - session->fc_slot_table.slots) {
 		dprintk("%s Invalid slot id\n", __func__);
 		goto out_err;
 	}
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 97c8f9191880..b0412873d29c 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -209,6 +209,7 @@ struct nfs_server {
 /* Sessions */
 #define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
 struct nfs4_slot_table {
+	struct nfs4_session *session;		/* Parent session */
 	struct nfs4_slot *slots;		/* seqid per slot */
 	unsigned long   used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
 	spinlock_t	slot_tbl_lock;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9cb1c63a70c2..0fd88ab0e814 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -187,6 +187,7 @@ struct nfs4_channel_attrs {
 
 /* nfs41 sessions slot seqid */
 struct nfs4_slot {
+	struct nfs4_slot_table	*table;
 	unsigned long		renewal_time;
 	u32		 	seq_nr;
 };
@@ -198,7 +199,6 @@ struct nfs4_sequence_args {
 };
 
 struct nfs4_sequence_res {
-	struct nfs4_session	*sr_session;
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
 	int			sr_status;	/* sequence operation status */
 	u32			sr_status_flags;
-- 
cgit v1.2.3-55-g7522


From df2fabffbace8988f3265585ec793ff9deccdea7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Fri, 16 Nov 2012 12:45:06 -0500
Subject: NFSv4.1: Label each entry in the session slot tables with its slot
 number

Instead of doing slot table pointer gymnastics every time we want to
know which slot we're using.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 12 +++++++-----
 fs/nfs/nfs4xdr.c        |  2 +-
 include/linux/nfs_xdr.h |  1 +
 3 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5b61c4a83191..4311dba49c58 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -526,9 +526,9 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		 * returned NFS4ERR_DELAY as per Section 2.10.6.2
 		 * of RFC5661.
 		 */
-		dprintk("%s: slot=%td seq=%d: Operation in progress\n",
+		dprintk("%s: slot=%u seq=%u: Operation in progress\n",
 			__func__,
-			slot - session->fc_slot_table.slots,
+			slot->slot_nr,
 			slot->seq_nr);
 		goto out_retry;
 	default:
@@ -671,9 +671,9 @@ int nfs4_setup_sequence(const struct nfs_server *server,
 	if (session == NULL)
 		goto out;
 
-	dprintk("--> %s clp %p session %p sr_slot %td\n",
+	dprintk("--> %s clp %p session %p sr_slot %d\n",
 		__func__, session->clp, session, res->sr_slot ?
-			res->sr_slot - session->fc_slot_table.slots : -1);
+			res->sr_slot->slot_nr : -1);
 
 	ret = nfs41_setup_sequence(session, args, res, task);
 out:
@@ -5669,8 +5669,10 @@ struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table,
 
 	tbl = kmalloc_array(max_slots, sizeof(*tbl), gfp_flags);
 	if (tbl != NULL) {
-		for (i = 0; i < max_slots; i++)
+		for (i = 0; i < max_slots; i++) {
 			tbl[i].table = table;
+			tbl[i].slot_nr = i;
+		}
 	}
 	return tbl;
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4126f054610a..50bac7066160 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5547,7 +5547,7 @@ static int decode_sequence(struct xdr_stream *xdr,
 	}
 	/* slot id */
 	dummy = be32_to_cpup(p++);
-	if (dummy != res->sr_slot - session->fc_slot_table.slots) {
+	if (dummy != res->sr_slot->slot_nr) {
 		dprintk("%s Invalid slot id\n", __func__);
 		goto out_err;
 	}
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0fd88ab0e814..9c9b76c94b46 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -189,6 +189,7 @@ struct nfs4_channel_attrs {
 struct nfs4_slot {
 	struct nfs4_slot_table	*table;
 	unsigned long		renewal_time;
+	u32			slot_nr;
 	u32		 	seq_nr;
 };
 
-- 
cgit v1.2.3-55-g7522


From 2b2fa71723f955d5b4a0f4edd99cf3cd69ceafd1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Fri, 16 Nov 2012 12:58:36 -0500
Subject: NFSv4.1: Simplify struct nfs4_sequence_args too

Replace the session pointer + slotid with a pointer to the
allocated slot.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       |  6 +++---
 fs/nfs/nfs4xdr.c        | 21 ++++++++++-----------
 include/linux/nfs_xdr.h |  3 +--
 3 files changed, 14 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4311dba49c58..6c41a34e34b4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -591,7 +591,7 @@ out:
 static void nfs41_init_sequence(struct nfs4_sequence_args *args,
 		struct nfs4_sequence_res *res, int cache_reply)
 {
-	args->sa_session = NULL;
+	args->sa_slot = NULL;
 	args->sa_cache_this = 0;
 	if (cache_reply)
 		args->sa_cache_this = 1;
@@ -644,8 +644,8 @@ int nfs41_setup_sequence(struct nfs4_session *session,
 	rpc_task_set_priority(task, RPC_PRIORITY_NORMAL);
 	slot = tbl->slots + slotid;
 	slot->renewal_time = jiffies;
-	args->sa_session = session;
-	args->sa_slotid = slotid;
+
+	args->sa_slot = slot;
 
 	dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 50bac7066160..27b0fec1a6b0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1833,18 +1833,16 @@ static void encode_sequence(struct xdr_stream *xdr,
 			    struct compound_hdr *hdr)
 {
 #if defined(CONFIG_NFS_V4_1)
-	struct nfs4_session *session = args->sa_session;
+	struct nfs4_session *session;
 	struct nfs4_slot_table *tp;
-	struct nfs4_slot *slot;
+	struct nfs4_slot *slot = args->sa_slot;
 	__be32 *p;
 
-	if (!session)
+	if (slot == NULL)
 		return;
 
-	tp = &session->fc_slot_table;
-
-	WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
-	slot = tp->slots + args->sa_slotid;
+	tp = slot->table;
+	session = tp->session;
 
 	encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr);
 
@@ -1858,12 +1856,12 @@ static void encode_sequence(struct xdr_stream *xdr,
 		((u32 *)session->sess_id.data)[1],
 		((u32 *)session->sess_id.data)[2],
 		((u32 *)session->sess_id.data)[3],
-		slot->seq_nr, args->sa_slotid,
+		slot->seq_nr, slot->slot_nr,
 		tp->highest_used_slotid, args->sa_cache_this);
 	p = reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 16);
 	p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
 	*p++ = cpu_to_be32(slot->seq_nr);
-	*p++ = cpu_to_be32(args->sa_slotid);
+	*p++ = cpu_to_be32(slot->slot_nr);
 	*p++ = cpu_to_be32(tp->highest_used_slotid);
 	*p = cpu_to_be32(args->sa_cache_this);
 #endif /* CONFIG_NFS_V4_1 */
@@ -2025,8 +2023,9 @@ static void encode_free_stateid(struct xdr_stream *xdr,
 static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
 {
 #if defined(CONFIG_NFS_V4_1)
-	if (args->sa_session)
-		return args->sa_session->clp->cl_mvops->minor_version;
+
+	if (args->sa_slot)
+		return args->sa_slot->table->session->clp->cl_mvops->minor_version;
 #endif /* CONFIG_NFS_V4_1 */
 	return 0;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9c9b76c94b46..deb31bbbb857 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -194,8 +194,7 @@ struct nfs4_slot {
 };
 
 struct nfs4_sequence_args {
-	struct nfs4_session	*sa_session;
-	u32			sa_slotid;
+	struct nfs4_slot	*sa_slot;
 	u8			sa_cache_this;
 };
 
-- 
cgit v1.2.3-55-g7522


From d80a361d779a9f19498943d1ca84243209cd5647 Mon Sep 17 00:00:00 2001
From: Seiji Aguchi
Date: Wed, 14 Nov 2012 20:25:37 +0000
Subject: efi_pstore: Check remaining space with QueryVariableInfo() before
 writing data

[Issue]

As discussed in a thread below, Running out of space in EFI isn't a well-tested scenario.
And we wouldn't expect all firmware to handle it gracefully.
http://marc.info/?l=linux-kernel&m=134305325801789&w=2

On the other hand, current efi_pstore doesn't check a remaining space of storage at writing time.
Therefore, efi_pstore may not work if it tries to write a large amount of data.

[Patch Description]

To avoid handling the situation above, this patch checks if there is a space enough to log with
QueryVariableInfo() before writing data.

Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Mike Waychison <mikew@google.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/firmware/efivars.c | 18 ++++++++++++++++++
 include/linux/efi.h        |  1 +
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index d10c9873dd9a..37ac21a08751 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -707,12 +707,29 @@ static int efi_pstore_write(enum pstore_type_id type,
 	struct efivars *efivars = psi->data;
 	struct efivar_entry *entry, *found = NULL;
 	int i, ret = 0;
+	u64 storage_space, remaining_space, max_variable_size;
+	efi_status_t status = EFI_NOT_FOUND;
 
 	sprintf(stub_name, "dump-type%u-%u-", type, part);
 	sprintf(name, "%s%lu", stub_name, get_seconds());
 
 	spin_lock(&efivars->lock);
 
+	/*
+	 * Check if there is a space enough to log.
+	 * size: a size of logging data
+	 * DUMP_NAME_LEN * 2: a maximum size of variable name
+	 */
+	status = efivars->ops->query_variable_info(PSTORE_EFI_ATTRIBUTES,
+						   &storage_space,
+						   &remaining_space,
+						   &max_variable_size);
+	if (status || remaining_space < size + DUMP_NAME_LEN * 2) {
+		spin_unlock(&efivars->lock);
+		*id = part;
+		return -ENOSPC;
+	}
+
 	for (i = 0; i < DUMP_NAME_LEN; i++)
 		efi_name[i] = stub_name[i];
 
@@ -1237,6 +1254,7 @@ efivars_init(void)
 	ops.get_variable = efi.get_variable;
 	ops.set_variable = efi.set_variable;
 	ops.get_next_variable = efi.get_next_variable;
+	ops.query_variable_info = efi.query_variable_info;
 	error = register_efivars(&__efivars, &ops, efi_kobj);
 	if (error)
 		goto err_put;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8670eb1eb8cd..c47ec36f3f39 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -643,6 +643,7 @@ struct efivar_operations {
 	efi_get_variable_t *get_variable;
 	efi_get_next_variable_t *get_next_variable;
 	efi_set_variable_t *set_variable;
+	efi_query_variable_info_t *query_variable_info;
 };
 
 struct efivars {
-- 
cgit v1.2.3-55-g7522


From a9efd39cd547223597cfe7c53acec44c099b9264 Mon Sep 17 00:00:00 2001
From: Seiji Aguchi
Date: Wed, 14 Nov 2012 20:27:28 +0000
Subject: efi_pstore: Add ctime to argument of erase callback

[Issue]

Currently, a variable name, which is used to identify each log entry, consists of type,
id and ctime. But an erase callback does not use ctime.

If efi_pstore supported just one log, type and id were enough.
However, in case of supporting multiple logs, it doesn't work because
it can't distinguish each entry without ctime at erasing time.

 <Example>

 As you can see below, efi_pstore can't differentiate first event from second one without ctime.

 a variable name of first event: dump-type0-1-12345678
 a variable name of second event: dump-type0-1-23456789

  type:0
  id:1
  ctime:12345678, 23456789

[Solution]

This patch adds ctime to an argument of an erase callback.

It works across reboots because ctime of pstore means the date that the record was originally stored.
To do this, efi_pstore saves the ctime to variable name at writing time and passes it to pstore
at reading time.

Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Mike Waychison <mikew@google.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/erst.c   |  4 ++--
 drivers/firmware/efivars.c | 17 ++++++++---------
 fs/pstore/inode.c          |  3 ++-
 fs/pstore/ram.c            |  2 +-
 include/linux/pstore.h     |  2 +-
 5 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index e4d9d24eb73d..0bd6ae4a899f 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -938,7 +938,7 @@ static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
 		       u64 *id, unsigned int part,
 		       size_t size, struct pstore_info *psi);
 static int erst_clearer(enum pstore_type_id type, u64 id,
-			struct pstore_info *psi);
+			struct timespec time, struct pstore_info *psi);
 
 static struct pstore_info erst_info = {
 	.owner		= THIS_MODULE,
@@ -1102,7 +1102,7 @@ static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
 }
 
 static int erst_clearer(enum pstore_type_id type, u64 id,
-			struct pstore_info *psi)
+			struct timespec time, struct pstore_info *psi)
 {
 	return erst_clear(id);
 }
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index fbe9202c2678..3803621c0d45 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -747,24 +747,25 @@ static int efi_pstore_write(enum pstore_type_id type,
 };
 
 static int efi_pstore_erase(enum pstore_type_id type, u64 id,
-			    struct pstore_info *psi)
+			    struct timespec time, struct pstore_info *psi)
 {
-	char stub_name[DUMP_NAME_LEN];
+	char name[DUMP_NAME_LEN];
 	efi_char16_t efi_name[DUMP_NAME_LEN];
 	efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
 	struct efivars *efivars = psi->data;
 	struct efivar_entry *entry, *found = NULL;
 	int i;
 
-	sprintf(stub_name, "dump-type%u-%u-", type, (unsigned int)id);
+	sprintf(name, "dump-type%u-%u-%lu", type, (unsigned int)id,
+		time.tv_sec);
 
 	spin_lock(&efivars->lock);
 
 	for (i = 0; i < DUMP_NAME_LEN; i++)
-		efi_name[i] = stub_name[i];
+		efi_name[i] = name[i];
 
 	/*
-	 * Clean up any entries with the same name
+	 * Clean up an entry with the same name
 	 */
 
 	list_for_each_entry(entry, &efivars->list, list) {
@@ -775,9 +776,6 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id,
 		if (utf16_strncmp(entry->var.VariableName, efi_name,
 				  utf16_strlen(efi_name)))
 			continue;
-		/* Needs to be a prefix */
-		if (entry->var.VariableName[utf16_strlen(efi_name)] == 0)
-			continue;
 
 		/* found */
 		found = entry;
@@ -785,6 +783,7 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id,
 					   &entry->var.VendorGuid,
 					   PSTORE_EFI_ATTRIBUTES,
 					   0, NULL);
+		break;
 	}
 
 	if (found)
@@ -823,7 +822,7 @@ static int efi_pstore_write(enum pstore_type_id type,
 }
 
 static int efi_pstore_erase(enum pstore_type_id type, u64 id,
-			    struct pstore_info *psi)
+			    struct timespec time, struct pstore_info *psi)
 {
 	return 0;
 }
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 4ab572e6d277..4300af654710 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -175,7 +175,8 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
 	struct pstore_private *p = dentry->d_inode->i_private;
 
 	if (p->psi->erase)
-		p->psi->erase(p->type, p->id, p->psi);
+		p->psi->erase(p->type, p->id, dentry->d_inode->i_ctime,
+			      p->psi);
 
 	return simple_unlink(dir, dentry);
 }
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 1a4f6da58eab..749693fcb75a 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -237,7 +237,7 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
 }
 
 static int ramoops_pstore_erase(enum pstore_type_id type, u64 id,
-				struct pstore_info *psi)
+				struct timespec time, struct pstore_info *psi)
 {
 	struct ramoops_context *cxt = psi->data;
 	struct persistent_ram_zone *prz;
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index ee3034a40884..f6e93362d259 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -60,7 +60,7 @@ struct pstore_info {
 			unsigned int part, const char *buf, size_t size,
 			struct pstore_info *psi);
 	int		(*erase)(enum pstore_type_id type, u64 id,
-			struct pstore_info *psi);
+			struct timespec time, struct pstore_info *psi);
 	void		*data;
 };
 
-- 
cgit v1.2.3-55-g7522


From 755d4fe46529018ae45bc7c86df682de45ace764 Mon Sep 17 00:00:00 2001
From: Seiji Aguchi
Date: Mon, 26 Nov 2012 16:07:44 -0800
Subject: efi_pstore: Add a sequence counter to a variable name

[Issue]

Currently, a variable name, which identifies each entry, consists of type, id and ctime.
But if multiple events happens in a short time, a second/third event may fail to log because
efi_pstore can't distinguish each event with current variable name.

[Solution]

A reasonable way to identify all events precisely is introducing a sequence counter to
the variable name.

The sequence counter has already supported in a pstore layer with "oopscount".
So, this patch adds it to a variable name.
Also, it is passed to read/erase callbacks of platform drivers in accordance with
the modification of the variable name.

  <before applying this patch>
 a variable name of first event: dump-type0-1-12345678
 a variable name of second event: dump-type0-1-12345678

  type:0
  id:1
  ctime:12345678

 If multiple events happen in a short time, efi_pstore can't distinguish them because
 variable names are same among them.

  <after applying this patch>

 it can be distinguishable by adding a sequence counter as follows.

 a variable name of first event: dump-type0-1-1-12345678
 a variable name of Second event: dump-type0-1-2-12345678

  type:0
  id:1
  sequence counter: 1(first event), 2(second event)
  ctime:12345678

In case of a write callback executed in pstore_console_write(), "0" is added to
an argument of the write callback because it just logs all kernel messages and
doesn't need to care about multiple events.

Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Mike Waychison <mikew@google.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/erst.c   | 12 ++++++------
 drivers/firmware/efivars.c | 23 ++++++++++++++---------
 fs/pstore/inode.c          |  8 +++++---
 fs/pstore/internal.h       |  2 +-
 fs/pstore/platform.c       | 13 +++++++------
 fs/pstore/ram.c            |  7 +++----
 include/linux/pstore.h     |  8 +++++---
 7 files changed, 41 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 0bd6ae4a899f..6d894bfd8b8f 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -931,13 +931,13 @@ static int erst_check_table(struct acpi_table_erst *erst_tab)
 
 static int erst_open_pstore(struct pstore_info *psi);
 static int erst_close_pstore(struct pstore_info *psi);
-static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
+static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count,
 			   struct timespec *time, char **buf,
 			   struct pstore_info *psi);
 static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
-		       u64 *id, unsigned int part,
+		       u64 *id, unsigned int part, int count,
 		       size_t size, struct pstore_info *psi);
-static int erst_clearer(enum pstore_type_id type, u64 id,
+static int erst_clearer(enum pstore_type_id type, u64 id, int count,
 			struct timespec time, struct pstore_info *psi);
 
 static struct pstore_info erst_info = {
@@ -987,7 +987,7 @@ static int erst_close_pstore(struct pstore_info *psi)
 	return 0;
 }
 
-static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
+static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count,
 			   struct timespec *time, char **buf,
 			   struct pstore_info *psi)
 {
@@ -1055,7 +1055,7 @@ out:
 }
 
 static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
-		       u64 *id, unsigned int part,
+		       u64 *id, unsigned int part, int count,
 		       size_t size, struct pstore_info *psi)
 {
 	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
@@ -1101,7 +1101,7 @@ static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
 	return ret;
 }
 
-static int erst_clearer(enum pstore_type_id type, u64 id,
+static int erst_clearer(enum pstore_type_id type, u64 id, int count,
 			struct timespec time, struct pstore_info *psi)
 {
 	return erst_clear(id);
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 3803621c0d45..7ad3aae6e085 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -658,13 +658,14 @@ static int efi_pstore_close(struct pstore_info *psi)
 }
 
 static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
-			       struct timespec *timespec,
+			       int *count, struct timespec *timespec,
 			       char **buf, struct pstore_info *psi)
 {
 	efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
 	struct efivars *efivars = psi->data;
 	char name[DUMP_NAME_LEN];
 	int i;
+	int cnt;
 	unsigned int part, size;
 	unsigned long time;
 
@@ -674,8 +675,10 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 			for (i = 0; i < DUMP_NAME_LEN; i++) {
 				name[i] = efivars->walk_entry->var.VariableName[i];
 			}
-			if (sscanf(name, "dump-type%u-%u-%lu", type, &part, &time) == 3) {
+			if (sscanf(name, "dump-type%u-%u-%d-%lu",
+				   type, &part, &cnt, &time) == 4) {
 				*id = part;
+				*count = cnt;
 				timespec->tv_sec = time;
 				timespec->tv_nsec = 0;
 				get_var_data_locked(efivars, &efivars->walk_entry->var);
@@ -698,7 +701,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 
 static int efi_pstore_write(enum pstore_type_id type,
 		enum kmsg_dump_reason reason, u64 *id,
-		unsigned int part, size_t size, struct pstore_info *psi)
+		unsigned int part, int count, size_t size,
+		struct pstore_info *psi)
 {
 	char name[DUMP_NAME_LEN];
 	efi_char16_t efi_name[DUMP_NAME_LEN];
@@ -725,7 +729,7 @@ static int efi_pstore_write(enum pstore_type_id type,
 		return -ENOSPC;
 	}
 
-	sprintf(name, "dump-type%u-%u-%lu", type, part,
+	sprintf(name, "dump-type%u-%u-%d-%lu", type, part, count,
 		get_seconds());
 
 	for (i = 0; i < DUMP_NAME_LEN; i++)
@@ -746,7 +750,7 @@ static int efi_pstore_write(enum pstore_type_id type,
 	return ret;
 };
 
-static int efi_pstore_erase(enum pstore_type_id type, u64 id,
+static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
 			    struct timespec time, struct pstore_info *psi)
 {
 	char name[DUMP_NAME_LEN];
@@ -756,7 +760,7 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id,
 	struct efivar_entry *entry, *found = NULL;
 	int i;
 
-	sprintf(name, "dump-type%u-%u-%lu", type, (unsigned int)id,
+	sprintf(name, "dump-type%u-%u-%d-%lu", type, (unsigned int)id, count,
 		time.tv_sec);
 
 	spin_lock(&efivars->lock);
@@ -807,7 +811,7 @@ static int efi_pstore_close(struct pstore_info *psi)
 	return 0;
 }
 
-static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
+static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, int *count,
 			       struct timespec *timespec,
 			       char **buf, struct pstore_info *psi)
 {
@@ -816,12 +820,13 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 
 static int efi_pstore_write(enum pstore_type_id type,
 		enum kmsg_dump_reason reason, u64 *id,
-		unsigned int part, size_t size, struct pstore_info *psi)
+		unsigned int part, int count, size_t size,
+		struct pstore_info *psi)
 {
 	return 0;
 }
 
-static int efi_pstore_erase(enum pstore_type_id type, u64 id,
+static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
 			    struct timespec time, struct pstore_info *psi)
 {
 	return 0;
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 4300af654710..ed1d8c7212da 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -49,6 +49,7 @@ struct pstore_private {
 	struct pstore_info *psi;
 	enum pstore_type_id type;
 	u64	id;
+	int	count;
 	ssize_t	size;
 	char	data[];
 };
@@ -175,8 +176,8 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
 	struct pstore_private *p = dentry->d_inode->i_private;
 
 	if (p->psi->erase)
-		p->psi->erase(p->type, p->id, dentry->d_inode->i_ctime,
-			      p->psi);
+		p->psi->erase(p->type, p->id, p->count,
+			      dentry->d_inode->i_ctime, p->psi);
 
 	return simple_unlink(dir, dentry);
 }
@@ -271,7 +272,7 @@ int pstore_is_mounted(void)
  * Load it up with "size" bytes of data from "buf".
  * Set the mtime & ctime to the date that this record was originally stored.
  */
-int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
+int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
 		  char *data, size_t size, struct timespec time,
 		  struct pstore_info *psi)
 {
@@ -307,6 +308,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
 		goto fail_alloc;
 	private->type = type;
 	private->id = id;
+	private->count = count;
 	private->psi = psi;
 
 	switch (type) {
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index 4847f588b7d5..937d820f273c 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -50,7 +50,7 @@ extern struct pstore_info *psinfo;
 extern void	pstore_set_kmsg_bytes(int);
 extern void	pstore_get_records(int);
 extern int	pstore_mkfile(enum pstore_type_id, char *psname, u64 id,
-			      char *data, size_t size,
+			      int count, char *data, size_t size,
 			      struct timespec time, struct pstore_info *psi);
 extern int	pstore_is_mounted(void);
 
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 947fbe06c3b1..5ea2e77ff023 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -136,7 +136,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 			break;
 
 		ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part,
-				    hsize + len, psinfo);
+				    oopscount, hsize + len, psinfo);
 		if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())
 			pstore_new_entry = 1;
 
@@ -173,7 +173,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
 			spin_lock_irqsave(&psinfo->buf_lock, flags);
 		}
 		memcpy(psinfo->buf, s, c);
-		psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, c, psinfo);
+		psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, c, psinfo);
 		spin_unlock_irqrestore(&psinfo->buf_lock, flags);
 		s += c;
 		c = e - s;
@@ -197,7 +197,7 @@ static void pstore_register_console(void) {}
 
 static int pstore_write_compat(enum pstore_type_id type,
 			       enum kmsg_dump_reason reason,
-			       u64 *id, unsigned int part,
+			       u64 *id, unsigned int part, int count,
 			       size_t size, struct pstore_info *psi)
 {
 	return psi->write_buf(type, reason, id, part, psinfo->buf, size, psi);
@@ -267,6 +267,7 @@ void pstore_get_records(int quiet)
 	char			*buf = NULL;
 	ssize_t			size;
 	u64			id;
+	int			count;
 	enum pstore_type_id	type;
 	struct timespec		time;
 	int			failed = 0, rc;
@@ -278,9 +279,9 @@ void pstore_get_records(int quiet)
 	if (psi->open && psi->open(psi))
 		goto out;
 
-	while ((size = psi->read(&id, &type, &time, &buf, psi)) > 0) {
-		rc = pstore_mkfile(type, psi->name, id, buf, (size_t)size,
-				  time, psi);
+	while ((size = psi->read(&id, &type, &count, &time, &buf, psi)) > 0) {
+		rc = pstore_mkfile(type, psi->name, id, count, buf,
+				  (size_t)size, time, psi);
 		kfree(buf);
 		buf = NULL;
 		if (rc && (rc != -EEXIST || !quiet))
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 749693fcb75a..2bfa36e0ffe8 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -132,9 +132,8 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max,
 }
 
 static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
-				   struct timespec *time,
-				   char **buf,
-				   struct pstore_info *psi)
+				   int *count, struct timespec *time,
+				   char **buf, struct pstore_info *psi)
 {
 	ssize_t size;
 	struct ramoops_context *cxt = psi->data;
@@ -236,7 +235,7 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
 	return 0;
 }
 
-static int ramoops_pstore_erase(enum pstore_type_id type, u64 id,
+static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count,
 				struct timespec time, struct pstore_info *psi)
 {
 	struct ramoops_context *cxt = psi->data;
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index f6e93362d259..1788909d9a99 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -50,17 +50,19 @@ struct pstore_info {
 	int		(*open)(struct pstore_info *psi);
 	int		(*close)(struct pstore_info *psi);
 	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
-			struct timespec *time, char **buf,
+			int *count, struct timespec *time, char **buf,
 			struct pstore_info *psi);
 	int		(*write)(enum pstore_type_id type,
 			enum kmsg_dump_reason reason, u64 *id,
-			unsigned int part, size_t size, struct pstore_info *psi);
+			unsigned int part, int count, size_t size,
+			struct pstore_info *psi);
 	int		(*write_buf)(enum pstore_type_id type,
 			enum kmsg_dump_reason reason, u64 *id,
 			unsigned int part, const char *buf, size_t size,
 			struct pstore_info *psi);
 	int		(*erase)(enum pstore_type_id type, u64 id,
-			struct timespec time, struct pstore_info *psi);
+			int count, struct timespec time,
+			struct pstore_info *psi);
 	void		*data;
 };
 
-- 
cgit v1.2.3-55-g7522


From f2d9d270c15ae0139b54a7e7466d738327e97e03 Mon Sep 17 00:00:00 2001
From: Johannes Berg
Date: Thu, 22 Nov 2012 14:11:39 +0100
Subject: mac80211: support VHT association

Determine the VHT channel from the AP's VHT operation IE
(if present) and configure the hardware to that channel
if it is supported. If channel contexts cause a channel
to not be usable, try a smaller bandwidth.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  |  15 ++
 net/mac80211/ieee80211_i.h |   2 +
 net/mac80211/mlme.c        | 368 ++++++++++++++++++++++++++++++++++++---------
 3 files changed, 313 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index f9c5a787d350..8f690e53dd89 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1212,6 +1212,21 @@ struct ieee80211_vht_cap {
 	struct ieee80211_vht_mcs_info supp_mcs;
 } __packed;
 
+/**
+ * enum ieee80211_vht_chanwidth - VHT channel width
+ * @IEEE80211_VHT_CHANWIDTH_USE_HT: use the HT operation IE to
+ *	determine the channel width (20 or 40 MHz)
+ * @IEEE80211_VHT_CHANWIDTH_80MHZ: 80 MHz bandwidth
+ * @IEEE80211_VHT_CHANWIDTH_160MHZ: 160 MHz bandwidth
+ * @IEEE80211_VHT_CHANWIDTH_80P80MHZ: 80+80 MHz bandwidth
+ */
+enum ieee80211_vht_chanwidth {
+	IEEE80211_VHT_CHANWIDTH_USE_HT		= 0,
+	IEEE80211_VHT_CHANWIDTH_80MHZ		= 1,
+	IEEE80211_VHT_CHANWIDTH_160MHZ		= 2,
+	IEEE80211_VHT_CHANWIDTH_80P80MHZ	= 3,
+};
+
 /**
  * struct ieee80211_vht_operation - VHT operation IE
  *
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 0a8f83d142f9..8d8bf7136386 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -371,6 +371,8 @@ enum ieee80211_sta_flags {
 	IEEE80211_STA_RESET_SIGNAL_AVE	= BIT(9),
 	IEEE80211_STA_DISABLE_40MHZ	= BIT(10),
 	IEEE80211_STA_DISABLE_VHT	= BIT(11),
+	IEEE80211_STA_DISABLE_80P80MHZ	= BIT(12),
+	IEEE80211_STA_DISABLE_160MHZ	= BIT(13),
 };
 
 struct ieee80211_mgd_auth_data {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d2a4f78b4b0f..35d8cffc973a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -354,6 +354,16 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
 	/* determine capability flags */
 	cap = vht_cap.cap;
 
+	if (sdata->u.mgd.flags & IEEE80211_STA_DISABLE_80P80MHZ) {
+		cap &= ~IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ;
+		cap |= IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ;
+	}
+
+	if (sdata->u.mgd.flags & IEEE80211_STA_DISABLE_160MHZ) {
+		cap &= ~IEEE80211_VHT_CAP_SHORT_GI_160;
+		cap &= ~IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ;
+	}
+
 	/* reserve and fill IE */
 	pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2);
 	ieee80211_ie_build_vht_cap(pos, &vht_cap, cap);
@@ -543,6 +553,10 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		offset = noffset;
 	}
 
+	if (WARN_ON_ONCE((ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
+			 !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)))
+		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+
 	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
 		ieee80211_add_ht_ie(sdata, skb, assoc_data->ap_ht_param,
 				    sband, chan, sdata->smps_mode);
@@ -3183,23 +3197,270 @@ int ieee80211_max_network_latency(struct notifier_block *nb,
 	return 0;
 }
 
+static u32 chandef_downgrade(struct cfg80211_chan_def *c)
+{
+	u32 ret;
+	int tmp;
+
+	switch (c->width) {
+	case NL80211_CHAN_WIDTH_20:
+		c->width = NL80211_CHAN_WIDTH_20_NOHT;
+		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
+		break;
+	case NL80211_CHAN_WIDTH_40:
+		c->width = NL80211_CHAN_WIDTH_20;
+		c->center_freq1 = c->chan->center_freq;
+		ret = IEEE80211_STA_DISABLE_40MHZ |
+		      IEEE80211_STA_DISABLE_VHT;
+		break;
+	case NL80211_CHAN_WIDTH_80:
+		tmp = (30 + c->chan->center_freq - c->center_freq1)/20;
+		/* n_P40 */
+		tmp /= 2;
+		/* freq_P40 */
+		c->center_freq1 = c->center_freq1 - 20 + 40 * tmp;
+		c->width = NL80211_CHAN_WIDTH_40;
+		ret = IEEE80211_STA_DISABLE_VHT;
+		break;
+	case NL80211_CHAN_WIDTH_80P80:
+		c->center_freq2 = 0;
+		c->width = NL80211_CHAN_WIDTH_80;
+		ret = IEEE80211_STA_DISABLE_80P80MHZ |
+		      IEEE80211_STA_DISABLE_160MHZ;
+		break;
+	case NL80211_CHAN_WIDTH_160:
+		/* n_P20 */
+		tmp = (70 + c->chan->center_freq - c->center_freq1)/20;
+		/* n_P80 */
+		tmp /= 4;
+		c->center_freq1 = c->center_freq1 - 40 + 80 * tmp;
+		c->width = NL80211_CHAN_WIDTH_80;
+		ret = IEEE80211_STA_DISABLE_80P80MHZ |
+		      IEEE80211_STA_DISABLE_160MHZ;
+		break;
+	default:
+	case NL80211_CHAN_WIDTH_20_NOHT:
+		WARN_ON_ONCE(1);
+		c->width = NL80211_CHAN_WIDTH_20_NOHT;
+		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
+		break;
+	}
+
+	WARN_ON_ONCE(!cfg80211_chandef_valid(c));
+
+	return ret;
+}
+
+static u32
+ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
+			     struct ieee80211_supported_band *sband,
+			     struct ieee80211_channel *channel,
+			     const struct ieee80211_ht_operation *ht_oper,
+			     const struct ieee80211_vht_operation *vht_oper,
+			     struct cfg80211_chan_def *chandef)
+{
+	struct cfg80211_chan_def vht_chandef;
+	u32 ht_cfreq, ret;
+
+	chandef->chan = channel;
+	chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
+	chandef->center_freq1 = channel->center_freq;
+	chandef->center_freq2 = 0;
+
+	if (!ht_oper || !sband->ht_cap.ht_supported) {
+		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
+		goto out;
+	}
+
+	chandef->width = NL80211_CHAN_WIDTH_20;
+
+	ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
+						  channel->band);
+	/* check that channel matches the right operating channel */
+	if (channel->center_freq != ht_cfreq) {
+		/*
+		 * It's possible that some APs are confused here;
+		 * Netgear WNDR3700 sometimes reports 4 higher than
+		 * the actual channel in association responses, but
+		 * since we look at probe response/beacon data here
+		 * it should be OK.
+		 */
+		sdata_info(sdata,
+			   "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n",
+			   channel->center_freq, ht_cfreq,
+			   ht_oper->primary_chan, channel->band);
+		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
+		goto out;
+	}
+
+	/* check 40 MHz support, if we have it */
+	if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) {
+		switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
+		case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
+			chandef->width = NL80211_CHAN_WIDTH_40;
+			chandef->center_freq1 += 10;
+			break;
+		case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
+			chandef->width = NL80211_CHAN_WIDTH_40;
+			chandef->center_freq1 -= 10;
+			break;
+		}
+	} else {
+		/* 40 MHz (and 80 MHz) must be supported for VHT */
+		ret = IEEE80211_STA_DISABLE_VHT;
+		goto out;
+	}
+
+	if (!vht_oper || !sband->vht_cap.vht_supported) {
+		ret = IEEE80211_STA_DISABLE_VHT;
+		goto out;
+	}
+
+	vht_chandef.chan = channel;
+	vht_chandef.center_freq1 =
+		ieee80211_channel_to_frequency(vht_oper->center_freq_seg1_idx,
+					       channel->band);
+	vht_chandef.center_freq2 = 0;
+
+	if (vht_oper->center_freq_seg2_idx)
+		vht_chandef.center_freq2 =
+			ieee80211_channel_to_frequency(
+				vht_oper->center_freq_seg2_idx,
+				channel->band);
+
+	switch (vht_oper->chan_width) {
+	case IEEE80211_VHT_CHANWIDTH_USE_HT:
+		vht_chandef.width = chandef->width;
+		break;
+	case IEEE80211_VHT_CHANWIDTH_80MHZ:
+		vht_chandef.width = NL80211_CHAN_WIDTH_80;
+		break;
+	case IEEE80211_VHT_CHANWIDTH_160MHZ:
+		vht_chandef.width = NL80211_CHAN_WIDTH_160;
+		break;
+	case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
+		vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
+		break;
+	default:
+		sdata_info(sdata,
+			   "AP VHT operation IE has invalid channel width (%d), disable VHT\n",
+			   vht_oper->chan_width);
+		ret = IEEE80211_STA_DISABLE_VHT;
+		goto out;
+	}
+
+	if (!cfg80211_chandef_valid(&vht_chandef)) {
+		sdata_info(sdata,
+			   "AP VHT information is invalid, disable VHT\n");
+		ret = IEEE80211_STA_DISABLE_VHT;
+		goto out;
+	}
+
+	if (cfg80211_chandef_identical(chandef, &vht_chandef)) {
+		ret = 0;
+		goto out;
+	}
+
+	if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) {
+		sdata_info(sdata,
+			   "AP VHT information doesn't match HT, disable VHT\n");
+		ret = IEEE80211_STA_DISABLE_VHT;
+		goto out;
+	}
+
+	*chandef = vht_chandef;
+
+	ret = 0;
+
+	while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
+					IEEE80211_CHAN_DISABLED)) {
+		if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) {
+			ret = IEEE80211_STA_DISABLE_HT |
+			      IEEE80211_STA_DISABLE_VHT;
+			goto out;
+		}
+
+		ret = chandef_downgrade(chandef);
+	}
+
+	if (chandef->width != vht_chandef.width)
+		sdata_info(sdata,
+			   "local regulatory prevented using AP HT/VHT configuration, downgraded\n");
+
+out:
+	WARN_ON_ONCE(!cfg80211_chandef_valid(chandef));
+	return ret;
+}
+
+static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata,
+				     struct cfg80211_bss *cbss)
+{
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	const u8 *ht_cap_ie, *vht_cap_ie;
+	const struct ieee80211_ht_cap *ht_cap;
+	const struct ieee80211_vht_cap *vht_cap;
+	u8 chains = 1;
+
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_HT)
+		return chains;
+
+	ht_cap_ie = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY,
+				     cbss->information_elements,
+				     cbss->len_information_elements);
+	if (ht_cap_ie && ht_cap_ie[1] >= sizeof(*ht_cap)) {
+		ht_cap = (void *)(ht_cap_ie + 2);
+		chains = ieee80211_mcs_to_chains(&ht_cap->mcs);
+		/*
+		 * TODO: use "Tx Maximum Number Spatial Streams Supported" and
+		 *	 "Tx Unequal Modulation Supported" fields.
+		 */
+	}
+
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT)
+		return chains;
+
+	vht_cap_ie = cfg80211_find_ie(WLAN_EID_VHT_CAPABILITY,
+				      cbss->information_elements,
+				      cbss->len_information_elements);
+	if (vht_cap_ie && vht_cap_ie[1] >= sizeof(*vht_cap)) {
+		u8 nss;
+		u16 tx_mcs_map;
+
+		vht_cap = (void *)(vht_cap_ie + 2);
+		tx_mcs_map = le16_to_cpu(vht_cap->supp_mcs.tx_mcs_map);
+		for (nss = 8; nss > 0; nss--) {
+			if (((tx_mcs_map >> (2 * (nss - 1))) & 3) !=
+					IEEE80211_VHT_MCS_NOT_SUPPORTED)
+				break;
+		}
+		/* TODO: use "Tx Highest Supported Long GI Data Rate" field? */
+		chains = max(chains, nss);
+	}
+
+	return chains;
+}
+
 static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 				  struct cfg80211_bss *cbss)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	int ht_cfreq;
-	enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
-	const u8 *ht_oper_ie;
 	const struct ieee80211_ht_operation *ht_oper = NULL;
+	const struct ieee80211_vht_operation *vht_oper = NULL;
 	struct ieee80211_supported_band *sband;
 	struct cfg80211_chan_def chandef;
+	int ret;
 
 	sband = local->hw.wiphy->bands[cbss->channel->band];
 
-	ifmgd->flags &= ~IEEE80211_STA_DISABLE_40MHZ;
+	ifmgd->flags &= ~(IEEE80211_STA_DISABLE_40MHZ |
+			  IEEE80211_STA_DISABLE_80P80MHZ |
+			  IEEE80211_STA_DISABLE_160MHZ);
+
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
+	    sband->ht_cap.ht_supported) {
+		const u8 *ht_oper_ie;
 
-	if (sband->ht_cap.ht_supported) {
 		ht_oper_ie = cfg80211_find_ie(WLAN_EID_HT_OPERATION,
 					      cbss->information_elements,
 					      cbss->len_information_elements);
@@ -3207,82 +3468,45 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 			ht_oper = (void *)(ht_oper_ie + 2);
 	}
 
-	if (ht_oper) {
-		ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
-							  cbss->channel->band);
-		/* check that channel matches the right operating channel */
-		if (cbss->channel->center_freq != ht_cfreq) {
-			/*
-			 * It's possible that some APs are confused here;
-			 * Netgear WNDR3700 sometimes reports 4 higher than
-			 * the actual channel in association responses, but
-			 * since we look at probe response/beacon data here
-			 * it should be OK.
-			 */
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
+	    sband->vht_cap.vht_supported) {
+		const u8 *vht_oper_ie;
+
+		vht_oper_ie = cfg80211_find_ie(WLAN_EID_VHT_OPERATION,
+					       cbss->information_elements,
+					       cbss->len_information_elements);
+		if (vht_oper_ie && vht_oper_ie[1] >= sizeof(*vht_oper))
+			vht_oper = (void *)(vht_oper_ie + 2);
+		if (vht_oper && !ht_oper) {
+			vht_oper = NULL;
 			sdata_info(sdata,
-				   "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n",
-				   cbss->channel->center_freq,
-				   ht_cfreq, ht_oper->primary_chan,
-				   cbss->channel->band);
-			ht_oper = NULL;
+				   "AP advertised VHT without HT, disabling both\n");
+			sdata->flags |= IEEE80211_STA_DISABLE_HT;
+			sdata->flags |= IEEE80211_STA_DISABLE_VHT;
 		}
 	}
 
-	if (ht_oper) {
-		/*
-		 * cfg80211 already verified that the channel itself can
-		 * be used, but it didn't check that we can do the right
-		 * HT type, so do that here as well. If HT40 isn't allowed
-		 * on this channel, disable 40 MHz operation.
-		 */
-		const u8 *ht_cap_ie;
-		const struct ieee80211_ht_cap *ht_cap;
-		u8 chains = 1;
-
-		channel_type = NL80211_CHAN_HT20;
-
-		if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) {
-			switch (ht_oper->ht_param &
-					IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
-			case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
-				if (cbss->channel->flags &
-						IEEE80211_CHAN_NO_HT40PLUS)
-					ifmgd->flags |=
-						IEEE80211_STA_DISABLE_40MHZ;
-				else
-					channel_type = NL80211_CHAN_HT40PLUS;
-				break;
-			case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
-				if (cbss->channel->flags &
-						IEEE80211_CHAN_NO_HT40MINUS)
-					ifmgd->flags |=
-						IEEE80211_STA_DISABLE_40MHZ;
-				else
-					channel_type = NL80211_CHAN_HT40MINUS;
-				break;
-			}
-		}
+	ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
+						     cbss->channel,
+						     ht_oper, vht_oper,
+						     &chandef);
 
-		ht_cap_ie = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY,
-					     cbss->information_elements,
-					     cbss->len_information_elements);
-		if (ht_cap_ie && ht_cap_ie[1] >= sizeof(*ht_cap)) {
-			ht_cap = (void *)(ht_cap_ie + 2);
-			chains = ieee80211_mcs_to_chains(&ht_cap->mcs);
-		}
-		sdata->needed_rx_chains = min(chains, local->rx_chains);
-	} else {
-		sdata->needed_rx_chains = 1;
-		sdata->u.mgd.flags |= IEEE80211_STA_DISABLE_HT;
-	}
+	sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
+				      local->rx_chains);
 
 	/* will change later if needed */
 	sdata->smps_mode = IEEE80211_SMPS_OFF;
 
-	ieee80211_vif_release_channel(sdata);
-	cfg80211_chandef_create(&chandef, cbss->channel, channel_type);
-	return ieee80211_vif_use_channel(sdata, &chandef,
-					 IEEE80211_CHANCTX_SHARED);
+	/*
+	 * If this fails (possibly due to channel context sharing
+	 * on incompatible channels, e.g. 80+80 and 160 sharing the
+	 * same control channel) try to use a smaller bandwidth.
+	 */
+	ret = ieee80211_vif_use_channel(sdata, &chandef,
+					IEEE80211_CHANCTX_SHARED);
+	while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT)
+		ifmgd->flags |= chandef_downgrade(&chandef);
+	return ret;
 }
 
 static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.3-55-g7522


From 77b71b370ed06c75bdebef09be438d5275f70fc1 Mon Sep 17 00:00:00 2001
From: Thomas Abraham
Date: Tue, 27 Nov 2012 14:04:32 +0530
Subject: regulator: add device tree support for max8997

Add device tree based discovery support for max8997.

Signed-off-by: Thomas Abraham <thomas.abraham@linaro.org>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Reviewed-by: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 .../bindings/regulator/max8997-regulator.txt       | 146 ++++++++++++++++++++
 drivers/mfd/max8997.c                              |  73 +++++++++-
 drivers/regulator/max8997.c                        | 148 ++++++++++++++++++++-
 include/linux/mfd/max8997-private.h                |   1 +
 include/linux/mfd/max8997.h                        |   1 +
 5 files changed, 366 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/regulator/max8997-regulator.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/regulator/max8997-regulator.txt b/Documentation/devicetree/bindings/regulator/max8997-regulator.txt
new file mode 100644
index 000000000000..9fd69a18b0ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/max8997-regulator.txt
@@ -0,0 +1,146 @@
+* Maxim MAX8997 Voltage and Current Regulator
+
+The Maxim MAX8997 is a multi-function device which includes volatage and
+current regulators, rtc, charger controller and other sub-blocks. It is
+interfaced to the host controller using a i2c interface. Each sub-block is
+addressed by the host system using different i2c slave address. This document
+describes the bindings for 'pmic' sub-block of max8997.
+
+Required properties:
+- compatible: Should be "maxim,max8997-pmic".
+- reg: Specifies the i2c slave address of the pmic block. It should be 0x66.
+
+- max8997,pmic-buck1-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+  units for buck1 when changing voltage using gpio dvs. Refer to [1] below
+  for additional information.
+
+- max8997,pmic-buck2-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+  units for buck2 when changing voltage using gpio dvs. Refer to [1] below
+  for additional information.
+
+- max8997,pmic-buck5-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+  units for buck5 when changing voltage using gpio dvs. Refer to [1] below
+  for additional information.
+
+[1] If none of the 'max8997,pmic-buck[1/2/5]-uses-gpio-dvs' optional
+    property is specified, the 'max8997,pmic-buck[1/2/5]-dvs-voltage'
+    property should specify atleast one voltage level (which would be a
+    safe operating voltage).
+
+    If either of the 'max8997,pmic-buck[1/2/5]-uses-gpio-dvs' optional
+    property is specified, then all the eigth voltage values for the
+    'max8997,pmic-buck[1/2/5]-dvs-voltage' should be specified.
+
+Optional properties:
+- interrupt-parent: Specifies the phandle of the interrupt controller to which
+  the interrupts from max8997 are delivered to.
+- interrupts: Interrupt specifiers for two interrupt sources.
+  - First interrupt specifier is for 'irq1' interrupt.
+  - Second interrupt specifier is for 'alert' interrupt.
+- max8997,pmic-buck1-uses-gpio-dvs: 'buck1' can be controlled by gpio dvs.
+- max8997,pmic-buck2-uses-gpio-dvs: 'buck2' can be controlled by gpio dvs.
+- max8997,pmic-buck5-uses-gpio-dvs: 'buck5' can be controlled by gpio dvs.
+
+Additional properties required if either of the optional properties are used:
+- max8997,pmic-ignore-gpiodvs-side-effect: When GPIO-DVS mode is used for
+  multiple bucks, changing the voltage value of one of the bucks may affect
+  that of another buck, which is the side effect of the change (set_voltage).
+  Use this property to ignore such side effects and change the voltage.
+
+- max8997,pmic-buck125-default-dvs-idx: Default voltage setting selected from
+  the possible 8 options selectable by the dvs gpios. The value of this
+  property should be between 0 and 7. If not specified or if out of range, the
+  default value of this property is set to 0.
+
+- max8997,pmic-buck125-dvs-gpios: GPIO specifiers for three host gpio's used
+  for dvs. The format of the gpio specifier depends in the gpio controller.
+
+Regulators: The regulators of max8997 that have to be instantiated should be
+included in a sub-node named 'regulators'. Regulator nodes included in this
+sub-node should be of the format as listed below.
+
+	regulator_name {
+		standard regulator bindings here
+	};
+
+The following are the names of the regulators that the max8997 pmic block
+supports. Note: The 'n' in LDOn and BUCKn represents the LDO or BUCK number
+as per the datasheet of max8997.
+
+	- LDOn
+		  - valid values for n are 1 to 18 and 21
+		  - Example: LDO0, LD01, LDO2, LDO21
+	- BUCKn
+		  - valid values for n are 1 to 7.
+		  - Example: BUCK1, BUCK2, BUCK3, BUCK7
+
+	- ENVICHG: Battery Charging Current Monitor Output. This is a fixed
+		   voltage type regulator
+
+	- ESAFEOUT1: (ldo19)
+	- ESAFEOUT2: (ld020)
+
+	- CHARGER_CV: main battery charger voltage control
+	- CHARGER: main battery charger current control
+	- CHARGER_TOPOFF: end of charge current threshold level
+
+The bindings inside the regulator nodes use the standard regulator bindings
+which are documented elsewhere.
+
+Example:
+
+	max8997_pmic@66 {
+		compatible = "maxim,max8997-pmic";
+		interrupt-parent = <&wakeup_eint>;
+		reg = <0x66>;
+		interrupts = <4 0>, <3 0>;
+
+		max8997,pmic-buck1-uses-gpio-dvs;
+		max8997,pmic-buck2-uses-gpio-dvs;
+		max8997,pmic-buck5-uses-gpio-dvs;
+
+		max8997,pmic-ignore-gpiodvs-side-effect;
+		max8997,pmic-buck125-default-dvs-idx = <0>;
+
+		max8997,pmic-buck125-dvs-gpios = <&gpx0 0 1 0 0>, /* SET1 */
+						 <&gpx0 1 1 0 0>, /* SET2 */
+						 <&gpx0 2 1 0 0>; /* SET3 */
+
+		max8997,pmic-buck1-dvs-voltage = <1350000>, <1300000>,
+						 <1250000>, <1200000>,
+						 <1150000>, <1100000>,
+						 <1000000>, <950000>;
+
+		max8997,pmic-buck2-dvs-voltage = <1100000>, <1100000>,
+						 <1100000>, <1100000>,
+						 <1000000>, <1000000>,
+						 <1000000>, <1000000>;
+
+		max8997,pmic-buck5-dvs-voltage = <1200000>, <1200000>,
+						 <1200000>, <1200000>,
+						 <1200000>, <1200000>,
+						 <1200000>, <1200000>;
+
+		regulators {
+			ldo1_reg: LDO1 {
+				regulator-name = "VDD_ABB_3.3V";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+			};
+
+			ldo2_reg: LDO2 {
+				regulator-name = "VDD_ALIVE_1.1V";
+				regulator-min-microvolt = <1100000>;
+				regulator-max-microvolt = <1100000>;
+				regulator-always-on;
+			};
+
+			buck1_reg: BUCK1 {
+				regulator-name = "VDD_ARM_1.2V";
+				regulator-min-microvolt = <950000>;
+				regulator-max-microvolt = <1350000>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+		};
+	};
diff --git a/drivers/mfd/max8997.c b/drivers/mfd/max8997.c
index f123517065ec..abd5c80c7cf5 100644
--- a/drivers/mfd/max8997.c
+++ b/drivers/mfd/max8997.c
@@ -21,8 +21,10 @@
  * This driver is based on max8998.c
  */
 
+#include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
+#include <linux/of_irq.h>
 #include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
 #include <linux/module.h>
@@ -47,6 +49,13 @@ static struct mfd_cell max8997_devs[] = {
 	{ .name = "max8997-led", .id = 2 },
 };
 
+#ifdef CONFIG_OF
+static struct of_device_id __devinitdata max8997_pmic_dt_match[] = {
+	{ .compatible = "maxim,max8997-pmic", .data = TYPE_MAX8997 },
+	{},
+};
+#endif
+
 int max8997_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest)
 {
 	struct max8997_dev *max8997 = i2c_get_clientdata(i2c);
@@ -123,6 +132,58 @@ int max8997_update_reg(struct i2c_client *i2c, u8 reg, u8 val, u8 mask)
 }
 EXPORT_SYMBOL_GPL(max8997_update_reg);
 
+#ifdef CONFIG_OF
+/*
+ * Only the common platform data elements for max8997 are parsed here from the
+ * device tree. Other sub-modules of max8997 such as pmic, rtc and others have
+ * to parse their own platform data elements from device tree.
+ *
+ * The max8997 platform data structure is instantiated here and the drivers for
+ * the sub-modules need not instantiate another instance while parsing their
+ * platform data.
+ */
+static struct max8997_platform_data *max8997_i2c_parse_dt_pdata(
+					struct device *dev)
+{
+	struct max8997_platform_data *pd;
+
+	pd = devm_kzalloc(dev, sizeof(*pd), GFP_KERNEL);
+	if (!pd) {
+		dev_err(dev, "could not allocate memory for pdata\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	pd->ono = irq_of_parse_and_map(dev->of_node, 1);
+
+	/*
+	 * ToDo: the 'wakeup' member in the platform data is more of a linux
+	 * specfic information. Hence, there is no binding for that yet and
+	 * not parsed here.
+	 */
+
+	return pd;
+}
+#else
+static struct max8997_platform_data *max8997_i2c_parse_dt_pdata(
+					struct device *dev)
+{
+	return 0;
+}
+#endif
+
+static inline int max8997_i2c_get_driver_data(struct i2c_client *i2c,
+						const struct i2c_device_id *id)
+{
+#ifdef CONFIG_OF
+	if (i2c->dev.of_node) {
+		const struct of_device_id *match;
+		match = of_match_node(max8997_pmic_dt_match, i2c->dev.of_node);
+		return (int)match->data;
+	}
+#endif
+	return (int)id->driver_data;
+}
+
 static int max8997_i2c_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
 {
@@ -137,12 +198,21 @@ static int max8997_i2c_probe(struct i2c_client *i2c,
 	i2c_set_clientdata(i2c, max8997);
 	max8997->dev = &i2c->dev;
 	max8997->i2c = i2c;
-	max8997->type = id->driver_data;
+	max8997->type = max8997_i2c_get_driver_data(i2c, id);
 	max8997->irq = i2c->irq;
 
+	if (max8997->dev->of_node) {
+		pdata = max8997_i2c_parse_dt_pdata(max8997->dev);
+		if (IS_ERR(pdata)) {
+			ret = PTR_ERR(pdata);
+			goto err;
+		}
+	}
+
 	if (!pdata)
 		goto err;
 
+	max8997->pdata = pdata;
 	max8997->ono = pdata->ono;
 
 	mutex_init(&max8997->iolock);
@@ -434,6 +504,7 @@ static struct i2c_driver max8997_i2c_driver = {
 		   .name = "max8997",
 		   .owner = THIS_MODULE,
 		   .pm = &max8997_pm,
+		   .of_match_table = of_match_ptr(max8997_pmic_dt_match),
 	},
 	.probe = max8997_i2c_probe,
 	.remove = max8997_i2c_remove,
diff --git a/drivers/regulator/max8997.c b/drivers/regulator/max8997.c
index 64cf2ee38f6c..b56c4326853d 100644
--- a/drivers/regulator/max8997.c
+++ b/drivers/regulator/max8997.c
@@ -24,6 +24,7 @@
 #include <linux/bug.h>
 #include <linux/err.h>
 #include <linux/gpio.h>
+#include <linux/of_gpio.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -31,6 +32,7 @@
 #include <linux/regulator/machine.h>
 #include <linux/mfd/max8997.h>
 #include <linux/mfd/max8997-private.h>
+#include <linux/regulator/of_regulator.h>
 
 struct max8997_data {
 	struct device *dev;
@@ -933,10 +935,145 @@ static struct regulator_desc regulators[] = {
 				  max8997_charger_fixedstate_ops),
 };
 
+#ifdef CONFIG_OF
+static int max8997_pmic_dt_parse_dvs_gpio(struct max8997_dev *iodev,
+			struct max8997_platform_data *pdata,
+			struct device_node *pmic_np)
+{
+	int i, gpio;
+
+	for (i = 0; i < 3; i++) {
+		gpio = of_get_named_gpio(pmic_np,
+					"max8997,pmic-buck125-dvs-gpios", i);
+		if (!gpio_is_valid(gpio)) {
+			dev_err(iodev->dev, "invalid gpio[%d]: %d\n", i, gpio);
+			return -EINVAL;
+		}
+		pdata->buck125_gpios[i] = gpio;
+	}
+	return 0;
+}
+
+static int max8997_pmic_dt_parse_pdata(struct max8997_dev *iodev,
+					struct max8997_platform_data *pdata)
+{
+	struct device_node *pmic_np, *regulators_np, *reg_np;
+	struct max8997_regulator_data *rdata;
+	unsigned int i, dvs_voltage_nr = 1, ret;
+
+	pmic_np = iodev->dev->of_node;
+	if (!pmic_np) {
+		dev_err(iodev->dev, "could not find pmic sub-node\n");
+		return -ENODEV;
+	}
+
+	regulators_np = of_find_node_by_name(pmic_np, "regulators");
+	if (!regulators_np) {
+		dev_err(iodev->dev, "could not find regulators sub-node\n");
+		return -EINVAL;
+	}
+
+	/* count the number of regulators to be supported in pmic */
+	pdata->num_regulators = 0;
+	for_each_child_of_node(regulators_np, reg_np)
+		pdata->num_regulators++;
+
+	rdata = devm_kzalloc(iodev->dev, sizeof(*rdata) *
+				pdata->num_regulators, GFP_KERNEL);
+	if (!rdata) {
+		dev_err(iodev->dev, "could not allocate memory for "
+						"regulator data\n");
+		return -ENOMEM;
+	}
+
+	pdata->regulators = rdata;
+	for_each_child_of_node(regulators_np, reg_np) {
+		for (i = 0; i < ARRAY_SIZE(regulators); i++)
+			if (!of_node_cmp(reg_np->name, regulators[i].name))
+				break;
+
+		if (i == ARRAY_SIZE(regulators)) {
+			dev_warn(iodev->dev, "don't know how to configure "
+				"regulator %s\n", reg_np->name);
+			continue;
+		}
+
+		rdata->id = i;
+		rdata->initdata = of_get_regulator_init_data(
+						iodev->dev, reg_np);
+		rdata->reg_node = reg_np;
+		rdata++;
+	}
+
+	if (of_get_property(pmic_np, "max8997,pmic-buck1-uses-gpio-dvs", NULL))
+		pdata->buck1_gpiodvs = true;
+
+	if (of_get_property(pmic_np, "max8997,pmic-buck2-uses-gpio-dvs", NULL))
+		pdata->buck2_gpiodvs = true;
+
+	if (of_get_property(pmic_np, "max8997,pmic-buck5-uses-gpio-dvs", NULL))
+		pdata->buck5_gpiodvs = true;
+
+	if (pdata->buck1_gpiodvs || pdata->buck2_gpiodvs ||
+						pdata->buck5_gpiodvs) {
+		ret = max8997_pmic_dt_parse_dvs_gpio(iodev, pdata, pmic_np);
+		if (ret)
+			return -EINVAL;
+
+		if (of_property_read_u32(pmic_np,
+				"max8997,pmic-buck125-default-dvs-idx",
+				&pdata->buck125_default_idx)) {
+			pdata->buck125_default_idx = 0;
+		} else {
+			if (pdata->buck125_default_idx >= 8) {
+				pdata->buck125_default_idx = 0;
+				dev_info(iodev->dev, "invalid value for "
+				"default dvs index, using 0 instead\n");
+			}
+		}
+
+		if (of_get_property(pmic_np,
+			"max8997,pmic-ignore-gpiodvs-side-effect", NULL))
+			pdata->ignore_gpiodvs_side_effect = true;
+
+		dvs_voltage_nr = 8;
+	}
+
+	if (of_property_read_u32_array(pmic_np,
+				"max8997,pmic-buck1-dvs-voltage",
+				pdata->buck1_voltage, dvs_voltage_nr)) {
+		dev_err(iodev->dev, "buck1 voltages not specified\n");
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32_array(pmic_np,
+				"max8997,pmic-buck2-dvs-voltage",
+				pdata->buck2_voltage, dvs_voltage_nr)) {
+		dev_err(iodev->dev, "buck2 voltages not specified\n");
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32_array(pmic_np,
+				"max8997,pmic-buck5-dvs-voltage",
+				pdata->buck5_voltage, dvs_voltage_nr)) {
+		dev_err(iodev->dev, "buck5 voltages not specified\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#else
+static int max8997_pmic_dt_parse_pdata(struct max8997_dev *iodev,
+					struct max8997_platform_data *pdata)
+{
+	return 0;
+}
+#endif /* CONFIG_OF */
+
 static __devinit int max8997_pmic_probe(struct platform_device *pdev)
 {
 	struct max8997_dev *iodev = dev_get_drvdata(pdev->dev.parent);
-	struct max8997_platform_data *pdata = dev_get_platdata(iodev->dev);
+	struct max8997_platform_data *pdata = iodev->pdata;
 	struct regulator_config config = { };
 	struct regulator_dev **rdev;
 	struct max8997_data *max8997;
@@ -944,11 +1081,17 @@ static __devinit int max8997_pmic_probe(struct platform_device *pdev)
 	int i, ret, size, nr_dvs;
 	u8 max_buck1 = 0, max_buck2 = 0, max_buck5 = 0;
 
-	if (!pdata) {
+	if (IS_ERR_OR_NULL(pdata)) {
 		dev_err(pdev->dev.parent, "No platform init data supplied.\n");
 		return -ENODEV;
 	}
 
+	if (iodev->dev->of_node) {
+		ret = max8997_pmic_dt_parse_pdata(iodev, pdata);
+		if (ret)
+			return ret;
+	}
+
 	max8997 = devm_kzalloc(&pdev->dev, sizeof(struct max8997_data),
 			       GFP_KERNEL);
 	if (!max8997)
@@ -1104,6 +1247,7 @@ static __devinit int max8997_pmic_probe(struct platform_device *pdev)
 		config.dev = max8997->dev;
 		config.init_data = pdata->regulators[i].initdata;
 		config.driver_data = max8997;
+		config.of_node = pdata->regulators[i].reg_node;
 
 		rdev[i] = regulator_register(&regulators[id], &config);
 		if (IS_ERR(rdev[i])) {
diff --git a/include/linux/mfd/max8997-private.h b/include/linux/mfd/max8997-private.h
index 830152cfae33..6ae21bf47d64 100644
--- a/include/linux/mfd/max8997-private.h
+++ b/include/linux/mfd/max8997-private.h
@@ -316,6 +316,7 @@ enum max8997_irq {
 #define MAX8997_NUM_GPIO	12
 struct max8997_dev {
 	struct device *dev;
+	struct max8997_platform_data *pdata;
 	struct i2c_client *i2c; /* 0xcc / PMIC, Battery Control, and FLASH */
 	struct i2c_client *rtc; /* slave addr 0x0c */
 	struct i2c_client *haptic; /* slave addr 0x90 */
diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index 328d8e24b533..1d4a4fe6ac33 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h
@@ -75,6 +75,7 @@ enum max8998_regulators {
 struct max8997_regulator_data {
 	int id;
 	struct regulator_init_data *initdata;
+	struct device_node *reg_node;
 };
 
 enum max8997_muic_usb_type {
-- 
cgit v1.2.3-55-g7522


From 33234e791de2ac3ea915158e042907748191cabd Mon Sep 17 00:00:00 2001
From: Axel Lin
Date: Tue, 27 Nov 2012 10:24:33 +0800
Subject: regulator: core: Allow specific minimal selector for starting linear
 mapping

Some drivers (at least 3 drivers) have such variant of linear mapping that
the first few selectors are invalid and the reset are linear mapping.
Let's support this case in core.

This patch adds linear_min_sel in struct regulator_desc,
so we can allow specific minimal selector for starting linear mapping.
Then extends regulator_[map|list]_voltage_linear() to support this feature.

Note that for selectors less than min_linear_index, we need count them to
n_voltages so regulator_list_voltage() won't fail while checking the boundary
for selector before calling list_voltage callback.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/core.c         | 6 ++++++
 include/linux/regulator/driver.h | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index e872c8be080e..02a249b024b3 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1897,6 +1897,10 @@ int regulator_list_voltage_linear(struct regulator_dev *rdev,
 {
 	if (selector >= rdev->desc->n_voltages)
 		return -EINVAL;
+	if (selector < rdev->desc->linear_min_sel)
+		return 0;
+
+	selector -= rdev->desc->linear_min_sel;
 
 	return rdev->desc->min_uV + (rdev->desc->uV_step * selector);
 }
@@ -2120,6 +2124,8 @@ int regulator_map_voltage_linear(struct regulator_dev *rdev,
 	if (ret < 0)
 		return ret;
 
+	ret += rdev->desc->linear_min_sel;
+
 	/* Map back into a voltage to verify we're still in bounds */
 	voltage = rdev->desc->ops->list_voltage(rdev, ret);
 	if (voltage < min_uV || voltage > max_uV)
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 7932a3bf21bd..d9ce98a5028b 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -185,6 +185,7 @@ enum regulator_type {
  *
  * @min_uV: Voltage given by the lowest selector (if linear mapping)
  * @uV_step: Voltage increase with each selector (if linear mapping)
+ * @linear_min_sel: Minimal selector for starting linear mapping
  * @ramp_delay: Time to settle down after voltage change (unit: uV/us)
  * @volt_table: Voltage mapping table (if table based mapping)
  *
@@ -207,6 +208,7 @@ struct regulator_desc {
 
 	unsigned int min_uV;
 	unsigned int uV_step;
+	unsigned int linear_min_sel;
 	unsigned int ramp_delay;
 
 	const unsigned int *volt_table;
-- 
cgit v1.2.3-55-g7522


From ec809bd817dfa1905283468e4c813684ed4efe78 Mon Sep 17 00:00:00 2001
From: Krzysztof Mazur
Date: Tue, 6 Nov 2012 23:16:57 +0100
Subject: atm: add owner of push() callback to atmvcc

The atm is using atmvcc->push(vcc, NULL) callback to notify protocol
that vcc will be closed and protocol must detach from it. This callback
is usually used by protocol to decrement module usage count by module_put(),
but it leaves small window then module is still used after module_put().

Now the owner of push() callback is kept in atmvcc and
module_put(atmvcc->owner) is called after the protocol is detached from vcc.

Signed-off-by: Krzysztof Mazur <krzysiek@podlesie.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Acked-by: Chas Williams <chas@cmf.nrl.navy.mil>
---
 include/linux/atmdev.h | 1 +
 net/atm/common.c       | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 22ef21c33d0c..72db2af902a8 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -106,6 +106,7 @@ struct atm_vcc {
 	void		*dev_data;	/* per-device data */
 	void		*proto_data;	/* per-protocol data */
 	struct k_atm_aal_stats *stats;	/* pointer to AAL stats group */
+	struct module *owner;		/* owner of ->push function */
 	/* SVC part --- may move later ------------------------------------- */
 	short		itf;		/* interface number */
 	struct sockaddr_atmsvc local;
diff --git a/net/atm/common.c b/net/atm/common.c
index 0c0ad930a632..24216642d696 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -156,6 +156,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
 	atomic_set(&sk->sk_rmem_alloc, 0);
 	vcc->push = NULL;
 	vcc->pop = NULL;
+	vcc->owner = NULL;
 	vcc->push_oam = NULL;
 	vcc->vpi = vcc->vci = 0; /* no VCI/VPI yet */
 	vcc->atm_options = vcc->aal_options = 0;
@@ -175,6 +176,7 @@ static void vcc_destroy_socket(struct sock *sk)
 			vcc->dev->ops->close(vcc);
 		if (vcc->push)
 			vcc->push(vcc, NULL); /* atmarpd has no push */
+		module_put(vcc->owner);
 
 		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 			atm_return(vcc, skb->truesize);
-- 
cgit v1.2.3-55-g7522


From 582b336ec2c0f0076f5650a029fcc9abd4a906f7 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti
Date: Tue, 27 Nov 2012 23:28:54 -0200
Subject: sched: add notifier for cross-cpu migrations

Originally from Jeremy Fitzhardinge.

Acked-by: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/sched.h |  8 ++++++++
 kernel/sched/core.c   | 15 +++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0dd42a02df2e..6eb2ed819e36 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -107,6 +107,14 @@ extern unsigned long this_cpu_load(void);
 extern void calc_global_load(unsigned long ticks);
 extern void update_cpu_load_nohz(void);
 
+/* Notifier for when a task gets migrated to a new CPU */
+struct task_migration_notifier {
+	struct task_struct *task;
+	int from_cpu;
+	int to_cpu;
+};
+extern void register_task_migration_notifier(struct notifier_block *n);
+
 extern unsigned long get_parent_ip(unsigned long addr);
 
 struct seq_file;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d8927fda712..c86b8b6e70f4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -922,6 +922,13 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 		rq->skip_clock_update = 1;
 }
 
+static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
+
+void register_task_migration_notifier(struct notifier_block *n)
+{
+	atomic_notifier_chain_register(&task_migration_notifier, n);
+}
+
 #ifdef CONFIG_SMP
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
@@ -952,8 +959,16 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 	trace_sched_migrate_task(p, new_cpu);
 
 	if (task_cpu(p) != new_cpu) {
+		struct task_migration_notifier tmn;
+
 		p->se.nr_migrations++;
 		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
+
+		tmn.task = p;
+		tmn.from_cpu = task_cpu(p);
+		tmn.to_cpu = new_cpu;
+
+		atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
 	}
 
 	__set_task_cpu(p, new_cpu);
-- 
cgit v1.2.3-55-g7522


From e0b306fef90556233797d2e1747bd6a3ae35ea93 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti
Date: Tue, 27 Nov 2012 23:28:59 -0200
Subject: time: export time information for KVM pvclock

As suggested by John, export time data similarly to how its
done by vsyscall support. This allows KVM to retrieve necessary
information to implement vsyscall support in KVM guests.

Acked-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/pvclock_gtod.h |  9 ++++++++
 kernel/time/timekeeping.c    | 50 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 include/linux/pvclock_gtod.h

(limited to 'include/linux')

diff --git a/include/linux/pvclock_gtod.h b/include/linux/pvclock_gtod.h
new file mode 100644
index 000000000000..0ca75825b60d
--- /dev/null
+++ b/include/linux/pvclock_gtod.h
@@ -0,0 +1,9 @@
+#ifndef _PVCLOCK_GTOD_H
+#define _PVCLOCK_GTOD_H
+
+#include <linux/notifier.h>
+
+extern int pvclock_gtod_register_notifier(struct notifier_block *nb);
+extern int pvclock_gtod_unregister_notifier(struct notifier_block *nb);
+
+#endif /* _PVCLOCK_GTOD_H */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e424970bb562..69f5342e8d1c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -21,6 +21,7 @@
 #include <linux/time.h>
 #include <linux/tick.h>
 #include <linux/stop_machine.h>
+#include <linux/pvclock_gtod.h>
 
 
 static struct timekeeper timekeeper;
@@ -180,6 +181,54 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
 	return nsec + arch_gettimeoffset();
 }
 
+static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
+
+static void update_pvclock_gtod(struct timekeeper *tk)
+{
+	raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk);
+}
+
+/**
+ * pvclock_gtod_register_notifier - register a pvclock timedata update listener
+ *
+ * Must hold write on timekeeper.lock
+ */
+int pvclock_gtod_register_notifier(struct notifier_block *nb)
+{
+	struct timekeeper *tk = &timekeeper;
+	unsigned long flags;
+	int ret;
+
+	write_seqlock_irqsave(&tk->lock, flags);
+	ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
+	/* update timekeeping data */
+	update_pvclock_gtod(tk);
+	write_sequnlock_irqrestore(&tk->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
+
+/**
+ * pvclock_gtod_unregister_notifier - unregister a pvclock
+ * timedata update listener
+ *
+ * Must hold write on timekeeper.lock
+ */
+int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
+{
+	struct timekeeper *tk = &timekeeper;
+	unsigned long flags;
+	int ret;
+
+	write_seqlock_irqsave(&tk->lock, flags);
+	ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
+	write_sequnlock_irqrestore(&tk->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
+
 /* must hold write on timekeeper.lock */
 static void timekeeping_update(struct timekeeper *tk, bool clearntp)
 {
@@ -188,6 +237,7 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp)
 		ntp_clear();
 	}
 	update_vsyscall(tk);
+	update_pvclock_gtod(tk);
 }
 
 /**
-- 
cgit v1.2.3-55-g7522


From d828199e84447795c6669ff0e6c6d55eb9beeff6 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti
Date: Tue, 27 Nov 2012 23:29:01 -0200
Subject: KVM: x86: implement PVCLOCK_TSC_STABLE_BIT pvclock flag

KVM added a global variable to guarantee monotonicity in the guest.
One of the reasons for that is that the time between

	1. ktime_get_ts(&timespec);
	2. rdtscll(tsc);

Is variable. That is, given a host with stable TSC, suppose that
two VCPUs read the same time via ktime_get_ts() above.

The time required to execute 2. is not the same on those two instances
executing in different VCPUS (cache misses, interrupts...).

If the TSC value that is used by the host to interpolate when
calculating the monotonic time is the same value used to calculate
the tsc_timestamp value stored in the pvclock data structure, and
a single <system_timestamp, tsc_timestamp> tuple is visible to all
vcpus simultaneously, this problem disappears. See comment on top
of pvclock_update_vm_gtod_copy for details.

Monotonicity is then guaranteed by synchronicity of the host TSCs
and guest TSCs.

Set TSC stable pvclock flag in that case, allowing the guest to read
clock from userspace.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   7 ++
 arch/x86/kvm/trace.h            |  30 +++++
 arch/x86/kvm/x86.c              | 235 ++++++++++++++++++++++++++++++++++++++--
 include/linux/kvm_host.h        |   3 +
 virt/kvm/kvm_main.c             |   5 +
 5 files changed, 272 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d60535adec98..32f0e4a063b7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -22,6 +22,8 @@
 #include <linux/kvm_para.h>
 #include <linux/kvm_types.h>
 #include <linux/perf_event.h>
+#include <linux/pvclock_gtod.h>
+#include <linux/clocksource.h>
 
 #include <asm/pvclock-abi.h>
 #include <asm/desc.h>
@@ -560,6 +562,11 @@ struct kvm_arch {
 	u64 cur_tsc_offset;
 	u8  cur_tsc_generation;
 
+	spinlock_t pvclock_gtod_sync_lock;
+	bool use_master_clock;
+	u64 master_kernel_ns;
+	cycle_t master_cycle_now;
+
 	struct kvm_xen_hvm_config xen_hvm_config;
 
 	/* fields used by HYPER-V emulation */
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index bca63f04dccb..1d6526856080 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -4,6 +4,7 @@
 #include <linux/tracepoint.h>
 #include <asm/vmx.h>
 #include <asm/svm.h>
+#include <asm/clocksource.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm
@@ -754,6 +755,35 @@ TRACE_EVENT(
 		  __entry->write ? "Write" : "Read",
 		  __entry->gpa_match ? "GPA" : "GVA")
 );
+
+#ifdef CONFIG_X86_64
+
+#define host_clocks					\
+	{VCLOCK_NONE, "none"},				\
+	{VCLOCK_TSC,  "tsc"},				\
+	{VCLOCK_HPET, "hpet"}				\
+
+TRACE_EVENT(kvm_update_master_clock,
+	TP_PROTO(bool use_master_clock, unsigned int host_clock),
+	TP_ARGS(use_master_clock, host_clock),
+
+	TP_STRUCT__entry(
+		__field(		bool,	use_master_clock	)
+		__field(	unsigned int,	host_clock		)
+	),
+
+	TP_fast_assign(
+		__entry->use_master_clock	= use_master_clock;
+		__entry->host_clock		= host_clock;
+	),
+
+	TP_printk("masterclock %d hostclock %s",
+		  __entry->use_master_clock,
+		  __print_symbolic(__entry->host_clock, host_clocks))
+);
+
+#endif /* CONFIG_X86_64 */
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c077b817d1c3..a7b97a49d8ad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1048,7 +1048,9 @@ static inline u64 get_kernel_ns(void)
 	return timespec_to_ns(&ts);
 }
 
+#ifdef CONFIG_X86_64
 static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
+#endif
 
 static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
 unsigned long max_tsc_khz;
@@ -1190,21 +1192,170 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
 
 EXPORT_SYMBOL_GPL(kvm_write_tsc);
 
+#ifdef CONFIG_X86_64
+
+static cycle_t read_tsc(void)
+{
+	cycle_t ret;
+	u64 last;
+
+	/*
+	 * Empirically, a fence (of type that depends on the CPU)
+	 * before rdtsc is enough to ensure that rdtsc is ordered
+	 * with respect to loads.  The various CPU manuals are unclear
+	 * as to whether rdtsc can be reordered with later loads,
+	 * but no one has ever seen it happen.
+	 */
+	rdtsc_barrier();
+	ret = (cycle_t)vget_cycles();
+
+	last = pvclock_gtod_data.clock.cycle_last;
+
+	if (likely(ret >= last))
+		return ret;
+
+	/*
+	 * GCC likes to generate cmov here, but this branch is extremely
+	 * predictable (it's just a funciton of time and the likely is
+	 * very likely) and there's a data dependence, so force GCC
+	 * to generate a branch instead.  I don't barrier() because
+	 * we don't actually need a barrier, and if this function
+	 * ever gets inlined it will generate worse code.
+	 */
+	asm volatile ("");
+	return last;
+}
+
+static inline u64 vgettsc(cycle_t *cycle_now)
+{
+	long v;
+	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+
+	*cycle_now = read_tsc();
+
+	v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
+	return v * gtod->clock.mult;
+}
+
+static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
+{
+	unsigned long seq;
+	u64 ns;
+	int mode;
+	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+
+	ts->tv_nsec = 0;
+	do {
+		seq = read_seqcount_begin(&gtod->seq);
+		mode = gtod->clock.vclock_mode;
+		ts->tv_sec = gtod->monotonic_time_sec;
+		ns = gtod->monotonic_time_snsec;
+		ns += vgettsc(cycle_now);
+		ns >>= gtod->clock.shift;
+	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+	timespec_add_ns(ts, ns);
+
+	return mode;
+}
+
+/* returns true if host is using tsc clocksource */
+static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
+{
+	struct timespec ts;
+
+	/* checked again under seqlock below */
+	if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+		return false;
+
+	if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC)
+		return false;
+
+	monotonic_to_bootbased(&ts);
+	*kernel_ns = timespec_to_ns(&ts);
+
+	return true;
+}
+#endif
+
+/*
+ *
+ * Assuming a stable TSC across physical CPUS, the following condition
+ * is possible. Each numbered line represents an event visible to both
+ * CPUs at the next numbered event.
+ *
+ * "timespecX" represents host monotonic time. "tscX" represents
+ * RDTSC value.
+ *
+ * 		VCPU0 on CPU0		|	VCPU1 on CPU1
+ *
+ * 1.  read timespec0,tsc0
+ * 2.					| timespec1 = timespec0 + N
+ * 					| tsc1 = tsc0 + M
+ * 3. transition to guest		| transition to guest
+ * 4. ret0 = timespec0 + (rdtsc - tsc0) |
+ * 5.				        | ret1 = timespec1 + (rdtsc - tsc1)
+ * 				        | ret1 = timespec0 + N + (rdtsc - (tsc0 + M))
+ *
+ * Since ret0 update is visible to VCPU1 at time 5, to obey monotonicity:
+ *
+ * 	- ret0 < ret1
+ *	- timespec0 + (rdtsc - tsc0) < timespec0 + N + (rdtsc - (tsc0 + M))
+ *		...
+ *	- 0 < N - M => M < N
+ *
+ * That is, when timespec0 != timespec1, M < N. Unfortunately that is not
+ * always the case (the difference between two distinct xtime instances
+ * might be smaller then the difference between corresponding TSC reads,
+ * when updating guest vcpus pvclock areas).
+ *
+ * To avoid that problem, do not allow visibility of distinct
+ * system_timestamp/tsc_timestamp values simultaneously: use a master
+ * copy of host monotonic time values. Update that master copy
+ * in lockstep.
+ *
+ * Rely on synchronization of host TSCs for monotonicity.
+ *
+ */
+
+static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
+{
+#ifdef CONFIG_X86_64
+	struct kvm_arch *ka = &kvm->arch;
+	int vclock_mode;
+
+	/*
+	 * If the host uses TSC clock, then passthrough TSC as stable
+	 * to the guest.
+	 */
+	ka->use_master_clock = kvm_get_time_and_clockread(
+					&ka->master_kernel_ns,
+					&ka->master_cycle_now);
+
+	if (ka->use_master_clock)
+		atomic_set(&kvm_guest_has_master_clock, 1);
+
+	vclock_mode = pvclock_gtod_data.clock.vclock_mode;
+	trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode);
+#endif
+}
+
 static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
-	unsigned long flags;
+	unsigned long flags, this_tsc_khz;
 	struct kvm_vcpu_arch *vcpu = &v->arch;
+	struct kvm_arch *ka = &v->kvm->arch;
 	void *shared_kaddr;
-	unsigned long this_tsc_khz;
 	s64 kernel_ns, max_kernel_ns;
-	u64 tsc_timestamp;
+	u64 tsc_timestamp, host_tsc;
 	struct pvclock_vcpu_time_info *guest_hv_clock;
 	u8 pvclock_flags;
+	bool use_master_clock;
+
+	kernel_ns = 0;
+	host_tsc = 0;
 
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
-	tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, native_read_tsc());
-	kernel_ns = get_kernel_ns();
 	this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
 	if (unlikely(this_tsc_khz == 0)) {
 		local_irq_restore(flags);
@@ -1212,6 +1363,24 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 		return 1;
 	}
 
+	/*
+	 * If the host uses TSC clock, then passthrough TSC as stable
+	 * to the guest.
+	 */
+	spin_lock(&ka->pvclock_gtod_sync_lock);
+	use_master_clock = ka->use_master_clock;
+	if (use_master_clock) {
+		host_tsc = ka->master_cycle_now;
+		kernel_ns = ka->master_kernel_ns;
+	}
+	spin_unlock(&ka->pvclock_gtod_sync_lock);
+	if (!use_master_clock) {
+		host_tsc = native_read_tsc();
+		kernel_ns = get_kernel_ns();
+	}
+
+	tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc);
+
 	/*
 	 * We may have to catch up the TSC to match elapsed wall clock
 	 * time for two reasons, even if kvmclock is used.
@@ -1273,9 +1442,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 		vcpu->hw_tsc_khz = this_tsc_khz;
 	}
 
-	if (max_kernel_ns > kernel_ns)
-		kernel_ns = max_kernel_ns;
-
+	/* with a master <monotonic time, tsc value> tuple,
+	 * pvclock clock reads always increase at the (scaled) rate
+	 * of guest TSC - no need to deal with sampling errors.
+	 */
+	if (!use_master_clock) {
+		if (max_kernel_ns > kernel_ns)
+			kernel_ns = max_kernel_ns;
+	}
 	/* With all the info we got, fill in the values */
 	vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
 	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
@@ -1301,6 +1475,10 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 		vcpu->pvclock_set_guest_stopped_request = false;
 	}
 
+	/* If the host uses TSC clocksource, then it is stable */
+	if (use_master_clock)
+		pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
+
 	vcpu->hv_clock.flags = pvclock_flags;
 
 	memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
@@ -4912,6 +5090,17 @@ static void kvm_set_mmio_spte_mask(void)
 #ifdef CONFIG_X86_64
 static void pvclock_gtod_update_fn(struct work_struct *work)
 {
+	struct kvm *kvm;
+
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	raw_spin_lock(&kvm_lock);
+	list_for_each_entry(kvm, &vm_list, vm_list)
+		kvm_for_each_vcpu(i, vcpu, kvm)
+			set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
+	atomic_set(&kvm_guest_has_master_clock, 0);
+	raw_spin_unlock(&kvm_lock);
 }
 
 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@@ -5303,6 +5492,29 @@ static void process_nmi(struct kvm_vcpu *vcpu)
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 
+static void kvm_gen_update_masterclock(struct kvm *kvm)
+{
+#ifdef CONFIG_X86_64
+	int i;
+	struct kvm_vcpu *vcpu;
+	struct kvm_arch *ka = &kvm->arch;
+
+	spin_lock(&ka->pvclock_gtod_sync_lock);
+	kvm_make_mclock_inprogress_request(kvm);
+	/* no guest entries from this point */
+	pvclock_update_vm_gtod_copy(kvm);
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+
+	/* guest entries allowed */
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
+
+	spin_unlock(&ka->pvclock_gtod_sync_lock);
+#endif
+}
+
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
 	int r;
@@ -5315,6 +5527,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_mmu_unload(vcpu);
 		if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
 			__kvm_migrate_timers(vcpu);
+		if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
+			kvm_gen_update_masterclock(vcpu->kvm);
 		if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
 			r = kvm_guest_time_update(vcpu);
 			if (unlikely(r))
@@ -6219,6 +6433,8 @@ int kvm_arch_hardware_enable(void *garbage)
 			kvm_for_each_vcpu(i, vcpu, kvm) {
 				vcpu->arch.tsc_offset_adjustment += delta_cyc;
 				vcpu->arch.last_host_tsc = local_tsc;
+				set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
+					&vcpu->requests);
 			}
 
 			/*
@@ -6356,6 +6572,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
 	mutex_init(&kvm->arch.apic_map_lock);
+	spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
+
+	pvclock_update_vm_gtod_copy(kvm);
 
 	return 0;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 99a47627e046..c94c9985dee0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -131,6 +131,8 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_PMU               16
 #define KVM_REQ_PMI               17
 #define KVM_REQ_WATCHDOG          18
+#define KVM_REQ_MASTERCLOCK_UPDATE 19
+#define KVM_REQ_MCLOCK_INPROGRESS 20
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
@@ -540,6 +542,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
+void kvm_make_mclock_inprogress_request(struct kvm *kvm);
 
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e3f5b143158e..be3e7bb73b10 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -212,6 +212,11 @@ void kvm_reload_remote_mmus(struct kvm *kvm)
 	make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
 }
 
+void kvm_make_mclock_inprogress_request(struct kvm *kvm)
+{
+	make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+}
+
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 {
 	struct page *page;
-- 
cgit v1.2.3-55-g7522


From 42897d866b120547777ae1fd316680ec53356d9c Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti
Date: Tue, 27 Nov 2012 23:29:02 -0200
Subject: KVM: x86: add kvm_arch_vcpu_postcreate callback, move TSC
 initialization

TSC initialization will soon make use of online_vcpus.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c   |  5 +++++
 arch/powerpc/kvm/powerpc.c |  5 +++++
 arch/s390/kvm/kvm-s390.c   |  5 +++++
 arch/x86/kvm/svm.c         |  1 -
 arch/x86/kvm/vmx.c         |  2 --
 arch/x86/kvm/x86.c         | 13 +++++++++++++
 include/linux/kvm_host.h   |  1 +
 virt/kvm/kvm_main.c        |  1 +
 8 files changed, 30 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index c71acd7f9a13..83b54530916e 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1330,6 +1330,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	return -EINVAL;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index deb0d596d815..f9ab12aea829 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -439,6 +439,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	return vcpu;
 }
 
+int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	/* Make sure we're not using the vcpu anymore */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 38883f0bf27e..731ddeee32e4 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -355,6 +355,11 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
 }
 
+int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 94f5ceba7e1e..161a5fa66d82 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1251,7 +1251,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
 	svm->asid_generation = 0;
 	init_vmcb(svm);
-	kvm_write_tsc(&svm->vcpu, 0);
 
 	err = fx_init(&svm->vcpu);
 	if (err)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 896efd4842e7..bb18923bf632 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3896,8 +3896,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
 	set_cr4_guest_host_mask(vmx);
 
-	kvm_write_tsc(&vmx->vcpu, 0);
-
 	return 0;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a7b97a49d8ad..f3c069efc72a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6320,6 +6320,19 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+	int r;
+
+	r = vcpu_load(vcpu);
+	if (r)
+		return r;
+	kvm_write_tsc(vcpu, 0);
+	vcpu_put(vcpu);
+
+	return r;
+}
+
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
 	int r;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c94c9985dee0..8e5c7b651655 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -596,6 +596,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id);
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
+int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
 int kvm_arch_hardware_enable(void *garbage);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index be3e7bb73b10..e6cfd4344d28 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1853,6 +1853,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 	atomic_inc(&kvm->online_vcpus);
 
 	mutex_unlock(&kvm->lock);
+	kvm_arch_vcpu_postcreate(vcpu);
 	return r;
 
 unlock_vcpu_destroy:
-- 
cgit v1.2.3-55-g7522


From 31fbcda71489d8cbe2b82819eaab4818524e3a49 Mon Sep 17 00:00:00 2001
From: Lee Jones
Date: Fri, 28 Sep 2012 10:29:07 +0100
Subject: Input: bu21013_ts - move GPIO init and exit functions into the driver

These GPIO init and exit functions have no place in platform data, they
should be part of the driver instead,

Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 arch/arm/mach-ux500/board-mop500-stuib.c | 71 +-------------------------------
 drivers/input/touchscreen/bu21013_ts.c   | 69 ++++++++++++++++++++++---------
 include/linux/input/bu21013.h            | 10 +----
 3 files changed, 53 insertions(+), 97 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/board-mop500-stuib.c b/arch/arm/mach-ux500/board-mop500-stuib.c
index 564f57d5d8a7..7e1f294f0434 100644
--- a/arch/arm/mach-ux500/board-mop500-stuib.c
+++ b/arch/arm/mach-ux500/board-mop500-stuib.c
@@ -77,9 +77,6 @@ static struct i2c_board_info __initdata mop500_i2c0_devices_stuib[] = {
  * BU21013 ROHM touchscreen interface on the STUIBs
  */
 
-/* tracks number of bu21013 devices being enabled */
-static int bu21013_devices;
-
 #define TOUCH_GPIO_PIN  84
 
 #define TOUCH_XMAX	384
@@ -88,73 +85,8 @@ static int bu21013_devices;
 #define PRCMU_CLOCK_OCR		0x1CC
 #define TSC_EXT_CLOCK_9_6MHZ	0x840000
 
-/**
- * bu21013_gpio_board_init : configures the touch panel.
- * @reset_pin: reset pin number
- * This function can be used to configures
- * the voltage and reset the touch panel controller.
- */
-static int bu21013_gpio_board_init(int reset_pin)
-{
-	int retval = 0;
-
-	bu21013_devices++;
-	if (bu21013_devices == 1) {
-		retval = gpio_request(reset_pin, "touchp_reset");
-		if (retval) {
-			printk(KERN_ERR "Unable to request gpio reset_pin");
-			return retval;
-		}
-		retval = gpio_direction_output(reset_pin, 1);
-		if (retval < 0) {
-			printk(KERN_ERR "%s: gpio direction failed\n",
-					__func__);
-			return retval;
-		}
-	}
-
-	return retval;
-}
-
-/**
- * bu21013_gpio_board_exit : deconfigures the touch panel controller
- * @reset_pin: reset pin number
- * This function can be used to deconfigures the chip selection
- * for touch panel controller.
- */
-static int bu21013_gpio_board_exit(int reset_pin)
-{
-	int retval = 0;
-
-	if (bu21013_devices == 1) {
-		retval = gpio_direction_output(reset_pin, 0);
-		if (retval < 0) {
-			printk(KERN_ERR "%s: gpio direction failed\n",
-					__func__);
-			return retval;
-		}
-		gpio_set_value(reset_pin, 0);
-	}
-	bu21013_devices--;
-
-	return retval;
-}
-
-/**
- * bu21013_read_pin_val : get the interrupt pin value
- * This function can be used to get the interrupt pin value for touch panel
- * controller.
- */
-static int bu21013_read_pin_val(void)
-{
-	return gpio_get_value(TOUCH_GPIO_PIN);
-}
-
 static struct bu21013_platform_device tsc_plat_device = {
-	.cs_en = bu21013_gpio_board_init,
-	.cs_dis = bu21013_gpio_board_exit,
-	.irq_read_val = bu21013_read_pin_val,
-	.irq = NOMADIK_GPIO_TO_IRQ(TOUCH_GPIO_PIN),
+	.touch_pin = TOUCH_GPIO_PIN,
 	.touch_x_max = TOUCH_XMAX,
 	.touch_y_max = TOUCH_YMAX,
 	.ext_clk = false,
@@ -171,7 +103,6 @@ static struct i2c_board_info __initdata u8500_i2c3_devices_stuib[] = {
 		I2C_BOARD_INFO("bu21013_tp", 0x5D),
 		.platform_data = &tsc_plat_device,
 	},
-
 };
 
 void __init mop500_stuib_init(void)
diff --git a/drivers/input/touchscreen/bu21013_ts.c b/drivers/input/touchscreen/bu21013_ts.c
index 1e8cddd06c60..c6f6a04ec673 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/regulator/consumer.h>
 #include <linux/module.h>
+#include <linux/gpio.h>
 
 #define PEN_DOWN_INTR	0
 #define MAX_FINGERS	2
@@ -148,11 +149,12 @@
 struct bu21013_ts_data {
 	struct i2c_client *client;
 	wait_queue_head_t wait;
-	bool touch_stopped;
 	const struct bu21013_platform_device *chip;
 	struct input_dev *in_dev;
-	unsigned int intr_pin;
 	struct regulator *regulator;
+	unsigned int irq;
+	unsigned int intr_pin;
+	bool touch_stopped;
 };
 
 /**
@@ -262,7 +264,7 @@ static irqreturn_t bu21013_gpio_irq(int irq, void *device_data)
 			return IRQ_NONE;
 		}
 
-		data->intr_pin = data->chip->irq_read_val();
+		data->intr_pin = gpio_get_value(data->chip->touch_pin);
 		if (data->intr_pin == PEN_DOWN_INTR)
 			wait_event_timeout(data->wait, data->touch_stopped,
 					   msecs_to_jiffies(2));
@@ -418,9 +420,32 @@ static void bu21013_free_irq(struct bu21013_ts_data *bu21013_data)
 {
 	bu21013_data->touch_stopped = true;
 	wake_up(&bu21013_data->wait);
-	free_irq(bu21013_data->chip->irq, bu21013_data);
+	free_irq(bu21013_data->irq, bu21013_data);
 }
 
+/**
+ * bu21013_cs_disable() - deconfigures the touch panel controller
+ * @bu21013_data: device structure pointer
+ *
+ * This function is used to deconfigure the chip selection
+ * for touch panel controller.
+ */
+static void bu21013_cs_disable(struct bu21013_ts_data *bu21013_data)
+{
+	int error;
+
+	error = gpio_direction_output(bu21013_data->chip->cs_pin, 0);
+	if (error < 0)
+		dev_warn(&bu21013_data->client->dev,
+			 "%s: gpio direction failed, error: %d\n",
+			 __func__, error);
+	else
+		gpio_set_value(bu21013_data->chip->cs_pin, 0);
+
+	gpio_free(bu21013_data->chip->cs_pin);
+}
+
+
 /**
  * bu21013_probe() - initializes the i2c-client touchscreen driver
  * @client: i2c client structure pointer
@@ -430,7 +455,7 @@ static void bu21013_free_irq(struct bu21013_ts_data *bu21013_data)
  * driver and returns integer.
  */
 static int bu21013_probe(struct i2c_client *client,
-					const struct i2c_device_id *id)
+			 const struct i2c_device_id *id)
 {
 	struct bu21013_ts_data *bu21013_data;
 	struct input_dev *in_dev;
@@ -449,6 +474,11 @@ static int bu21013_probe(struct i2c_client *client,
 		return -EINVAL;
 	}
 
+	if (!gpio_is_valid(pdata->touch_pin)) {
+		dev_err(&client->dev, "invalid touch_pin supplied\n");
+		return -EINVAL;
+	}
+
 	bu21013_data = kzalloc(sizeof(struct bu21013_ts_data), GFP_KERNEL);
 	in_dev = input_allocate_device();
 	if (!bu21013_data || !in_dev) {
@@ -460,6 +490,7 @@ static int bu21013_probe(struct i2c_client *client,
 	bu21013_data->in_dev = in_dev;
 	bu21013_data->chip = pdata;
 	bu21013_data->client = client;
+	bu21013_data->irq = gpio_to_irq(pdata->touch_pin);
 
 	bu21013_data->regulator = regulator_get(&client->dev, "avdd");
 	if (IS_ERR(bu21013_data->regulator)) {
@@ -478,12 +509,11 @@ static int bu21013_probe(struct i2c_client *client,
 	init_waitqueue_head(&bu21013_data->wait);
 
 	/* configure the gpio pins */
-	if (pdata->cs_en) {
-		error = pdata->cs_en(pdata->cs_pin);
-		if (error < 0) {
-			dev_err(&client->dev, "chip init failed\n");
-			goto err_disable_regulator;
-		}
+	error = gpio_request_one(pdata->cs_pin, GPIOF_OUT_INIT_HIGH,
+				 "touchp_reset");
+	if (error < 0) {
+		dev_err(&client->dev, "Unable to request gpio reset_pin\n");
+		goto err_disable_regulator;
 	}
 
 	/* configure the touch panel controller */
@@ -508,12 +538,13 @@ static int bu21013_probe(struct i2c_client *client,
 						pdata->touch_y_max, 0, 0);
 	input_set_drvdata(in_dev, bu21013_data);
 
-	error = request_threaded_irq(pdata->irq, NULL, bu21013_gpio_irq,
+	error = request_threaded_irq(bu21013_data->irq, NULL, bu21013_gpio_irq,
 				     IRQF_TRIGGER_FALLING | IRQF_SHARED |
 					IRQF_ONESHOT,
 				     DRIVER_TP, bu21013_data);
 	if (error) {
-		dev_err(&client->dev, "request irq %d failed\n", pdata->irq);
+		dev_err(&client->dev, "request irq %d failed\n",
+			bu21013_data->irq);
 		goto err_cs_disable;
 	}
 
@@ -531,7 +562,7 @@ static int bu21013_probe(struct i2c_client *client,
 err_free_irq:
 	bu21013_free_irq(bu21013_data);
 err_cs_disable:
-	pdata->cs_dis(pdata->cs_pin);
+	bu21013_cs_disable(bu21013_data);
 err_disable_regulator:
 	regulator_disable(bu21013_data->regulator);
 err_put_regulator:
@@ -555,7 +586,7 @@ static int bu21013_remove(struct i2c_client *client)
 
 	bu21013_free_irq(bu21013_data);
 
-	bu21013_data->chip->cs_dis(bu21013_data->chip->cs_pin);
+	bu21013_cs_disable(bu21013_data);
 
 	input_unregister_device(bu21013_data->in_dev);
 
@@ -584,9 +615,9 @@ static int bu21013_suspend(struct device *dev)
 
 	bu21013_data->touch_stopped = true;
 	if (device_may_wakeup(&client->dev))
-		enable_irq_wake(bu21013_data->chip->irq);
+		enable_irq_wake(bu21013_data->irq);
 	else
-		disable_irq(bu21013_data->chip->irq);
+		disable_irq(bu21013_data->irq);
 
 	regulator_disable(bu21013_data->regulator);
 
@@ -621,9 +652,9 @@ static int bu21013_resume(struct device *dev)
 	bu21013_data->touch_stopped = false;
 
 	if (device_may_wakeup(&client->dev))
-		disable_irq_wake(bu21013_data->chip->irq);
+		disable_irq_wake(bu21013_data->irq);
 	else
-		enable_irq(bu21013_data->chip->irq);
+		enable_irq(bu21013_data->irq);
 
 	return 0;
 }
diff --git a/include/linux/input/bu21013.h b/include/linux/input/bu21013.h
index 05e03284b92a..6230d76bde5d 100644
--- a/include/linux/input/bu21013.h
+++ b/include/linux/input/bu21013.h
@@ -9,13 +9,10 @@
 
 /**
  * struct bu21013_platform_device - Handle the platform data
- * @cs_en:	pointer to the cs enable function
- * @cs_dis:	pointer to the cs disable function
- * @irq_read_val:    pointer to read the pen irq value function
  * @touch_x_max: touch x max
  * @touch_y_max: touch y max
  * @cs_pin: chip select pin
- * @irq: irq pin
+ * @touch_pin: touch gpio pin
  * @ext_clk: external clock flag
  * @x_flip: x flip flag
  * @y_flip: y flip flag
@@ -24,13 +21,10 @@
  * This is used to handle the platform data
  */
 struct bu21013_platform_device {
-	int (*cs_en)(int reset_pin);
-	int (*cs_dis)(int reset_pin);
-	int (*irq_read_val)(void);
 	int touch_x_max;
 	int touch_y_max;
 	unsigned int cs_pin;
-	unsigned int irq;
+	unsigned int touch_pin;
 	bool ext_clk;
 	bool x_flip;
 	bool y_flip;
-- 
cgit v1.2.3-55-g7522


From 972deb4f49b5b6703d9c6117ba0aeda2180d4447 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti D
Date: Mon, 26 Nov 2012 15:25:11 +0530
Subject: i2c: omap: Remove the OMAP_I2C_FLAG_RESET_REGS_POSTIDLE flag

The OMAP_I2C_FLAG_RESET_REGS_POSTIDLE is not used anymore
in the i2c driver. Remove the flag.

Signed-off-by: Shubhrajyoti D <shubhrajyoti@ti.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 arch/arm/mach-omap2/omap_hwmod_33xx_data.c | 3 +--
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 9 +++------
 arch/arm/mach-omap2/omap_hwmod_44xx_data.c | 3 +--
 drivers/i2c/busses/i2c-omap.c              | 3 +--
 include/linux/i2c-omap.h                   | 1 -
 5 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
index 59d5c1cd316d..c9a186bc6d40 100644
--- a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
@@ -1103,8 +1103,7 @@ static struct omap_hwmod_class i2c_class = {
 };
 
 static struct omap_i2c_dev_attr i2c_dev_attr = {
-	.flags = OMAP_I2C_FLAG_BUS_SHIFT_NONE |
-		  OMAP_I2C_FLAG_RESET_REGS_POSTIDLE,
+	.flags = OMAP_I2C_FLAG_BUS_SHIFT_NONE,
 };
 
 /* i2c1 */
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 943222c40489..36270bb637e4 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -791,8 +791,7 @@ static struct omap_hwmod omap3xxx_dss_venc_hwmod = {
 /* I2C1 */
 static struct omap_i2c_dev_attr i2c1_dev_attr = {
 	.fifo_depth	= 8, /* bytes */
-	.flags		= OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
-			  OMAP_I2C_FLAG_BUS_SHIFT_2,
+	.flags		= OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
 static struct omap_hwmod omap3xxx_i2c1_hwmod = {
@@ -817,8 +816,7 @@ static struct omap_hwmod omap3xxx_i2c1_hwmod = {
 /* I2C2 */
 static struct omap_i2c_dev_attr i2c2_dev_attr = {
 	.fifo_depth	= 8, /* bytes */
-	.flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
-		 OMAP_I2C_FLAG_BUS_SHIFT_2,
+	.flags = OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
 static struct omap_hwmod omap3xxx_i2c2_hwmod = {
@@ -843,8 +841,7 @@ static struct omap_hwmod omap3xxx_i2c2_hwmod = {
 /* I2C3 */
 static struct omap_i2c_dev_attr i2c3_dev_attr = {
 	.fifo_depth	= 64, /* bytes */
-	.flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
-		 OMAP_I2C_FLAG_BUS_SHIFT_2,
+	.flags = OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
 static struct omap_hwmod_irq_info i2c3_mpu_irqs[] = {
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index 652d0285bd6d..eb40dbc6688e 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -1526,8 +1526,7 @@ static struct omap_hwmod_class omap44xx_i2c_hwmod_class = {
 };
 
 static struct omap_i2c_dev_attr i2c_dev_attr = {
-	.flags	= OMAP_I2C_FLAG_BUS_SHIFT_NONE |
-			OMAP_I2C_FLAG_RESET_REGS_POSTIDLE,
+	.flags	= OMAP_I2C_FLAG_BUS_SHIFT_NONE,
 };
 
 /* i2c1 */
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 248280136668..7a62acb7d262 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -1038,8 +1038,7 @@ static const struct i2c_algorithm omap_i2c_algo = {
 #ifdef CONFIG_OF
 static struct omap_i2c_bus_platform_data omap3_pdata = {
 	.rev = OMAP_I2C_IP_VERSION_1,
-	.flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
-		 OMAP_I2C_FLAG_BUS_SHIFT_2,
+	.flags = OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
 static struct omap_i2c_bus_platform_data omap4_pdata = {
diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h
index 1b25c04f82d9..babe0cf6d56b 100644
--- a/include/linux/i2c-omap.h
+++ b/include/linux/i2c-omap.h
@@ -20,7 +20,6 @@
 #define OMAP_I2C_FLAG_NO_FIFO			BIT(0)
 #define OMAP_I2C_FLAG_SIMPLE_CLOCK		BIT(1)
 #define OMAP_I2C_FLAG_16BIT_DATA_REG		BIT(2)
-#define OMAP_I2C_FLAG_RESET_REGS_POSTIDLE	BIT(3)
 #define OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK	BIT(5)
 #define OMAP_I2C_FLAG_FORCE_19200_INT_CLK	BIT(6)
 /* how the CPU address bus must be translated for I2C unit access */
-- 
cgit v1.2.3-55-g7522


From b951b523ea45d5717377ce2216afe98440001660 Mon Sep 17 00:00:00 2001
From: Mark Brown
Date: Mon, 26 Nov 2012 21:11:10 +0000
Subject: mfd: arizona: Allow the CODEC DAPM context to be accessed elsewhere

Some other device functions need to integrate with signal sources in the
audio portion (primarily for haptics) so allow CODEC to export the DAPM
context by pointing to it from the core driver.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/mfd/arizona/core.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index dd231ac0bb1f..a580363a7d29 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -78,6 +78,8 @@ enum arizona_type {
 
 #define ARIZONA_NUM_IRQ                   50
 
+struct snd_soc_dapm_context;
+
 struct arizona {
 	struct regmap *regmap;
 	struct device *dev;
@@ -98,6 +100,8 @@ struct arizona {
 
 	struct mutex clk_lock;
 	int clk32k_ref;
+
+	struct snd_soc_dapm_context *dapm;
 };
 
 int arizona_clk32k_enable(struct arizona *arizona);
-- 
cgit v1.2.3-55-g7522


From 9dd555e2f4de1af1bb0f75cc84ed0708fcdb5987 Mon Sep 17 00:00:00 2001
From: Mark Brown
Date: Mon, 26 Nov 2012 21:17:21 +0000
Subject: Input - arizona-haptics: Add driver haptics module on Arizona CODECs

The Arizona CODECs contain a haptics module providing vibration feedback
support. Implement basic support for this, providing simple start/stop and
signal magnitude control.

Since the output path for haptics is routed through the CODEC audio routing
it is modelled as a signal generator within ASoC, the haptics driver calls
DAPM to start and stop the output drivers. An appropriate output path must
be configured via ALSA to connect the haptics source to the correct output.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/input/misc/Kconfig           |  10 ++
 drivers/input/misc/Makefile          |   1 +
 drivers/input/misc/arizona-haptics.c | 255 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/arizona/pdata.h    |   6 +
 4 files changed, 272 insertions(+)
 create mode 100644 drivers/input/misc/arizona-haptics.c

(limited to 'include/linux')

diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 7c0f1ecfdd7a..104a7c3153c0 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -72,6 +72,16 @@ config INPUT_AD714X_SPI
 	  To compile this driver as a module, choose M here: the
 	  module will be called ad714x-spi.
 
+config INPUT_ARIZONA_HAPTICS
+	tristate "Arizona haptics support"
+	depends on MFD_ARIZONA && SND_SOC
+	select INPUT_FF_MEMLESS
+	help
+	  Say Y to enable support for the haptics module in Arizona CODECs.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called arizona-haptics.
+
 config INPUT_BMA150
 	tristate "BMA150/SMB380 acceleration sensor support"
 	depends on I2C
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index 83fe6f5b77d1..5ea769eda999 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_INPUT_ADXL34X)		+= adxl34x.o
 obj-$(CONFIG_INPUT_ADXL34X_I2C)		+= adxl34x-i2c.o
 obj-$(CONFIG_INPUT_ADXL34X_SPI)		+= adxl34x-spi.o
 obj-$(CONFIG_INPUT_APANEL)		+= apanel.o
+obj-$(CONFIG_INPUT_ARIZONA_HAPTICS)	+= arizona-haptics.o
 obj-$(CONFIG_INPUT_ATI_REMOTE2)		+= ati_remote2.o
 obj-$(CONFIG_INPUT_ATLAS_BTNS)		+= atlas_btns.o
 obj-$(CONFIG_INPUT_BFIN_ROTARY)		+= bfin_rotary.o
diff --git a/drivers/input/misc/arizona-haptics.c b/drivers/input/misc/arizona-haptics.c
new file mode 100644
index 000000000000..7a04f54ef961
--- /dev/null
+++ b/drivers/input/misc/arizona-haptics.c
@@ -0,0 +1,255 @@
+/*
+ * Arizona haptics driver
+ *
+ * Copyright 2012 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/input.h>
+#include <linux/slab.h>
+
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+
+#include <linux/mfd/arizona/core.h>
+#include <linux/mfd/arizona/pdata.h>
+#include <linux/mfd/arizona/registers.h>
+
+struct arizona_haptics {
+	struct arizona *arizona;
+	struct input_dev *input_dev;
+	struct work_struct work;
+
+	struct mutex mutex;
+	u8 intensity;
+};
+
+static void arizona_haptics_work(struct work_struct *work)
+{
+	struct arizona_haptics *haptics = container_of(work,
+						       struct arizona_haptics,
+						       work);
+	struct arizona *arizona = haptics->arizona;
+	struct mutex *dapm_mutex = &arizona->dapm->card->dapm_mutex;
+	int ret;
+
+	if (!haptics->arizona->dapm) {
+		dev_err(arizona->dev, "No DAPM context\n");
+		return;
+	}
+
+	if (haptics->intensity) {
+		ret = regmap_update_bits(arizona->regmap,
+					 ARIZONA_HAPTICS_PHASE_2_INTENSITY,
+					 ARIZONA_PHASE2_INTENSITY_MASK,
+					 haptics->intensity);
+		if (ret != 0) {
+			dev_err(arizona->dev, "Failed to set intensity: %d\n",
+				ret);
+			return;
+		}
+
+		/* This enable sequence will be a noop if already enabled */
+		ret = regmap_update_bits(arizona->regmap,
+					 ARIZONA_HAPTICS_CONTROL_1,
+					 ARIZONA_HAP_CTRL_MASK,
+					 1 << ARIZONA_HAP_CTRL_SHIFT);
+		if (ret != 0) {
+			dev_err(arizona->dev, "Failed to start haptics: %d\n",
+				ret);
+			return;
+		}
+
+		mutex_lock_nested(dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME);
+
+		ret = snd_soc_dapm_enable_pin(arizona->dapm, "HAPTICS");
+		if (ret != 0) {
+			dev_err(arizona->dev, "Failed to start HAPTICS: %d\n",
+				ret);
+			mutex_unlock(dapm_mutex);
+			return;
+		}
+
+		ret = snd_soc_dapm_sync(arizona->dapm);
+		if (ret != 0) {
+			dev_err(arizona->dev, "Failed to sync DAPM: %d\n",
+				ret);
+			mutex_unlock(dapm_mutex);
+			return;
+		}
+
+		mutex_unlock(dapm_mutex);
+
+	} else {
+		/* This disable sequence will be a noop if already enabled */
+		mutex_lock_nested(dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME);
+
+		ret = snd_soc_dapm_disable_pin(arizona->dapm, "HAPTICS");
+		if (ret != 0) {
+			dev_err(arizona->dev, "Failed to disable HAPTICS: %d\n",
+				ret);
+			mutex_unlock(dapm_mutex);
+			return;
+		}
+
+		ret = snd_soc_dapm_sync(arizona->dapm);
+		if (ret != 0) {
+			dev_err(arizona->dev, "Failed to sync DAPM: %d\n",
+				ret);
+			mutex_unlock(dapm_mutex);
+			return;
+		}
+
+		mutex_unlock(dapm_mutex);
+
+		ret = regmap_update_bits(arizona->regmap,
+					 ARIZONA_HAPTICS_CONTROL_1,
+					 ARIZONA_HAP_CTRL_MASK,
+					 1 << ARIZONA_HAP_CTRL_SHIFT);
+		if (ret != 0) {
+			dev_err(arizona->dev, "Failed to stop haptics: %d\n",
+				ret);
+			return;
+		}
+	}
+}
+
+static int arizona_haptics_play(struct input_dev *input, void *data,
+				struct ff_effect *effect)
+{
+	struct arizona_haptics *haptics = input_get_drvdata(input);
+	struct arizona *arizona = haptics->arizona;
+
+	if (!arizona->dapm) {
+		dev_err(arizona->dev, "No DAPM context\n");
+		return -EBUSY;
+	}
+
+	if (effect->u.rumble.strong_magnitude) {
+		/* Scale the magnitude into the range the device supports */
+		if (arizona->pdata.hap_act) {
+			haptics->intensity =
+				effect->u.rumble.strong_magnitude >> 9;
+			if (effect->direction < 0x8000)
+				haptics->intensity += 0x7f;
+		} else {
+			haptics->intensity =
+				effect->u.rumble.strong_magnitude >> 8;
+		}
+	} else {
+		haptics->intensity = 0;
+	}
+
+	schedule_work(&haptics->work);
+
+	return 0;
+}
+
+static void arizona_haptics_close(struct input_dev *input)
+{
+	struct arizona_haptics *haptics = input_get_drvdata(input);
+	struct mutex *dapm_mutex = &haptics->arizona->dapm->card->dapm_mutex;
+
+	cancel_work_sync(&haptics->work);
+
+	mutex_lock_nested(dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME);
+
+	if (haptics->arizona->dapm)
+		snd_soc_dapm_disable_pin(haptics->arizona->dapm, "HAPTICS");
+
+	mutex_unlock(dapm_mutex);
+}
+
+static int arizona_haptics_probe(struct platform_device *pdev)
+{
+	struct arizona *arizona = dev_get_drvdata(pdev->dev.parent);
+	struct arizona_haptics *haptics;
+	int ret;
+
+	haptics = devm_kzalloc(&pdev->dev, sizeof(*haptics), GFP_KERNEL);
+	if (!haptics)
+		return -ENOMEM;
+
+	haptics->arizona = arizona;
+
+	ret = regmap_update_bits(arizona->regmap, ARIZONA_HAPTICS_CONTROL_1,
+				 ARIZONA_HAP_ACT, arizona->pdata.hap_act);
+	if (ret != 0) {
+		dev_err(arizona->dev, "Failed to set haptics actuator: %d\n",
+			ret);
+		return ret;
+	}
+
+	INIT_WORK(&haptics->work, arizona_haptics_work);
+
+	haptics->input_dev = input_allocate_device();
+	if (haptics->input_dev == NULL) {
+		dev_err(arizona->dev, "Failed to allocate input device\n");
+		return -ENOMEM;
+	}
+
+	input_set_drvdata(haptics->input_dev, haptics);
+
+	haptics->input_dev->name = "arizona:haptics";
+	haptics->input_dev->dev.parent = pdev->dev.parent;
+	haptics->input_dev->close = arizona_haptics_close;
+	__set_bit(FF_RUMBLE, haptics->input_dev->ffbit);
+
+	ret = input_ff_create_memless(haptics->input_dev, NULL,
+				      arizona_haptics_play);
+	if (ret < 0) {
+		dev_err(arizona->dev, "input_ff_create_memless() failed: %d\n",
+			ret);
+		goto err_ialloc;
+	}
+
+	ret = input_register_device(haptics->input_dev);
+	if (ret < 0) {
+		dev_err(arizona->dev, "couldn't register input device: %d\n",
+			ret);
+		goto err_iff;
+	}
+
+	platform_set_drvdata(pdev, haptics);
+
+	return 0;
+
+err_iff:
+	if (haptics->input_dev)
+		input_ff_destroy(haptics->input_dev);
+err_ialloc:
+	input_free_device(haptics->input_dev);
+
+	return ret;
+}
+
+static int arizona_haptics_remove(struct platform_device *pdev)
+{
+	struct arizona_haptics *haptics = platform_get_drvdata(pdev);
+
+	input_unregister_device(haptics->input_dev);
+
+	return 0;
+}
+
+static struct platform_driver arizona_haptics_driver = {
+	.probe		= arizona_haptics_probe,
+	.remove		= arizona_haptics_remove,
+	.driver		= {
+		.name	= "arizona-haptics",
+		.owner	= THIS_MODULE,
+	},
+};
+module_platform_driver(arizona_haptics_driver);
+
+MODULE_ALIAS("platform:arizona-haptics");
+MODULE_DESCRIPTION("Arizona haptics driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index 7ab442905a57..8b1d1daaae16 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -62,6 +62,9 @@
 
 #define ARIZONA_MAX_OUTPUT 6
 
+#define ARIZONA_HAP_ACT_ERM 0
+#define ARIZONA_HAP_ACT_LRA 2
+
 #define ARIZONA_MAX_PDM_SPK 2
 
 struct regulator_init_data;
@@ -114,6 +117,9 @@ struct arizona_pdata {
 
 	/** PDM speaker format */
 	unsigned int spk_fmt[ARIZONA_MAX_PDM_SPK];
+
+	/** Haptic actuator type */
+	unsigned int hap_act;
 };
 
 #endif
-- 
cgit v1.2.3-55-g7522


From e80d0a1ae8bb8fee0edd37427836f108b30f596b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Wed, 21 Nov 2012 16:26:44 +0100
Subject: cputime: Rename thread_group_times to thread_group_cputime_adjusted

We have thread_group_cputime() and thread_group_times(). The naming
doesn't provide enough information about the difference between
these two APIs.

To lower the confusion, rename thread_group_times() to
thread_group_cputime_adjusted(). This name better suggests that
it's a version of thread_group_cputime() that does some stabilization
on the raw cputime values. ie here: scale on top of CFS runtime
stats and bound lower value for monotonicity.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 fs/proc/array.c        | 4 ++--
 include/linux/sched.h  | 4 ++--
 kernel/exit.c          | 4 ++--
 kernel/sched/cputime.c | 8 ++++----
 kernel/sys.c           | 6 +++---
 5 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index c1c207c36cae..d3696708fc1a 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -438,7 +438,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 
 			min_flt += sig->min_flt;
 			maj_flt += sig->maj_flt;
-			thread_group_times(task, &utime, &stime);
+			thread_group_cputime_adjusted(task, &utime, &stime);
 			gtime += sig->gtime;
 		}
 
@@ -454,7 +454,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	if (!whole) {
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
-		task_times(task, &utime, &stime);
+		task_cputime_adjusted(task, &utime, &stime);
 		gtime = task->gtime;
 	}
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e1581a029e3d..e75cab5820ab 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1751,8 +1751,8 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 }
 
-extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
-extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
+extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
+extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
 /*
  * Per process flags
diff --git a/kernel/exit.c b/kernel/exit.c
index 346616c0092c..618f7ee56003 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1186,11 +1186,11 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		 * as other threads in the parent group can be right
 		 * here reaping other children at the same time.
 		 *
-		 * We use thread_group_times() to get times for the thread
+		 * We use thread_group_cputime_adjusted() to get times for the thread
 		 * group, which consolidates times for all threads in the
 		 * group including the group leader.
 		 */
-		thread_group_times(p, &tgutime, &tgstime);
+		thread_group_cputime_adjusted(p, &tgutime, &tgstime);
 		spin_lock_irq(&p->real_parent->sighand->siglock);
 		psig = p->real_parent->signal;
 		sig = p->signal;
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index e56f138a23c7..7dc155371b95 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -445,13 +445,13 @@ void account_idle_ticks(unsigned long ticks)
  * Use precise platform statistics if available:
  */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	*ut = p->utime;
 	*st = p->stime;
 }
 
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	struct task_cputime cputime;
 
@@ -516,7 +516,7 @@ static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
 	return (__force cputime_t) temp;
 }
 
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	cputime_t rtime, utime = p->utime, total = utime + p->stime;
 
@@ -543,7 +543,7 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 /*
  * Must be called with siglock held.
  */
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	struct signal_struct *sig = p->signal;
 	struct task_cputime cputime;
diff --git a/kernel/sys.c b/kernel/sys.c
index e6e0ece5f6a0..265b37690421 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1046,7 +1046,7 @@ void do_sys_times(struct tms *tms)
 	cputime_t tgutime, tgstime, cutime, cstime;
 
 	spin_lock_irq(&current->sighand->siglock);
-	thread_group_times(current, &tgutime, &tgstime);
+	thread_group_cputime_adjusted(current, &tgutime, &tgstime);
 	cutime = current->signal->cutime;
 	cstime = current->signal->cstime;
 	spin_unlock_irq(&current->sighand->siglock);
@@ -1704,7 +1704,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 	utime = stime = 0;
 
 	if (who == RUSAGE_THREAD) {
-		task_times(current, &utime, &stime);
+		task_cputime_adjusted(current, &utime, &stime);
 		accumulate_thread_rusage(p, r);
 		maxrss = p->signal->maxrss;
 		goto out;
@@ -1730,7 +1730,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 				break;
 
 		case RUSAGE_SELF:
-			thread_group_times(p, &tgutime, &tgstime);
+			thread_group_cputime_adjusted(p, &tgutime, &tgstime);
 			utime += tgutime;
 			stime += tgstime;
 			r->ru_nvcsw += p->signal->nvcsw;
-- 
cgit v1.2.3-55-g7522


From d37f761dbd276790f70dcf73a287fde2c3464482 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Thu, 22 Nov 2012 00:58:35 +0100
Subject: cputime: Consolidate cputime adjustment code

task_cputime_adjusted() and thread_group_cputime_adjusted()
essentially share the same code. They just don't use the same
source:

* The first function uses the cputime in the task struct and the
previous adjusted snapshot that ensures monotonicity.

* The second adds the cputime of all tasks in the group and the
previous adjusted snapshot of the whole group from the signal
structure.

Just consolidate the common code that does the adjustment. These
functions just need to fetch the values from the appropriate
source.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/sched.h  | 23 +++++++++++++++++++----
 kernel/fork.c          |  2 +-
 kernel/sched/cputime.c | 46 +++++++++++++++++++++++-----------------------
 3 files changed, 43 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e75cab5820ab..5dafac366811 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -433,14 +433,29 @@ struct cpu_itimer {
 	u32 incr_error;
 };
 
+/**
+ * struct cputime - snaphsot of system and user cputime
+ * @utime: time spent in user mode
+ * @stime: time spent in system mode
+ *
+ * Gathers a generic snapshot of user and system time.
+ */
+struct cputime {
+	cputime_t utime;
+	cputime_t stime;
+};
+
 /**
  * struct task_cputime - collected CPU time counts
  * @utime:		time spent in user mode, in &cputime_t units
  * @stime:		time spent in kernel mode, in &cputime_t units
  * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
  *
- * This structure groups together three kinds of CPU time that are
- * tracked for threads and thread groups.  Most things considering
+ * This is an extension of struct cputime that includes the total runtime
+ * spent by the task from the scheduler point of view.
+ *
+ * As a result, this structure groups together three kinds of CPU time
+ * that are tracked for threads and thread groups.  Most things considering
  * CPU time want to group these counts together and treat all three
  * of them in parallel.
  */
@@ -581,7 +596,7 @@ struct signal_struct {
 	cputime_t gtime;
 	cputime_t cgtime;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
-	cputime_t prev_utime, prev_stime;
+	struct cputime prev_cputime;
 #endif
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
@@ -1340,7 +1355,7 @@ struct task_struct {
 	cputime_t utime, stime, utimescaled, stimescaled;
 	cputime_t gtime;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
-	cputime_t prev_utime, prev_stime;
+	struct cputime prev_cputime;
 #endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	struct timespec start_time; 		/* monotonic time */
diff --git a/kernel/fork.c b/kernel/fork.c
index 8b20ab7d3aa2..0e7cdb90476f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1222,7 +1222,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->utime = p->stime = p->gtime = 0;
 	p->utimescaled = p->stimescaled = 0;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
-	p->prev_utime = p->prev_stime = 0;
+	p->prev_cputime.utime = p->prev_cputime.stime = 0;
 #endif
 #if defined(SPLIT_RSS_COUNTING)
 	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 7dc155371b95..220fdc4db770 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -516,14 +516,18 @@ static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
 	return (__force cputime_t) temp;
 }
 
-void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
+static void cputime_adjust(struct task_cputime *curr,
+			   struct cputime *prev,
+			   cputime_t *ut, cputime_t *st)
 {
-	cputime_t rtime, utime = p->utime, total = utime + p->stime;
+	cputime_t rtime, utime, total;
 
+	utime = curr->utime;
+	total = utime + curr->stime;
 	/*
 	 * Use CFS's precise accounting:
 	 */
-	rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
+	rtime = nsecs_to_cputime(curr->sum_exec_runtime);
 
 	if (total)
 		utime = scale_utime(utime, rtime, total);
@@ -533,11 +537,22 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	/*
 	 * Compare with previous values, to keep monotonicity:
 	 */
-	p->prev_utime = max(p->prev_utime, utime);
-	p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
+	prev->utime = max(prev->utime, utime);
+	prev->stime = max(prev->stime, rtime - prev->utime);
+
+	*ut = prev->utime;
+	*st = prev->stime;
+}
 
-	*ut = p->prev_utime;
-	*st = p->prev_stime;
+void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+	struct task_cputime cputime = {
+		.utime = p->utime,
+		.stime = p->stime,
+		.sum_exec_runtime = p->se.sum_exec_runtime,
+	};
+
+	cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 }
 
 /*
@@ -545,24 +560,9 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
  */
 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
-	struct signal_struct *sig = p->signal;
 	struct task_cputime cputime;
-	cputime_t rtime, utime, total;
 
 	thread_group_cputime(p, &cputime);
-
-	total = cputime.utime + cputime.stime;
-	rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
-
-	if (total)
-		utime = scale_utime(cputime.utime, rtime, total);
-	else
-		utime = rtime;
-
-	sig->prev_utime = max(sig->prev_utime, utime);
-	sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime);
-
-	*ut = sig->prev_utime;
-	*st = sig->prev_stime;
+	cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 }
 #endif
-- 
cgit v1.2.3-55-g7522


From 73ee29460e5d0adbb46e4962df69ae2465746612 Mon Sep 17 00:00:00 2001
From: Mark Brown
Date: Tue, 27 Nov 2012 18:48:33 +0000
Subject: regulator: arizona-ldo1: Support 1.8V mode

Some Arizona device support a 1.8V output mode. Enable this in the driver.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/mfd/wm5102-tables.c           |   3 +
 drivers/regulator/arizona-ldo1.c      | 107 +++++++++++++++++++++++++++++++++-
 include/linux/mfd/arizona/registers.h |  16 +++++
 3 files changed, 125 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index 01b9255ed631..b829a5710ddc 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -775,6 +775,7 @@ static const struct reg_default wm5102_reg_default[] = {
 	{ 0x00000154, 0x0000 },   /* R340   - Rate Estimator 3 */ 
 	{ 0x00000155, 0x0000 },   /* R341   - Rate Estimator 4 */ 
 	{ 0x00000156, 0x0000 },   /* R342   - Rate Estimator 5 */ 
+	{ 0x00000161, 0x0000 },   /* R353   - Dynamic Frequency Scaling 1 */ 
 	{ 0x00000171, 0x0000 },   /* R369   - FLL1 Control 1 */ 
 	{ 0x00000172, 0x0008 },   /* R370   - FLL1 Control 2 */ 
 	{ 0x00000173, 0x0018 },   /* R371   - FLL1 Control 3 */ 
@@ -1564,6 +1565,7 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_RATE_ESTIMATOR_3:
 	case ARIZONA_RATE_ESTIMATOR_4:
 	case ARIZONA_RATE_ESTIMATOR_5:
+	case ARIZONA_DYNAMIC_FREQUENCY_SCALING_1:
 	case ARIZONA_FLL1_CONTROL_1:
 	case ARIZONA_FLL1_CONTROL_2:
 	case ARIZONA_FLL1_CONTROL_3:
@@ -1596,6 +1598,7 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_FLL2_GPIO_CLOCK:
 	case ARIZONA_MIC_CHARGE_PUMP_1:
 	case ARIZONA_LDO1_CONTROL_1:
+	case ARIZONA_LDO1_CONTROL_2:
 	case ARIZONA_LDO2_CONTROL_1:
 	case ARIZONA_MIC_BIAS_CTRL_1:
 	case ARIZONA_MIC_BIAS_CTRL_2:
diff --git a/drivers/regulator/arizona-ldo1.c b/drivers/regulator/arizona-ldo1.c
index 800c8ad3db91..739faf99b9e2 100644
--- a/drivers/regulator/arizona-ldo1.c
+++ b/drivers/regulator/arizona-ldo1.c
@@ -34,6 +34,108 @@ struct arizona_ldo1 {
 	struct regulator_init_data init_data;
 };
 
+static int arizona_ldo1_hc_list_voltage(struct regulator_dev *rdev,
+					unsigned int selector)
+{
+	if (selector >= rdev->desc->n_voltages)
+		return -EINVAL;
+
+	if (selector == rdev->desc->n_voltages - 1)
+		return 1800000;
+	else
+		return rdev->desc->min_uV + (rdev->desc->uV_step * selector);
+}
+
+static int arizona_ldo1_hc_map_voltage(struct regulator_dev *rdev,
+				       int min_uV, int max_uV)
+{
+	int sel;
+
+	sel = DIV_ROUND_UP(min_uV - rdev->desc->min_uV, rdev->desc->uV_step);
+	if (sel >= rdev->desc->n_voltages)
+		sel = rdev->desc->n_voltages - 1;
+
+	return sel;
+}
+
+static int arizona_ldo1_hc_set_voltage_sel(struct regulator_dev *rdev,
+					   unsigned sel)
+{
+	struct arizona_ldo1 *ldo = rdev_get_drvdata(rdev);
+	struct regmap *regmap = ldo->arizona->regmap;
+	unsigned int val;
+	int ret;
+
+	if (sel == rdev->desc->n_voltages - 1)
+		val = ARIZONA_LDO1_HI_PWR;
+	else
+		val = 0;
+
+	ret = regmap_update_bits(regmap, ARIZONA_LDO1_CONTROL_2,
+				 ARIZONA_LDO1_HI_PWR, val);
+	if (ret != 0)
+		return ret;
+
+	ret = regmap_update_bits(regmap, ARIZONA_DYNAMIC_FREQUENCY_SCALING_1,
+				 ARIZONA_SUBSYS_MAX_FREQ, val);
+	if (ret != 0)
+		return ret;
+
+	if (val)
+		return 0;
+
+	val = sel << ARIZONA_LDO1_VSEL_SHIFT;
+
+	return regmap_update_bits(regmap, ARIZONA_LDO1_CONTROL_1,
+				  ARIZONA_LDO1_VSEL_MASK, val);
+}
+
+static int arizona_ldo1_hc_get_voltage_sel(struct regulator_dev *rdev)
+{
+	struct arizona_ldo1 *ldo = rdev_get_drvdata(rdev);
+	struct regmap *regmap = ldo->arizona->regmap;
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(regmap, ARIZONA_LDO1_CONTROL_2, &val);
+	if (ret != 0)
+		return ret;
+
+	if (val & ARIZONA_LDO1_HI_PWR)
+		return rdev->desc->n_voltages - 1;
+
+	ret = regmap_read(regmap, ARIZONA_LDO1_CONTROL_1, &val);
+	if (ret != 0)
+		return ret;
+
+	return (val & ARIZONA_LDO1_VSEL_MASK) >> ARIZONA_LDO1_VSEL_SHIFT;
+}
+
+static struct regulator_ops arizona_ldo1_hc_ops = {
+	.list_voltage = arizona_ldo1_hc_list_voltage,
+	.map_voltage = arizona_ldo1_hc_map_voltage,
+	.get_voltage_sel = arizona_ldo1_hc_get_voltage_sel,
+	.set_voltage_sel = arizona_ldo1_hc_set_voltage_sel,
+	.get_bypass = regulator_get_bypass_regmap,
+	.set_bypass = regulator_set_bypass_regmap,
+};
+
+static const struct regulator_desc arizona_ldo1_hc = {
+	.name = "LDO1",
+	.supply_name = "LDOVDD",
+	.type = REGULATOR_VOLTAGE,
+	.ops = &arizona_ldo1_hc_ops,
+
+	.bypass_reg = ARIZONA_LDO1_CONTROL_1,
+	.bypass_mask = ARIZONA_LDO1_BYPASS,
+	.min_uV = 900000,
+	.uV_step = 50000,
+	.n_voltages = 8,
+	.enable_time = 500,
+
+	.owner = THIS_MODULE,
+};
+
 static struct regulator_ops arizona_ldo1_ops = {
 	.list_voltage = regulator_list_voltage_linear,
 	.map_voltage = regulator_map_voltage_linear,
@@ -81,6 +183,7 @@ static const struct regulator_init_data arizona_ldo1_default = {
 static __devinit int arizona_ldo1_probe(struct platform_device *pdev)
 {
 	struct arizona *arizona = dev_get_drvdata(pdev->dev.parent);
+	const struct regulator_desc *desc;
 	struct regulator_config config = { };
 	struct arizona_ldo1 *ldo1;
 	int ret;
@@ -100,9 +203,11 @@ static __devinit int arizona_ldo1_probe(struct platform_device *pdev)
 	 */
 	switch (arizona->type) {
 	case WM5102:
+		desc = &arizona_ldo1_hc;
 		ldo1->init_data = arizona_ldo1_dvfs;
 		break;
 	default:
+		desc = &arizona_ldo1;
 		ldo1->init_data = arizona_ldo1_default;
 		break;
 	}
@@ -121,7 +226,7 @@ static __devinit int arizona_ldo1_probe(struct platform_device *pdev)
 	else
 		config.init_data = &ldo1->init_data;
 
-	ldo1->regulator = regulator_register(&arizona_ldo1, &config);
+	ldo1->regulator = regulator_register(desc, &config);
 	if (IS_ERR(ldo1->regulator)) {
 		ret = PTR_ERR(ldo1->regulator);
 		dev_err(arizona->dev, "Failed to register LDO1 supply: %d\n",
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index 7671a287dfee..ba26e99c388d 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -76,6 +76,7 @@
 #define ARIZONA_RATE_ESTIMATOR_3                 0x154
 #define ARIZONA_RATE_ESTIMATOR_4                 0x155
 #define ARIZONA_RATE_ESTIMATOR_5                 0x156
+#define ARIZONA_DYNAMIC_FREQUENCY_SCALING_1      0x161
 #define ARIZONA_FLL1_CONTROL_1                   0x171
 #define ARIZONA_FLL1_CONTROL_2                   0x172
 #define ARIZONA_FLL1_CONTROL_3                   0x173
@@ -110,6 +111,7 @@
 #define ARIZONA_FLL2_GPIO_CLOCK                  0x1AA
 #define ARIZONA_MIC_CHARGE_PUMP_1                0x200
 #define ARIZONA_LDO1_CONTROL_1                   0x210
+#define ARIZONA_LDO1_CONTROL_2                   0x212
 #define ARIZONA_LDO2_CONTROL_1                   0x213
 #define ARIZONA_MIC_BIAS_CTRL_1                  0x218
 #define ARIZONA_MIC_BIAS_CTRL_2                  0x219
@@ -1573,6 +1575,13 @@
 #define ARIZONA_SAMPLE_RATE_DETECT_D_SHIFT            0  /* SAMPLE_RATE_DETECT_D - [4:0] */
 #define ARIZONA_SAMPLE_RATE_DETECT_D_WIDTH            5  /* SAMPLE_RATE_DETECT_D - [4:0] */
 
+/*
+ * R353 (0x161) - Dynamic Frequency Scaling 1
+ */
+#define ARIZONA_SUBSYS_MAX_FREQ                  0x0001  /* SUBSYS_MAX_FREQ */
+#define ARIZONA_SUBSYS_MAX_FREQ_SHIFT                 0  /* SUBSYS_MAX_FREQ */
+#define ARIZONA_SUBSYS_MAX_FREQ_WIDTH                 1  /* SUBSYS_MAX_FREQ */
+
 /*
  * R369 (0x171) - FLL1 Control 1
  */
@@ -1888,6 +1897,13 @@
 #define ARIZONA_LDO1_ENA_SHIFT                        0  /* LDO1_ENA */
 #define ARIZONA_LDO1_ENA_WIDTH                        1  /* LDO1_ENA */
 
+/*
+ * R530 (0x212) - LDO1 Control 2
+ */
+#define ARIZONA_LDO1_HI_PWR                      0x0001  /* LDO1_HI_PWR */
+#define ARIZONA_LDO1_HI_PWR_SHIFT                     0  /* LDO1_HI_PWR */
+#define ARIZONA_LDO1_HI_PWR_WIDTH                     1  /* LDO1_HI_PWR */
+
 /*
  * R531 (0x213) - LDO2 Control 1
  */
-- 
cgit v1.2.3-55-g7522


From 1c9a9f59149e247e264db1fbf9f8ea3d5066eb64 Mon Sep 17 00:00:00 2001
From: Bill Pemberton
Date: Mon, 19 Nov 2012 13:19:21 -0500
Subject: kobject: remove CONFIG_HOTPLUG ifdefs

Remove conditional code based on CONFIG_HOTPLUG being false.  It's
always on now in preparation of it going away as an option.

Signed-off-by: Bill Pemberton <wfp5p@virginia.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kobject.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 1e57449395b1..939b11268c86 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -203,7 +203,6 @@ extern struct kobject *power_kobj;
 /* The global /sys/firmware/ kobject for people to chain off of */
 extern struct kobject *firmware_kobj;
 
-#if defined(CONFIG_HOTPLUG)
 int kobject_uevent(struct kobject *kobj, enum kobject_action action);
 int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 			char *envp[]);
@@ -213,22 +212,5 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...);
 
 int kobject_action_type(const char *buf, size_t count,
 			enum kobject_action *type);
-#else
-static inline int kobject_uevent(struct kobject *kobj,
-				 enum kobject_action action)
-{ return 0; }
-static inline int kobject_uevent_env(struct kobject *kobj,
-				      enum kobject_action action,
-				      char *envp[])
-{ return 0; }
-
-static inline __printf(2, 3)
-int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
-{ return -ENOMEM; }
-
-static inline int kobject_action_type(const char *buf, size_t count,
-				      enum kobject_action *type)
-{ return -EINVAL; }
-#endif
 
 #endif /* _KOBJECT_H_ */
-- 
cgit v1.2.3-55-g7522


From 1974a042dd15f1f007a3a1a2dd7a23ca0e42c01d Mon Sep 17 00:00:00 2001
From: Padmavathi Venna
Date: Wed, 28 Nov 2012 16:17:48 +0530
Subject: ASoC: Samsung: Get I2S src_clk from clock alias id.

As the I2S src clks are registered with clkdev using generic
connection id, driver can get the clk name using generic id.
So the variable representing the array of rclk src clks is
deleted.

Signed-off-by: Padmavathi Venna <padma.v@samsung.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/platform_data/asoc-s3c.h |  6 ------
 sound/soc/samsung/i2s.c                | 12 ++++++------
 2 files changed, 6 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/asoc-s3c.h b/include/linux/platform_data/asoc-s3c.h
index aa9875f77c40..88272591a895 100644
--- a/include/linux/platform_data/asoc-s3c.h
+++ b/include/linux/platform_data/asoc-s3c.h
@@ -38,12 +38,6 @@ struct samsung_i2s {
 #define QUIRK_NEED_RSTCLR	(1 << 3)
 	/* Quirks of the I2S controller */
 	u32 quirks;
-
-	/*
-	 * Array of clock names that can be used to generate I2S signals.
-	 * Also corresponds to clocks of I2SMOD[10]
-	 */
-	const char **src_clk;
 	dma_addr_t idma_addr;
 };
 
diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c
index 547b9190c88f..aaf57b7caebb 100644
--- a/sound/soc/samsung/i2s.c
+++ b/sound/soc/samsung/i2s.c
@@ -49,8 +49,6 @@ struct i2s_dai {
 	struct clk *clk;
 	/* Clock for generating I2S signals */
 	struct clk *op_clk;
-	/* Array of clock names for op_clk */
-	const char **src_clk;
 	/* Pointer to the Primary_Fifo if this is Sec_Fifo, NULL otherwise */
 	struct i2s_dai *pri_dai;
 	/* Pointer to the Secondary_Fifo if it has one, NULL otherwise */
@@ -432,8 +430,12 @@ static int i2s_set_sysclk(struct snd_soc_dai *dai,
 				}
 			}
 
-			i2s->op_clk = clk_get(&i2s->pdev->dev,
-						i2s->src_clk[clk_id]);
+			if (clk_id)
+				i2s->op_clk = clk_get(&i2s->pdev->dev,
+						"i2s_opclk1");
+			else
+				i2s->op_clk = clk_get(&i2s->pdev->dev,
+						"i2s_opclk0");
 			clk_prepare_enable(i2s->op_clk);
 			i2s->rclk_srcrate = clk_get_rate(i2s->op_clk);
 
@@ -1067,7 +1069,6 @@ static __devinit int samsung_i2s_probe(struct platform_device *pdev)
 		(struct s3c2410_dma_client *)&pri_dai->dma_capture;
 	pri_dai->dma_playback.channel = dma_pl_chan;
 	pri_dai->dma_capture.channel = dma_cp_chan;
-	pri_dai->src_clk = i2s_cfg->src_clk;
 	pri_dai->dma_playback.dma_size = 4;
 	pri_dai->dma_capture.dma_size = 4;
 	pri_dai->base = regs_base;
@@ -1088,7 +1089,6 @@ static __devinit int samsung_i2s_probe(struct platform_device *pdev)
 			(struct s3c2410_dma_client *)&sec_dai->dma_playback;
 		/* Use iDMA always if SysDMA not provided */
 		sec_dai->dma_playback.channel = dma_pl_sec_chan ? : -1;
-		sec_dai->src_clk = i2s_cfg->src_clk;
 		sec_dai->dma_playback.dma_size = 4;
 		sec_dai->base = regs_base;
 		sec_dai->quirks = quirks;
-- 
cgit v1.2.3-55-g7522


From f791be492f76dea7b0641ed227a60eeb2fa7e255 Mon Sep 17 00:00:00 2001
From: Bill Pemberton
Date: Mon, 19 Nov 2012 13:23:04 -0500
Subject: mfd: remove use of __devinit

CONFIG_HOTPLUG is going away as an option so __devinit is no longer
needed.

Signed-off-by: Bill Pemberton <wfp5p@virginia.edu>
Cc: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
Cc: Peter Tyser <ptyser@xes-inc.com>
Cc: Daniel Walker <dwalker@fifo99.com>
Cc: Bryan Huntsman <bryanh@codeaurora.org>
Acked-by: David Brown <davidb@codeaurora.org>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mfd/88pm800.c             |  8 ++++----
 drivers/mfd/88pm805.c             |  6 +++---
 drivers/mfd/88pm80x.c             |  2 +-
 drivers/mfd/88pm860x-core.c       | 30 +++++++++++++++---------------
 drivers/mfd/ab3100-core.c         |  4 ++--
 drivers/mfd/ab8500-core.c         |  2 +-
 drivers/mfd/ab8500-debugfs.c      |  2 +-
 drivers/mfd/ab8500-gpadc.c        |  2 +-
 drivers/mfd/ab8500-sysctrl.c      |  2 +-
 drivers/mfd/adp5520.c             |  2 +-
 drivers/mfd/arizona-core.c        |  2 +-
 drivers/mfd/arizona-i2c.c         |  2 +-
 drivers/mfd/arizona-spi.c         |  2 +-
 drivers/mfd/cs5535-mfd.c          |  4 ++--
 drivers/mfd/da903x.c              |  6 +++---
 drivers/mfd/da9052-core.c         |  2 +-
 drivers/mfd/da9052-i2c.c          |  2 +-
 drivers/mfd/da9052-spi.c          |  2 +-
 drivers/mfd/da9055-core.c         |  2 +-
 drivers/mfd/da9055-i2c.c          |  2 +-
 drivers/mfd/db8500-prcmu.c        |  2 +-
 drivers/mfd/ezx-pcap.c            |  4 ++--
 drivers/mfd/htc-i2cpld.c          | 12 ++++++------
 drivers/mfd/intel_msic.c          |  4 ++--
 drivers/mfd/janz-cmodio.c         |  6 +++---
 drivers/mfd/jz4740-adc.c          |  2 +-
 drivers/mfd/lm3533-core.c         | 12 ++++++------
 drivers/mfd/lpc_ich.c             | 14 +++++++-------
 drivers/mfd/lpc_sch.c             |  2 +-
 drivers/mfd/max8907.c             |  2 +-
 drivers/mfd/max8925-core.c        |  4 ++--
 drivers/mfd/max8925-i2c.c         |  2 +-
 drivers/mfd/omap-usb-host.c       |  2 +-
 drivers/mfd/omap-usb-tll.c        |  2 +-
 drivers/mfd/palmas.c              |  4 ++--
 drivers/mfd/pcf50633-adc.c        |  2 +-
 drivers/mfd/pcf50633-core.c       |  2 +-
 drivers/mfd/pm8921-core.c         |  4 ++--
 drivers/mfd/pm8xxx-irq.c          |  2 +-
 drivers/mfd/rc5t583.c             |  2 +-
 drivers/mfd/rdc321x-southbridge.c |  2 +-
 drivers/mfd/sm501.c               | 10 +++++-----
 drivers/mfd/sta2x11-mfd.c         |  4 ++--
 drivers/mfd/stmpe-i2c.c           |  2 +-
 drivers/mfd/stmpe-spi.c           |  2 +-
 drivers/mfd/syscon.c              |  2 +-
 drivers/mfd/tc3589x.c             |  4 ++--
 drivers/mfd/tc6387xb.c            |  2 +-
 drivers/mfd/tc6393xb.c            |  2 +-
 drivers/mfd/ti-ssp.c              |  2 +-
 drivers/mfd/timberdale.c          |  2 +-
 drivers/mfd/tps6105x.c            |  4 ++--
 drivers/mfd/tps65090.c            |  4 ++--
 drivers/mfd/tps65217.c            |  2 +-
 drivers/mfd/tps6586x.c            |  6 +++---
 drivers/mfd/tps65910.c            |  6 +++---
 drivers/mfd/tps65911-comparator.c |  2 +-
 drivers/mfd/tps65912-spi.c        |  2 +-
 drivers/mfd/twl-core.c            |  2 +-
 drivers/mfd/twl4030-audio.c       |  2 +-
 drivers/mfd/twl4030-madc.c        |  2 +-
 drivers/mfd/twl4030-power.c       | 20 ++++++++++----------
 drivers/mfd/vx855.c               |  2 +-
 drivers/mfd/wl1273-core.c         |  2 +-
 drivers/mfd/wm831x-spi.c          |  2 +-
 drivers/mfd/wm8994-core.c         |  4 ++--
 include/linux/mfd/88pm80x.h       |  2 +-
 include/linux/mfd/abx500/ab8500.h |  2 +-
 include/linux/mfd/pm8xxx/irq.h    |  4 ++--
 69 files changed, 138 insertions(+), 138 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/88pm800.c b/drivers/mfd/88pm800.c
index 321a50cdebf9..6746ecd260ad 100644
--- a/drivers/mfd/88pm800.c
+++ b/drivers/mfd/88pm800.c
@@ -248,7 +248,7 @@ static const struct regmap_irq pm800_irqs[] = {
 	},
 };
 
-static int __devinit device_gpadc_init(struct pm80x_chip *chip,
+static int device_gpadc_init(struct pm80x_chip *chip,
 				       struct pm80x_platform_data *pdata)
 {
 	struct pm80x_subchip *subchip = chip->subchip;
@@ -315,7 +315,7 @@ out:
 	return ret;
 }
 
-static int __devinit device_irq_init_800(struct pm80x_chip *chip)
+static int device_irq_init_800(struct pm80x_chip *chip)
 {
 	struct regmap *map = chip->regmap;
 	unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
@@ -415,7 +415,7 @@ static void pm800_pages_exit(struct pm80x_chip *chip)
 	}
 }
 
-static int __devinit device_800_init(struct pm80x_chip *chip,
+static int device_800_init(struct pm80x_chip *chip,
 				     struct pm80x_platform_data *pdata)
 {
 	int ret, pmic_id;
@@ -499,7 +499,7 @@ out:
 	return ret;
 }
 
-static int __devinit pm800_probe(struct i2c_client *client,
+static int pm800_probe(struct i2c_client *client,
 				 const struct i2c_device_id *id)
 {
 	int ret = 0;
diff --git a/drivers/mfd/88pm805.c b/drivers/mfd/88pm805.c
index 41e0488b4e3b..13c09941dc2c 100644
--- a/drivers/mfd/88pm805.c
+++ b/drivers/mfd/88pm805.c
@@ -135,7 +135,7 @@ static struct regmap_irq pm805_irqs[] = {
 	},
 };
 
-static int __devinit device_irq_init_805(struct pm80x_chip *chip)
+static int device_irq_init_805(struct pm80x_chip *chip)
 {
 	struct regmap *map = chip->regmap;
 	unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
@@ -189,7 +189,7 @@ static struct regmap_irq_chip pm805_irq_chip = {
 	.ack_base = PM805_INT_STATUS1,
 };
 
-static int __devinit device_805_init(struct pm80x_chip *chip)
+static int device_805_init(struct pm80x_chip *chip)
 {
 	int ret = 0;
 	unsigned int val;
@@ -232,7 +232,7 @@ out_irq_init:
 	return ret;
 }
 
-static int __devinit pm805_probe(struct i2c_client *client,
+static int pm805_probe(struct i2c_client *client,
 				 const struct i2c_device_id *id)
 {
 	int ret = 0;
diff --git a/drivers/mfd/88pm80x.c b/drivers/mfd/88pm80x.c
index cd0bf527d764..1adb355d86d1 100644
--- a/drivers/mfd/88pm80x.c
+++ b/drivers/mfd/88pm80x.c
@@ -31,7 +31,7 @@ const struct regmap_config pm80x_regmap_config = {
 };
 EXPORT_SYMBOL_GPL(pm80x_regmap_config);
 
-int __devinit pm80x_init(struct i2c_client *client,
+int pm80x_init(struct i2c_client *client,
 				 const struct i2c_device_id *id)
 {
 	struct pm80x_chip *chip;
diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index c2d5595e49d0..ad36ad70a0c2 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -565,7 +565,7 @@ static struct irq_domain_ops pm860x_irq_domain_ops = {
 	.xlate	= irq_domain_xlate_onetwocell,
 };
 
-static int __devinit device_irq_init(struct pm860x_chip *chip,
+static int device_irq_init(struct pm860x_chip *chip,
 				     struct pm860x_platform_data *pdata)
 {
 	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
@@ -730,7 +730,7 @@ out:
 }
 EXPORT_SYMBOL(pm8606_osc_disable);
 
-static void __devinit device_osc_init(struct i2c_client *i2c)
+static void device_osc_init(struct i2c_client *i2c)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 
@@ -745,7 +745,7 @@ static void __devinit device_osc_init(struct i2c_client *i2c)
 	chip->osc_status = PM8606_REF_GP_OSC_OFF;
 }
 
-static void __devinit device_bk_init(struct pm860x_chip *chip,
+static void device_bk_init(struct pm860x_chip *chip,
 				     struct pm860x_platform_data *pdata)
 {
 	int ret, i;
@@ -765,7 +765,7 @@ static void __devinit device_bk_init(struct pm860x_chip *chip,
 		dev_err(chip->dev, "Failed to add backlight subdev\n");
 }
 
-static void __devinit device_led_init(struct pm860x_chip *chip,
+static void device_led_init(struct pm860x_chip *chip,
 				      struct pm860x_platform_data *pdata)
 {
 	int ret, i;
@@ -787,7 +787,7 @@ static void __devinit device_led_init(struct pm860x_chip *chip,
 	}
 }
 
-static void __devinit device_regulator_init(struct pm860x_chip *chip,
+static void device_regulator_init(struct pm860x_chip *chip,
 					    struct pm860x_platform_data *pdata)
 {
 	int ret;
@@ -866,7 +866,7 @@ static void __devinit device_regulator_init(struct pm860x_chip *chip,
 	}
 }
 
-static void __devinit device_rtc_init(struct pm860x_chip *chip,
+static void device_rtc_init(struct pm860x_chip *chip,
 				      struct pm860x_platform_data *pdata)
 {
 	int ret;
@@ -885,7 +885,7 @@ static void __devinit device_rtc_init(struct pm860x_chip *chip,
 		dev_err(chip->dev, "Failed to add rtc subdev\n");
 }
 
-static void __devinit device_touch_init(struct pm860x_chip *chip,
+static void device_touch_init(struct pm860x_chip *chip,
 					struct pm860x_platform_data *pdata)
 {
 	int ret;
@@ -904,7 +904,7 @@ static void __devinit device_touch_init(struct pm860x_chip *chip,
 		dev_err(chip->dev, "Failed to add touch subdev\n");
 }
 
-static void __devinit device_power_init(struct pm860x_chip *chip,
+static void device_power_init(struct pm860x_chip *chip,
 					struct pm860x_platform_data *pdata)
 {
 	int ret;
@@ -951,7 +951,7 @@ static void __devinit device_power_init(struct pm860x_chip *chip,
 	}
 }
 
-static void __devinit device_onkey_init(struct pm860x_chip *chip,
+static void device_onkey_init(struct pm860x_chip *chip,
 					struct pm860x_platform_data *pdata)
 {
 	int ret;
@@ -965,7 +965,7 @@ static void __devinit device_onkey_init(struct pm860x_chip *chip,
 		dev_err(chip->dev, "Failed to add onkey subdev\n");
 }
 
-static void __devinit device_codec_init(struct pm860x_chip *chip,
+static void device_codec_init(struct pm860x_chip *chip,
 					struct pm860x_platform_data *pdata)
 {
 	int ret;
@@ -979,7 +979,7 @@ static void __devinit device_codec_init(struct pm860x_chip *chip,
 		dev_err(chip->dev, "Failed to add codec subdev\n");
 }
 
-static void __devinit device_8607_init(struct pm860x_chip *chip,
+static void device_8607_init(struct pm860x_chip *chip,
 				       struct i2c_client *i2c,
 				       struct pm860x_platform_data *pdata)
 {
@@ -1040,7 +1040,7 @@ out:
 	return;
 }
 
-static void __devinit device_8606_init(struct pm860x_chip *chip,
+static void device_8606_init(struct pm860x_chip *chip,
 				       struct i2c_client *i2c,
 				       struct pm860x_platform_data *pdata)
 {
@@ -1049,7 +1049,7 @@ static void __devinit device_8606_init(struct pm860x_chip *chip,
 	device_led_init(chip, pdata);
 }
 
-static int __devinit pm860x_device_init(struct pm860x_chip *chip,
+static int pm860x_device_init(struct pm860x_chip *chip,
 					struct pm860x_platform_data *pdata)
 {
 	chip->core_irq = 0;
@@ -1109,7 +1109,7 @@ static struct regmap_config pm860x_regmap_config = {
 	.val_bits = 8,
 };
 
-static int __devinit pm860x_dt_init(struct device_node *np,
+static int pm860x_dt_init(struct device_node *np,
 				    struct device *dev,
 				    struct pm860x_platform_data *pdata)
 {
@@ -1127,7 +1127,7 @@ static int __devinit pm860x_dt_init(struct device_node *np,
 	return 0;
 }
 
-static int __devinit pm860x_probe(struct i2c_client *client,
+static int pm860x_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
 	struct pm860x_platform_data *pdata = client->dev.platform_data;
diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
index 8355d4ee6edd..84b2303bc770 100644
--- a/drivers/mfd/ab3100-core.c
+++ b/drivers/mfd/ab3100-core.c
@@ -708,7 +708,7 @@ ab3100_init_settings[] = {
 	},
 };
 
-static int __devinit ab3100_setup(struct ab3100 *ab3100)
+static int ab3100_setup(struct ab3100 *ab3100)
 {
 	int err = 0;
 	int i;
@@ -857,7 +857,7 @@ static const struct ab_family_id ids[] __devinitconst = {
 	},
 };
 
-static int __devinit ab3100_probe(struct i2c_client *client,
+static int ab3100_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
 	struct ab3100 *ab3100;
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 01a0e01f6a6c..43245f2ce758 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -1248,7 +1248,7 @@ static struct attribute_group ab9540_attr_group = {
 	.attrs	= ab9540_sysfs_entries,
 };
 
-static int __devinit ab8500_probe(struct platform_device *pdev)
+static int ab8500_probe(struct platform_device *pdev)
 {
 	static char *switch_off_status[] = {
 		"Swoff bit programming",
diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c
index efdbc7bebd01..44843680d982 100644
--- a/drivers/mfd/ab8500-debugfs.c
+++ b/drivers/mfd/ab8500-debugfs.c
@@ -552,7 +552,7 @@ static struct dentry *ab8500_bank_file;
 static struct dentry *ab8500_address_file;
 static struct dentry *ab8500_val_file;
 
-static int __devinit ab8500_debug_probe(struct platform_device *plf)
+static int ab8500_debug_probe(struct platform_device *plf)
 {
 	debug_bank = AB8500_MISC;
 	debug_address = AB8500_REV_REG & 0x00FF;
diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c
index 97daf61f068c..c5e168e3ef11 100644
--- a/drivers/mfd/ab8500-gpadc.c
+++ b/drivers/mfd/ab8500-gpadc.c
@@ -571,7 +571,7 @@ static void ab8500_gpadc_read_calibration_data(struct ab8500_gpadc *gpadc)
 		gpadc->cal_data[ADC_INPUT_VBAT].offset);
 }
 
-static int __devinit ab8500_gpadc_probe(struct platform_device *pdev)
+static int ab8500_gpadc_probe(struct platform_device *pdev)
 {
 	int ret = 0;
 	struct ab8500_gpadc *gpadc;
diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c
index d8cc157af3e8..499263c3110a 100644
--- a/drivers/mfd/ab8500-sysctrl.c
+++ b/drivers/mfd/ab8500-sysctrl.c
@@ -49,7 +49,7 @@ int ab8500_sysctrl_write(u16 reg, u8 mask, u8 value)
 		(u8)(reg & 0xFF), mask, value);
 }
 
-static int __devinit ab8500_sysctrl_probe(struct platform_device *pdev)
+static int ab8500_sysctrl_probe(struct platform_device *pdev)
 {
 	sysctrl_dev = &pdev->dev;
 	return 0;
diff --git a/drivers/mfd/adp5520.c b/drivers/mfd/adp5520.c
index f664a52687d5..f2f9d8ff6416 100644
--- a/drivers/mfd/adp5520.c
+++ b/drivers/mfd/adp5520.c
@@ -203,7 +203,7 @@ static int adp5520_remove_subdevs(struct adp5520_chip *chip)
 	return device_for_each_child(chip->dev, NULL, __remove_subdev);
 }
 
-static int __devinit adp5520_probe(struct i2c_client *client,
+static int adp5520_probe(struct i2c_client *client,
 					const struct i2c_device_id *id)
 {
 	struct adp5520_platform_data *pdata = client->dev.platform_data;
diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index 1b48f2094806..47e711674739 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -285,7 +285,7 @@ static struct mfd_cell wm5110_devs[] = {
 	{ .name = "wm5110-codec" },
 };
 
-int __devinit arizona_dev_init(struct arizona *arizona)
+int arizona_dev_init(struct arizona *arizona)
 {
 	struct device *dev = arizona->dev;
 	const char *type_name;
diff --git a/drivers/mfd/arizona-i2c.c b/drivers/mfd/arizona-i2c.c
index 43d164736fcb..aaf1a69134df 100644
--- a/drivers/mfd/arizona-i2c.c
+++ b/drivers/mfd/arizona-i2c.c
@@ -22,7 +22,7 @@
 
 #include "arizona.h"
 
-static __devinit int arizona_i2c_probe(struct i2c_client *i2c,
+static int arizona_i2c_probe(struct i2c_client *i2c,
 					  const struct i2c_device_id *id)
 {
 	struct arizona *arizona;
diff --git a/drivers/mfd/arizona-spi.c b/drivers/mfd/arizona-spi.c
index cc79b824f706..9663cafdb7ff 100644
--- a/drivers/mfd/arizona-spi.c
+++ b/drivers/mfd/arizona-spi.c
@@ -22,7 +22,7 @@
 
 #include "arizona.h"
 
-static int __devinit arizona_spi_probe(struct spi_device *spi)
+static int arizona_spi_probe(struct spi_device *spi)
 {
 	const struct spi_device_id *id = spi_get_device_id(spi);
 	struct arizona *arizona;
diff --git a/drivers/mfd/cs5535-mfd.c b/drivers/mfd/cs5535-mfd.c
index 5d3878ad39a6..0779b13a7dd1 100644
--- a/drivers/mfd/cs5535-mfd.c
+++ b/drivers/mfd/cs5535-mfd.c
@@ -113,7 +113,7 @@ static __devinitdata struct mfd_cell cs5535_mfd_cells[] = {
 };
 
 #ifdef CONFIG_OLPC
-static void __devinit cs5535_clone_olpc_cells(void)
+static void cs5535_clone_olpc_cells(void)
 {
 	const char *acpi_clones[] = { "olpc-xo1-pm-acpi", "olpc-xo1-sci-acpi" };
 
@@ -126,7 +126,7 @@ static void __devinit cs5535_clone_olpc_cells(void)
 static void cs5535_clone_olpc_cells(void) { }
 #endif
 
-static int __devinit cs5535_mfd_probe(struct pci_dev *pdev,
+static int cs5535_mfd_probe(struct pci_dev *pdev,
 		const struct pci_device_id *id)
 {
 	int err, i;
diff --git a/drivers/mfd/da903x.c b/drivers/mfd/da903x.c
index c715475df377..5fa1e91a9532 100644
--- a/drivers/mfd/da903x.c
+++ b/drivers/mfd/da903x.c
@@ -246,7 +246,7 @@ int da903x_query_status(struct device *dev, unsigned int sbits)
 }
 EXPORT_SYMBOL(da903x_query_status);
 
-static int __devinit da9030_init_chip(struct da903x_chip *chip)
+static int da9030_init_chip(struct da903x_chip *chip)
 {
 	uint8_t chip_id;
 	int err;
@@ -459,7 +459,7 @@ static int da903x_remove_subdevs(struct da903x_chip *chip)
 	return device_for_each_child(chip->dev, NULL, __remove_subdev);
 }
 
-static int __devinit da903x_add_subdevs(struct da903x_chip *chip,
+static int da903x_add_subdevs(struct da903x_chip *chip,
 					struct da903x_platform_data *pdata)
 {
 	struct da903x_subdev_info *subdev;
@@ -491,7 +491,7 @@ failed:
 	return ret;
 }
 
-static int __devinit da903x_probe(struct i2c_client *client,
+static int da903x_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
 	struct da903x_platform_data *pdata = client->dev.platform_data;
diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c
index a0a62b24621b..c71c4a247186 100644
--- a/drivers/mfd/da9052-core.c
+++ b/drivers/mfd/da9052-core.c
@@ -769,7 +769,7 @@ struct regmap_config da9052_regmap_config = {
 };
 EXPORT_SYMBOL_GPL(da9052_regmap_config);
 
-int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id)
+int da9052_device_init(struct da9052 *da9052, u8 chip_id)
 {
 	struct da9052_pdata *pdata = da9052->dev->platform_data;
 	int ret;
diff --git a/drivers/mfd/da9052-i2c.c b/drivers/mfd/da9052-i2c.c
index 8af0f0c5cefe..96f66ed8dbfc 100644
--- a/drivers/mfd/da9052-i2c.c
+++ b/drivers/mfd/da9052-i2c.c
@@ -64,7 +64,7 @@ static const struct of_device_id dialog_dt_ids[] = {
 };
 #endif
 
-static int __devinit da9052_i2c_probe(struct i2c_client *client,
+static int da9052_i2c_probe(struct i2c_client *client,
 				       const struct i2c_device_id *id)
 {
 	struct da9052 *da9052;
diff --git a/drivers/mfd/da9052-spi.c b/drivers/mfd/da9052-spi.c
index cbcc9bebbbe1..1ad324373c97 100644
--- a/drivers/mfd/da9052-spi.c
+++ b/drivers/mfd/da9052-spi.c
@@ -21,7 +21,7 @@
 
 #include <linux/mfd/da9052/da9052.h>
 
-static int __devinit da9052_spi_probe(struct spi_device *spi)
+static int da9052_spi_probe(struct spi_device *spi)
 {
 	int ret;
 	const struct spi_device_id *id = spi_get_device_id(spi);
diff --git a/drivers/mfd/da9055-core.c b/drivers/mfd/da9055-core.c
index ff6c77f392bd..6f5a4984f0aa 100644
--- a/drivers/mfd/da9055-core.c
+++ b/drivers/mfd/da9055-core.c
@@ -377,7 +377,7 @@ static struct regmap_irq_chip da9055_regmap_irq_chip = {
 	.num_irqs = ARRAY_SIZE(da9055_irqs),
 };
 
-int __devinit da9055_device_init(struct da9055 *da9055)
+int da9055_device_init(struct da9055 *da9055)
 {
 	struct da9055_pdata *pdata = da9055->dev->platform_data;
 	int ret;
diff --git a/drivers/mfd/da9055-i2c.c b/drivers/mfd/da9055-i2c.c
index dbaca7bc36da..7778d042fb5d 100644
--- a/drivers/mfd/da9055-i2c.c
+++ b/drivers/mfd/da9055-i2c.c
@@ -18,7 +18,7 @@
 
 #include <linux/mfd/da9055/core.h>
 
-static int __devinit da9055_i2c_probe(struct i2c_client *i2c,
+static int da9055_i2c_probe(struct i2c_client *i2c,
 				      const struct i2c_device_id *id)
 {
 	struct da9055 *da9055;
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index 00b8b0f3dfb6..084a58776837 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -3034,7 +3034,7 @@ static struct mfd_cell db8500_prcmu_devs[] = {
  * prcmu_fw_init - arch init call for the Linux PRCMU fw init logic
  *
  */
-static int __devinit db8500_prcmu_probe(struct platform_device *pdev)
+static int db8500_prcmu_probe(struct platform_device *pdev)
 {
 	struct ab8500_platform_data *ab8500_platdata = pdev->dev.platform_data;
 	struct device_node *np = pdev->dev.of_node;
diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index d7e4de041526..d81505e50d12 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -371,7 +371,7 @@ static int pcap_remove_subdev(struct device *dev, void *unused)
 	return 0;
 }
 
-static int __devinit pcap_add_subdev(struct pcap_chip *pcap,
+static int pcap_add_subdev(struct pcap_chip *pcap,
 						struct pcap_subdev *subdev)
 {
 	struct platform_device *pdev;
@@ -420,7 +420,7 @@ static int __devexit ezx_pcap_remove(struct spi_device *spi)
 	return 0;
 }
 
-static int __devinit ezx_pcap_probe(struct spi_device *spi)
+static int ezx_pcap_probe(struct spi_device *spi)
 {
 	struct pcap_platform_data *pdata = spi->dev.platform_data;
 	struct pcap_chip *pcap;
diff --git a/drivers/mfd/htc-i2cpld.c b/drivers/mfd/htc-i2cpld.c
index d55065cc324c..324187c0c124 100644
--- a/drivers/mfd/htc-i2cpld.c
+++ b/drivers/mfd/htc-i2cpld.c
@@ -327,7 +327,7 @@ static void htcpld_chip_reset(struct i2c_client *client)
 		client, (chip_data->cache_out = chip_data->reset));
 }
 
-static int __devinit htcpld_setup_chip_irq(
+static int htcpld_setup_chip_irq(
 		struct platform_device *pdev,
 		int chip_index)
 {
@@ -361,7 +361,7 @@ static int __devinit htcpld_setup_chip_irq(
 	return ret;
 }
 
-static int __devinit htcpld_register_chip_i2c(
+static int htcpld_register_chip_i2c(
 		struct platform_device *pdev,
 		int chip_index)
 {
@@ -419,7 +419,7 @@ static int __devinit htcpld_register_chip_i2c(
 	return 0;
 }
 
-static void __devinit htcpld_unregister_chip_i2c(
+static void htcpld_unregister_chip_i2c(
 		struct platform_device *pdev,
 		int chip_index)
 {
@@ -434,7 +434,7 @@ static void __devinit htcpld_unregister_chip_i2c(
 		i2c_unregister_device(chip->client);
 }
 
-static int __devinit htcpld_register_chip_gpio(
+static int htcpld_register_chip_gpio(
 		struct platform_device *pdev,
 		int chip_index)
 {
@@ -501,7 +501,7 @@ static int __devinit htcpld_register_chip_gpio(
 	return 0;
 }
 
-static int __devinit htcpld_setup_chips(struct platform_device *pdev)
+static int htcpld_setup_chips(struct platform_device *pdev)
 {
 	struct htcpld_data *htcpld;
 	struct device *dev = &pdev->dev;
@@ -563,7 +563,7 @@ static int __devinit htcpld_setup_chips(struct platform_device *pdev)
 	return 0;
 }
 
-static int __devinit htcpld_core_probe(struct platform_device *pdev)
+static int htcpld_core_probe(struct platform_device *pdev)
 {
 	struct htcpld_data *htcpld;
 	struct device *dev = &pdev->dev;
diff --git a/drivers/mfd/intel_msic.c b/drivers/mfd/intel_msic.c
index c5f478eb4e18..438ac3df166b 100644
--- a/drivers/mfd/intel_msic.c
+++ b/drivers/mfd/intel_msic.c
@@ -306,7 +306,7 @@ int intel_msic_irq_read(struct intel_msic *msic, unsigned short reg, u8 *val)
 }
 EXPORT_SYMBOL_GPL(intel_msic_irq_read);
 
-static int __devinit intel_msic_init_devices(struct intel_msic *msic)
+static int intel_msic_init_devices(struct intel_msic *msic)
 {
 	struct platform_device *pdev = msic->pdev;
 	struct intel_msic_platform_data *pdata = pdev->dev.platform_data;
@@ -375,7 +375,7 @@ static void __devexit intel_msic_remove_devices(struct intel_msic *msic)
 		gpio_free(pdata->ocd->gpio);
 }
 
-static int __devinit intel_msic_probe(struct platform_device *pdev)
+static int intel_msic_probe(struct platform_device *pdev)
 {
 	struct intel_msic_platform_data *pdata = pdev->dev.platform_data;
 	struct intel_msic *msic;
diff --git a/drivers/mfd/janz-cmodio.c b/drivers/mfd/janz-cmodio.c
index 1a3627784246..55c479e57aad 100644
--- a/drivers/mfd/janz-cmodio.c
+++ b/drivers/mfd/janz-cmodio.c
@@ -63,7 +63,7 @@ struct cmodio_device {
  * Subdevices using the mfd-core API
  */
 
-static int __devinit cmodio_setup_subdevice(struct cmodio_device *priv,
+static int cmodio_setup_subdevice(struct cmodio_device *priv,
 					    char *name, unsigned int devno,
 					    unsigned int modno)
 {
@@ -120,7 +120,7 @@ static int __devinit cmodio_setup_subdevice(struct cmodio_device *priv,
 }
 
 /* Probe each submodule using kernel parameters */
-static int __devinit cmodio_probe_submodules(struct cmodio_device *priv)
+static int cmodio_probe_submodules(struct cmodio_device *priv)
 {
 	struct pci_dev *pdev = priv->pdev;
 	unsigned int num_probed = 0;
@@ -177,7 +177,7 @@ static const struct attribute_group cmodio_sysfs_attr_group = {
  * PCI Driver
  */
 
-static int __devinit cmodio_pci_probe(struct pci_dev *dev,
+static int cmodio_pci_probe(struct pci_dev *dev,
 				      const struct pci_device_id *id)
 {
 	struct cmodio_device *priv;
diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c
index 9aed2a66180e..c0d38c597f0c 100644
--- a/drivers/mfd/jz4740-adc.c
+++ b/drivers/mfd/jz4740-adc.c
@@ -202,7 +202,7 @@ static struct mfd_cell jz4740_adc_cells[] = {
 	},
 };
 
-static int __devinit jz4740_adc_probe(struct platform_device *pdev)
+static int jz4740_adc_probe(struct platform_device *pdev)
 {
 	struct irq_chip_generic *gc;
 	struct irq_chip_type *ct;
diff --git a/drivers/mfd/lm3533-core.c b/drivers/mfd/lm3533-core.c
index e7aa2a5a826b..2b74508655d4 100644
--- a/drivers/mfd/lm3533-core.c
+++ b/drivers/mfd/lm3533-core.c
@@ -382,7 +382,7 @@ static struct attribute_group lm3533_attribute_group = {
 	.attrs		= lm3533_attributes
 };
 
-static int __devinit lm3533_device_als_init(struct lm3533 *lm3533)
+static int lm3533_device_als_init(struct lm3533 *lm3533)
 {
 	struct lm3533_platform_data *pdata = lm3533->dev->platform_data;
 	int ret;
@@ -405,7 +405,7 @@ static int __devinit lm3533_device_als_init(struct lm3533 *lm3533)
 	return 0;
 }
 
-static int __devinit lm3533_device_bl_init(struct lm3533 *lm3533)
+static int lm3533_device_bl_init(struct lm3533 *lm3533)
 {
 	struct lm3533_platform_data *pdata = lm3533->dev->platform_data;
 	int i;
@@ -434,7 +434,7 @@ static int __devinit lm3533_device_bl_init(struct lm3533 *lm3533)
 	return 0;
 }
 
-static int __devinit lm3533_device_led_init(struct lm3533 *lm3533)
+static int lm3533_device_led_init(struct lm3533 *lm3533)
 {
 	struct lm3533_platform_data *pdata = lm3533->dev->platform_data;
 	int i;
@@ -463,7 +463,7 @@ static int __devinit lm3533_device_led_init(struct lm3533 *lm3533)
 	return 0;
 }
 
-static int __devinit lm3533_device_setup(struct lm3533 *lm3533,
+static int lm3533_device_setup(struct lm3533 *lm3533,
 					struct lm3533_platform_data *pdata)
 {
 	int ret;
@@ -479,7 +479,7 @@ static int __devinit lm3533_device_setup(struct lm3533 *lm3533,
 	return 0;
 }
 
-static int __devinit lm3533_device_init(struct lm3533 *lm3533)
+static int lm3533_device_init(struct lm3533 *lm3533)
 {
 	struct lm3533_platform_data *pdata = lm3533->dev->platform_data;
 	int ret;
@@ -596,7 +596,7 @@ static struct regmap_config regmap_config = {
 	.precious_reg	= lm3533_precious_register,
 };
 
-static int __devinit lm3533_i2c_probe(struct i2c_client *i2c,
+static int lm3533_i2c_probe(struct i2c_client *i2c,
 					const struct i2c_device_id *id)
 {
 	struct lm3533 *lm3533;
diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
index a43c73ac25b0..99891752c338 100644
--- a/drivers/mfd/lpc_ich.c
+++ b/drivers/mfd/lpc_ich.c
@@ -672,7 +672,7 @@ static void lpc_ich_restore_config_space(struct pci_dev *dev)
 	}
 }
 
-static void __devinit lpc_ich_enable_acpi_space(struct pci_dev *dev)
+static void lpc_ich_enable_acpi_space(struct pci_dev *dev)
 {
 	u8 reg_save;
 
@@ -681,7 +681,7 @@ static void __devinit lpc_ich_enable_acpi_space(struct pci_dev *dev)
 	lpc_ich_acpi_save = reg_save;
 }
 
-static void __devinit lpc_ich_enable_gpio_space(struct pci_dev *dev)
+static void lpc_ich_enable_gpio_space(struct pci_dev *dev)
 {
 	u8 reg_save;
 
@@ -690,7 +690,7 @@ static void __devinit lpc_ich_enable_gpio_space(struct pci_dev *dev)
 	lpc_ich_gpio_save = reg_save;
 }
 
-static void __devinit lpc_ich_finalize_cell(struct mfd_cell *cell,
+static void lpc_ich_finalize_cell(struct mfd_cell *cell,
 					const struct pci_device_id *id)
 {
 	cell->platform_data = &lpc_chipset_info[id->driver_data];
@@ -702,7 +702,7 @@ static void __devinit lpc_ich_finalize_cell(struct mfd_cell *cell,
  * GPIO groups and it's enough to have access to one of these to instantiate
  * the device.
  */
-static int __devinit lpc_ich_check_conflict_gpio(struct resource *res)
+static int lpc_ich_check_conflict_gpio(struct resource *res)
 {
 	int ret;
 	u8 use_gpio = 0;
@@ -721,7 +721,7 @@ static int __devinit lpc_ich_check_conflict_gpio(struct resource *res)
 	return use_gpio ? use_gpio : ret;
 }
 
-static int __devinit lpc_ich_init_gpio(struct pci_dev *dev,
+static int lpc_ich_init_gpio(struct pci_dev *dev,
 				const struct pci_device_id *id)
 {
 	u32 base_addr_cfg;
@@ -798,7 +798,7 @@ gpio_done:
 	return ret;
 }
 
-static int __devinit lpc_ich_init_wdt(struct pci_dev *dev,
+static int lpc_ich_init_wdt(struct pci_dev *dev,
 				const struct pci_device_id *id)
 {
 	u32 base_addr_cfg;
@@ -852,7 +852,7 @@ wdt_done:
 	return ret;
 }
 
-static int __devinit lpc_ich_probe(struct pci_dev *dev,
+static int lpc_ich_probe(struct pci_dev *dev,
 				const struct pci_device_id *id)
 {
 	int ret;
diff --git a/drivers/mfd/lpc_sch.c b/drivers/mfd/lpc_sch.c
index 27477f4533ea..5756a6af08dc 100644
--- a/drivers/mfd/lpc_sch.c
+++ b/drivers/mfd/lpc_sch.c
@@ -83,7 +83,7 @@ static DEFINE_PCI_DEVICE_TABLE(lpc_sch_ids) = {
 };
 MODULE_DEVICE_TABLE(pci, lpc_sch_ids);
 
-static int __devinit lpc_sch_probe(struct pci_dev *dev,
+static int lpc_sch_probe(struct pci_dev *dev,
 				const struct pci_device_id *id)
 {
 	unsigned int base_addr_cfg;
diff --git a/drivers/mfd/max8907.c b/drivers/mfd/max8907.c
index 17f2593d82b8..81ded7a4ca8c 100644
--- a/drivers/mfd/max8907.c
+++ b/drivers/mfd/max8907.c
@@ -183,7 +183,7 @@ static void max8907_power_off(void)
 			MAX8907_MASK_POWER_OFF, MAX8907_MASK_POWER_OFF);
 }
 
-static __devinit int max8907_i2c_probe(struct i2c_client *i2c,
+static int max8907_i2c_probe(struct i2c_client *i2c,
 				       const struct i2c_device_id *id)
 {
 	struct max8907 *max8907;
diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index 9f54c04912f2..20daa16b83c9 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c
@@ -714,7 +714,7 @@ tsc_irq:
 	return 0;
 }
 
-static void __devinit init_regulator(struct max8925_chip *chip,
+static void init_regulator(struct max8925_chip *chip,
 				     struct max8925_platform_data *pdata)
 {
 	int ret;
@@ -821,7 +821,7 @@ static void __devinit init_regulator(struct max8925_chip *chip,
 	}
 }
 
-int __devinit max8925_device_init(struct max8925_chip *chip,
+int max8925_device_init(struct max8925_chip *chip,
 				  struct max8925_platform_data *pdata)
 {
 	int ret;
diff --git a/drivers/mfd/max8925-i2c.c b/drivers/mfd/max8925-i2c.c
index 375e0475f6d2..6e3d30aa00df 100644
--- a/drivers/mfd/max8925-i2c.c
+++ b/drivers/mfd/max8925-i2c.c
@@ -135,7 +135,7 @@ static const struct i2c_device_id max8925_id_table[] = {
 };
 MODULE_DEVICE_TABLE(i2c, max8925_id_table);
 
-static int __devinit max8925_probe(struct i2c_client *client,
+static int max8925_probe(struct i2c_client *client,
 				   const struct i2c_device_id *id)
 {
 	struct max8925_platform_data *pdata = client->dev.platform_data;
diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index 23cec57c02ba..fc23dfbb6910 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c
@@ -464,7 +464,7 @@ static void omap_usbhs_deinit(struct device *dev)
  *
  * Allocates basic resources for this USB host controller.
  */
-static int __devinit usbhs_omap_probe(struct platform_device *pdev)
+static int usbhs_omap_probe(struct platform_device *pdev)
 {
 	struct device			*dev =  &pdev->dev;
 	struct usbhs_omap_platform_data	*pdata = dev->platform_data;
diff --git a/drivers/mfd/omap-usb-tll.c b/drivers/mfd/omap-usb-tll.c
index 2d8edfd85e1c..0b586885446b 100644
--- a/drivers/mfd/omap-usb-tll.c
+++ b/drivers/mfd/omap-usb-tll.c
@@ -200,7 +200,7 @@ static unsigned ohci_omap3_fslsmode(enum usbhs_omap_port_mode mode)
  *
  * Allocates basic resources for this USB host controller.
  */
-static int __devinit usbtll_omap_probe(struct platform_device *pdev)
+static int usbtll_omap_probe(struct platform_device *pdev)
 {
 	struct device				*dev =  &pdev->dev;
 	struct usbtll_omap_platform_data	*pdata = dev->platform_data;
diff --git a/drivers/mfd/palmas.c b/drivers/mfd/palmas.c
index 4f8d6e6b19aa..cb52783390c1 100644
--- a/drivers/mfd/palmas.c
+++ b/drivers/mfd/palmas.c
@@ -247,7 +247,7 @@ static struct regmap_irq_chip palmas_irq_chip = {
 			PALMAS_INT1_MASK),
 };
 
-static void __devinit palmas_dt_to_pdata(struct device_node *node,
+static void palmas_dt_to_pdata(struct device_node *node,
 		struct palmas_platform_data *pdata)
 {
 	int ret;
@@ -275,7 +275,7 @@ static void __devinit palmas_dt_to_pdata(struct device_node *node,
 					PALMAS_POWER_CTRL_ENABLE2_MASK;
 }
 
-static int __devinit palmas_i2c_probe(struct i2c_client *i2c,
+static int palmas_i2c_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
 {
 	struct palmas *palmas;
diff --git a/drivers/mfd/pcf50633-adc.c b/drivers/mfd/pcf50633-adc.c
index a285b5111edb..dbd2f0d0078e 100644
--- a/drivers/mfd/pcf50633-adc.c
+++ b/drivers/mfd/pcf50633-adc.c
@@ -199,7 +199,7 @@ static void pcf50633_adc_irq(int irq, void *data)
 	kfree(req);
 }
 
-static int __devinit pcf50633_adc_probe(struct platform_device *pdev)
+static int pcf50633_adc_probe(struct platform_device *pdev)
 {
 	struct pcf50633_adc *adc;
 
diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c
index ad438e85cbf7..fc477353081e 100644
--- a/drivers/mfd/pcf50633-core.c
+++ b/drivers/mfd/pcf50633-core.c
@@ -191,7 +191,7 @@ static struct regmap_config pcf50633_regmap_config = {
 	.val_bits = 8,
 };
 
-static int __devinit pcf50633_probe(struct i2c_client *client,
+static int pcf50633_probe(struct i2c_client *client,
 				const struct i2c_device_id *ids)
 {
 	struct pcf50633 *pcf;
diff --git a/drivers/mfd/pm8921-core.c b/drivers/mfd/pm8921-core.c
index 24d57bcc98b6..43491346e7d0 100644
--- a/drivers/mfd/pm8921-core.c
+++ b/drivers/mfd/pm8921-core.c
@@ -80,7 +80,7 @@ static struct pm8xxx_drvdata pm8921_drvdata = {
 	.pmic_read_irq_stat	= pm8921_read_irq_stat,
 };
 
-static int __devinit pm8921_add_subdevices(const struct pm8921_platform_data
+static int pm8921_add_subdevices(const struct pm8921_platform_data
 					   *pdata,
 					   struct pm8921 *pmic,
 					   u32 rev)
@@ -104,7 +104,7 @@ static int __devinit pm8921_add_subdevices(const struct pm8921_platform_data
 	return ret;
 }
 
-static int __devinit pm8921_probe(struct platform_device *pdev)
+static int pm8921_probe(struct platform_device *pdev)
 {
 	const struct pm8921_platform_data *pdata = pdev->dev.platform_data;
 	struct pm8921 *pmic;
diff --git a/drivers/mfd/pm8xxx-irq.c b/drivers/mfd/pm8xxx-irq.c
index d452dd013081..59c20ea329de 100644
--- a/drivers/mfd/pm8xxx-irq.c
+++ b/drivers/mfd/pm8xxx-irq.c
@@ -309,7 +309,7 @@ bail_out:
 }
 EXPORT_SYMBOL_GPL(pm8xxx_get_irq_stat);
 
-struct pm_irq_chip *  __devinit pm8xxx_irq_init(struct device *dev,
+struct pm_irq_chip *  pm8xxx_irq_init(struct device *dev,
 				const struct pm8xxx_irq_platform_data *pdata)
 {
 	struct pm_irq_chip  *chip;
diff --git a/drivers/mfd/rc5t583.c b/drivers/mfd/rc5t583.c
index d38ee24302ca..f721b13e986c 100644
--- a/drivers/mfd/rc5t583.c
+++ b/drivers/mfd/rc5t583.c
@@ -246,7 +246,7 @@ static const struct regmap_config rc5t583_regmap_config = {
 	.cache_type = REGCACHE_RBTREE,
 };
 
-static int __devinit rc5t583_i2c_probe(struct i2c_client *i2c,
+static int rc5t583_i2c_probe(struct i2c_client *i2c,
 			      const struct i2c_device_id *id)
 {
 	struct rc5t583 *rc5t583;
diff --git a/drivers/mfd/rdc321x-southbridge.c b/drivers/mfd/rdc321x-southbridge.c
index 330396306d62..be46539c3080 100644
--- a/drivers/mfd/rdc321x-southbridge.c
+++ b/drivers/mfd/rdc321x-southbridge.c
@@ -72,7 +72,7 @@ static struct mfd_cell rdc321x_sb_cells[] = {
 	},
 };
 
-static int __devinit rdc321x_sb_probe(struct pci_dev *pdev,
+static int rdc321x_sb_probe(struct pci_dev *pdev,
 					const struct pci_device_id *ent)
 {
 	int err;
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 777af5ec25d4..c9c102c49028 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -1014,7 +1014,7 @@ static struct gpio_chip gpio_chip_template = {
 	.get			= sm501_gpio_get,
 };
 
-static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm,
+static int sm501_gpio_register_chip(struct sm501_devdata *sm,
 					      struct sm501_gpio *gpio,
 					      struct sm501_gpio_chip *chip)
 {
@@ -1042,7 +1042,7 @@ static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm,
 	return gpiochip_add(gchip);
 }
 
-static int __devinit sm501_register_gpio(struct sm501_devdata *sm)
+static int sm501_register_gpio(struct sm501_devdata *sm)
 {
 	struct sm501_gpio *gpio = &sm->gpio;
 	resource_size_t iobase = sm->io_res->start + SM501_GPIO;
@@ -1313,7 +1313,7 @@ static unsigned int sm501_mem_local[] = {
  * Common init code for an SM501
 */
 
-static int __devinit sm501_init_dev(struct sm501_devdata *sm)
+static int sm501_init_dev(struct sm501_devdata *sm)
 {
 	struct sm501_initdata *idata;
 	struct sm501_platdata *pdata;
@@ -1389,7 +1389,7 @@ static int __devinit sm501_init_dev(struct sm501_devdata *sm)
 	return 0;
 }
 
-static int __devinit sm501_plat_probe(struct platform_device *dev)
+static int sm501_plat_probe(struct platform_device *dev)
 {
 	struct sm501_devdata *sm;
 	int ret;
@@ -1578,7 +1578,7 @@ static struct sm501_platdata sm501_pci_platdata = {
 	.gpio_base	= -1,
 };
 
-static int __devinit sm501_pci_probe(struct pci_dev *dev,
+static int sm501_pci_probe(struct pci_dev *dev,
 				     const struct pci_device_id *id)
 {
 	struct sm501_devdata *sm;
diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index d35da6820bea..2cfd55f343c2 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -69,7 +69,7 @@ static struct sta2x11_mfd *sta2x11_mfd_find(struct pci_dev *pdev)
 	return NULL;
 }
 
-static int __devinit sta2x11_mfd_add(struct pci_dev *pdev, gfp_t flags)
+static int sta2x11_mfd_add(struct pci_dev *pdev, gfp_t flags)
 {
 	struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
 	struct sta2x11_instance *instance;
@@ -363,7 +363,7 @@ static int sta2x11_mfd_resume(struct pci_dev *pdev)
 	return 0;
 }
 
-static int __devinit sta2x11_mfd_probe(struct pci_dev *pdev,
+static int sta2x11_mfd_probe(struct pci_dev *pdev,
 				       const struct pci_device_id *pci_id)
 {
 	int err, i;
diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c
index 6a6aed74b946..8195ca2ac74b 100644
--- a/drivers/mfd/stmpe-i2c.c
+++ b/drivers/mfd/stmpe-i2c.c
@@ -52,7 +52,7 @@ static struct stmpe_client_info i2c_ci = {
 	.write_block = i2c_block_write,
 };
 
-static int __devinit
+static int
 stmpe_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 {
 	i2c_ci.data = (void *)id;
diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c
index 29590ad6a6e4..52774338998f 100644
--- a/drivers/mfd/stmpe-spi.c
+++ b/drivers/mfd/stmpe-spi.c
@@ -82,7 +82,7 @@ static struct stmpe_client_info spi_ci = {
 	.init = spi_init,
 };
 
-static int __devinit
+static int
 stmpe_spi_probe(struct spi_device *spi)
 {
 	const struct spi_device_id *id = spi_get_device_id(spi);
diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c
index d258b595d0b7..3dbfe9ab889c 100644
--- a/drivers/mfd/syscon.c
+++ b/drivers/mfd/syscon.c
@@ -97,7 +97,7 @@ static struct regmap_config syscon_regmap_config = {
 	.reg_stride = 4,
 };
 
-static int __devinit syscon_probe(struct platform_device *pdev)
+static int syscon_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c
index 553ce956da6d..7e197f788b0a 100644
--- a/drivers/mfd/tc3589x.c
+++ b/drivers/mfd/tc3589x.c
@@ -282,7 +282,7 @@ static int tc3589x_chip_init(struct tc3589x *tc3589x)
 	return tc3589x_reg_write(tc3589x, TC3589x_RSTINTCLR, 0x1);
 }
 
-static int __devinit tc3589x_device_init(struct tc3589x *tc3589x)
+static int tc3589x_device_init(struct tc3589x *tc3589x)
 {
 	int ret = 0;
 	unsigned int blocks = tc3589x->pdata->block;
@@ -329,7 +329,7 @@ static int tc3589x_of_probe(struct device_node *np,
 	return 0;
 }
 
-static int __devinit tc3589x_probe(struct i2c_client *i2c,
+static int tc3589x_probe(struct i2c_client *i2c,
 				   const struct i2c_device_id *id)
 {
 	struct tc3589x_platform_data *pdata = i2c->dev.platform_data;
diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c
index 186f053e36ef..ca18f262c34a 100644
--- a/drivers/mfd/tc6387xb.c
+++ b/drivers/mfd/tc6387xb.c
@@ -138,7 +138,7 @@ static struct mfd_cell tc6387xb_cells[] = {
 	},
 };
 
-static int __devinit tc6387xb_probe(struct platform_device *dev)
+static int tc6387xb_probe(struct platform_device *dev)
 {
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 	struct resource *iomem, *rscr;
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 9411a88770b8..256723231a4e 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -602,7 +602,7 @@ static void tc6393xb_detach_irq(struct platform_device *dev)
 
 /*--------------------------------------------------------------------------*/
 
-static int __devinit tc6393xb_probe(struct platform_device *dev)
+static int tc6393xb_probe(struct platform_device *dev)
 {
 	struct tc6393xb_platform_data *tcpd = dev->dev.platform_data;
 	struct tc6393xb *tc6393xb;
diff --git a/drivers/mfd/ti-ssp.c b/drivers/mfd/ti-ssp.c
index 1c31f877c6bb..b177f96da864 100644
--- a/drivers/mfd/ti-ssp.c
+++ b/drivers/mfd/ti-ssp.c
@@ -315,7 +315,7 @@ static irqreturn_t ti_ssp_interrupt(int irq, void *dev_data)
 	return IRQ_HANDLED;
 }
 
-static int __devinit ti_ssp_probe(struct platform_device *pdev)
+static int ti_ssp_probe(struct platform_device *pdev)
 {
 	static struct ti_ssp *ssp;
 	const struct ti_ssp_data *pdata = pdev->dev.platform_data;
diff --git a/drivers/mfd/timberdale.c b/drivers/mfd/timberdale.c
index 61badece0b9a..dddf1df57838 100644
--- a/drivers/mfd/timberdale.c
+++ b/drivers/mfd/timberdale.c
@@ -650,7 +650,7 @@ static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
 
 /*--------------------------------------------------------------------------*/
 
-static int __devinit timb_probe(struct pci_dev *dev,
+static int timb_probe(struct pci_dev *dev,
 	const struct pci_device_id *id)
 {
 	struct timberdale_device *priv;
diff --git a/drivers/mfd/tps6105x.c b/drivers/mfd/tps6105x.c
index 45389600f86e..75a84ac4672e 100644
--- a/drivers/mfd/tps6105x.c
+++ b/drivers/mfd/tps6105x.c
@@ -86,7 +86,7 @@ fail:
 }
 EXPORT_SYMBOL(tps6105x_mask_and_set);
 
-static int __devinit tps6105x_startup(struct tps6105x *tps6105x)
+static int tps6105x_startup(struct tps6105x *tps6105x)
 {
 	int ret;
 	u8 regval;
@@ -133,7 +133,7 @@ static struct mfd_cell tps6105x_cells[] = {
 	},
 };
 
-static int __devinit tps6105x_probe(struct i2c_client *client,
+static int tps6105x_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
 	struct tps6105x			*tps6105x;
diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c
index d43ff16d8c4c..0c446eb86bdb 100644
--- a/drivers/mfd/tps65090.c
+++ b/drivers/mfd/tps65090.c
@@ -188,7 +188,7 @@ static irqreturn_t tps65090_irq(int irq, void *data)
 	return acks ? IRQ_HANDLED : IRQ_NONE;
 }
 
-static int __devinit tps65090_irq_init(struct tps65090 *tps65090, int irq,
+static int tps65090_irq_init(struct tps65090 *tps65090, int irq,
 	int irq_base)
 {
 	int i, ret;
@@ -251,7 +251,7 @@ static const struct regmap_config tps65090_regmap_config = {
 	.volatile_reg = is_volatile_reg,
 };
 
-static int __devinit tps65090_i2c_probe(struct i2c_client *client,
+static int tps65090_i2c_probe(struct i2c_client *client,
 					const struct i2c_device_id *id)
 {
 	struct tps65090_platform_data *pdata = client->dev.platform_data;
diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c
index 76360c10246e..8cba75750e91 100644
--- a/drivers/mfd/tps65217.c
+++ b/drivers/mfd/tps65217.c
@@ -153,7 +153,7 @@ static const struct of_device_id tps65217_of_match[] = {
 	{ /* sentinel */ },
 };
 
-static int __devinit tps65217_probe(struct i2c_client *client,
+static int tps65217_probe(struct i2c_client *client,
 				const struct i2c_device_id *ids)
 {
 	struct tps65217 *tps;
diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index a2e226f0f185..975fbb5f36aa 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c
@@ -267,7 +267,7 @@ static irqreturn_t tps6586x_irq(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static int __devinit tps6586x_irq_init(struct tps6586x *tps6586x, int irq,
+static int tps6586x_irq_init(struct tps6586x *tps6586x, int irq,
 				       int irq_base)
 {
 	int i, ret;
@@ -316,7 +316,7 @@ static int __devinit tps6586x_irq_init(struct tps6586x *tps6586x, int irq,
 	return ret;
 }
 
-static int __devinit tps6586x_add_subdevs(struct tps6586x *tps6586x,
+static int tps6586x_add_subdevs(struct tps6586x *tps6586x,
 					  struct tps6586x_platform_data *pdata)
 {
 	struct tps6586x_subdev_info *subdev;
@@ -468,7 +468,7 @@ static void tps6586x_power_off(void)
 	tps6586x_set_bits(tps6586x_dev, TPS6586X_SUPPLYENE, SLEEP_MODE_BIT);
 }
 
-static int __devinit tps6586x_i2c_probe(struct i2c_client *client,
+static int tps6586x_i2c_probe(struct i2c_client *client,
 					const struct i2c_device_id *id)
 {
 	struct tps6586x_platform_data *pdata = client->dev.platform_data;
diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index a3d732bcb519..3a3402290a74 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -78,7 +78,7 @@ static const struct regmap_config tps65910_regmap_config = {
 	.cache_type = REGCACHE_RBTREE,
 };
 
-static int __devinit tps65910_ck32k_init(struct tps65910 *tps65910,
+static int tps65910_ck32k_init(struct tps65910 *tps65910,
 					struct tps65910_board *pmic_pdata)
 {
 	int ret;
@@ -96,7 +96,7 @@ static int __devinit tps65910_ck32k_init(struct tps65910 *tps65910,
 	return 0;
 }
 
-static int __devinit tps65910_sleepinit(struct tps65910 *tps65910,
+static int tps65910_sleepinit(struct tps65910 *tps65910,
 		struct tps65910_board *pmic_pdata)
 {
 	struct device *dev = NULL;
@@ -237,7 +237,7 @@ static void tps65910_power_off(void)
 			DEVCTRL_DEV_ON_MASK);
 }
 
-static __devinit int tps65910_i2c_probe(struct i2c_client *i2c,
+static int tps65910_i2c_probe(struct i2c_client *i2c,
 					const struct i2c_device_id *id)
 {
 	struct tps65910 *tps65910;
diff --git a/drivers/mfd/tps65911-comparator.c b/drivers/mfd/tps65911-comparator.c
index a0255874fb1d..b3d268f95a77 100644
--- a/drivers/mfd/tps65911-comparator.c
+++ b/drivers/mfd/tps65911-comparator.c
@@ -122,7 +122,7 @@ static ssize_t comp_threshold_show(struct device *dev,
 static DEVICE_ATTR(comp1_threshold, S_IRUGO, comp_threshold_show, NULL);
 static DEVICE_ATTR(comp2_threshold, S_IRUGO, comp_threshold_show, NULL);
 
-static __devinit int tps65911_comparator_probe(struct platform_device *pdev)
+static int tps65911_comparator_probe(struct platform_device *pdev)
 {
 	struct tps65910 *tps65910 = dev_get_drvdata(pdev->dev.parent);
 	struct tps65910_board *pdata = dev_get_platdata(tps65910->dev);
diff --git a/drivers/mfd/tps65912-spi.c b/drivers/mfd/tps65912-spi.c
index 054315da9d5f..6366576a3056 100644
--- a/drivers/mfd/tps65912-spi.c
+++ b/drivers/mfd/tps65912-spi.c
@@ -81,7 +81,7 @@ static int tps65912_spi_read(struct tps65912 *tps65912, u8 addr,
 	return ret;
 }
 
-static int __devinit tps65912_spi_probe(struct spi_device *spi)
+static int tps65912_spi_probe(struct spi_device *spi)
 {
 	struct tps65912 *tps65912;
 
diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 4ae642320205..271d98a83caf 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -1170,7 +1170,7 @@ static int twl_remove(struct i2c_client *client)
 }
 
 /* NOTE: This driver only handles a single twl4030/tps659x0 chip */
-static int __devinit
+static int
 twl_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	struct twl4030_platform_data	*pdata = client->dev.platform_data;
diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c
index 08f99055b0fa..aacccdcf997c 100644
--- a/drivers/mfd/twl4030-audio.c
+++ b/drivers/mfd/twl4030-audio.c
@@ -184,7 +184,7 @@ static bool twl4030_audio_has_vibra(struct twl4030_audio_data *pdata,
 	return false;
 }
 
-static int __devinit twl4030_audio_probe(struct platform_device *pdev)
+static int twl4030_audio_probe(struct platform_device *pdev)
 {
 	struct twl4030_audio *audio;
 	struct twl4030_audio_data *pdata = pdev->dev.platform_data;
diff --git a/drivers/mfd/twl4030-madc.c b/drivers/mfd/twl4030-madc.c
index 456ecb5ac4fe..228d5aacd332 100644
--- a/drivers/mfd/twl4030-madc.c
+++ b/drivers/mfd/twl4030-madc.c
@@ -692,7 +692,7 @@ static int twl4030_madc_set_power(struct twl4030_madc_data *madc, int on)
 /*
  * Initialize MADC and request for threaded irq
  */
-static int __devinit twl4030_madc_probe(struct platform_device *pdev)
+static int twl4030_madc_probe(struct platform_device *pdev)
 {
 	struct twl4030_madc_data *madc;
 	struct twl4030_madc_platform_data *pdata = pdev->dev.platform_data;
diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 79ca33dfacca..a5332063183a 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -124,7 +124,7 @@ static u8 res_config_addrs[] = {
 	[RES_MAIN_REF]	= 0x94,
 };
 
-static int __devinit twl4030_write_script_byte(u8 address, u8 byte)
+static int twl4030_write_script_byte(u8 address, u8 byte)
 {
 	int err;
 
@@ -138,7 +138,7 @@ out:
 	return err;
 }
 
-static int __devinit twl4030_write_script_ins(u8 address, u16 pmb_message,
+static int twl4030_write_script_ins(u8 address, u16 pmb_message,
 					   u8 delay, u8 next)
 {
 	int err;
@@ -158,7 +158,7 @@ out:
 	return err;
 }
 
-static int __devinit twl4030_write_script(u8 address, struct twl4030_ins *script,
+static int twl4030_write_script(u8 address, struct twl4030_ins *script,
 				       int len)
 {
 	int err;
@@ -183,7 +183,7 @@ static int __devinit twl4030_write_script(u8 address, struct twl4030_ins *script
 	return err;
 }
 
-static int __devinit twl4030_config_wakeup3_sequence(u8 address)
+static int twl4030_config_wakeup3_sequence(u8 address)
 {
 	int err;
 	u8 data;
@@ -208,7 +208,7 @@ out:
 	return err;
 }
 
-static int __devinit twl4030_config_wakeup12_sequence(u8 address)
+static int twl4030_config_wakeup12_sequence(u8 address)
 {
 	int err = 0;
 	u8 data;
@@ -262,7 +262,7 @@ out:
 	return err;
 }
 
-static int __devinit twl4030_config_sleep_sequence(u8 address)
+static int twl4030_config_sleep_sequence(u8 address)
 {
 	int err;
 
@@ -276,7 +276,7 @@ static int __devinit twl4030_config_sleep_sequence(u8 address)
 	return err;
 }
 
-static int __devinit twl4030_config_warmreset_sequence(u8 address)
+static int twl4030_config_warmreset_sequence(u8 address)
 {
 	int err;
 	u8 rd_data;
@@ -324,7 +324,7 @@ out:
 	return err;
 }
 
-static int __devinit twl4030_configure_resource(struct twl4030_resconfig *rconfig)
+static int twl4030_configure_resource(struct twl4030_resconfig *rconfig)
 {
 	int rconfig_addr;
 	int err;
@@ -416,7 +416,7 @@ static int __devinit twl4030_configure_resource(struct twl4030_resconfig *rconfi
 	return 0;
 }
 
-static int __devinit load_twl4030_script(struct twl4030_script *tscript,
+static int load_twl4030_script(struct twl4030_script *tscript,
 	       u8 address)
 {
 	int err;
@@ -527,7 +527,7 @@ void twl4030_power_off(void)
 		pr_err("TWL4030 Unable to power off\n");
 }
 
-void __devinit twl4030_power_init(struct twl4030_power_data *twl4030_scripts)
+void twl4030_power_init(struct twl4030_power_data *twl4030_scripts)
 {
 	int err = 0;
 	int i;
diff --git a/drivers/mfd/vx855.c b/drivers/mfd/vx855.c
index c5a7df84e6da..66ef03b02569 100644
--- a/drivers/mfd/vx855.c
+++ b/drivers/mfd/vx855.c
@@ -72,7 +72,7 @@ static struct mfd_cell vx855_cells[] = {
 	},
 };
 
-static __devinit int vx855_probe(struct pci_dev *pdev,
+static int vx855_probe(struct pci_dev *pdev,
 				 const struct pci_device_id *id)
 {
 	int ret;
diff --git a/drivers/mfd/wl1273-core.c b/drivers/mfd/wl1273-core.c
index 7f3f3896662d..edbe6c1b755a 100644
--- a/drivers/mfd/wl1273-core.c
+++ b/drivers/mfd/wl1273-core.c
@@ -182,7 +182,7 @@ static int wl1273_core_remove(struct i2c_client *client)
 	return 0;
 }
 
-static int __devinit wl1273_core_probe(struct i2c_client *client,
+static int wl1273_core_probe(struct i2c_client *client,
 				       const struct i2c_device_id *id)
 {
 	struct wl1273_fm_platform_data *pdata = client->dev.platform_data;
diff --git a/drivers/mfd/wm831x-spi.c b/drivers/mfd/wm831x-spi.c
index 4329a3a9726f..4c7225a53ba2 100644
--- a/drivers/mfd/wm831x-spi.c
+++ b/drivers/mfd/wm831x-spi.c
@@ -21,7 +21,7 @@
 
 #include <linux/mfd/wm831x/core.h>
 
-static int __devinit wm831x_spi_probe(struct spi_device *spi)
+static int wm831x_spi_probe(struct spi_device *spi)
 {
 	const struct spi_device_id *id = spi_get_device_id(spi);
 	struct wm831x *wm831x;
diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index bb9cf5216115..46429495836d 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -399,7 +399,7 @@ static const __devinitconst struct reg_default wm1811_reva_patch[] = {
 /*
  * Instantiate the generic non-control parts of the device.
  */
-static __devinit int wm8994_device_init(struct wm8994 *wm8994, int irq)
+static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 {
 	struct wm8994_pdata *pdata = wm8994->dev->platform_data;
 	struct regmap_config *regmap_config;
@@ -689,7 +689,7 @@ static const struct of_device_id wm8994_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, wm8994_of_match);
 
-static __devinit int wm8994_i2c_probe(struct i2c_client *i2c,
+static int wm8994_i2c_probe(struct i2c_client *i2c,
 				      const struct i2c_device_id *id)
 {
 	struct wm8994 *wm8994;
diff --git a/include/linux/mfd/88pm80x.h b/include/linux/mfd/88pm80x.h
index a0ca0dca1244..478672ed0c3d 100644
--- a/include/linux/mfd/88pm80x.h
+++ b/include/linux/mfd/88pm80x.h
@@ -364,6 +364,6 @@ static inline int pm80x_dev_resume(struct device *dev)
 #endif
 
 extern int pm80x_init(struct i2c_client *client,
-			     const struct i2c_device_id *id) __devinit;
+		      const struct i2c_device_id *id);
 extern int pm80x_deinit(struct i2c_client *client);
 #endif /* __LINUX_MFD_88PM80X_H */
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 1491044efa10..83f0bdc530b7 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -291,7 +291,7 @@ struct ab8500_platform_data {
 	struct ab8500_codec_platform_data *codec;
 };
 
-extern int __devinit ab8500_init(struct ab8500 *ab8500,
+extern int ab8500_init(struct ab8500 *ab8500,
 				 enum ab8500_version version);
 extern int __devexit ab8500_exit(struct ab8500 *ab8500);
 
diff --git a/include/linux/mfd/pm8xxx/irq.h b/include/linux/mfd/pm8xxx/irq.h
index 4b21769f4483..ddaa7f5e9ed9 100644
--- a/include/linux/mfd/pm8xxx/irq.h
+++ b/include/linux/mfd/pm8xxx/irq.h
@@ -37,7 +37,7 @@ struct pm_irq_chip;
 
 #ifdef CONFIG_MFD_PM8XXX_IRQ
 int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq);
-struct pm_irq_chip * __devinit pm8xxx_irq_init(struct device *dev,
+struct pm_irq_chip *pm8xxx_irq_init(struct device *dev,
 				const struct pm8xxx_irq_platform_data *pdata);
 int __devexit pm8xxx_irq_exit(struct pm_irq_chip *chip);
 #else
@@ -45,7 +45,7 @@ static inline int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq)
 {
 	return -ENXIO;
 }
-static inline struct pm_irq_chip * __devinit pm8xxx_irq_init(
+static inline struct pm_irq_chip *pm8xxx_irq_init(
 				const struct device *dev,
 				const struct pm8xxx_irq_platform_data *pdata)
 {
-- 
cgit v1.2.3-55-g7522


From 4740f73fe5388ab5d22d552d2a0dacc62418a70c Mon Sep 17 00:00:00 2001
From: Bill Pemberton
Date: Mon, 19 Nov 2012 13:26:01 -0500
Subject: mfd: remove use of __devexit

CONFIG_HOTPLUG is going away as an option so __devexit is no
longer needed.

Signed-off-by: Bill Pemberton <wfp5p@virginia.edu>
Cc: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
Cc: Peter Tyser <ptyser@xes-inc.com>
Cc: Daniel Walker <dwalker@fifo99.com>
Cc: Bryan Huntsman <bryanh@codeaurora.org>
Acked-by: David Brown <davidb@codeaurora.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mfd/88pm800.c             | 2 +-
 drivers/mfd/88pm805.c             | 2 +-
 drivers/mfd/88pm860x-core.c       | 4 ++--
 drivers/mfd/ab3100-core.c         | 2 +-
 drivers/mfd/ab8500-core.c         | 2 +-
 drivers/mfd/ab8500-debugfs.c      | 2 +-
 drivers/mfd/ab8500-gpadc.c        | 2 +-
 drivers/mfd/ab8500-sysctrl.c      | 2 +-
 drivers/mfd/adp5520.c             | 2 +-
 drivers/mfd/arizona-core.c        | 2 +-
 drivers/mfd/arizona-i2c.c         | 2 +-
 drivers/mfd/arizona-spi.c         | 2 +-
 drivers/mfd/asic3.c               | 2 +-
 drivers/mfd/cs5535-mfd.c          | 2 +-
 drivers/mfd/da903x.c              | 2 +-
 drivers/mfd/da9052-i2c.c          | 2 +-
 drivers/mfd/da9052-spi.c          | 2 +-
 drivers/mfd/da9055-core.c         | 2 +-
 drivers/mfd/da9055-i2c.c          | 2 +-
 drivers/mfd/davinci_voicecodec.c  | 2 +-
 drivers/mfd/ezx-pcap.c            | 2 +-
 drivers/mfd/intel_msic.c          | 4 ++--
 drivers/mfd/janz-cmodio.c         | 2 +-
 drivers/mfd/jz4740-adc.c          | 2 +-
 drivers/mfd/lm3533-core.c         | 4 ++--
 drivers/mfd/lp8788.c              | 2 +-
 drivers/mfd/lpc_ich.c             | 2 +-
 drivers/mfd/lpc_sch.c             | 2 +-
 drivers/mfd/max8907.c             | 2 +-
 drivers/mfd/max8925-core.c        | 2 +-
 drivers/mfd/max8925-i2c.c         | 2 +-
 drivers/mfd/mc13xxx-i2c.c         | 2 +-
 drivers/mfd/mc13xxx-spi.c         | 2 +-
 drivers/mfd/omap-usb-host.c       | 2 +-
 drivers/mfd/omap-usb-tll.c        | 2 +-
 drivers/mfd/pcf50633-adc.c        | 2 +-
 drivers/mfd/pcf50633-core.c       | 2 +-
 drivers/mfd/pm8921-core.c         | 2 +-
 drivers/mfd/pm8xxx-irq.c          | 2 +-
 drivers/mfd/rc5t583.c             | 2 +-
 drivers/mfd/rdc321x-southbridge.c | 2 +-
 drivers/mfd/sm501.c               | 2 +-
 drivers/mfd/sta2x11-mfd.c         | 2 +-
 drivers/mfd/stmpe-i2c.c           | 2 +-
 drivers/mfd/stmpe-spi.c           | 2 +-
 drivers/mfd/syscon.c              | 2 +-
 drivers/mfd/tc3589x.c             | 2 +-
 drivers/mfd/tc6387xb.c            | 2 +-
 drivers/mfd/tc6393xb.c            | 2 +-
 drivers/mfd/ti-ssp.c              | 2 +-
 drivers/mfd/timberdale.c          | 2 +-
 drivers/mfd/tps6105x.c            | 2 +-
 drivers/mfd/tps65090.c            | 2 +-
 drivers/mfd/tps65217.c            | 2 +-
 drivers/mfd/tps6586x.c            | 2 +-
 drivers/mfd/tps65910.c            | 2 +-
 drivers/mfd/tps65911-comparator.c | 2 +-
 drivers/mfd/tps65912-spi.c        | 2 +-
 drivers/mfd/twl4030-audio.c       | 2 +-
 drivers/mfd/twl4030-madc.c        | 2 +-
 drivers/mfd/vx855.c               | 2 +-
 drivers/mfd/wm831x-spi.c          | 2 +-
 drivers/mfd/wm8994-core.c         | 4 ++--
 include/linux/mfd/abx500/ab8500.h | 2 +-
 include/linux/mfd/pm8xxx/irq.h    | 4 ++--
 65 files changed, 70 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/88pm800.c b/drivers/mfd/88pm800.c
index 6746ecd260ad..391e23e6a647 100644
--- a/drivers/mfd/88pm800.c
+++ b/drivers/mfd/88pm800.c
@@ -554,7 +554,7 @@ out_init:
 	return ret;
 }
 
-static int __devexit pm800_remove(struct i2c_client *client)
+static int pm800_remove(struct i2c_client *client)
 {
 	struct pm80x_chip *chip = i2c_get_clientdata(client);
 
diff --git a/drivers/mfd/88pm805.c b/drivers/mfd/88pm805.c
index 13c09941dc2c..e671230be2b1 100644
--- a/drivers/mfd/88pm805.c
+++ b/drivers/mfd/88pm805.c
@@ -262,7 +262,7 @@ out_init:
 	return ret;
 }
 
-static int __devexit pm805_remove(struct i2c_client *client)
+static int pm805_remove(struct i2c_client *client)
 {
 	struct pm80x_chip *chip = i2c_get_clientdata(client);
 
diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 20dd0d41aee4..893fc1ba6ead 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -1077,7 +1077,7 @@ static int pm860x_device_init(struct pm860x_chip *chip,
 	return 0;
 }
 
-static void __devexit pm860x_device_exit(struct pm860x_chip *chip)
+static void pm860x_device_exit(struct pm860x_chip *chip)
 {
 	device_irq_exit(chip);
 	mfd_remove_devices(chip->dev);
@@ -1200,7 +1200,7 @@ err:
 	return ret;
 }
 
-static int __devexit pm860x_remove(struct i2c_client *client)
+static int pm860x_remove(struct i2c_client *client)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(client);
 
diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
index bf188bc98986..2ec7725f4a08 100644
--- a/drivers/mfd/ab3100-core.c
+++ b/drivers/mfd/ab3100-core.c
@@ -961,7 +961,7 @@ static int ab3100_probe(struct i2c_client *client,
 	return err;
 }
 
-static int __devexit ab3100_remove(struct i2c_client *client)
+static int ab3100_remove(struct i2c_client *client)
 {
 	struct ab3100 *ab3100 = i2c_get_clientdata(client);
 
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 7335a9c8ffae..127b00aadae3 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -1473,7 +1473,7 @@ out_free_ab8500:
 	return ret;
 }
 
-static int __devexit ab8500_remove(struct platform_device *pdev)
+static int ab8500_remove(struct platform_device *pdev)
 {
 	struct ab8500 *ab8500 = platform_get_drvdata(pdev);
 
diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c
index 44843680d982..5a8e707bc038 100644
--- a/drivers/mfd/ab8500-debugfs.c
+++ b/drivers/mfd/ab8500-debugfs.c
@@ -597,7 +597,7 @@ exit_no_debugfs:
 	return -ENOMEM;
 }
 
-static int __devexit ab8500_debug_remove(struct platform_device *plf)
+static int ab8500_debug_remove(struct platform_device *plf)
 {
 	debugfs_remove(ab8500_val_file);
 	debugfs_remove(ab8500_address_file);
diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c
index c5e168e3ef11..3fb1f40d6389 100644
--- a/drivers/mfd/ab8500-gpadc.c
+++ b/drivers/mfd/ab8500-gpadc.c
@@ -634,7 +634,7 @@ fail:
 	return ret;
 }
 
-static int __devexit ab8500_gpadc_remove(struct platform_device *pdev)
+static int ab8500_gpadc_remove(struct platform_device *pdev)
 {
 	struct ab8500_gpadc *gpadc = platform_get_drvdata(pdev);
 
diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c
index 499263c3110a..8a33b2c7eead 100644
--- a/drivers/mfd/ab8500-sysctrl.c
+++ b/drivers/mfd/ab8500-sysctrl.c
@@ -55,7 +55,7 @@ static int ab8500_sysctrl_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int __devexit ab8500_sysctrl_remove(struct platform_device *pdev)
+static int ab8500_sysctrl_remove(struct platform_device *pdev)
 {
 	sysctrl_dev = NULL;
 	return 0;
diff --git a/drivers/mfd/adp5520.c b/drivers/mfd/adp5520.c
index f2f9d8ff6416..210dd038bb5a 100644
--- a/drivers/mfd/adp5520.c
+++ b/drivers/mfd/adp5520.c
@@ -307,7 +307,7 @@ out_free_chip:
 	return ret;
 }
 
-static int __devexit adp5520_remove(struct i2c_client *client)
+static int adp5520_remove(struct i2c_client *client)
 {
 	struct adp5520_chip *chip = dev_get_drvdata(&client->dev);
 
diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index 47e711674739..12fdabfb569e 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -553,7 +553,7 @@ err_early:
 }
 EXPORT_SYMBOL_GPL(arizona_dev_init);
 
-int __devexit arizona_dev_exit(struct arizona *arizona)
+int arizona_dev_exit(struct arizona *arizona)
 {
 	mfd_remove_devices(arizona->dev);
 	arizona_free_irq(arizona, ARIZONA_IRQ_UNDERCLOCKED, arizona);
diff --git a/drivers/mfd/arizona-i2c.c b/drivers/mfd/arizona-i2c.c
index aaf1a69134df..44a1bb969841 100644
--- a/drivers/mfd/arizona-i2c.c
+++ b/drivers/mfd/arizona-i2c.c
@@ -65,7 +65,7 @@ static int arizona_i2c_probe(struct i2c_client *i2c,
 	return arizona_dev_init(arizona);
 }
 
-static int __devexit arizona_i2c_remove(struct i2c_client *i2c)
+static int arizona_i2c_remove(struct i2c_client *i2c)
 {
 	struct arizona *arizona = dev_get_drvdata(&i2c->dev);
 	arizona_dev_exit(arizona);
diff --git a/drivers/mfd/arizona-spi.c b/drivers/mfd/arizona-spi.c
index 9663cafdb7ff..1b9fdd698b03 100644
--- a/drivers/mfd/arizona-spi.c
+++ b/drivers/mfd/arizona-spi.c
@@ -65,7 +65,7 @@ static int arizona_spi_probe(struct spi_device *spi)
 	return arizona_dev_init(arizona);
 }
 
-static int __devexit arizona_spi_remove(struct spi_device *spi)
+static int arizona_spi_remove(struct spi_device *spi)
 {
 	struct arizona *arizona = dev_get_drvdata(&spi->dev);
 	arizona_dev_exit(arizona);
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index b0720d7c1b71..1b15986c01e1 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -1039,7 +1039,7 @@ static int __init asic3_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int __devexit asic3_remove(struct platform_device *pdev)
+static int asic3_remove(struct platform_device *pdev)
 {
 	int ret;
 	struct asic3 *asic = platform_get_drvdata(pdev);
diff --git a/drivers/mfd/cs5535-mfd.c b/drivers/mfd/cs5535-mfd.c
index d9dc87f067d8..2e4752a9220a 100644
--- a/drivers/mfd/cs5535-mfd.c
+++ b/drivers/mfd/cs5535-mfd.c
@@ -166,7 +166,7 @@ err_disable:
 	return err;
 }
 
-static void __devexit cs5535_mfd_remove(struct pci_dev *pdev)
+static void cs5535_mfd_remove(struct pci_dev *pdev)
 {
 	mfd_remove_devices(&pdev->dev);
 	pci_disable_device(pdev);
diff --git a/drivers/mfd/da903x.c b/drivers/mfd/da903x.c
index 5fa1e91a9532..05176cd2862b 100644
--- a/drivers/mfd/da903x.c
+++ b/drivers/mfd/da903x.c
@@ -544,7 +544,7 @@ out_free_chip:
 	return ret;
 }
 
-static int __devexit da903x_remove(struct i2c_client *client)
+static int da903x_remove(struct i2c_client *client)
 {
 	struct da903x_chip *chip = i2c_get_clientdata(client);
 
diff --git a/drivers/mfd/da9052-i2c.c b/drivers/mfd/da9052-i2c.c
index 96f66ed8dbfc..ac74a4d1daea 100644
--- a/drivers/mfd/da9052-i2c.c
+++ b/drivers/mfd/da9052-i2c.c
@@ -121,7 +121,7 @@ static int da9052_i2c_probe(struct i2c_client *client,
 	return 0;
 }
 
-static int __devexit da9052_i2c_remove(struct i2c_client *client)
+static int da9052_i2c_remove(struct i2c_client *client)
 {
 	struct da9052 *da9052 = i2c_get_clientdata(client);
 
diff --git a/drivers/mfd/da9052-spi.c b/drivers/mfd/da9052-spi.c
index 1ad324373c97..61d63b93576c 100644
--- a/drivers/mfd/da9052-spi.c
+++ b/drivers/mfd/da9052-spi.c
@@ -58,7 +58,7 @@ static int da9052_spi_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int __devexit da9052_spi_remove(struct spi_device *spi)
+static int da9052_spi_remove(struct spi_device *spi)
 {
 	struct da9052 *da9052 = dev_get_drvdata(&spi->dev);
 
diff --git a/drivers/mfd/da9055-core.c b/drivers/mfd/da9055-core.c
index 6f5a4984f0aa..f56a1a9f7777 100644
--- a/drivers/mfd/da9055-core.c
+++ b/drivers/mfd/da9055-core.c
@@ -412,7 +412,7 @@ err:
 	return ret;
 }
 
-void __devexit da9055_device_exit(struct da9055 *da9055)
+void da9055_device_exit(struct da9055 *da9055)
 {
 	regmap_del_irq_chip(da9055->chip_irq, da9055->irq_data);
 	mfd_remove_devices(da9055->dev);
diff --git a/drivers/mfd/da9055-i2c.c b/drivers/mfd/da9055-i2c.c
index 7778d042fb5d..607387ffe8ca 100644
--- a/drivers/mfd/da9055-i2c.c
+++ b/drivers/mfd/da9055-i2c.c
@@ -44,7 +44,7 @@ static int da9055_i2c_probe(struct i2c_client *i2c,
 	return da9055_device_init(da9055);
 }
 
-static int __devexit da9055_i2c_remove(struct i2c_client *i2c)
+static int da9055_i2c_remove(struct i2c_client *i2c)
 {
 	struct da9055 *da9055 = i2c_get_clientdata(i2c);
 
diff --git a/drivers/mfd/davinci_voicecodec.c b/drivers/mfd/davinci_voicecodec.c
index b2b0397059d8..c0bcc872af4e 100644
--- a/drivers/mfd/davinci_voicecodec.c
+++ b/drivers/mfd/davinci_voicecodec.c
@@ -151,7 +151,7 @@ fail1:
 	return ret;
 }
 
-static int __devexit davinci_vc_remove(struct platform_device *pdev)
+static int davinci_vc_remove(struct platform_device *pdev)
 {
 	struct davinci_vc *davinci_vc = platform_get_drvdata(pdev);
 
diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index d81505e50d12..b7a61f0f27a4 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -391,7 +391,7 @@ static int pcap_add_subdev(struct pcap_chip *pcap,
 	return ret;
 }
 
-static int __devexit ezx_pcap_remove(struct spi_device *spi)
+static int ezx_pcap_remove(struct spi_device *spi)
 {
 	struct pcap_chip *pcap = dev_get_drvdata(&spi->dev);
 	struct pcap_platform_data *pdata = spi->dev.platform_data;
diff --git a/drivers/mfd/intel_msic.c b/drivers/mfd/intel_msic.c
index 438ac3df166b..ab8d0b2739b2 100644
--- a/drivers/mfd/intel_msic.c
+++ b/drivers/mfd/intel_msic.c
@@ -364,7 +364,7 @@ fail:
 	return ret;
 }
 
-static void __devexit intel_msic_remove_devices(struct intel_msic *msic)
+static void intel_msic_remove_devices(struct intel_msic *msic)
 {
 	struct platform_device *pdev = msic->pdev;
 	struct intel_msic_platform_data *pdata = pdev->dev.platform_data;
@@ -445,7 +445,7 @@ static int intel_msic_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int __devexit intel_msic_remove(struct platform_device *pdev)
+static int intel_msic_remove(struct platform_device *pdev)
 {
 	struct intel_msic *msic = platform_get_drvdata(pdev);
 
diff --git a/drivers/mfd/janz-cmodio.c b/drivers/mfd/janz-cmodio.c
index 55c479e57aad..45ece11cc27c 100644
--- a/drivers/mfd/janz-cmodio.c
+++ b/drivers/mfd/janz-cmodio.c
@@ -254,7 +254,7 @@ out_return:
 	return ret;
 }
 
-static void __devexit cmodio_pci_remove(struct pci_dev *dev)
+static void cmodio_pci_remove(struct pci_dev *dev)
 {
 	struct cmodio_device *priv = pci_get_drvdata(dev);
 
diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c
index c0d38c597f0c..0b8b55bb9b11 100644
--- a/drivers/mfd/jz4740-adc.c
+++ b/drivers/mfd/jz4740-adc.c
@@ -307,7 +307,7 @@ err_free:
 	return ret;
 }
 
-static int __devexit jz4740_adc_remove(struct platform_device *pdev)
+static int jz4740_adc_remove(struct platform_device *pdev)
 {
 	struct jz4740_adc *adc = platform_get_drvdata(pdev);
 
diff --git a/drivers/mfd/lm3533-core.c b/drivers/mfd/lm3533-core.c
index 2b74508655d4..ceebf2c1ea97 100644
--- a/drivers/mfd/lm3533-core.c
+++ b/drivers/mfd/lm3533-core.c
@@ -534,7 +534,7 @@ err_disable:
 	return ret;
 }
 
-static void __devexit lm3533_device_exit(struct lm3533 *lm3533)
+static void lm3533_device_exit(struct lm3533 *lm3533)
 {
 	dev_dbg(lm3533->dev, "%s\n", __func__);
 
@@ -624,7 +624,7 @@ static int lm3533_i2c_probe(struct i2c_client *i2c,
 	return 0;
 }
 
-static int __devexit lm3533_i2c_remove(struct i2c_client *i2c)
+static int lm3533_i2c_remove(struct i2c_client *i2c)
 {
 	struct lm3533 *lm3533 = i2c_get_clientdata(i2c);
 
diff --git a/drivers/mfd/lp8788.c b/drivers/mfd/lp8788.c
index e1d7c9f18cc5..c3d3c9b4d3ad 100644
--- a/drivers/mfd/lp8788.c
+++ b/drivers/mfd/lp8788.c
@@ -203,7 +203,7 @@ static int lp8788_probe(struct i2c_client *cl, const struct i2c_device_id *id)
 			       ARRAY_SIZE(lp8788_devs), NULL, 0, NULL);
 }
 
-static int __devexit lp8788_remove(struct i2c_client *cl)
+static int lp8788_remove(struct i2c_client *cl)
 {
 	struct lp8788 *lp = i2c_get_clientdata(cl);
 
diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
index 7c83e1b56588..2ad24caa07db 100644
--- a/drivers/mfd/lpc_ich.c
+++ b/drivers/mfd/lpc_ich.c
@@ -878,7 +878,7 @@ static int lpc_ich_probe(struct pci_dev *dev,
 	return 0;
 }
 
-static void __devexit lpc_ich_remove(struct pci_dev *dev)
+static void lpc_ich_remove(struct pci_dev *dev)
 {
 	mfd_remove_devices(&dev->dev);
 	lpc_ich_restore_config_space(dev);
diff --git a/drivers/mfd/lpc_sch.c b/drivers/mfd/lpc_sch.c
index 5756a6af08dc..5624fcbba69b 100644
--- a/drivers/mfd/lpc_sch.c
+++ b/drivers/mfd/lpc_sch.c
@@ -164,7 +164,7 @@ out_dev:
 	return ret;
 }
 
-static void __devexit lpc_sch_remove(struct pci_dev *dev)
+static void lpc_sch_remove(struct pci_dev *dev)
 {
 	mfd_remove_devices(&dev->dev);
 }
diff --git a/drivers/mfd/max8907.c b/drivers/mfd/max8907.c
index 81ded7a4ca8c..e9b1c93a3ade 100644
--- a/drivers/mfd/max8907.c
+++ b/drivers/mfd/max8907.c
@@ -288,7 +288,7 @@ err_alloc_drvdata:
 	return ret;
 }
 
-static __devexit int max8907_i2c_remove(struct i2c_client *i2c)
+static int max8907_i2c_remove(struct i2c_client *i2c)
 {
 	struct max8907 *max8907 = i2c_get_clientdata(i2c);
 
diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index 60325c49afda..e32466e865b9 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c
@@ -901,7 +901,7 @@ out:
 	return ret;
 }
 
-void __devexit max8925_device_exit(struct max8925_chip *chip)
+void max8925_device_exit(struct max8925_chip *chip)
 {
 	if (chip->core_irq)
 		free_irq(chip->core_irq, chip);
diff --git a/drivers/mfd/max8925-i2c.c b/drivers/mfd/max8925-i2c.c
index 6e3d30aa00df..00b5b456063d 100644
--- a/drivers/mfd/max8925-i2c.c
+++ b/drivers/mfd/max8925-i2c.c
@@ -168,7 +168,7 @@ static int max8925_probe(struct i2c_client *client,
 	return 0;
 }
 
-static int __devexit max8925_remove(struct i2c_client *client)
+static int max8925_remove(struct i2c_client *client)
 {
 	struct max8925_chip *chip = i2c_get_clientdata(client);
 
diff --git a/drivers/mfd/mc13xxx-i2c.c b/drivers/mfd/mc13xxx-i2c.c
index 4a605fb63cbc..7957999f30bb 100644
--- a/drivers/mfd/mc13xxx-i2c.c
+++ b/drivers/mfd/mc13xxx-i2c.c
@@ -85,7 +85,7 @@ static int mc13xxx_i2c_probe(struct i2c_client *client,
 	return ret;
 }
 
-static int __devexit mc13xxx_i2c_remove(struct i2c_client *client)
+static int mc13xxx_i2c_remove(struct i2c_client *client)
 {
 	struct mc13xxx *mc13xxx = dev_get_drvdata(&client->dev);
 
diff --git a/drivers/mfd/mc13xxx-spi.c b/drivers/mfd/mc13xxx-spi.c
index e7acd0511156..cb32f69d80ba 100644
--- a/drivers/mfd/mc13xxx-spi.c
+++ b/drivers/mfd/mc13xxx-spi.c
@@ -159,7 +159,7 @@ static int mc13xxx_spi_probe(struct spi_device *spi)
 	return ret;
 }
 
-static int __devexit mc13xxx_spi_remove(struct spi_device *spi)
+static int mc13xxx_spi_remove(struct spi_device *spi)
 {
 	struct mc13xxx *mc13xxx = dev_get_drvdata(&spi->dev);
 
diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index fc23dfbb6910..29b8ed21213e 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c
@@ -652,7 +652,7 @@ end_probe:
  *
  * Reverses the effect of usbhs_omap_probe().
  */
-static int __devexit usbhs_omap_remove(struct platform_device *pdev)
+static int usbhs_omap_remove(struct platform_device *pdev)
 {
 	struct usbhs_hcd_omap *omap = platform_get_drvdata(pdev);
 
diff --git a/drivers/mfd/omap-usb-tll.c b/drivers/mfd/omap-usb-tll.c
index 0b586885446b..401b976e3aff 100644
--- a/drivers/mfd/omap-usb-tll.c
+++ b/drivers/mfd/omap-usb-tll.c
@@ -348,7 +348,7 @@ end:
  *
  * Reverses the effect of usbtll_omap_probe().
  */
-static int __devexit usbtll_omap_remove(struct platform_device *pdev)
+static int usbtll_omap_remove(struct platform_device *pdev)
 {
 	struct usbtll_omap *tll = platform_get_drvdata(pdev);
 
diff --git a/drivers/mfd/pcf50633-adc.c b/drivers/mfd/pcf50633-adc.c
index dbd2f0d0078e..18b53cb72fea 100644
--- a/drivers/mfd/pcf50633-adc.c
+++ b/drivers/mfd/pcf50633-adc.c
@@ -218,7 +218,7 @@ static int pcf50633_adc_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int __devexit pcf50633_adc_remove(struct platform_device *pdev)
+static int pcf50633_adc_remove(struct platform_device *pdev)
 {
 	struct pcf50633_adc *adc = platform_get_drvdata(pdev);
 	int i, head;
diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c
index fc477353081e..64803f13bcec 100644
--- a/drivers/mfd/pcf50633-core.c
+++ b/drivers/mfd/pcf50633-core.c
@@ -275,7 +275,7 @@ static int pcf50633_probe(struct i2c_client *client,
 	return 0;
 }
 
-static int __devexit pcf50633_remove(struct i2c_client *client)
+static int pcf50633_remove(struct i2c_client *client)
 {
 	struct pcf50633 *pcf = i2c_get_clientdata(client);
 	int i;
diff --git a/drivers/mfd/pm8921-core.c b/drivers/mfd/pm8921-core.c
index 43491346e7d0..d4b297cbd801 100644
--- a/drivers/mfd/pm8921-core.c
+++ b/drivers/mfd/pm8921-core.c
@@ -165,7 +165,7 @@ err_read_rev:
 	return rc;
 }
 
-static int __devexit pm8921_remove(struct platform_device *pdev)
+static int pm8921_remove(struct platform_device *pdev)
 {
 	struct pm8xxx_drvdata *drvdata;
 	struct pm8921 *pmic = NULL;
diff --git a/drivers/mfd/pm8xxx-irq.c b/drivers/mfd/pm8xxx-irq.c
index 59c20ea329de..1360e20adf11 100644
--- a/drivers/mfd/pm8xxx-irq.c
+++ b/drivers/mfd/pm8xxx-irq.c
@@ -363,7 +363,7 @@ struct pm_irq_chip *  pm8xxx_irq_init(struct device *dev,
 	return chip;
 }
 
-int __devexit pm8xxx_irq_exit(struct pm_irq_chip *chip)
+int pm8xxx_irq_exit(struct pm_irq_chip *chip)
 {
 	irq_set_chained_handler(chip->devirq, NULL);
 	kfree(chip);
diff --git a/drivers/mfd/rc5t583.c b/drivers/mfd/rc5t583.c
index f721b13e986c..14bdaccefbec 100644
--- a/drivers/mfd/rc5t583.c
+++ b/drivers/mfd/rc5t583.c
@@ -303,7 +303,7 @@ err_add_devs:
 	return ret;
 }
 
-static int  __devexit rc5t583_i2c_remove(struct i2c_client *i2c)
+static int  rc5t583_i2c_remove(struct i2c_client *i2c)
 {
 	struct rc5t583 *rc5t583 = i2c_get_clientdata(i2c);
 
diff --git a/drivers/mfd/rdc321x-southbridge.c b/drivers/mfd/rdc321x-southbridge.c
index be46539c3080..21b7bef73507 100644
--- a/drivers/mfd/rdc321x-southbridge.c
+++ b/drivers/mfd/rdc321x-southbridge.c
@@ -91,7 +91,7 @@ static int rdc321x_sb_probe(struct pci_dev *pdev,
 			       NULL, 0, NULL);
 }
 
-static void __devexit rdc321x_sb_remove(struct pci_dev *pdev)
+static void rdc321x_sb_remove(struct pci_dev *pdev)
 {
 	mfd_remove_devices(&pdev->dev);
 }
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 9b53733cb297..9816c232e583 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -1685,7 +1685,7 @@ static void sm501_dev_remove(struct sm501_devdata *sm)
 	sm501_gpio_remove(sm);
 }
 
-static void __devexit sm501_pci_remove(struct pci_dev *dev)
+static void sm501_pci_remove(struct pci_dev *dev)
 {
 	struct sm501_devdata *sm = pci_get_drvdata(dev);
 
diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index 57f3361d0cb0..d6284cacd27a 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -89,7 +89,7 @@ static int sta2x11_mfd_add(struct pci_dev *pdev, gfp_t flags)
 	return 0;
 }
 
-static int __devexit mfd_remove(struct pci_dev *pdev)
+static int mfd_remove(struct pci_dev *pdev)
 {
 	struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
 
diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c
index 8195ca2ac74b..36df18778029 100644
--- a/drivers/mfd/stmpe-i2c.c
+++ b/drivers/mfd/stmpe-i2c.c
@@ -63,7 +63,7 @@ stmpe_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 	return stmpe_probe(&i2c_ci, id->driver_data);
 }
 
-static int __devexit stmpe_i2c_remove(struct i2c_client *i2c)
+static int stmpe_i2c_remove(struct i2c_client *i2c)
 {
 	struct stmpe *stmpe = dev_get_drvdata(&i2c->dev);
 
diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c
index 52774338998f..973659f8abd9 100644
--- a/drivers/mfd/stmpe-spi.c
+++ b/drivers/mfd/stmpe-spi.c
@@ -101,7 +101,7 @@ stmpe_spi_probe(struct spi_device *spi)
 	return stmpe_probe(&spi_ci, id->driver_data);
 }
 
-static int __devexit stmpe_spi_remove(struct spi_device *spi)
+static int stmpe_spi_remove(struct spi_device *spi)
 {
 	struct stmpe *stmpe = dev_get_drvdata(&spi->dev);
 
diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c
index 3dbfe9ab889c..3f10591ea94e 100644
--- a/drivers/mfd/syscon.c
+++ b/drivers/mfd/syscon.c
@@ -138,7 +138,7 @@ static int syscon_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int __devexit syscon_remove(struct platform_device *pdev)
+static int syscon_remove(struct platform_device *pdev)
 {
 	struct syscon *syscon;
 
diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c
index 7e197f788b0a..a06d66b929b1 100644
--- a/drivers/mfd/tc3589x.c
+++ b/drivers/mfd/tc3589x.c
@@ -402,7 +402,7 @@ out_free:
 	return ret;
 }
 
-static int __devexit tc3589x_remove(struct i2c_client *client)
+static int tc3589x_remove(struct i2c_client *client)
 {
 	struct tc3589x *tc3589x = i2c_get_clientdata(client);
 
diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c
index ca18f262c34a..366f7b906278 100644
--- a/drivers/mfd/tc6387xb.c
+++ b/drivers/mfd/tc6387xb.c
@@ -208,7 +208,7 @@ err_no_irq:
 	return ret;
 }
 
-static int __devexit tc6387xb_remove(struct platform_device *dev)
+static int tc6387xb_remove(struct platform_device *dev)
 {
 	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 1ff8ed6390d1..15e1463e5e13 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -731,7 +731,7 @@ err_kzalloc:
 	return ret;
 }
 
-static int __devexit tc6393xb_remove(struct platform_device *dev)
+static int tc6393xb_remove(struct platform_device *dev)
 {
 	struct tc6393xb_platform_data *tcpd = dev->dev.platform_data;
 	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
diff --git a/drivers/mfd/ti-ssp.c b/drivers/mfd/ti-ssp.c
index b177f96da864..09a14cec351b 100644
--- a/drivers/mfd/ti-ssp.c
+++ b/drivers/mfd/ti-ssp.c
@@ -433,7 +433,7 @@ error_res:
 	return error;
 }
 
-static int __devexit ti_ssp_remove(struct platform_device *pdev)
+static int ti_ssp_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct ti_ssp *ssp = dev_get_drvdata(dev);
diff --git a/drivers/mfd/timberdale.c b/drivers/mfd/timberdale.c
index a305ac109496..59e0ee247e86 100644
--- a/drivers/mfd/timberdale.c
+++ b/drivers/mfd/timberdale.c
@@ -840,7 +840,7 @@ err_enable:
 	return -ENODEV;
 }
 
-static void __devexit timb_remove(struct pci_dev *dev)
+static void timb_remove(struct pci_dev *dev)
 {
 	struct timberdale_device *priv = pci_get_drvdata(dev);
 
diff --git a/drivers/mfd/tps6105x.c b/drivers/mfd/tps6105x.c
index 75a84ac4672e..1d302f583adf 100644
--- a/drivers/mfd/tps6105x.c
+++ b/drivers/mfd/tps6105x.c
@@ -199,7 +199,7 @@ fail:
 	return ret;
 }
 
-static int __devexit tps6105x_remove(struct i2c_client *client)
+static int tps6105x_remove(struct i2c_client *client)
 {
 	struct tps6105x *tps6105x = i2c_get_clientdata(client);
 
diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c
index 0c446eb86bdb..382a857b0dde 100644
--- a/drivers/mfd/tps65090.c
+++ b/drivers/mfd/tps65090.c
@@ -308,7 +308,7 @@ err_exit:
 	return ret;
 }
 
-static int __devexit tps65090_i2c_remove(struct i2c_client *client)
+static int tps65090_i2c_remove(struct i2c_client *client)
 {
 	struct tps65090 *tps65090 = i2c_get_clientdata(client);
 
diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c
index 8cba75750e91..e14e252e3473 100644
--- a/drivers/mfd/tps65217.c
+++ b/drivers/mfd/tps65217.c
@@ -214,7 +214,7 @@ static int tps65217_probe(struct i2c_client *client,
 	return 0;
 }
 
-static int __devexit tps65217_remove(struct i2c_client *client)
+static int tps65217_remove(struct i2c_client *client)
 {
 	struct tps65217 *tps = i2c_get_clientdata(client);
 
diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index 975fbb5f36aa..9f92c3b22093 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c
@@ -548,7 +548,7 @@ err_mfd_add:
 	return ret;
 }
 
-static int __devexit tps6586x_i2c_remove(struct i2c_client *client)
+static int tps6586x_i2c_remove(struct i2c_client *client)
 {
 	struct tps6586x *tps6586x = i2c_get_clientdata(client);
 
diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index 3a3402290a74..ce054654f5bb 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -302,7 +302,7 @@ static int tps65910_i2c_probe(struct i2c_client *i2c,
 	return ret;
 }
 
-static __devexit int tps65910_i2c_remove(struct i2c_client *i2c)
+static int tps65910_i2c_remove(struct i2c_client *i2c)
 {
 	struct tps65910 *tps65910 = i2c_get_clientdata(i2c);
 
diff --git a/drivers/mfd/tps65911-comparator.c b/drivers/mfd/tps65911-comparator.c
index b3d268f95a77..c0816ebd9d7e 100644
--- a/drivers/mfd/tps65911-comparator.c
+++ b/drivers/mfd/tps65911-comparator.c
@@ -152,7 +152,7 @@ static int tps65911_comparator_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static __devexit int tps65911_comparator_remove(struct platform_device *pdev)
+static int tps65911_comparator_remove(struct platform_device *pdev)
 {
 	struct tps65910 *tps65910;
 
diff --git a/drivers/mfd/tps65912-spi.c b/drivers/mfd/tps65912-spi.c
index 6366576a3056..b45f460d299f 100644
--- a/drivers/mfd/tps65912-spi.c
+++ b/drivers/mfd/tps65912-spi.c
@@ -99,7 +99,7 @@ static int tps65912_spi_probe(struct spi_device *spi)
 	return tps65912_device_init(tps65912);
 }
 
-static int __devexit tps65912_spi_remove(struct spi_device *spi)
+static int tps65912_spi_remove(struct spi_device *spi)
 {
 	struct tps65912 *tps65912 = spi_get_drvdata(spi);
 
diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c
index aacccdcf997c..e16edca92670 100644
--- a/drivers/mfd/twl4030-audio.c
+++ b/drivers/mfd/twl4030-audio.c
@@ -269,7 +269,7 @@ static int twl4030_audio_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int __devexit twl4030_audio_remove(struct platform_device *pdev)
+static int twl4030_audio_remove(struct platform_device *pdev)
 {
 	mfd_remove_devices(&pdev->dev);
 	platform_set_drvdata(pdev, NULL);
diff --git a/drivers/mfd/twl4030-madc.c b/drivers/mfd/twl4030-madc.c
index 228d5aacd332..a39dcf3e2133 100644
--- a/drivers/mfd/twl4030-madc.c
+++ b/drivers/mfd/twl4030-madc.c
@@ -785,7 +785,7 @@ err_power:
 	return ret;
 }
 
-static int __devexit twl4030_madc_remove(struct platform_device *pdev)
+static int twl4030_madc_remove(struct platform_device *pdev)
 {
 	struct twl4030_madc_data *madc = platform_get_drvdata(pdev);
 
diff --git a/drivers/mfd/vx855.c b/drivers/mfd/vx855.c
index 66ef03b02569..757ecc63338c 100644
--- a/drivers/mfd/vx855.c
+++ b/drivers/mfd/vx855.c
@@ -112,7 +112,7 @@ out:
 	return ret;
 }
 
-static void __devexit vx855_remove(struct pci_dev *pdev)
+static void vx855_remove(struct pci_dev *pdev)
 {
 	mfd_remove_devices(&pdev->dev);
 	pci_disable_device(pdev);
diff --git a/drivers/mfd/wm831x-spi.c b/drivers/mfd/wm831x-spi.c
index 4c7225a53ba2..4e70e157a909 100644
--- a/drivers/mfd/wm831x-spi.c
+++ b/drivers/mfd/wm831x-spi.c
@@ -51,7 +51,7 @@ static int wm831x_spi_probe(struct spi_device *spi)
 	return wm831x_device_init(wm831x, type, spi->irq);
 }
 
-static int __devexit wm831x_spi_remove(struct spi_device *spi)
+static int wm831x_spi_remove(struct spi_device *spi)
 {
 	struct wm831x *wm831x = dev_get_drvdata(&spi->dev);
 
diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index 4e2432dc49f9..c7f62ac544ad 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -671,7 +671,7 @@ err:
 	return ret;
 }
 
-static __devexit void wm8994_device_exit(struct wm8994 *wm8994)
+static void wm8994_device_exit(struct wm8994 *wm8994)
 {
 	pm_runtime_disable(wm8994->dev);
 	mfd_remove_devices(wm8994->dev);
@@ -715,7 +715,7 @@ static int wm8994_i2c_probe(struct i2c_client *i2c,
 	return wm8994_device_init(wm8994, i2c->irq);
 }
 
-static __devexit int wm8994_i2c_remove(struct i2c_client *i2c)
+static int wm8994_i2c_remove(struct i2c_client *i2c)
 {
 	struct wm8994 *wm8994 = i2c_get_clientdata(i2c);
 
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 83f0bdc530b7..1cb5698b4d76 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -293,7 +293,7 @@ struct ab8500_platform_data {
 
 extern int ab8500_init(struct ab8500 *ab8500,
 				 enum ab8500_version version);
-extern int __devexit ab8500_exit(struct ab8500 *ab8500);
+extern int ab8500_exit(struct ab8500 *ab8500);
 
 extern int ab8500_suspend(struct ab8500 *ab8500);
 
diff --git a/include/linux/mfd/pm8xxx/irq.h b/include/linux/mfd/pm8xxx/irq.h
index ddaa7f5e9ed9..f83d6b43ecbb 100644
--- a/include/linux/mfd/pm8xxx/irq.h
+++ b/include/linux/mfd/pm8xxx/irq.h
@@ -39,7 +39,7 @@ struct pm_irq_chip;
 int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq);
 struct pm_irq_chip *pm8xxx_irq_init(struct device *dev,
 				const struct pm8xxx_irq_platform_data *pdata);
-int __devexit pm8xxx_irq_exit(struct pm_irq_chip *chip);
+int pm8xxx_irq_exit(struct pm_irq_chip *chip);
 #else
 static inline int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq)
 {
@@ -51,7 +51,7 @@ static inline struct pm_irq_chip *pm8xxx_irq_init(
 {
 	return ERR_PTR(-ENXIO);
 }
-static inline int __devexit pm8xxx_irq_exit(struct pm_irq_chip *chip)
+static inline int pm8xxx_irq_exit(struct pm_irq_chip *chip)
 {
 	return -ENXIO;
 }
-- 
cgit v1.2.3-55-g7522


From b40b97ae736cad3084b13d2969b10c474572de89 Mon Sep 17 00:00:00 2001
From: Bill Pemberton
Date: Wed, 21 Nov 2012 15:34:57 -0500
Subject: PCI: Remove CONFIG_HOTPLUG ifdefs

Remove conditional code based on CONFIG_HOTPLUG being false.  It's
always on now in preparation of it going away as an option.

Signed-off-by: Bill Pemberton <wfp5p@virginia.edu>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/pci-driver.c | 15 ---------------
 drivers/pci/pci-sysfs.c  |  7 -------
 drivers/pci/pci.h        |  4 ----
 drivers/pci/probe.c      |  2 --
 include/linux/pci.h      |  2 --
 5 files changed, 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 6c94fc9489e7..8919801cb27e 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -89,10 +89,6 @@ static void pci_free_dynids(struct pci_driver *drv)
 	spin_unlock(&drv->dynids.lock);
 }
 
-/*
- * Dynamic device ID manipulation via sysfs is disabled for !CONFIG_HOTPLUG
- */
-#ifdef CONFIG_HOTPLUG
 /**
  * store_new_id - sysfs frontend to pci_add_dynid()
  * @driver: target device driver
@@ -191,10 +187,6 @@ static struct driver_attribute pci_drv_attrs[] = {
 	__ATTR_NULL,
 };
 
-#else
-#define pci_drv_attrs	NULL
-#endif /* CONFIG_HOTPLUG */
-
 /**
  * pci_match_id - See if a pci device matches a given pci_id table
  * @ids: array of PCI device id structures to search in
@@ -1223,13 +1215,6 @@ void pci_dev_put(struct pci_dev *dev)
 		put_device(&dev->dev);
 }
 
-#ifndef CONFIG_HOTPLUG
-int pci_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	return -ENODEV;
-}
-#endif
-
 struct bus_type pci_bus_type = {
 	.name		= "pci",
 	.match		= pci_bus_match,
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index f39378d9da15..68d56f02e721 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -284,7 +284,6 @@ msi_bus_store(struct device *dev, struct device_attribute *attr,
 	return count;
 }
 
-#ifdef CONFIG_HOTPLUG
 static DEFINE_MUTEX(pci_remove_rescan_mutex);
 static ssize_t bus_rescan_store(struct bus_type *bus, const char *buf,
 				size_t count)
@@ -377,8 +376,6 @@ dev_bus_rescan_store(struct device *dev, struct device_attribute *attr,
 	return count;
 }
 
-#endif
-
 #if defined(CONFIG_PM_RUNTIME) && defined(CONFIG_ACPI)
 static ssize_t d3cold_allowed_store(struct device *dev,
 				    struct device_attribute *attr,
@@ -424,10 +421,8 @@ struct device_attribute pci_dev_attrs[] = {
 	__ATTR(broken_parity_status,(S_IRUGO|S_IWUSR),
 		broken_parity_status_show,broken_parity_status_store),
 	__ATTR(msi_bus, 0644, msi_bus_show, msi_bus_store),
-#ifdef CONFIG_HOTPLUG
 	__ATTR(remove, (S_IWUSR|S_IWGRP), NULL, remove_store),
 	__ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_rescan_store),
-#endif
 #if defined(CONFIG_PM_RUNTIME) && defined(CONFIG_ACPI)
 	__ATTR(d3cold_allowed, 0644, d3cold_allowed_show, d3cold_allowed_store),
 #endif
@@ -435,9 +430,7 @@ struct device_attribute pci_dev_attrs[] = {
 };
 
 struct device_attribute pcibus_dev_attrs[] = {
-#ifdef CONFIG_HOTPLUG
 	__ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_bus_rescan_store),
-#endif
 	__ATTR(cpuaffinity, S_IRUGO, pci_bus_show_cpumaskaffinity, NULL),
 	__ATTR(cpulistaffinity, S_IRUGO, pci_bus_show_cpulistaffinity, NULL),
 	__ATTR_NULL,
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index fd92aab9904b..6e993af4d30b 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -159,11 +159,7 @@ static inline int pci_no_d1d2(struct pci_dev *dev)
 }
 extern struct device_attribute pci_dev_attrs[];
 extern struct device_attribute pcibus_dev_attrs[];
-#ifdef CONFIG_HOTPLUG
 extern struct bus_attribute pci_bus_attrs[];
-#else
-#define pci_bus_attrs	NULL
-#endif
 
 
 /**
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ec909afa90b6..034cb1c73092 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1864,7 +1864,6 @@ struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops,
 }
 EXPORT_SYMBOL(pci_scan_bus);
 
-#ifdef CONFIG_HOTPLUG
 /**
  * pci_rescan_bus_bridge_resize - scan a PCI bus for devices.
  * @bridge: PCI bridge for the bus to scan
@@ -1894,7 +1893,6 @@ EXPORT_SYMBOL(pci_add_new_bus);
 EXPORT_SYMBOL(pci_scan_slot);
 EXPORT_SYMBOL(pci_scan_bridge);
 EXPORT_SYMBOL_GPL(pci_scan_child_bus);
-#endif
 
 static int __init pci_sort_bf_cmp(const struct device *d_a, const struct device *d_b)
 {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ee2179546c63..699e9a920eca 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -941,10 +941,8 @@ void set_pcie_hotplug_bridge(struct pci_dev *pdev);
 
 /* Functions for PCI Hotplug drivers to use */
 int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
-#ifdef CONFIG_HOTPLUG
 unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge);
 unsigned int pci_rescan_bus(struct pci_bus *bus);
-#endif
 
 /* Vital product data routines */
 ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
-- 
cgit v1.2.3-55-g7522


From 15856ad50bf5ea02a5ee22399c036d49e7e1124d Mon Sep 17 00:00:00 2001
From: Bill Pemberton
Date: Wed, 21 Nov 2012 15:35:00 -0500
Subject: PCI: Remove __dev* markings

CONFIG_HOTPLUG is going away as an option so __devexit_p, __devint,
__devinitdata, __devinitconst, and _devexit are no longer needed.

Signed-off-by: Bill Pemberton <wfp5p@virginia.edu>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/hotplug/cpcihp_zt5550.c |   4 +-
 drivers/pci/ioapic.c                |   6 +-
 drivers/pci/pci.c                   |   4 +-
 drivers/pci/pcie/aer/aerdrv.c       |   4 +-
 drivers/pci/pcie/portdrv_pci.c      |   2 +-
 drivers/pci/probe.c                 |  18 ++---
 drivers/pci/quirks.c                | 132 ++++++++++++++++++------------------
 drivers/pci/xen-pcifront.c          |  10 +--
 include/linux/pci.h                 |   6 +-
 9 files changed, 93 insertions(+), 93 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/cpcihp_zt5550.c b/drivers/pci/hotplug/cpcihp_zt5550.c
index 6bf8d2ab164f..449b4bbc8301 100644
--- a/drivers/pci/hotplug/cpcihp_zt5550.c
+++ b/drivers/pci/hotplug/cpcihp_zt5550.c
@@ -271,7 +271,7 @@ init_hc_error:
 
 }
 
-static void __devexit zt5550_hc_remove_one(struct pci_dev *pdev)
+static void zt5550_hc_remove_one(struct pci_dev *pdev)
 {
 	cpci_hp_stop();
 	cpci_hp_unregister_bus(bus0);
@@ -290,7 +290,7 @@ static struct pci_driver zt5550_hc_driver = {
 	.name		= "zt5550_hc",
 	.id_table	= zt5550_hc_pci_tbl,
 	.probe		= zt5550_hc_init_one,
-	.remove		= __devexit_p(zt5550_hc_remove_one),
+	.remove		= zt5550_hc_remove_one,
 };
 
 static int __init zt5550_init(void)
diff --git a/drivers/pci/ioapic.c b/drivers/pci/ioapic.c
index 205af8dc83c2..2eca902a4283 100644
--- a/drivers/pci/ioapic.c
+++ b/drivers/pci/ioapic.c
@@ -27,7 +27,7 @@ struct ioapic {
 	u32		gsi_base;
 };
 
-static int __devinit ioapic_probe(struct pci_dev *dev, const struct pci_device_id *ent)
+static int ioapic_probe(struct pci_dev *dev, const struct pci_device_id *ent)
 {
 	acpi_handle handle;
 	acpi_status status;
@@ -88,7 +88,7 @@ exit_free:
 	return -ENODEV;
 }
 
-static void __devexit ioapic_remove(struct pci_dev *dev)
+static void ioapic_remove(struct pci_dev *dev)
 {
 	struct ioapic *ioapic = pci_get_drvdata(dev);
 
@@ -110,7 +110,7 @@ static struct pci_driver ioapic_driver = {
 	.name		= "ioapic",
 	.id_table	= ioapic_devices,
 	.probe		= ioapic_probe,
-	.remove		= __devexit_p(ioapic_remove),
+	.remove		= ioapic_remove,
 };
 
 static int __init ioapic_init(void)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index aabf64798bda..bdf66b500f22 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -86,7 +86,7 @@ enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_TUNE_OFF;
  * the dfl or actual value as it sees fit.  Don't forget this is
  * measured in 32-bit words, not bytes.
  */
-u8 pci_dfl_cache_line_size __devinitdata = L1_CACHE_BYTES >> 2;
+u8 pci_dfl_cache_line_size = L1_CACHE_BYTES >> 2;
 u8 pci_cache_line_size;
 
 /*
@@ -3857,7 +3857,7 @@ static int __init pci_resource_alignment_sysfs_init(void)
 
 late_initcall(pci_resource_alignment_sysfs_init);
 
-static void __devinit pci_no_domains(void)
+static void pci_no_domains(void)
 {
 #ifdef CONFIG_PCI_DOMAINS
 	pci_domains_supported = 0;
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index 030cf12d5468..76ef634caf6f 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -41,7 +41,7 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-static int __devinit aer_probe(struct pcie_device *dev);
+static int aer_probe(struct pcie_device *dev);
 static void aer_remove(struct pcie_device *dev);
 static pci_ers_result_t aer_error_detected(struct pci_dev *dev,
 	enum pci_channel_state error);
@@ -300,7 +300,7 @@ static void aer_remove(struct pcie_device *dev)
  *
  * Invoked when PCI Express bus loads AER service driver.
  */
-static int __devinit aer_probe(struct pcie_device *dev)
+static int aer_probe(struct pcie_device *dev)
 {
 	int status;
 	struct aer_rpc *rpc;
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 0761d90ca279..d4824cb78b49 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -182,7 +182,7 @@ static const struct pci_device_id port_runtime_pm_black_list[] = {
  * this port device.
  *
  */
-static int __devinit pcie_portdrv_probe(struct pci_dev *dev,
+static int pcie_portdrv_probe(struct pci_dev *dev,
 					const struct pci_device_id *id)
 {
 	int status;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 034cb1c73092..3683f6094e3f 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -305,7 +305,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
 	}
 }
 
-static void __devinit pci_read_bridge_io(struct pci_bus *child)
+static void pci_read_bridge_io(struct pci_bus *child)
 {
 	struct pci_dev *dev = child->self;
 	u8 io_base_lo, io_limit_lo;
@@ -345,7 +345,7 @@ static void __devinit pci_read_bridge_io(struct pci_bus *child)
 	}
 }
 
-static void __devinit pci_read_bridge_mmio(struct pci_bus *child)
+static void pci_read_bridge_mmio(struct pci_bus *child)
 {
 	struct pci_dev *dev = child->self;
 	u16 mem_base_lo, mem_limit_lo;
@@ -367,7 +367,7 @@ static void __devinit pci_read_bridge_mmio(struct pci_bus *child)
 	}
 }
 
-static void __devinit pci_read_bridge_mmio_pref(struct pci_bus *child)
+static void pci_read_bridge_mmio_pref(struct pci_bus *child)
 {
 	struct pci_dev *dev = child->self;
 	u16 mem_base_lo, mem_limit_lo;
@@ -417,7 +417,7 @@ static void __devinit pci_read_bridge_mmio_pref(struct pci_bus *child)
 	}
 }
 
-void __devinit pci_read_bridge_bases(struct pci_bus *child)
+void pci_read_bridge_bases(struct pci_bus *child)
 {
 	struct pci_dev *dev = child->self;
 	struct resource *res;
@@ -705,7 +705,7 @@ static void pci_fixup_parent_subordinate_busnr(struct pci_bus *child, int max)
  * them, we proceed to assigning numbers to the remaining buses in
  * order to avoid overlaps between old and new bus numbers.
  */
-int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass)
+int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass)
 {
 	struct pci_bus *child;
 	int is_cardbus = (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS);
@@ -1586,7 +1586,7 @@ void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss)
 }
 EXPORT_SYMBOL_GPL(pcie_bus_configure_settings);
 
-unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus)
+unsigned int pci_scan_child_bus(struct pci_bus *bus)
 {
 	unsigned int devfn, pass, max = bus->busn_res.start;
 	struct pci_dev *dev;
@@ -1790,7 +1790,7 @@ void pci_bus_release_busn_res(struct pci_bus *b)
 			res, ret ? "can not be" : "is");
 }
 
-struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus,
+struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
 		struct pci_ops *ops, void *sysdata, struct list_head *resources)
 {
 	struct pci_host_bridge_window *window;
@@ -1826,7 +1826,7 @@ struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus,
 EXPORT_SYMBOL(pci_scan_root_bus);
 
 /* Deprecated; use pci_scan_root_bus() instead */
-struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent,
+struct pci_bus *pci_scan_bus_parented(struct device *parent,
 		int bus, struct pci_ops *ops, void *sysdata)
 {
 	LIST_HEAD(resources);
@@ -1844,7 +1844,7 @@ struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent,
 }
 EXPORT_SYMBOL(pci_scan_bus_parented);
 
-struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops,
+struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops,
 					void *sysdata)
 {
 	LIST_HEAD(resources);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 7a451ff56ecc..22ad3ee0cf0b 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -37,7 +37,7 @@
  * key system devices. For devices that need to have mmio decoding always-on,
  * we need to set the dev->mmio_always_on bit.
  */
-static void __devinit quirk_mmio_always_on(struct pci_dev *dev)
+static void quirk_mmio_always_on(struct pci_dev *dev)
 {
 	dev->mmio_always_on = 1;
 }
@@ -48,7 +48,7 @@ DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_ANY_ID, PCI_ANY_ID,
  * Mark this device with a broken_parity_status, to allow
  * PCI scanning code to "skip" this now blacklisted device.
  */
-static void __devinit quirk_mellanox_tavor(struct pci_dev *dev)
+static void quirk_mellanox_tavor(struct pci_dev *dev)
 {
 	dev->broken_parity_status = 1;	/* This device gives false positives */
 }
@@ -83,7 +83,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82441,	quirk_p
     This appears to be BIOS not version dependent. So presumably there is a 
     chipset level fix */
     
-static void __devinit quirk_isa_dma_hangs(struct pci_dev *dev)
+static void quirk_isa_dma_hangs(struct pci_dev *dev)
 {
 	if (!isa_dma_bridge_buggy) {
 		isa_dma_bridge_buggy=1;
@@ -106,7 +106,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_3,	quirk_isa_d
  * Intel NM10 "TigerPoint" LPC PM1a_STS.BM_STS must be clear
  * for some HT machines to use C4 w/o hanging.
  */
-static void __devinit quirk_tigerpoint_bm_sts(struct pci_dev *dev)
+static void quirk_tigerpoint_bm_sts(struct pci_dev *dev)
 {
 	u32 pmbase;
 	u16 pm1a;
@@ -125,7 +125,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGP_LPC, quirk
 /*
  *	Chipsets where PCI->PCI transfers vanish or hang
  */
-static void __devinit quirk_nopcipci(struct pci_dev *dev)
+static void quirk_nopcipci(struct pci_dev *dev)
 {
 	if ((pci_pci_problems & PCIPCI_FAIL)==0) {
 		dev_info(&dev->dev, "Disabling direct PCI/PCI transfers\n");
@@ -135,7 +135,7 @@ static void __devinit quirk_nopcipci(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_5597,		quirk_nopcipci);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_496,		quirk_nopcipci);
 
-static void __devinit quirk_nopciamd(struct pci_dev *dev)
+static void quirk_nopciamd(struct pci_dev *dev)
 {
 	u8 rev;
 	pci_read_config_byte(dev, 0x08, &rev);
@@ -150,7 +150,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_8151_0,	quirk_nopci
 /*
  *	Triton requires workarounds to be used by the drivers
  */
-static void __devinit quirk_triton(struct pci_dev *dev)
+static void quirk_triton(struct pci_dev *dev)
 {
 	if ((pci_pci_problems&PCIPCI_TRITON)==0) {
 		dev_info(&dev->dev, "Limiting direct PCI/PCI transfers\n");
@@ -229,7 +229,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8361,		quirk_viala
 /*
  *	VIA Apollo VP3 needs ETBF on BT848/878
  */
-static void __devinit quirk_viaetbf(struct pci_dev *dev)
+static void quirk_viaetbf(struct pci_dev *dev)
 {
 	if ((pci_pci_problems&PCIPCI_VIAETBF)==0) {
 		dev_info(&dev->dev, "Limiting direct PCI/PCI transfers\n");
@@ -238,7 +238,7 @@ static void __devinit quirk_viaetbf(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C597_0,	quirk_viaetbf);
 
-static void __devinit quirk_vsfx(struct pci_dev *dev)
+static void quirk_vsfx(struct pci_dev *dev)
 {
 	if ((pci_pci_problems&PCIPCI_VSFX)==0) {
 		dev_info(&dev->dev, "Limiting direct PCI/PCI transfers\n");
@@ -253,7 +253,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C576,	quirk_vsfx)
  *	workaround applied too
  *	[Info kindly provided by ALi]
  */	
-static void __devinit quirk_alimagik(struct pci_dev *dev)
+static void quirk_alimagik(struct pci_dev *dev)
 {
 	if ((pci_pci_problems&PCIPCI_ALIMAGIK)==0) {
 		dev_info(&dev->dev, "Limiting direct PCI/PCI transfers\n");
@@ -267,7 +267,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 	PCI_DEVICE_ID_AL_M1651, 	quirk_alimag
  *	Natoma has some interesting boundary conditions with Zoran stuff
  *	at least
  */
-static void __devinit quirk_natoma(struct pci_dev *dev)
+static void quirk_natoma(struct pci_dev *dev)
 {
 	if ((pci_pci_problems&PCIPCI_NATOMA)==0) {
 		dev_info(&dev->dev, "Limiting direct PCI/PCI transfers\n");
@@ -285,7 +285,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82443BX_2, 	qu
  *  This chip can cause PCI parity errors if config register 0xA0 is read
  *  while DMAs are occurring.
  */
-static void __devinit quirk_citrine(struct pci_dev *dev)
+static void quirk_citrine(struct pci_dev *dev)
 {
 	dev->cfg_size = 0xA0;
 }
@@ -295,7 +295,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM,	PCI_DEVICE_ID_IBM_CITRINE,	quirk_cit
  *  S3 868 and 968 chips report region size equal to 32M, but they decode 64M.
  *  If it's needed, re-allocate the region.
  */
-static void __devinit quirk_s3_64M(struct pci_dev *dev)
+static void quirk_s3_64M(struct pci_dev *dev)
 {
 	struct resource *r = &dev->resource[0];
 
@@ -313,7 +313,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_S3,	PCI_DEVICE_ID_S3_968,		quirk_s3_64M);
  * BAR0 should be 8 bytes; instead, it may be set to something like 8k
  * (which conflicts w/ BAR1's memory range).
  */
-static void __devinit quirk_cs5536_vsa(struct pci_dev *dev)
+static void quirk_cs5536_vsa(struct pci_dev *dev)
 {
 	if (pci_resource_len(dev, 0) != 8) {
 		struct resource *res = &dev->resource[0];
@@ -324,7 +324,7 @@ static void __devinit quirk_cs5536_vsa(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, quirk_cs5536_vsa);
 
-static void __devinit quirk_io_region(struct pci_dev *dev, unsigned region,
+static void quirk_io_region(struct pci_dev *dev, unsigned region,
 	unsigned size, int nr, const char *name)
 {
 	region &= ~(size-1);
@@ -352,7 +352,7 @@ static void __devinit quirk_io_region(struct pci_dev *dev, unsigned region,
  *	ATI Northbridge setups MCE the processor if you even
  *	read somewhere between 0x3b0->0x3bb or read 0x3d3
  */
-static void __devinit quirk_ati_exploding_mce(struct pci_dev *dev)
+static void quirk_ati_exploding_mce(struct pci_dev *dev)
 {
 	dev_info(&dev->dev, "ATI Northbridge, reserving I/O ports 0x3b0 to 0x3bb\n");
 	/* Mae rhaid i ni beidio ag edrych ar y lleoliadiau I/O hyn */
@@ -372,7 +372,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI,	PCI_DEVICE_ID_ATI_RS100,   quirk_ati_
  *	0xE0 (64 bytes of ACPI registers)
  *	0xE2 (32 bytes of SMB registers)
  */
-static void __devinit quirk_ali7101_acpi(struct pci_dev *dev)
+static void quirk_ali7101_acpi(struct pci_dev *dev)
 {
 	u16 region;
 
@@ -440,7 +440,7 @@ static void piix4_mem_quirk(struct pci_dev *dev, const char *name, unsigned int
  *	0x90 (16 bytes of SMB registers)
  * and a few strange programmable PIIX4 device resources.
  */
-static void __devinit quirk_piix4_acpi(struct pci_dev *dev)
+static void quirk_piix4_acpi(struct pci_dev *dev)
 {
 	u32 region, res_a;
 
@@ -489,7 +489,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82443MX_3,	qui
  *	0x40 (128 bytes of ACPI, GPIO & TCO registers)
  *	0x58 (64 bytes of GPIO I/O space)
  */
-static void __devinit quirk_ich4_lpc_acpi(struct pci_dev *dev)
+static void quirk_ich4_lpc_acpi(struct pci_dev *dev)
 {
 	u32 region;
 	u8 enable;
@@ -531,7 +531,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801DB_12,
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801EB_0,		quirk_ich4_lpc_acpi);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_ESB_1,		quirk_ich4_lpc_acpi);
 
-static void __devinit ich6_lpc_acpi_gpio(struct pci_dev *dev)
+static void ich6_lpc_acpi_gpio(struct pci_dev *dev)
 {
 	u32 region;
 	u8 enable;
@@ -555,7 +555,7 @@ static void __devinit ich6_lpc_acpi_gpio(struct pci_dev *dev)
 	}
 }
 
-static void __devinit ich6_lpc_generic_decode(struct pci_dev *dev, unsigned reg, const char *name, int dynsize)
+static void ich6_lpc_generic_decode(struct pci_dev *dev, unsigned reg, const char *name, int dynsize)
 {
 	u32 val;
 	u32 size, base;
@@ -583,7 +583,7 @@ static void __devinit ich6_lpc_generic_decode(struct pci_dev *dev, unsigned reg,
 	dev_info(&dev->dev, "%s PIO at %04x-%04x\n", name, base, base+size-1);
 }
 
-static void __devinit quirk_ich6_lpc(struct pci_dev *dev)
+static void quirk_ich6_lpc(struct pci_dev *dev)
 {
 	/* Shared ACPI/GPIO decode with all ICH6+ */
 	ich6_lpc_acpi_gpio(dev);
@@ -595,7 +595,7 @@ static void __devinit quirk_ich6_lpc(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH6_0, quirk_ich6_lpc);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH6_1, quirk_ich6_lpc);
 
-static void __devinit ich7_lpc_generic_decode(struct pci_dev *dev, unsigned reg, const char *name)
+static void ich7_lpc_generic_decode(struct pci_dev *dev, unsigned reg, const char *name)
 {
 	u32 val;
 	u32 mask, base;
@@ -619,7 +619,7 @@ static void __devinit ich7_lpc_generic_decode(struct pci_dev *dev, unsigned reg,
 }
 
 /* ICH7-10 has the same common LPC generic IO decode registers */
-static void __devinit quirk_ich7_lpc(struct pci_dev *dev)
+static void quirk_ich7_lpc(struct pci_dev *dev)
 {
 	/* We share the common ACPI/GPIO decode with ICH6 */
 	ich6_lpc_acpi_gpio(dev);
@@ -648,7 +648,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_ICH10_1, qui
  * VIA ACPI: One IO region pointed to by longword at
  *	0x48 or 0x20 (256 bytes of ACPI registers)
  */
-static void __devinit quirk_vt82c586_acpi(struct pci_dev *dev)
+static void quirk_vt82c586_acpi(struct pci_dev *dev)
 {
 	u32 region;
 
@@ -666,7 +666,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C586_3,	quirk_vt
  *	0x70 (128 bytes of hardware monitoring register)
  *	0x90 (16 bytes of SMB registers)
  */
-static void __devinit quirk_vt82c686_acpi(struct pci_dev *dev)
+static void quirk_vt82c686_acpi(struct pci_dev *dev)
 {
 	u16 hm;
 	u32 smb;
@@ -688,7 +688,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686_4,	quirk_vt
  *	0x88 (128 bytes of power management registers)
  *	0xd0 (16 bytes of SMB registers)
  */
-static void __devinit quirk_vt8235_acpi(struct pci_dev *dev)
+static void quirk_vt8235_acpi(struct pci_dev *dev)
 {
 	u16 pm, smb;
 
@@ -706,7 +706,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8235,	quirk_vt8235
  * TI XIO2000a PCIe-PCI Bridge erroneously reports it supports fast back-to-back:
  *	Disable fast back-to-back on the secondary bus segment
  */
-static void __devinit quirk_xio2000a(struct pci_dev *dev)
+static void quirk_xio2000a(struct pci_dev *dev)
 {
 	struct pci_dev *pdev;
 	u16 command;
@@ -780,7 +780,7 @@ DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8237,		quirk
  * noapic specified. For the moment we assume it's the erratum. We may be wrong
  * of course. However the advice is demonstrably good even if so..
  */
-static void __devinit quirk_amd_ioapic(struct pci_dev *dev)
+static void quirk_amd_ioapic(struct pci_dev *dev)
 {
 	if (dev->revision >= 0x02) {
 		dev_warn(&dev->dev, "I/O APIC: AMD Erratum #22 may be present. In the event of instability try\n");
@@ -789,7 +789,7 @@ static void __devinit quirk_amd_ioapic(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_VIPER_7410,	quirk_amd_ioapic);
 
-static void __devinit quirk_ioapic_rmw(struct pci_dev *dev)
+static void quirk_ioapic_rmw(struct pci_dev *dev)
 {
 	if (dev->devfn == 0 && dev->bus->number == 0)
 		sis_apic_bug = 1;
@@ -801,7 +801,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI,	PCI_ANY_ID,			quirk_ioapic_rmw);
  * Some settings of MMRBC can lead to data corruption so block changes.
  * See AMD 8131 HyperTransport PCI-X Tunnel Revision Guide
  */
-static void __devinit quirk_amd_8131_mmrbc(struct pci_dev *dev)
+static void quirk_amd_8131_mmrbc(struct pci_dev *dev)
 {
 	if (dev->subordinate && dev->revision <= 0x12) {
 		dev_info(&dev->dev, "AMD8131 rev %x detected; "
@@ -819,7 +819,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_
  * value of the ACPI SCI interrupt is only done for convenience.
  *	-jgarzik
  */
-static void __devinit quirk_via_acpi(struct pci_dev *d)
+static void quirk_via_acpi(struct pci_dev *d)
 {
 	/*
 	 * VIA ACPI device: SCI IRQ line in PCI config byte 0x42
@@ -926,7 +926,7 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_ANY_ID, quirk_via_vlink);
  * We need to switch it off to be able to recognize the real
  * type of the chip.
  */
-static void __devinit quirk_vt82c598_id(struct pci_dev *dev)
+static void quirk_vt82c598_id(struct pci_dev *dev)
 {
 	pci_write_config_byte(dev, 0xfc, 0);
 	pci_read_config_word(dev, PCI_DEVICE_ID, &dev->device);
@@ -978,7 +978,7 @@ DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_FE_GATE_700C
  *	assigned to it. We force a larger allocation to ensure that
  *	nothing gets put too close to it.
  */
-static void __devinit quirk_dunord ( struct pci_dev * dev )
+static void quirk_dunord(struct pci_dev *dev)
 {
 	struct resource *r = &dev->resource [1];
 	r->start = 0;
@@ -992,7 +992,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_DUNORD,	PCI_DEVICE_ID_DUNORD_I3000,	quirk
  * in the ProgIf. Unfortunately, the ProgIf value is wrong - 0x80
  * instead of 0x01.
  */
-static void __devinit quirk_transparent_bridge(struct pci_dev *dev)
+static void quirk_transparent_bridge(struct pci_dev *dev)
 {
 	dev->transparent = 1;
 }
@@ -1066,7 +1066,7 @@ DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SATA
 /*
  *	Serverworks CSB5 IDE does not fully support native mode
  */
-static void __devinit quirk_svwks_csb5ide(struct pci_dev *pdev)
+static void quirk_svwks_csb5ide(struct pci_dev *pdev)
 {
 	u8 prog;
 	pci_read_config_byte(pdev, PCI_CLASS_PROG, &prog);
@@ -1082,7 +1082,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB
 /*
  *	Intel 82801CAM ICH3-M datasheet says IDE modes must be the same
  */
-static void __devinit quirk_ide_samemode(struct pci_dev *pdev)
+static void quirk_ide_samemode(struct pci_dev *pdev)
 {
 	u8 prog;
 
@@ -1101,7 +1101,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10, qui
  * Some ATA devices break if put into D3
  */
 
-static void __devinit quirk_no_ata_d3(struct pci_dev *pdev)
+static void quirk_no_ata_d3(struct pci_dev *pdev)
 {
 	pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3;
 }
@@ -1121,7 +1121,7 @@ DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_VIA, PCI_ANY_ID,
 /* This was originally an Alpha specific thing, but it really fits here.
  * The i82375 PCI/EISA bridge appears as non-classified. Fix that.
  */
-static void __devinit quirk_eisa_bridge(struct pci_dev *dev)
+static void quirk_eisa_bridge(struct pci_dev *dev)
 {
 	dev->class = PCI_CLASS_BRIDGE_EISA << 8;
 }
@@ -1155,7 +1155,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82375,	quirk_e
  */
 static int asus_hides_smbus;
 
-static void __devinit asus_hides_smbus_hostbridge(struct pci_dev *dev)
+static void asus_hides_smbus_hostbridge(struct pci_dev *dev)
 {
 	if (unlikely(dev->subsystem_vendor == PCI_VENDOR_ID_ASUSTEK)) {
 		if (dev->device == PCI_DEVICE_ID_INTEL_82845_HB)
@@ -1538,7 +1538,7 @@ DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB3
 #endif
 
 #ifdef CONFIG_X86_IO_APIC
-static void __devinit quirk_alder_ioapic(struct pci_dev *pdev)
+static void quirk_alder_ioapic(struct pci_dev *pdev)
 {
 	int i;
 
@@ -1561,7 +1561,7 @@ static void __devinit quirk_alder_ioapic(struct pci_dev *pdev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_EESSC,	quirk_alder_ioapic);
 #endif
 
-static void __devinit quirk_pcie_mch(struct pci_dev *pdev)
+static void quirk_pcie_mch(struct pci_dev *pdev)
 {
 	pci_msi_off(pdev);
 	pdev->no_msi = 1;
@@ -1575,7 +1575,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7525_MCH,	quir
  * It's possible for the MSI to get corrupted if shpc and acpi
  * are used together on certain PXH-based systems.
  */
-static void __devinit quirk_pcie_pxh(struct pci_dev *dev)
+static void quirk_pcie_pxh(struct pci_dev *dev)
 {
 	pci_msi_off(dev);
 	dev->no_msi = 1;
@@ -1777,7 +1777,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD,   PCI_DEVICE_ID_AMD_8111_SMBUS, 	qui
  * but the PIO transfers won't work if BAR0 falls at the odd 8 bytes.
  * Re-allocate the region if needed...
  */
-static void __devinit quirk_tc86c001_ide(struct pci_dev *dev)
+static void quirk_tc86c001_ide(struct pci_dev *dev)
 {
 	struct resource *r = &dev->resource[0];
 
@@ -1790,7 +1790,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TOSHIBA_2,
 			 PCI_DEVICE_ID_TOSHIBA_TC86C001_IDE,
 			 quirk_tc86c001_ide);
 
-static void __devinit quirk_netmos(struct pci_dev *dev)
+static void quirk_netmos(struct pci_dev *dev)
 {
 	unsigned int num_parallel = (dev->subsystem_device & 0xf0) >> 4;
 	unsigned int num_serial = dev->subsystem_device & 0xf;
@@ -1828,7 +1828,7 @@ static void __devinit quirk_netmos(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_NETMOS, PCI_ANY_ID,
 			 PCI_CLASS_COMMUNICATION_SERIAL, 8, quirk_netmos);
 
-static void __devinit quirk_e100_interrupt(struct pci_dev *dev)
+static void quirk_e100_interrupt(struct pci_dev *dev)
 {
 	u16 command, pmcsr;
 	u8 __iomem *csr;
@@ -1901,7 +1901,7 @@ DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
  * The 82575 and 82598 may experience data corruption issues when transitioning
  * out of L0S.  To prevent this we need to disable L0S on the pci-e link
  */
-static void __devinit quirk_disable_aspm_l0s(struct pci_dev *dev)
+static void quirk_disable_aspm_l0s(struct pci_dev *dev)
 {
 	dev_info(&dev->dev, "Disabling L0s\n");
 	pci_disable_link_state(dev, PCIE_LINK_STATE_L0S);
@@ -1921,7 +1921,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10f1, quirk_disable_aspm_l0s);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10f4, quirk_disable_aspm_l0s);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1508, quirk_disable_aspm_l0s);
 
-static void __devinit fixup_rev1_53c810(struct pci_dev* dev)
+static void fixup_rev1_53c810(struct pci_dev *dev)
 {
 	/* rev 1 ncr53c810 chips don't set the class at all which means
 	 * they don't get their resources remapped. Fix that here.
@@ -1935,7 +1935,7 @@ static void __devinit fixup_rev1_53c810(struct pci_dev* dev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, fixup_rev1_53c810);
 
 /* Enable 1k I/O space granularity on the Intel P64H2 */
-static void __devinit quirk_p64h2_1k_io(struct pci_dev *dev)
+static void quirk_p64h2_1k_io(struct pci_dev *dev)
 {
 	u16 en1k;
 
@@ -1968,7 +1968,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA,  PCI_DEVICE_ID_NVIDIA_CK804_PCIE,
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_NVIDIA,  PCI_DEVICE_ID_NVIDIA_CK804_PCIE,
 			quirk_nvidia_ck804_pcie_aer_ext_cap);
 
-static void __devinit quirk_via_cx700_pci_parking_caching(struct pci_dev *dev)
+static void quirk_via_cx700_pci_parking_caching(struct pci_dev *dev)
 {
 	/*
 	 * Disable PCI Bus Parking and PCI Master read caching on CX700
@@ -2031,7 +2031,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, quirk_via_cx700_pci_parking_c
  * We believe that it is legal to read beyond the end tag and
  * therefore the solution is to limit the read/write length.
  */
-static void __devinit quirk_brcm_570x_limit_vpd(struct pci_dev *dev)
+static void quirk_brcm_570x_limit_vpd(struct pci_dev *dev)
 {
 	/*
 	 * Only disable the VPD capability for 5706, 5706S, 5708,
@@ -2091,7 +2091,7 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_BROADCOM,
  * the DRBs - this is where we expose device 6.
  * http://www.x86-secret.com/articles/tweak/pat/patsecrets-2.htm
  */
-static void __devinit quirk_unhide_mch_dev6(struct pci_dev *dev)
+static void quirk_unhide_mch_dev6(struct pci_dev *dev)
 {
 	u8 reg;
 
@@ -2115,7 +2115,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82875_HB,
  * supports link speed auto negotiation, but falsely sets
  * the link speed to 5GT/s.
  */
-static void __devinit quirk_tile_plx_gen1(struct pci_dev *dev)
+static void quirk_tile_plx_gen1(struct pci_dev *dev)
 {
 	if (tile_plx_gen1) {
 		pci_write_config_dword(dev, 0x98, 0x1);
@@ -2132,7 +2132,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PLX, 0x8624, quirk_tile_plx_gen1);
  * aware of it.  Instead of setting the flag on all busses in the
  * machine, simply disable MSI globally.
  */
-static void __devinit quirk_disable_all_msi(struct pci_dev *dev)
+static void quirk_disable_all_msi(struct pci_dev *dev)
 {
 	pci_no_msi();
 	dev_warn(&dev->dev, "MSI quirk detected; MSI disabled\n");
@@ -2146,7 +2146,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3364, quirk_disab
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8380_0, quirk_disable_all_msi);
 
 /* Disable MSI on chipsets that are known to not support it */
-static void __devinit quirk_disable_msi(struct pci_dev *dev)
+static void quirk_disable_msi(struct pci_dev *dev)
 {
 	if (dev->subordinate) {
 		dev_warn(&dev->dev, "MSI quirk detected; "
@@ -2164,7 +2164,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x5a3f, quirk_disable_msi);
  * we use the possible vendor/device IDs of the host bridge for the
  * declared quirk, and search for the APC bridge by slot number.
  */
-static void __devinit quirk_amd_780_apc_msi(struct pci_dev *host_bridge)
+static void quirk_amd_780_apc_msi(struct pci_dev *host_bridge)
 {
 	struct pci_dev *apc_bridge;
 
@@ -2272,7 +2272,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE,
  * for the MCP55 NIC. It is not yet determined whether the msi problem
  * also affects other devices. As for now, turn off msi for this device.
  */
-static void __devinit nvenet_msi_disable(struct pci_dev *dev)
+static void nvenet_msi_disable(struct pci_dev *dev)
 {
 	const char *board_name = dmi_get_system_info(DMI_BOARD_NAME);
 
@@ -2298,7 +2298,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
  * we have it set correctly.
  * Note this is an undocumented register.
  */
-static void __devinit nvbridge_check_legacy_irq_routing(struct pci_dev *dev)
+static void nvbridge_check_legacy_irq_routing(struct pci_dev *dev)
 {
 	u32 cfg;
 
@@ -2534,11 +2534,11 @@ DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_q
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all);
 
-static void __devinit quirk_msi_intx_disable_bug(struct pci_dev *dev)
+static void quirk_msi_intx_disable_bug(struct pci_dev *dev)
 {
 	dev->dev_flags |= PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG;
 }
-static void __devinit quirk_msi_intx_disable_ati_bug(struct pci_dev *dev)
+static void quirk_msi_intx_disable_ati_bug(struct pci_dev *dev)
 {
 	struct pci_dev *p;
 
@@ -2612,7 +2612,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0x1083,
  * kernel fails to allocate resources when hotplug device is 
  * inserted and PCI bus is rescanned.
  */
-static void __devinit quirk_hotplug_bridge(struct pci_dev *dev)
+static void quirk_hotplug_bridge(struct pci_dev *dev)
 {
 	dev->is_hotplug_bridge = 1;
 }
@@ -2752,7 +2752,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, vtd_mask_spec_errors);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x3c28, vtd_mask_spec_errors);
 #endif
 
-static void __devinit fixup_ti816x_class(struct pci_dev* dev)
+static void fixup_ti816x_class(struct pci_dev *dev)
 {
 	/* TI 816x devices do not have class code set when in PCIe boot mode */
 	dev_info(&dev->dev, "Setting PCI class for 816x PCIe device\n");
@@ -2764,7 +2764,7 @@ DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_TI, 0xb800,
 /* Some PCIe devices do not work reliably with the claimed maximum
  * payload size supported.
  */
-static void __devinit fixup_mpss_256(struct pci_dev *dev)
+static void fixup_mpss_256(struct pci_dev *dev)
 {
 	dev->pcie_mpss = 1; /* 256 bytes */
 }
@@ -2782,7 +2782,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SOLARFLARE,
  * coalescing must be disabled.  Unfortunately, it cannot be re-enabled because
  * it is possible to hotplug a device with MPS of 256B.
  */
-static void __devinit quirk_intel_mc_errata(struct pci_dev *dev)
+static void quirk_intel_mc_errata(struct pci_dev *dev)
 {
 	int err;
 	u16 rcc;
@@ -2888,7 +2888,7 @@ static void fixup_debug_report(struct pci_dev *dev, ktime_t calltime,
  * This resolves crashes often seen on monitor unplug.
  */
 #define I915_DEIER_REG 0x4400c
-static void __devinit disable_igfx_irq(struct pci_dev *dev)
+static void disable_igfx_irq(struct pci_dev *dev)
 {
 	void __iomem *regs = pci_iomap(dev, 0, 0);
 	if (regs == NULL) {
@@ -2914,7 +2914,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq);
  * PCI_COMMAND_INTX_DISABLE works though they actually do not properly
  * support this feature.
  */
-static void __devinit quirk_broken_intx_masking(struct pci_dev *dev)
+static void quirk_broken_intx_masking(struct pci_dev *dev)
 {
 	dev->broken_intx_masking = 1;
 }
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 0aab85a51559..db542f4196a4 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -412,7 +412,7 @@ static int pcifront_claim_resource(struct pci_dev *dev, void *data)
 	return 0;
 }
 
-static int __devinit pcifront_scan_bus(struct pcifront_device *pdev,
+static int pcifront_scan_bus(struct pcifront_device *pdev,
 				unsigned int domain, unsigned int bus,
 				struct pci_bus *b)
 {
@@ -441,7 +441,7 @@ static int __devinit pcifront_scan_bus(struct pcifront_device *pdev,
 	return 0;
 }
 
-static int __devinit pcifront_scan_root(struct pcifront_device *pdev,
+static int pcifront_scan_root(struct pcifront_device *pdev,
 				 unsigned int domain, unsigned int bus)
 {
 	struct pci_bus *b;
@@ -503,7 +503,7 @@ err_out:
 	return err;
 }
 
-static int __devinit pcifront_rescan_root(struct pcifront_device *pdev,
+static int pcifront_rescan_root(struct pcifront_device *pdev,
 				   unsigned int domain, unsigned int bus)
 {
 	int err;
@@ -834,7 +834,7 @@ out:
 	return err;
 }
 
-static int __devinit pcifront_try_connect(struct pcifront_device *pdev)
+static int pcifront_try_connect(struct pcifront_device *pdev)
 {
 	int err = -EFAULT;
 	int i, num_roots, len;
@@ -924,7 +924,7 @@ out:
 	return err;
 }
 
-static int __devinit pcifront_attach_devices(struct pcifront_device *pdev)
+static int pcifront_attach_devices(struct pcifront_device *pdev)
 {
 	int err = -EFAULT;
 	int i, num_roots, len;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 699e9a920eca..d03d2463efac 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -588,7 +588,7 @@ struct pci_driver {
  * in a generic manner.
  */
 #define DEFINE_PCI_DEVICE_TABLE(_table) \
-	const struct pci_device_id _table[] __devinitconst
+	const struct pci_device_id _table[]
 
 /**
  * PCI_DEVICE - macro used to describe a specific pci device
@@ -686,7 +686,7 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int busmax);
 int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax);
 void pci_bus_release_busn_res(struct pci_bus *b);
-struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus,
+struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
 					     struct pci_ops *ops, void *sysdata,
 					     struct list_head *resources);
 struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
@@ -1578,7 +1578,7 @@ extern int pci_pci_problems;
 
 extern unsigned long pci_cardbus_io_size;
 extern unsigned long pci_cardbus_mem_size;
-extern u8 __devinitdata pci_dfl_cache_line_size;
+extern u8 pci_dfl_cache_line_size;
 extern u8 pci_cache_line_size;
 
 extern unsigned long pci_hotplug_io_size;
-- 
cgit v1.2.3-55-g7522


From c4144670fd9b34d6eae22c9f83751745898e8243 Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Tue, 2 Oct 2012 16:34:38 -0400
Subject: kill daemonize()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/gdm72xx/gdm_usb.c |  4 +-
 fs/file.c                         |  6 ---
 fs/fs_struct.c                    | 24 ----------
 include/linux/fdtable.h           |  1 -
 include/linux/fs_struct.h         |  1 -
 include/linux/sched.h             |  1 -
 kernel/exit.c                     | 92 ---------------------------------------
 7 files changed, 1 insertion(+), 128 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/gdm72xx/gdm_usb.c b/drivers/staging/gdm72xx/gdm_usb.c
index 0c9e8958009b..39db582ab1a6 100644
--- a/drivers/staging/gdm72xx/gdm_usb.c
+++ b/drivers/staging/gdm72xx/gdm_usb.c
@@ -701,8 +701,6 @@ static int k_mode_thread(void *arg)
 	unsigned long flags, flags2, expire;
 	int ret;
 
-	daemonize("k_mode_wimax");
-
 	while (!k_mode_stop) {
 
 		spin_lock_irqsave(&k_lock, flags2);
@@ -764,7 +762,7 @@ static struct usb_driver gdm_usb_driver = {
 static int __init usb_gdm_wimax_init(void)
 {
 #ifdef CONFIG_WIMAX_GDM72XX_K_MODE
-	kthread_run(k_mode_thread, NULL, "WiMax_thread");
+	kthread_run(k_mode_thread, NULL, "k_mode_wimax");
 #endif /* CONFIG_WIMAX_GDM72XX_K_MODE */
 	return usb_register(&gdm_usb_driver);
 }
diff --git a/fs/file.c b/fs/file.c
index 7cb71b992603..7272a1c5831d 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -519,12 +519,6 @@ struct files_struct init_files = {
 	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock),
 };
 
-void daemonize_descriptors(void)
-{
-	atomic_inc(&init_files.count);
-	reset_files_struct(&init_files);
-}
-
 /*
  * allocate a file descriptor, mark it busy.
  */
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 5df4775fea03..fe6ca583bbc0 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -164,27 +164,3 @@ struct fs_struct init_fs = {
 	.seq		= SEQCNT_ZERO,
 	.umask		= 0022,
 };
-
-void daemonize_fs_struct(void)
-{
-	struct fs_struct *fs = current->fs;
-
-	if (fs) {
-		int kill;
-
-		task_lock(current);
-
-		spin_lock(&init_fs.lock);
-		init_fs.users++;
-		spin_unlock(&init_fs.lock);
-
-		spin_lock(&fs->lock);
-		current->fs = &init_fs;
-		kill = !--fs->users;
-		spin_unlock(&fs->lock);
-
-		task_unlock(current);
-		if (kill)
-			free_fs_struct(fs);
-	}
-}
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 45052aa814c8..fb7dacae0522 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -95,7 +95,6 @@ struct task_struct;
 struct files_struct *get_files_struct(struct task_struct *);
 void put_files_struct(struct files_struct *fs);
 void reset_files_struct(struct files_struct *);
-void daemonize_descriptors(void);
 int unshare_files(struct files_struct **);
 struct files_struct *dup_fd(struct files_struct *, int *);
 void do_close_on_exec(struct files_struct *);
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 003dc0fd7347..d0ae3a84bcfb 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -21,7 +21,6 @@ extern void set_fs_root(struct fs_struct *, struct path *);
 extern void set_fs_pwd(struct fs_struct *, struct path *);
 extern struct fs_struct *copy_fs_struct(struct fs_struct *);
 extern void free_fs_struct(struct fs_struct *);
-extern void daemonize_fs_struct(void);
 extern int unshare_fs_struct(void);
 
 static inline void get_fs_root(struct fs_struct *fs, struct path *root)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0dd42a02df2e..a0166481eb20 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2283,7 +2283,6 @@ extern void flush_itimer_signals(void);
 
 extern void do_group_exit(int);
 
-extern void daemonize(const char *, ...);
 extern int allow_signal(int);
 extern int disallow_signal(int);
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 346616c0092c..f9275e2c7c2c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -322,43 +322,6 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
 	}
 }
 
-/**
- * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd
- *
- * If a kernel thread is launched as a result of a system call, or if
- * it ever exits, it should generally reparent itself to kthreadd so it
- * isn't in the way of other processes and is correctly cleaned up on exit.
- *
- * The various task state such as scheduling policy and priority may have
- * been inherited from a user process, so we reset them to sane values here.
- *
- * NOTE that reparent_to_kthreadd() gives the caller full capabilities.
- */
-static void reparent_to_kthreadd(void)
-{
-	write_lock_irq(&tasklist_lock);
-
-	ptrace_unlink(current);
-	/* Reparent to init */
-	current->real_parent = current->parent = kthreadd_task;
-	list_move_tail(&current->sibling, &current->real_parent->children);
-
-	/* Set the exit signal to SIGCHLD so we signal init on exit */
-	current->exit_signal = SIGCHLD;
-
-	if (task_nice(current) < 0)
-		set_user_nice(current, 0);
-	/* cpus_allowed? */
-	/* rt_priority? */
-	/* signals? */
-	memcpy(current->signal->rlim, init_task.signal->rlim,
-	       sizeof(current->signal->rlim));
-
-	atomic_inc(&init_cred.usage);
-	commit_creds(&init_cred);
-	write_unlock_irq(&tasklist_lock);
-}
-
 void __set_special_pids(struct pid *pid)
 {
 	struct task_struct *curr = current->group_leader;
@@ -370,13 +333,6 @@ void __set_special_pids(struct pid *pid)
 		change_pid(curr, PIDTYPE_PGID, pid);
 }
 
-static void set_special_pids(struct pid *pid)
-{
-	write_lock_irq(&tasklist_lock);
-	__set_special_pids(pid);
-	write_unlock_irq(&tasklist_lock);
-}
-
 /*
  * Let kernel threads use this to say that they allow a certain signal.
  * Must not be used if kthread was cloned with CLONE_SIGHAND.
@@ -416,54 +372,6 @@ int disallow_signal(int sig)
 
 EXPORT_SYMBOL(disallow_signal);
 
-/*
- *	Put all the gunge required to become a kernel thread without
- *	attached user resources in one place where it belongs.
- */
-
-void daemonize(const char *name, ...)
-{
-	va_list args;
-	sigset_t blocked;
-
-	va_start(args, name);
-	vsnprintf(current->comm, sizeof(current->comm), name, args);
-	va_end(args);
-
-	/*
-	 * If we were started as result of loading a module, close all of the
-	 * user space pages.  We don't need them, and if we didn't close them
-	 * they would be locked into memory.
-	 */
-	exit_mm(current);
-	/*
-	 * We don't want to get frozen, in case system-wide hibernation
-	 * or suspend transition begins right now.
-	 */
-	current->flags |= (PF_NOFREEZE | PF_KTHREAD);
-
-	if (current->nsproxy != &init_nsproxy) {
-		get_nsproxy(&init_nsproxy);
-		switch_task_namespaces(current, &init_nsproxy);
-	}
-	set_special_pids(&init_struct_pid);
-	proc_clear_tty(current);
-
-	/* Block and flush all signals */
-	sigfillset(&blocked);
-	sigprocmask(SIG_BLOCK, &blocked, NULL);
-	flush_signals(current);
-
-	/* Become as one with the init task */
-
-	daemonize_fs_struct();
-	daemonize_descriptors();
-
-	reparent_to_kthreadd();
-}
-
-EXPORT_SYMBOL(daemonize);
-
 #ifdef CONFIG_MM_OWNER
 /*
  * A task is exiting.   If it owned this mm, find a new owner for the mm.
-- 
cgit v1.2.3-55-g7522


From 6b94631f9e8c45a46056cbc6a7a50ecebea4f8da Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Sat, 20 Oct 2012 13:32:30 -0400
Subject: consolidate sys_execve() prototype

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/hexagon/include/asm/syscall.h | 3 ---
 arch/tile/include/asm/syscalls.h   | 4 ----
 arch/x86/include/asm/syscalls.h    | 3 ---
 arch/xtensa/include/asm/syscall.h  | 1 -
 include/asm-generic/syscalls.h     | 7 -------
 include/linux/syscalls.h           | 3 +++
 6 files changed, 3 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h
index fb0e9d48faa6..ec2ce6792cd6 100644
--- a/arch/hexagon/include/asm/syscall.h
+++ b/arch/hexagon/include/asm/syscall.h
@@ -25,12 +25,9 @@ typedef long (*syscall_fn)(unsigned long, unsigned long,
 	unsigned long, unsigned long,
 	unsigned long, unsigned long);
 
-asmlinkage int sys_execve(char __user *ufilename, char __user * __user *argv,
-			  char __user * __user *envp);
 asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
 			 unsigned long parent_tidp, unsigned long child_tidp);
 
-#define sys_execve	sys_execve
 #define sys_clone	sys_clone
 
 #include <asm-generic/syscalls.h>
diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h
index 369696d63e7b..394c76f2dc76 100644
--- a/arch/tile/include/asm/syscalls.h
+++ b/arch/tile/include/asm/syscalls.h
@@ -65,13 +65,9 @@ long sys_ftruncate64(unsigned int fd, loff_t length);
 /* Provide versions of standard syscalls that use current_pt_regs(). */
 long sys_clone(unsigned long clone_flags, unsigned long newsp,
 		void __user *parent_tid, void __user *child_tid);
-long sys_execve(const char __user *filename,
-		 const char __user *const __user *argv,
-		 const char __user *const __user *envp);
 long sys_rt_sigreturn(void);
 long sys_sigaltstack(const stack_t __user *, stack_t __user *);
 #define sys_clone sys_clone
-#define sys_execve sys_execve
 #define sys_rt_sigreturn sys_rt_sigreturn
 #define sys_sigaltstack sys_sigaltstack
 
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 2be0b880417e..9e5aef3a2598 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -23,9 +23,6 @@ long sys_iopl(unsigned int, struct pt_regs *);
 /* kernel/process.c */
 int sys_fork(struct pt_regs *);
 int sys_vfork(struct pt_regs *);
-long sys_execve(const char __user *,
-		const char __user *const __user *,
-		const char __user *const __user *);
 long sys_clone(unsigned long, unsigned long, void __user *,
 	       void __user *, struct pt_regs *);
 
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index 124aeee0d381..4e27d76c9612 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -10,7 +10,6 @@
 
 struct pt_regs;
 struct sigaction;
-asmlinkage long sys_execve(char*, char**, char**, struct pt_regs*);
 asmlinkage long xtensa_clone(unsigned long, unsigned long, struct pt_regs*);
 asmlinkage long xtensa_ptrace(long, long, long, long);
 asmlinkage long xtensa_sigreturn(struct pt_regs*);
diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
index 7e4fdb649951..77960333b1a1 100644
--- a/include/asm-generic/syscalls.h
+++ b/include/asm-generic/syscalls.h
@@ -21,13 +21,6 @@ asmlinkage long sys_fork(void);
 asmlinkage long sys_vfork(void);
 #endif
 
-#ifndef sys_execve
-asmlinkage long sys_execve(const char __user *filename,
-			   const char __user *const __user *argv,
-			   const char __user *const __user *envp,
-			   struct pt_regs *regs);
-#endif
-
 #ifndef sys_mmap2
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			unsigned long prot, unsigned long flags,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 727f0cd73921..2779009ceaa3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -837,6 +837,9 @@ int kernel_execve(const char *filename, const char *const argv[], const char *co
 		current_pt_regs())
 #endif
 
+asmlinkage long sys_execve(const char __user *filename,
+		const char __user *const __user *argv,
+		const char __user *const __user *envp);
 
 asmlinkage long sys_perf_event_open(
 		struct perf_event_attr __user *attr_uptr,
-- 
cgit v1.2.3-55-g7522


From d03d26e58fde2ec99478e26aab47b55755189b08 Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Sat, 20 Oct 2012 21:46:25 -0400
Subject: make compat_do_execve() static, lose pt_regs argument

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c              | 10 ++++------
 include/linux/compat.h |  2 --
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 0039055b1fc6..f86b6cc2d6cc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1575,10 +1575,9 @@ int do_execve(const char *filename,
 }
 
 #ifdef CONFIG_COMPAT
-int compat_do_execve(const char *filename,
+static int compat_do_execve(const char *filename,
 	const compat_uptr_t __user *__argv,
-	const compat_uptr_t __user *__envp,
-	struct pt_regs *regs)
+	const compat_uptr_t __user *__envp)
 {
 	struct user_arg_ptr argv = {
 		.is_compat = true,
@@ -1588,7 +1587,7 @@ int compat_do_execve(const char *filename,
 		.is_compat = true,
 		.ptr.compat = __envp,
 	};
-	return do_execve_common(filename, argv, envp, regs);
+	return do_execve_common(filename, argv, envp, current_pt_regs());
 }
 #endif
 
@@ -1682,8 +1681,7 @@ asmlinkage long compat_sys_execve(const char __user * filename,
 	struct filename *path = getname(filename);
 	int error = PTR_ERR(path);
 	if (!IS_ERR(path)) {
-		error = compat_do_execve(path->name, argv, envp,
-							current_pt_regs());
+		error = compat_do_execve(path->name, argv, envp);
 		putname(path);
 	}
 	return error;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index d2db71077d93..784ebfe63c48 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -284,8 +284,6 @@ asmlinkage ssize_t compat_sys_pwritev(unsigned long fd,
 		const struct compat_iovec __user *vec,
 		unsigned long vlen, u32 pos_low, u32 pos_high);
 
-int compat_do_execve(const char *filename, const compat_uptr_t __user *argv,
-		     const compat_uptr_t __user *envp, struct pt_regs *regs);
 asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv,
 		     const compat_uptr_t __user *envp);
 
-- 
cgit v1.2.3-55-g7522


From da3d4c5fa56236dd924d77ffc4f982356816b93b Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Sat, 20 Oct 2012 21:49:33 -0400
Subject: get rid of pt_regs argument of do_execve()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c                | 16 ++++++----------
 include/linux/sched.h    |  2 +-
 include/linux/syscalls.h |  3 +--
 3 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index f86b6cc2d6cc..5797ed07efd3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1566,12 +1566,11 @@ out_ret:
 
 int do_execve(const char *filename,
 	const char __user *const __user *__argv,
-	const char __user *const __user *__envp,
-	struct pt_regs *regs)
+	const char __user *const __user *__envp)
 {
 	struct user_arg_ptr argv = { .ptr.native = __argv };
 	struct user_arg_ptr envp = { .ptr.native = __envp };
-	return do_execve_common(filename, argv, envp, regs);
+	return do_execve_common(filename, argv, envp, current_pt_regs());
 }
 
 #ifdef CONFIG_COMPAT
@@ -1668,7 +1667,7 @@ SYSCALL_DEFINE3(execve,
 	struct filename *path = getname(filename);
 	int error = PTR_ERR(path);
 	if (!IS_ERR(path)) {
-		error = do_execve(path->name, argv, envp, current_pt_regs());
+		error = do_execve(path->name, argv, envp);
 		putname(path);
 	}
 	return error;
@@ -1694,12 +1693,9 @@ int kernel_execve(const char *filename,
 		  const char *const argv[],
 		  const char *const envp[])
 {
-	struct pt_regs *p = current_pt_regs();
-	int ret;
-
-	ret = do_execve(filename,
+	int ret = do_execve(filename,
 			(const char __user *const __user *)argv,
-			(const char __user *const __user *)envp, p);
+			(const char __user *const __user *)envp);
 	if (ret < 0)
 		return ret;
 
@@ -1707,6 +1703,6 @@ int kernel_execve(const char *filename,
 	 * We were successful.  We won't be returning to our caller, but
 	 * instead to user space by manipulating the kernel stack.
 	 */
-	ret_from_kernel_execve(p);
+	ret_from_kernel_execve(current_pt_regs());
 }
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a0166481eb20..c57249782e48 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2288,7 +2288,7 @@ extern int disallow_signal(int);
 
 extern int do_execve(const char *,
 		     const char __user * const __user *,
-		     const char __user * const __user *, struct pt_regs *);
+		     const char __user * const __user *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
 #ifdef CONFIG_GENERIC_KERNEL_THREAD
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2779009ceaa3..526deb333b91 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -833,8 +833,7 @@ int kernel_execve(const char *filename, const char *const argv[], const char *co
 #define kernel_execve(filename, argv, envp) \
 	do_execve(filename, \
 		(const char __user *const __user *)argv, \
-		(const char __user *const __user *)envp, \
-		current_pt_regs())
+		(const char __user *const __user *)envp)
 #endif
 
 asmlinkage long sys_execve(const char __user *filename,
-- 
cgit v1.2.3-55-g7522


From 3c456bfc4ba66e9cda210da7bc4fb0ba9fcc6972 Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Sat, 20 Oct 2012 21:53:31 -0400
Subject: get rid of pt_regs argument of search_binary_handler()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/binfmt_loader.c | 2 +-
 fs/binfmt_em86.c                  | 2 +-
 fs/binfmt_misc.c                  | 2 +-
 fs/binfmt_script.c                | 2 +-
 fs/exec.c                         | 7 +++----
 include/linux/binfmts.h           | 2 +-
 6 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c
index d1f474d1d44d..54abbef5d077 100644
--- a/arch/alpha/kernel/binfmt_loader.c
+++ b/arch/alpha/kernel/binfmt_loader.c
@@ -37,7 +37,7 @@ static int load_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	retval = prepare_binprm(bprm);
 	if (retval < 0)
 		return retval;
-	return search_binary_handler(bprm,regs);
+	return search_binary_handler(bprm);
 }
 
 static struct linux_binfmt loader_format = {
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 2790c7e1912e..7e125718a75e 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -90,7 +90,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
 	if (retval < 0)
 		return retval;
 
-	return search_binary_handler(bprm, regs);
+	return search_binary_handler(bprm);
 }
 
 static struct linux_binfmt em86_format = {
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 790b3cddca67..226aeac22ac9 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -199,7 +199,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 
 	bprm->recursion_depth++;
 
-	retval = search_binary_handler (bprm, regs);
+	retval = search_binary_handler(bprm);
 	if (retval < 0)
 		goto _error;
 
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index d3b8c1f63155..798b729f01d5 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -95,7 +95,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
 	retval = prepare_binprm(bprm);
 	if (retval < 0)
 		return retval;
-	return search_binary_handler(bprm,regs);
+	return search_binary_handler(bprm);
 }
 
 static struct linux_binfmt script_format = {
diff --git a/fs/exec.c b/fs/exec.c
index dc5e2830d353..2aee7ef10663 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1349,7 +1349,7 @@ EXPORT_SYMBOL(remove_arg_zero);
 /*
  * cycle the list of binary formats handler, until one recognizes the image
  */
-int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
+int search_binary_handler(struct linux_binprm *bprm)
 {
 	unsigned int depth = bprm->recursion_depth;
 	int try,retval;
@@ -1380,7 +1380,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 			if (!try_module_get(fmt->module))
 				continue;
 			read_unlock(&binfmt_lock);
-			retval = fn(bprm, regs);
+			retval = fn(bprm, current_pt_regs());
 			/*
 			 * Restore the depth counter to its starting value
 			 * in this call, so we don't have to rely on every
@@ -1447,7 +1447,6 @@ static int do_execve_common(const char *filename,
 	bool clear_in_exec;
 	int retval;
 	const struct cred *cred = current_cred();
-	struct pt_regs *regs = current_pt_regs();
 
 	/*
 	 * We move the actual failure in case of RLIMIT_NPROC excess from
@@ -1524,7 +1523,7 @@ static int do_execve_common(const char *filename,
 	if (retval < 0)
 		goto out;
 
-	retval = search_binary_handler(bprm,regs);
+	retval = search_binary_handler(bprm);
 	if (retval < 0)
 		goto out;
 
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index cfcc6bfcaec0..1f6ce133b4c1 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -95,7 +95,7 @@ extern void unregister_binfmt(struct linux_binfmt *);
 
 extern int prepare_binprm(struct linux_binprm *);
 extern int __must_check remove_arg_zero(struct linux_binprm *);
-extern int search_binary_handler(struct linux_binprm *, struct pt_regs *);
+extern int search_binary_handler(struct linux_binprm *);
 extern int flush_old_exec(struct linux_binprm * bprm);
 extern void setup_new_exec(struct linux_binprm * bprm);
 extern void would_dump(struct linux_binprm *, struct file *);
-- 
cgit v1.2.3-55-g7522


From 71613c3b871c5a9f27cc48f124251bcd3aa23be1 Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Sat, 20 Oct 2012 22:00:48 -0400
Subject: get rid of pt_regs argument of ->load_binary()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/binfmt_loader.c | 2 +-
 arch/x86/ia32/ia32_aout.c         | 5 +++--
 fs/binfmt_aout.c                  | 5 +++--
 fs/binfmt_elf.c                   | 5 +++--
 fs/binfmt_elf_fdpic.c             | 6 +++---
 fs/binfmt_em86.c                  | 2 +-
 fs/binfmt_flat.c                  | 5 +++--
 fs/binfmt_misc.c                  | 2 +-
 fs/binfmt_script.c                | 2 +-
 fs/binfmt_som.c                   | 5 +++--
 fs/exec.c                         | 4 ++--
 include/linux/binfmts.h           | 2 +-
 12 files changed, 25 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c
index 54abbef5d077..9525660c93c0 100644
--- a/arch/alpha/kernel/binfmt_loader.c
+++ b/arch/alpha/kernel/binfmt_loader.c
@@ -5,7 +5,7 @@
 #include <linux/binfmts.h>
 #include <linux/a.out.h>
 
-static int load_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+static int load_binary(struct linux_binprm *bprm)
 {
 	struct exec *eh = (struct exec *)bprm->buf;
 	unsigned long loader;
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 07b3a68d2d29..a703af19c281 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -35,7 +35,7 @@
 #undef WARN_OLD
 #undef CORE_DUMP /* definitely broken */
 
-static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs);
+static int load_aout_binary(struct linux_binprm *);
 static int load_aout_library(struct file *);
 
 #ifdef CORE_DUMP
@@ -260,9 +260,10 @@ static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
  * These are the functions used to load a.out style executables and shared
  * libraries.  There is no binary dependent code anywhere else.
  */
-static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+static int load_aout_binary(struct linux_binprm *bprm)
 {
 	unsigned long error, fd_offset, rlim;
+	struct pt_regs *regs = current_pt_regs();
 	struct exec ex;
 	int retval;
 
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 0e7a6f81ae36..6043567b95c2 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -30,7 +30,7 @@
 #include <asm/cacheflush.h>
 #include <asm/a.out-core.h>
 
-static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
+static int load_aout_binary(struct linux_binprm *);
 static int load_aout_library(struct file*);
 
 #ifdef CONFIG_COREDUMP
@@ -201,8 +201,9 @@ static unsigned long __user *create_aout_tables(char __user *p, struct linux_bin
  * libraries.  There is no binary dependent code anywhere else.
  */
 
-static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+static int load_aout_binary(struct linux_binprm * bprm)
 {
+	struct pt_regs *regs = current_pt_regs();
 	struct exec ex;
 	unsigned long error;
 	unsigned long fd_offset;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fbd9f60bd763..6d7d1647a68c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -44,7 +44,7 @@
 #define user_siginfo_t siginfo_t
 #endif
 
-static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
+static int load_elf_binary(struct linux_binprm *bprm);
 static int load_elf_library(struct file *);
 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
 				int, int, unsigned long);
@@ -558,7 +558,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
 #endif
 }
 
-static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+static int load_elf_binary(struct linux_binprm *bprm)
 {
 	struct file *interpreter = NULL; /* to shut gcc up */
  	unsigned long load_addr = 0, load_bias = 0;
@@ -575,6 +575,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	unsigned long reloc_func_desc __maybe_unused = 0;
 	int executable_stack = EXSTACK_DEFAULT;
 	unsigned long def_flags = 0;
+	struct pt_regs *regs = current_pt_regs();
 	struct {
 		struct elfhdr elf_ex;
 		struct elfhdr interp_elf_ex;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index a46049154107..dc84732e554f 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -56,7 +56,7 @@ typedef char *elf_caddr_t;
 
 MODULE_LICENSE("GPL");
 
-static int load_elf_fdpic_binary(struct linux_binprm *, struct pt_regs *);
+static int load_elf_fdpic_binary(struct linux_binprm *);
 static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *, struct file *);
 static int elf_fdpic_map_file(struct elf_fdpic_params *, struct file *,
 			      struct mm_struct *, const char *);
@@ -164,10 +164,10 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params,
 /*
  * load an fdpic binary into various bits of memory
  */
-static int load_elf_fdpic_binary(struct linux_binprm *bprm,
-				 struct pt_regs *regs)
+static int load_elf_fdpic_binary(struct linux_binprm *bprm)
 {
 	struct elf_fdpic_params exec_params, interp_params;
+	struct pt_regs *regs = current_pt_regs();
 	struct elf_phdr *phdr;
 	unsigned long stack_size, entryaddr;
 #ifdef ELF_FDPIC_PLAT_INIT
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 7e125718a75e..4e6cce57d113 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -22,7 +22,7 @@
 #define EM86_INTERP	"/usr/bin/em86"
 #define EM86_I_NAME	"em86"
 
-static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
+static int load_em86(struct linux_binprm *bprm)
 {
 	char *interp, *i_name, *i_arg;
 	struct file * file;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index e280352b28f9..b56371981d16 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -88,7 +88,7 @@ struct lib_info {
 static int load_flat_shared_library(int id, struct lib_info *p);
 #endif
 
-static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
+static int load_flat_binary(struct linux_binprm *);
 static int flat_core_dump(struct coredump_params *cprm);
 
 static struct linux_binfmt flat_format = {
@@ -858,9 +858,10 @@ out:
  * libraries.  There is no binary dependent code anywhere else.
  */
 
-static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+static int load_flat_binary(struct linux_binprm * bprm)
 {
 	struct lib_info libinfo;
+	struct pt_regs *regs = current_pt_regs();
 	unsigned long p = bprm->p;
 	unsigned long stack_len;
 	unsigned long start_addr;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 226aeac22ac9..b0b70fbea06c 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -104,7 +104,7 @@ static Node *check_file(struct linux_binprm *bprm)
 /*
  * the loader itself
  */
-static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+static int load_misc_binary(struct linux_binprm *bprm)
 {
 	Node *fmt;
 	struct file * interp_file = NULL;
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 798b729f01d5..8c954997e7f7 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -14,7 +14,7 @@
 #include <linux/err.h>
 #include <linux/fs.h>
 
-static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
+static int load_script(struct linux_binprm *bprm)
 {
 	const char *i_arg, *i_name;
 	char *cp;
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 4517aaff61b4..4e00ed68d4a6 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -35,7 +35,7 @@
 
 #include <linux/elf.h>
 
-static int load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs);
+static int load_som_binary(struct linux_binprm * bprm);
 static int load_som_library(struct file *);
 
 /*
@@ -180,13 +180,14 @@ out:
  */
 
 static int
-load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+load_som_binary(struct linux_binprm * bprm)
 {
 	int retval;
 	unsigned int size;
 	unsigned long som_entry;
 	struct som_hdr *som_ex;
 	struct som_exec_auxhdr *hpuxhdr;
+	struct pt_regs *regs = current_pt_regs();
 
 	/* Get the exec-header */
 	som_ex = (struct som_hdr *) bprm->buf;
diff --git a/fs/exec.c b/fs/exec.c
index 2aee7ef10663..721a29929511 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1374,13 +1374,13 @@ int search_binary_handler(struct linux_binprm *bprm)
 	for (try=0; try<2; try++) {
 		read_lock(&binfmt_lock);
 		list_for_each_entry(fmt, &formats, lh) {
-			int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
+			int (*fn)(struct linux_binprm *) = fmt->load_binary;
 			if (!fn)
 				continue;
 			if (!try_module_get(fmt->module))
 				continue;
 			read_unlock(&binfmt_lock);
-			retval = fn(bprm, current_pt_regs());
+			retval = fn(bprm);
 			/*
 			 * Restore the depth counter to its starting value
 			 * in this call, so we don't have to rely on every
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 1f6ce133b4c1..2630c9b41a86 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -72,7 +72,7 @@ struct coredump_params {
 struct linux_binfmt {
 	struct list_head lh;
 	struct module *module;
-	int (*load_binary)(struct linux_binprm *, struct  pt_regs * regs);
+	int (*load_binary)(struct linux_binprm *);
 	int (*load_shlib)(struct file *);
 	int (*core_dump)(struct coredump_params *cprm);
 	unsigned long min_coredump;	/* minimal dump size */
-- 
cgit v1.2.3-55-g7522


From 24465a40ba452bd81fdc9eecb2d75bb903aafdf6 Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Wed, 28 Nov 2012 23:04:26 -0500
Subject: take sys_fork/sys_vfork/sys_clone prototypes to linux/syscalls.h

now it can be done...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm64/include/asm/syscalls.h      |  8 --------
 arch/hexagon/include/asm/syscall.h     |  5 -----
 arch/microblaze/include/asm/Kbuild     |  1 +
 arch/microblaze/include/asm/syscalls.h | 16 ----------------
 arch/s390/kernel/entry.h               |  4 ----
 arch/tile/include/asm/syscalls.h       |  3 ---
 arch/tile/kernel/compat.c              |  1 -
 arch/tile/kernel/sys.c                 |  1 -
 arch/x86/include/asm/syscalls.h        | 11 -----------
 arch/x86/um/shared/sysdep/syscalls.h   |  2 --
 include/asm-generic/syscalls.h         | 12 ------------
 include/linux/syscalls.h               | 10 ++++++++++
 12 files changed, 11 insertions(+), 63 deletions(-)
 delete mode 100644 arch/microblaze/include/asm/syscalls.h

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/syscalls.h b/arch/arm64/include/asm/syscalls.h
index 010ec127dc5b..20d63b290665 100644
--- a/arch/arm64/include/asm/syscalls.h
+++ b/arch/arm64/include/asm/syscalls.h
@@ -27,14 +27,6 @@ asmlinkage long sys_rt_sigreturn_wrapper(void);
 asmlinkage long sys_sigaltstack_wrapper(const stack_t __user *uss,
 					stack_t __user *uoss);
 
-/*
- * AArch64 sys_clone implementation has a different prototype than the generic
- * one (additional TLS value argument).
- */
-asmlinkage long sys_clone(unsigned long, unsigned long, void __user *, int,
-	       void __user *);
-#define sys_clone	sys_clone
-
 #include <asm-generic/syscalls.h>
 
 #endif	/* __ASM_SYSCALLS_H */
diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h
index ec2ce6792cd6..4af9c7b6f13a 100644
--- a/arch/hexagon/include/asm/syscall.h
+++ b/arch/hexagon/include/asm/syscall.h
@@ -25,11 +25,6 @@ typedef long (*syscall_fn)(unsigned long, unsigned long,
 	unsigned long, unsigned long,
 	unsigned long, unsigned long);
 
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-			 unsigned long parent_tidp, unsigned long child_tidp);
-
-#define sys_clone	sys_clone
-
 #include <asm-generic/syscalls.h>
 
 extern void *sys_call_table[];
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 8653072d7e9f..88a758a67922 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm
 header-y  += elf.h
 generic-y += clkdev.h
 generic-y += exec.h
+generic-y += syscalls.h
diff --git a/arch/microblaze/include/asm/syscalls.h b/arch/microblaze/include/asm/syscalls.h
deleted file mode 100644
index 27f2f4c0f39f..000000000000
--- a/arch/microblaze/include/asm/syscalls.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __ASM_MICROBLAZE_SYSCALLS_H
-
-asmlinkage long microblaze_vfork(struct pt_regs *regs);
-asmlinkage long microblaze_clone(int flags, unsigned long stack,
-							struct pt_regs *regs);
-asmlinkage long microblaze_execve(const char __user *filenamei,
-				  const char __user *const __user *argv,
-				  const char __user *const __user *envp,
-				  struct pt_regs *regs);
-
-asmlinkage long sys_clone(int flags, unsigned long stack, struct pt_regs *regs);
-#define sys_clone sys_clone
-
-#include <asm-generic/syscalls.h>
-
-#endif /* __ASM_MICROBLAZE_SYSCALLS_H */
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index d0d3f69a7346..d8251b98f17a 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -54,10 +54,6 @@ long sys_s390_fadvise64(int fd, u32 offset_high, u32 offset_low,
 long sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
 long sys_s390_fallocate(int fd, int mode, loff_t offset, u32 len_high,
 			u32 len_low);
-long sys_fork(void);
-long sys_clone(unsigned long newsp, unsigned long clone_flags,
-	       int __user *parent_tidptr, int __user *child_tidptr);
-long sys_vfork(void);
 long sys_sigsuspend(int history0, int history1, old_sigset_t mask);
 long sys_sigaction(int sig, const struct old_sigaction __user *act,
 		   struct old_sigaction __user *oact);
diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h
index 394c76f2dc76..4c8462a62cb6 100644
--- a/arch/tile/include/asm/syscalls.h
+++ b/arch/tile/include/asm/syscalls.h
@@ -63,11 +63,8 @@ long sys_ftruncate64(unsigned int fd, loff_t length);
 #endif
 
 /* Provide versions of standard syscalls that use current_pt_regs(). */
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
-		void __user *parent_tid, void __user *child_tid);
 long sys_rt_sigreturn(void);
 long sys_sigaltstack(const stack_t __user *, stack_t __user *);
-#define sys_clone sys_clone
 #define sys_rt_sigreturn sys_rt_sigreturn
 #define sys_sigaltstack sys_sigaltstack
 
diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
index a2e805569d5d..9cd7cb6041c0 100644
--- a/arch/tile/kernel/compat.c
+++ b/arch/tile/kernel/compat.c
@@ -104,7 +104,6 @@ long compat_sys_sched_rr_get_interval(compat_pid_t pid,
 
 /* Call the assembly trampolines where necessary. */
 #define compat_sys_rt_sigreturn _compat_sys_rt_sigreturn
-#undef sys_clone
 #define sys_clone _sys_clone
 
 /*
diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c
index 02ff5c0ef775..b881a7be24bd 100644
--- a/arch/tile/kernel/sys.c
+++ b/arch/tile/kernel/sys.c
@@ -109,7 +109,6 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
 /* Call the assembly trampolines where necessary. */
 #undef sys_rt_sigreturn
 #define sys_rt_sigreturn _sys_rt_sigreturn
-#undef sys_clone
 #define sys_clone _sys_clone
 
 /*
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index f7252d11416b..2f8374718aa3 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -20,17 +20,6 @@
 asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
 long sys_iopl(unsigned int, struct pt_regs *);
 
-/* kernel/process.c */
-asmlinkage long sys_fork(void);
-asmlinkage long sys_vfork(void);
-#ifdef CONFIG_CLONE_BACKWARDS
-asmlinkage long sys_clone(unsigned long, unsigned long, void __user *, int,
-	       void __user *);
-#else
-asmlinkage long sys_clone(unsigned long, unsigned long, void __user *,
-	       void __user *, int);
-#endif
-
 /* kernel/ldt.c */
 asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
 
diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h
index ca255a805ed9..bd9a89b67e41 100644
--- a/arch/x86/um/shared/sysdep/syscalls.h
+++ b/arch/x86/um/shared/sysdep/syscalls.h
@@ -1,5 +1,3 @@
-extern long sys_clone(unsigned long clone_flags, unsigned long newsp,
-	       void __user *parent_tid, void __user *child_tid);
 #ifdef __i386__
 #include "syscalls_32.h"
 #else
diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
index 77960333b1a1..58f466ff00d3 100644
--- a/include/asm-generic/syscalls.h
+++ b/include/asm-generic/syscalls.h
@@ -8,18 +8,6 @@
  * Calling conventions for these system calls can differ, so
  * it's possible to override them.
  */
-#ifndef sys_clone
-asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp,
-			void __user *parent_tid, void __user *child_tid);
-#endif
-
-#ifndef sys_fork
-asmlinkage long sys_fork(void);
-#endif
-
-#ifndef sys_vfork
-asmlinkage long sys_vfork(void);
-#endif
 
 #ifndef sys_mmap2
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 526deb333b91..91835e7f364d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -836,6 +836,16 @@ int kernel_execve(const char *filename, const char *const argv[], const char *co
 		(const char __user *const __user *)envp)
 #endif
 
+asmlinkage long sys_fork(void);
+asmlinkage long sys_vfork(void);
+#ifdef CONFIG_CLONE_BACKWARDS
+asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int,
+	       int __user *);
+#else
+asmlinkage long sys_clone(unsigned long, unsigned long, int __user *,
+	       int __user *, int);
+#endif
+
 asmlinkage long sys_execve(const char __user *filename,
 		const char __user *const __user *argv,
 		const char __user *const __user *envp);
-- 
cgit v1.2.3-55-g7522


From afa86fc426ff7e7f5477f15da9c405d08d5cf790 Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Mon, 22 Oct 2012 22:51:14 -0400
Subject: flagday: don't pass regs to copy_thread()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/process.c         | 2 +-
 arch/arm/kernel/process.c           | 2 +-
 arch/arm64/kernel/process.c         | 3 +--
 arch/avr32/kernel/process.c         | 2 +-
 arch/blackfin/kernel/process.c      | 6 +++---
 arch/c6x/kernel/process.c           | 2 +-
 arch/cris/arch-v10/kernel/process.c | 3 +--
 arch/cris/arch-v32/kernel/process.c | 3 +--
 arch/frv/kernel/process.c           | 2 +-
 arch/h8300/kernel/process.c         | 2 +-
 arch/hexagon/kernel/process.c       | 3 +--
 arch/ia64/kernel/process.c          | 3 ++-
 arch/m32r/kernel/process.c          | 2 +-
 arch/m68k/kernel/process.c          | 3 +--
 arch/microblaze/kernel/process.c    | 3 +--
 arch/mips/kernel/process.c          | 4 ++--
 arch/mn10300/kernel/process.c       | 2 +-
 arch/openrisc/kernel/process.c      | 2 +-
 arch/parisc/kernel/process.c        | 5 ++---
 arch/powerpc/kernel/process.c       | 4 ++--
 arch/s390/kernel/process.c          | 3 +--
 arch/score/kernel/process.c         | 4 ++--
 arch/sh/kernel/process_32.c         | 3 +--
 arch/sh/kernel/process_64.c         | 5 ++---
 arch/sparc/kernel/process_32.c      | 5 ++---
 arch/sparc/kernel/process_64.c      | 4 ++--
 arch/tile/kernel/process.c          | 5 ++---
 arch/um/kernel/process.c            | 3 +--
 arch/unicore32/kernel/process.c     | 2 +-
 arch/x86/kernel/process_32.c        | 3 +--
 arch/x86/kernel/process_64.c        | 3 +--
 arch/xtensa/kernel/process.c        | 3 +--
 include/linux/sched.h               | 2 +-
 kernel/fork.c                       | 2 +-
 34 files changed, 45 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index e9705bcc96f9..b5d0d0923699 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -241,7 +241,7 @@ release_thread(struct task_struct *dead_task)
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
 	    unsigned long arg,
-	    struct task_struct *p, struct pt_regs *wontuse)
+	    struct task_struct *p)
 {
 	extern void ret_from_fork(void);
 	extern void ret_from_kernel_thread(void);
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 4ab80bbb6d95..9800338c5d1b 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -376,7 +376,7 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
 int
 copy_thread(unsigned long clone_flags, unsigned long stack_start,
-	    unsigned long stk_sz, struct task_struct *p, struct pt_regs *unused)
+	    unsigned long stk_sz, struct task_struct *p)
 {
 	struct thread_info *thread = task_thread_info(p);
 	struct pt_regs *childregs = task_pt_regs(p);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 5a1335caf6f1..cb0956bc96ed 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -234,8 +234,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 asmlinkage void ret_from_fork(void) asm("ret_from_fork");
 
 int copy_thread(unsigned long clone_flags, unsigned long stack_start,
-		unsigned long stk_sz, struct task_struct *p,
-		struct pt_regs *unused)
+		unsigned long stk_sz, struct task_struct *p)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 	unsigned long tls = p->thread.tp_value;
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index 03d7aa4a4bc9..fd78f58ea79a 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -299,7 +299,7 @@ asmlinkage void syscall_return(void);
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
 		unsigned long arg,
-		struct task_struct *p, struct pt_regs *unused)
+		struct task_struct *p)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index e5ae8fcab438..582276efaaa4 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -141,14 +141,14 @@ asmlinkage int bfin_clone(unsigned long clone_flags, unsigned long newsp)
 int
 copy_thread(unsigned long clone_flags,
 	    unsigned long usp, unsigned long topstk,
-	    struct task_struct *p, struct pt_regs *regs)
+	    struct task_struct *p)
 {
 	struct pt_regs *childregs;
 	unsigned long *v;
 
 	childregs = (struct pt_regs *) (task_stack_page(p) + THREAD_SIZE) - 1;
 	v = ((unsigned long *)childregs) - 2;
-	if (unlikely(!regs)) {
+	if (unlikely(p->flags & PF_KTHREAD)) {
 		memset(childregs, 0, sizeof(struct pt_regs));
 		v[0] = usp;
 		v[1] = topstk;
@@ -157,7 +157,7 @@ copy_thread(unsigned long clone_flags,
 		__asm__ __volatile__("%0 = syscfg;":"=da"(childregs->syscfg):);
 		p->thread.usp = 0;
 	} else {
-		*childregs = *regs;
+		*childregs = *current_pt_regs();
 		childregs->r0 = 0;
 		p->thread.usp = usp ? : rdusp();
 		v[0] = v[1] = 0;
diff --git a/arch/c6x/kernel/process.c b/arch/c6x/kernel/process.c
index a3f91895e8b4..6434df476f77 100644
--- a/arch/c6x/kernel/process.c
+++ b/arch/c6x/kernel/process.c
@@ -139,7 +139,7 @@ void start_thread(struct pt_regs *regs, unsigned int pc, unsigned long usp)
  */
 int copy_thread(unsigned long clone_flags, unsigned long usp,
 		unsigned long ustk_size,
-		struct task_struct *p, struct pt_regs *unused)
+		struct task_struct *p)
 {
 	struct pt_regs *childregs;
 
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index 520547c8b196..b1018750cffb 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -94,8 +94,7 @@ asmlinkage void ret_from_fork(void);
 asmlinkage void ret_from_kernel_thread(void);
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long arg,
-		struct task_struct *p, struct pt_regs *unused)
+		unsigned long arg, struct task_struct *p)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 	struct switch_stack *swstack = ((struct switch_stack *)childregs) - 1;
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index 331e70252df0..2b23ef0e4452 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -109,8 +109,7 @@ extern asmlinkage void ret_from_kernel_thread(void);
 
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
-	unsigned long arg,
-	struct task_struct *p, struct pt_regs *unused)
+	unsigned long arg, struct task_struct *p)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 	struct switch_stack *swstack = ((struct switch_stack *) childregs) - 1;
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 0039bf77b192..23916b2a12a2 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -144,7 +144,7 @@ inline unsigned long user_stack(const struct pt_regs *regs)
  */
 int copy_thread(unsigned long clone_flags,
 		unsigned long usp, unsigned long arg,
-		struct task_struct *p, struct pt_regs *unused)
+		struct task_struct *p)
 {
 	struct pt_regs *childregs;
 
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index b0fb4054aee5..b609f63f1590 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -129,7 +129,7 @@ void flush_thread(void)
 
 int copy_thread(unsigned long clone_flags,
                 unsigned long usp, unsigned long topstk,
-		 struct task_struct * p, struct pt_regs *unused)
+		 struct task_struct * p)
 {
 	struct pt_regs * childregs;
 
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index 36dce17ed25c..06ae9ffcabd5 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -87,8 +87,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
  * Copy architecture-specific thread state
  */
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long arg, struct task_struct *p,
-		struct pt_regs *unused)
+		unsigned long arg, struct task_struct *p)
 {
 	struct thread_info *ti = task_thread_info(p);
 	struct hexagon_switch_stack *ss;
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 25543a295ad9..31360cbbd5f8 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -393,12 +393,13 @@ ia64_load_extra (struct task_struct *task)
 int
 copy_thread(unsigned long clone_flags,
 	     unsigned long user_stack_base, unsigned long user_stack_size,
-	     struct task_struct *p, struct pt_regs *regs)
+	     struct task_struct *p)
 {
 	extern char ia64_ret_from_clone;
 	struct switch_stack *child_stack, *stack;
 	unsigned long rbs, child_rbs, rbs_size;
 	struct pt_regs *child_ptregs;
+	struct pt_regs *regs = current_pt_regs();
 	int retval = 0;
 
 	child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1;
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index c37e9a9a8f27..765d0f57c787 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -192,7 +192,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long spu,
-	unsigned long arg, struct task_struct *tsk, struct pt_regs *unused)
+	unsigned long arg, struct task_struct *tsk)
 {
 	struct pt_regs *childregs = task_pt_regs(tsk);
 	extern void ret_from_fork(void);
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index aa9b11000273..9a3df4df73cc 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -154,8 +154,7 @@ asmlinkage int m68k_clone(struct pt_regs *regs)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		 unsigned long arg,
-		 struct task_struct * p, struct pt_regs * unused)
+		 unsigned long arg, struct task_struct *p)
 {
 	struct fork_frame {
 		struct switch_stack sw;
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index a5fed8db7263..40823fd1db0b 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -120,8 +120,7 @@ void flush_thread(void)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long arg,
-		struct task_struct *p, struct pt_regs *unused)
+		unsigned long arg, struct task_struct *p)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 	struct thread_info *ti = task_thread_info(p);
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index d13720ac656f..38097652d62d 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -114,10 +114,10 @@ void flush_thread(void)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-	unsigned long arg, struct task_struct *p, struct pt_regs *regs)
+	unsigned long arg, struct task_struct *p)
 {
 	struct thread_info *ti = task_thread_info(p);
-	struct pt_regs *childregs;
+	struct pt_regs *childregs, *regs = current_pt_regs();
 	unsigned long childksp;
 	p->set_child_tid = p->clear_child_tid = NULL;
 
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index 5e0ef396458d..eb09f5a552ff 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -206,7 +206,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
  */
 int copy_thread(unsigned long clone_flags,
 		unsigned long c_usp, unsigned long ustk_size,
-		struct task_struct *p, struct pt_regs *unused)
+		struct task_struct *p)
 {
 	struct thread_info *ti = task_thread_info(p);
 	struct pt_regs *c_regs;
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 6b853668369b..00c233bf0d06 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -142,7 +142,7 @@ extern asmlinkage void ret_from_fork(void);
 
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
-	    unsigned long arg, struct task_struct *p, struct pt_regs *regs)
+	    unsigned long arg, struct task_struct *p)
 {
 	struct pt_regs *userregs;
 	struct pt_regs *kregs;
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 9753ecf49a06..d13507246c5d 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -204,10 +204,9 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r)
 
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
-	    unsigned long arg,
-	    struct task_struct *p, struct pt_regs *unused)
+	    unsigned long arg, struct task_struct *p)
 {
-	struct pt_regs * cregs = &(p->thread.regs);
+	struct pt_regs *cregs = &(p->thread.regs);
 	void *stack = task_stack_page(p);
 	
 	/* We have to use void * instead of a function pointer, because
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a31437567631..81430674e71c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -733,8 +733,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long arg, struct task_struct *p,
-		struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
 	struct pt_regs *childregs, *kregs;
 	extern void ret_from_fork(void);
@@ -759,6 +758,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		ti->flags |= _TIF_RESTOREALL;
 		f = ret_from_kernel_thread;
 	} else {
+		struct pt_regs *regs = current_pt_regs();
 		CHECK_FULL_REGS(regs);
 		*childregs = *regs;
 		if (usp)
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index e37677796a09..536d64579d9a 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -117,8 +117,7 @@ void release_thread(struct task_struct *dead_task)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
-		unsigned long arg,
-		struct task_struct *p, struct pt_regs *unused)
+		unsigned long arg, struct task_struct *p)
 {
 	struct thread_info *ti;
 	struct fake_frame
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index f96379a5aee0..79568466b578 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -87,11 +87,11 @@ void flush_thread(void) {}
  * set up the kernel stack and exception frames for a new process
  */
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long arg,
-		struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
 	struct thread_info *ti = task_thread_info(p);
 	struct pt_regs *childregs = task_pt_regs(p);
+	struct pt_regs *regs = current_pt_regs();
 
 	p->thread.reg0 = (unsigned long) childregs;
 	if (unlikely(p->flags & PF_KTHREAD)) {
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 1786d16b6c64..73eb66fc6253 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -128,8 +128,7 @@ asmlinkage void ret_from_fork(void);
 asmlinkage void ret_from_kernel_thread(void);
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long arg,
-		struct task_struct *p, struct pt_regs *unused)
+		unsigned long arg, struct task_struct *p)
 {
 	struct thread_info *ti = task_thread_info(p);
 	struct pt_regs *childregs;
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index d5c86a8a3849..e611c85144b1 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -371,10 +371,9 @@ asmlinkage void ret_from_fork(void);
 asmlinkage void ret_from_kernel_thread(void);
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long arg,
-		struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
-	struct pt_regs *childregs;
+	struct pt_regs *childregs, *regs = current_pt_regs();
 
 #ifdef CONFIG_SH_FPU
 	/* can't happen for a kernel thread */
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index bf4c6addce7b..ecde946ef834 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -319,11 +319,10 @@ extern void ret_from_fork(void);
 extern void ret_from_kernel_thread(void);
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long arg,
-		struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
 	struct thread_info *ti = task_thread_info(p);
-	struct pt_regs *childregs;
+	struct pt_regs *childregs, *regs = current_pt_regs();
 	char *new_stack;
 
 #ifndef CONFIG_SMP
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index dff54f46728d..58ef19e7e82f 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -622,10 +622,10 @@ asmlinkage long sparc_do_fork(unsigned long clone_flags,
  * Child  -->  %o0 == parents pid, %o1 == 1
  */
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long arg,
-		struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
 	struct thread_info *t = task_thread_info(p);
+	struct pt_regs *regs = current_pt_regs();
 	struct sparc_stackf *parent_sf;
 	unsigned long child_stack_sz;
 	char *child_trap_frame;
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 267936b51b59..0e5661e7d00d 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -157,10 +157,9 @@ void arch_release_thread_info(struct thread_info *info)
 static void save_arch_state(struct thread_struct *t);
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long arg,
-		struct task_struct *p, struct pt_regs *unused)
+		unsigned long arg, struct task_struct *p)
 {
-	struct pt_regs *childregs = task_pt_regs(p);
+	struct pt_regs *childregs = task_pt_regs(p), *regs = current_pt_regs();
 	unsigned long ksp;
 	unsigned long *callee_regs;
 
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index c502c804e8bb..b462b13c5bae 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -161,8 +161,7 @@ void fork_handler(void)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long arg, struct task_struct * p,
-		struct pt_regs *regs)
+		unsigned long arg, struct task_struct * p)
 {
 	void (*handler)(void);
 	int kthread = current->flags & PF_KTHREAD;
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index 79e44e8ae31c..62bad9fed03e 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -262,7 +262,7 @@ asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
 
 int
 copy_thread(unsigned long clone_flags, unsigned long stack_start,
-	    unsigned long stk_sz, struct task_struct *p, struct pt_regs *unused)
+	    unsigned long stk_sz, struct task_struct *p)
 {
 	struct thread_info *thread = task_thread_info(p);
 	struct pt_regs *childregs = task_pt_regs(p);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 16efa974532b..b5a8905785e6 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -128,8 +128,7 @@ void release_thread(struct task_struct *dead_task)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-	unsigned long arg,
-	struct task_struct *p, struct pt_regs *unused)
+	unsigned long arg, struct task_struct *p)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 	struct task_struct *tsk;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 74aac76c6e34..6e68a6194965 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -146,8 +146,7 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long arg,
-	struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
 	int err;
 	struct pt_regs *childregs;
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 0036c14739f8..1accf28da5f5 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -199,8 +199,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
  */
 
 int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
-		unsigned long thread_fn_arg,
-		struct task_struct *p, struct pt_regs *unused)
+		unsigned long thread_fn_arg, struct task_struct *p)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c57249782e48..78a2ae3470df 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2271,7 +2271,7 @@ extern void mm_release(struct task_struct *, struct mm_struct *);
 extern struct mm_struct *dup_mm(struct task_struct *tsk);
 
 extern int copy_thread(unsigned long, unsigned long, unsigned long,
-			struct task_struct *, struct pt_regs *);
+			struct task_struct *);
 extern void flush_thread(void);
 extern void exit_thread(void);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 27a337549dab..d96a562b1311 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1320,7 +1320,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	retval = copy_io(clone_flags, p);
 	if (retval)
 		goto bad_fork_cleanup_namespaces;
-	retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
+	retval = copy_thread(clone_flags, stack_start, stack_size, p);
 	if (retval)
 		goto bad_fork_cleanup_io;
 
-- 
cgit v1.2.3-55-g7522


From e80d6661c3a5caa0cebec0853c6cb0db090fb506 Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Mon, 22 Oct 2012 23:10:08 -0400
Subject: flagday: kill pt_regs argument of do_fork()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/blackfin/kernel/process.c |  2 +-
 arch/ia64/kernel/entry.S       | 14 ++++++--------
 arch/m68k/kernel/process.c     |  2 +-
 arch/mips/kernel/linux32.c     |  2 +-
 arch/mips/kernel/syscall.c     |  4 ++--
 arch/sparc/kernel/process_32.c |  3 +--
 arch/sparc/kernel/process_64.c |  3 +--
 include/linux/sched.h          |  2 +-
 kernel/fork.c                  | 13 +++++--------
 9 files changed, 19 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 582276efaaa4..3e16ad9b0a99 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -135,7 +135,7 @@ asmlinkage int bfin_clone(unsigned long clone_flags, unsigned long newsp)
 #endif
 	if (newsp)
 		newsp -= 12;
-	return do_fork(clone_flags, newsp, regs, 0, NULL, NULL);
+	return do_fork(clone_flags, newsp, 0, NULL, NULL);
 }
 
 int
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 940a67263629..e25b784a2b72 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -116,13 +116,12 @@ GLOBAL_ENTRY(sys_clone2)
 	mov loc1=r16				// save ar.pfs across do_fork
 	.body
 	mov out1=in1
-	mov out3=in2
+	mov out2=in2
 	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
-	mov out4=in3	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
+	mov out3=in3	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
 	;;
 (p6)	st8 [r2]=in5				// store TLS in r16 for copy_thread()
-	mov out5=in4	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
-	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
+	mov out4=in4	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
 	mov out0=in0				// out0 = clone_flags
 	br.call.sptk.many rp=do_fork
 .ret1:	.restore sp
@@ -148,13 +147,12 @@ GLOBAL_ENTRY(sys_clone)
 	mov loc1=r16				// save ar.pfs across do_fork
 	.body
 	mov out1=in1
-	mov out3=16				// stacksize (compensates for 16-byte scratch area)
+	mov out2=16				// stacksize (compensates for 16-byte scratch area)
 	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
-	mov out4=in2	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
+	mov out3=in2	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
 	;;
 (p6)	st8 [r2]=in4				// store TLS in r13 (tp)
-	mov out5=in3	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
-	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
+	mov out4=in3	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
 	mov out0=in0				// out0 = clone_flags
 	br.call.sptk.many rp=do_fork
 .ret2:	.restore sp
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 9a3df4df73cc..d538694ad208 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -149,7 +149,7 @@ void flush_thread(void)
 asmlinkage int m68k_clone(struct pt_regs *regs)
 {
 	/* regs will be equal to current_pt_regs() */
-	return do_fork(regs->d1, regs->d2, regs, 0,
+	return do_fork(regs->d1, regs->d2, 0,
 		       (int __user *)regs->d3, (int __user *)regs->d4);
 }
 
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 8796dbc7e358..7adab86c632c 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -312,7 +312,7 @@ _sys32_clone(nabi_no_regargs struct pt_regs regs)
 	/* Use __dummy4 instead of getting it off the stack, so that
 	   syscall() works.  */
 	child_tidptr = (int __user *) __dummy4;
-	return do_fork(clone_flags, newsp, &regs, 0,
+	return do_fork(clone_flags, newsp, 0,
 	               parent_tidptr, child_tidptr);
 }
 
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index c611e2df7767..201cb76b4df9 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -92,7 +92,7 @@ save_static_function(sys_fork);
 static int __used noinline
 _sys_fork(nabi_no_regargs struct pt_regs regs)
 {
-	return do_fork(SIGCHLD, regs.regs[29], &regs, 0, NULL, NULL);
+	return do_fork(SIGCHLD, regs.regs[29], 0, NULL, NULL);
 }
 
 save_static_function(sys_clone);
@@ -123,7 +123,7 @@ _sys_clone(nabi_no_regargs struct pt_regs regs)
 #else
 	child_tidptr = (int __user *) regs.regs[8];
 #endif
-	return do_fork(clone_flags, newsp, &regs, 0,
+	return do_fork(clone_flags, newsp, 0,
 	               parent_tidptr, child_tidptr);
 }
 
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index ecde946ef834..be8e862badaf 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -286,8 +286,7 @@ asmlinkage int sparc_do_fork(unsigned long clone_flags,
 	parent_tid_ptr = regs->u_regs[UREG_I2];
 	child_tid_ptr = regs->u_regs[UREG_I4];
 
-	ret = do_fork(clone_flags, stack_start,
-		      regs, stack_size,
+	ret = do_fork(clone_flags, stack_start, stack_size,
 		      (int __user *) parent_tid_ptr,
 		      (int __user *) child_tid_ptr);
 
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 58ef19e7e82f..cdb80b2adbe0 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -601,8 +601,7 @@ asmlinkage long sparc_do_fork(unsigned long clone_flags,
 		child_tid_ptr = (int __user *) regs->u_regs[UREG_I4];
 	}
 
-	ret = do_fork(clone_flags, stack_start,
-		      regs, stack_size,
+	ret = do_fork(clone_flags, stack_start, stack_size,
 		      parent_tid_ptr, child_tid_ptr);
 
 	/* If we get an error and potentially restart the system
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78a2ae3470df..1162258bcaf0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2289,7 +2289,7 @@ extern int disallow_signal(int);
 extern int do_execve(const char *,
 		     const char __user * const __user *,
 		     const char __user * const __user *);
-extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
+extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
 #ifdef CONFIG_GENERIC_KERNEL_THREAD
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
diff --git a/kernel/fork.c b/kernel/fork.c
index 0e68b6686acb..540730783433 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1527,8 +1527,6 @@ static inline void init_idle_pids(struct pid_link *links)
 struct task_struct * __cpuinit fork_idle(int cpu)
 {
 	struct task_struct *task;
-	struct pt_regs regs;
-
 	task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0);
 	if (!IS_ERR(task)) {
 		init_idle_pids(task->pids);
@@ -1546,7 +1544,6 @@ struct task_struct * __cpuinit fork_idle(int cpu)
  */
 long do_fork(unsigned long clone_flags,
 	      unsigned long stack_start,
-	      struct pt_regs *regs,
 	      unsigned long stack_size,
 	      int __user *parent_tidptr,
 	      int __user *child_tidptr)
@@ -1576,7 +1573,7 @@ long do_fork(unsigned long clone_flags,
 	 * requested, no event is reported; otherwise, report if the event
 	 * for the type of forking is enabled.
 	 */
-	if (!(clone_flags & CLONE_UNTRACED) && likely(user_mode(regs))) {
+	if (!(clone_flags & CLONE_UNTRACED)) {
 		if (clone_flags & CLONE_VFORK)
 			trace = PTRACE_EVENT_VFORK;
 		else if ((clone_flags & CSIGNAL) != SIGCHLD)
@@ -1632,7 +1629,7 @@ long do_fork(unsigned long clone_flags,
  */
 pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 {
-	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, NULL,
+	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
 		(unsigned long)arg, NULL, NULL);
 }
 #endif
@@ -1641,7 +1638,7 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 SYSCALL_DEFINE0(fork)
 {
 #ifdef CONFIG_MMU
-	return do_fork(SIGCHLD, 0, current_pt_regs(), 0, NULL, NULL);
+	return do_fork(SIGCHLD, 0, 0, NULL, NULL);
 #else
 	/* can not support in nommu mode */
 	return(-EINVAL);
@@ -1652,7 +1649,7 @@ SYSCALL_DEFINE0(fork)
 #ifdef __ARCH_WANT_SYS_VFORK
 SYSCALL_DEFINE0(vfork)
 {
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, current_pt_regs(),
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 
 			0, NULL, NULL);
 }
 #endif
@@ -1675,7 +1672,7 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
 		 int, tls_val)
 #endif
 {
-	return do_fork(clone_flags, newsp, current_pt_regs(), 0,
+	return do_fork(clone_flags, newsp, 0,
 		parent_tidptr, child_tidptr);
 }
 #endif
-- 
cgit v1.2.3-55-g7522


From 4f4202fe5ae9a43e59303f20d700571f695d7b1b Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Mon, 5 Nov 2012 12:59:15 -0500
Subject: unify default ptrace_signal_deliver

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/include/asm/signal.h   | 3 ---
 arch/arm/include/asm/signal.h     | 1 -
 arch/avr32/include/asm/signal.h   | 2 --
 arch/cris/include/asm/signal.h    | 6 ------
 arch/h8300/include/asm/signal.h   | 2 --
 arch/ia64/include/asm/signal.h    | 2 --
 arch/m32r/include/asm/signal.h    | 4 ----
 arch/m68k/include/asm/signal.h    | 5 ++---
 arch/mips/include/asm/signal.h    | 2 --
 arch/mn10300/include/asm/signal.h | 4 ----
 arch/parisc/include/asm/signal.h  | 2 --
 arch/powerpc/include/asm/signal.h | 2 --
 arch/s390/include/asm/signal.h    | 2 --
 arch/sparc/include/asm/signal.h   | 2 --
 arch/x86/include/asm/signal.h     | 2 --
 arch/xtensa/include/asm/signal.h  | 1 -
 include/asm-generic/signal.h      | 2 --
 include/linux/ptrace.h            | 4 ++++
 18 files changed, 6 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/signal.h b/arch/alpha/include/asm/signal.h
index a9388300abb1..45552862cc10 100644
--- a/arch/alpha/include/asm/signal.h
+++ b/arch/alpha/include/asm/signal.h
@@ -164,9 +164,6 @@ struct sigstack {
 
 #ifdef __KERNEL__
 #include <asm/sigcontext.h>
-
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif
 
 #endif
diff --git a/arch/arm/include/asm/signal.h b/arch/arm/include/asm/signal.h
index 5a7963dbd3fb..9a0ea6ab988f 100644
--- a/arch/arm/include/asm/signal.h
+++ b/arch/arm/include/asm/signal.h
@@ -35,5 +35,4 @@ struct k_sigaction {
 };
 
 #include <asm/sigcontext.h>
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
 #endif
diff --git a/arch/avr32/include/asm/signal.h b/arch/avr32/include/asm/signal.h
index 4d502fd6bad3..9326d182e9e5 100644
--- a/arch/avr32/include/asm/signal.h
+++ b/arch/avr32/include/asm/signal.h
@@ -37,6 +37,4 @@ struct k_sigaction {
 #include <asm/sigcontext.h>
 #undef __HAVE_ARCH_SIG_BITOPS
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif
diff --git a/arch/cris/include/asm/signal.h b/arch/cris/include/asm/signal.h
index ea6af9aad76c..72dbbf59dfae 100644
--- a/arch/cris/include/asm/signal.h
+++ b/arch/cris/include/asm/signal.h
@@ -152,12 +152,6 @@ typedef struct sigaltstack {
 
 #ifdef __KERNEL__
 #include <asm/sigcontext.h>
-
-/* here we could define asm-optimized sigaddset, sigdelset etc. operations. 
- * if we don't, generic ones are used from linux/signal.h
- */
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif /* __KERNEL__ */
 
 #endif
diff --git a/arch/h8300/include/asm/signal.h b/arch/h8300/include/asm/signal.h
index fd8b66e40dca..c43c0a7d2c2e 100644
--- a/arch/h8300/include/asm/signal.h
+++ b/arch/h8300/include/asm/signal.h
@@ -154,8 +154,6 @@ typedef struct sigaltstack {
 #include <asm/sigcontext.h>
 #undef __HAVE_ARCH_SIG_BITOPS
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif /* __KERNEL__ */
 
 #endif /* _H8300_SIGNAL_H */
diff --git a/arch/ia64/include/asm/signal.h b/arch/ia64/include/asm/signal.h
index aecda5b9eb4e..3a1b20e74c5c 100644
--- a/arch/ia64/include/asm/signal.h
+++ b/arch/ia64/include/asm/signal.h
@@ -38,7 +38,5 @@ struct k_sigaction {
 
 #  include <asm/sigcontext.h>
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 # endif /* !__ASSEMBLY__ */
 #endif /* _ASM_IA64_SIGNAL_H */
diff --git a/arch/m32r/include/asm/signal.h b/arch/m32r/include/asm/signal.h
index ea5f95e4079e..e4d2e2ad5f1e 100644
--- a/arch/m32r/include/asm/signal.h
+++ b/arch/m32r/include/asm/signal.h
@@ -149,10 +149,6 @@ typedef struct sigaltstack {
 
 #undef __HAVE_ARCH_SIG_BITOPS
 
-struct pt_regs;
-
-#define ptrace_signal_deliver(regs, cookie)	do { } while (0)
-
 #endif /* __KERNEL__ */
 
 #endif  /* _ASM_M32R_SIGNAL_H */
diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
index 2df26b57c26a..eb51a5241187 100644
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -86,11 +86,10 @@ static inline int sigfindinword(unsigned long word)
 
 #endif /* !CONFIG_CPU_HAS_NO_BITFIELDS */
 
-#ifdef __uClinux__
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-#else
+#ifndef __uClinux__
 struct pt_regs;
 extern void ptrace_signal_deliver(struct pt_regs *regs, void *cookie);
+#define ptrace_signal_deliver ptrace_signal_deliver
 #endif /* __uClinux__ */
 
 #endif /* _M68K_SIGNAL_H */
diff --git a/arch/mips/include/asm/signal.h b/arch/mips/include/asm/signal.h
index 880240dff8b7..cf4a08062d1d 100644
--- a/arch/mips/include/asm/signal.h
+++ b/arch/mips/include/asm/signal.h
@@ -21,6 +21,4 @@
 #include <asm/sigcontext.h>
 #include <asm/siginfo.h>
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif /* _ASM_SIGNAL_H */
diff --git a/arch/mn10300/include/asm/signal.h b/arch/mn10300/include/asm/signal.h
index f9668ec3040c..d280e9780793 100644
--- a/arch/mn10300/include/asm/signal.h
+++ b/arch/mn10300/include/asm/signal.h
@@ -45,8 +45,4 @@ struct k_sigaction {
 };
 #include <asm/sigcontext.h>
 
-
-struct pt_regs;
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif /* _ASM_SIGNAL_H */
diff --git a/arch/parisc/include/asm/signal.h b/arch/parisc/include/asm/signal.h
index 21abf4fc169a..0fdb3c835952 100644
--- a/arch/parisc/include/asm/signal.h
+++ b/arch/parisc/include/asm/signal.h
@@ -34,8 +34,6 @@ struct k_sigaction {
 	struct sigaction sa;
 };
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #include <asm/sigcontext.h>
 
 #endif /* !__ASSEMBLY */
diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h
index 189998bb61c4..a101637725a2 100644
--- a/arch/powerpc/include/asm/signal.h
+++ b/arch/powerpc/include/asm/signal.h
@@ -3,6 +3,4 @@
 
 #include <uapi/asm/signal.h>
 
-struct pt_regs;
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
 #endif /* _ASM_POWERPC_SIGNAL_H */
diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h
index bffdbdd5b3d7..db7ddfaf5b79 100644
--- a/arch/s390/include/asm/signal.h
+++ b/arch/s390/include/asm/signal.h
@@ -39,6 +39,4 @@ struct k_sigaction {
         struct sigaction sa;
 };
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif
diff --git a/arch/sparc/include/asm/signal.h b/arch/sparc/include/asm/signal.h
index d243c2ae02d2..77b85850d543 100644
--- a/arch/sparc/include/asm/signal.h
+++ b/arch/sparc/include/asm/signal.h
@@ -26,7 +26,5 @@ struct k_sigaction {
 	void			__user *ka_restorer;
 };
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif /* !(__ASSEMBLY__) */
 #endif /* !(__SPARC_SIGNAL_H) */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 323973f4abf1..0dba8b7a6ac7 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -260,8 +260,6 @@ struct pt_regs;
 
 #endif /* !__i386__ */
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/xtensa/include/asm/signal.h b/arch/xtensa/include/asm/signal.h
index 72fd44c85b70..6f586bd90e18 100644
--- a/arch/xtensa/include/asm/signal.h
+++ b/arch/xtensa/include/asm/signal.h
@@ -27,7 +27,6 @@ struct k_sigaction {
 };
 
 #include <asm/sigcontext.h>
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
 
 #endif	/* __ASSEMBLY__ */
 #endif	/* _XTENSA_SIGNAL_H */
diff --git a/include/asm-generic/signal.h b/include/asm-generic/signal.h
index 98caa306122a..d840c90a157a 100644
--- a/include/asm-generic/signal.h
+++ b/include/asm-generic/signal.h
@@ -10,7 +10,5 @@
 #include <asm/sigcontext.h>
 #undef __HAVE_ARCH_SIG_BITOPS
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_GENERIC_SIGNAL_H */
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index e0ff4689d35a..7aefbae2452e 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -329,6 +329,10 @@ static inline void user_single_step_siginfo(struct task_struct *tsk,
 #define current_pt_regs() task_pt_regs(current)
 #endif
 
+#ifndef ptrace_signal_deliver
+#define ptrace_signal_deliver(regs, cookie) do { } while (0)
+#endif
+
 extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
-- 
cgit v1.2.3-55-g7522


From 22062a96300dabfef93368a28c34bdf35c9b8308 Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Mon, 5 Nov 2012 13:00:27 -0500
Subject: new helper: signal_pt_regs()

Always equal to task_pt_regs(current); defined only when we are in
signal delivery.  It may be different from current_pt_regs() - e.g.
architectures like m68k may have pt_regs location on exception
different from that on a syscall and signals (just as ptrace handling)
may happen on exceptions as well as on syscalls.

When they are equal, it's often better to have signal_pt_regs
defined (in asm/ptrace.h) as current_pt_regs - that tends to be
optimized better than default would be.  However, optimisation is
the only reason why we might want an arch-specific definition;
if current_pt_regs() and task_pt_regs(current) have different
values, the latter one is right.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/include/asm/ptrace.h | 1 +
 arch/h8300/include/asm/ptrace.h | 1 +
 include/linux/ptrace.h          | 9 +++++++++
 3 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/ptrace.h b/arch/alpha/include/asm/ptrace.h
index b87755a19554..b4c5b2fbb647 100644
--- a/arch/alpha/include/asm/ptrace.h
+++ b/arch/alpha/include/asm/ptrace.h
@@ -78,6 +78,7 @@ struct switch_stack {
 
 #define current_pt_regs() \
   ((struct pt_regs *) ((char *)current_thread_info() + 2*PAGE_SIZE) - 1)
+#define signal_pt_regs current_pt_regs
 
 #define force_successful_syscall_return() (current_pt_regs()->r0 = 0)
 
diff --git a/arch/h8300/include/asm/ptrace.h b/arch/h8300/include/asm/ptrace.h
index 00502a61bf0a..7468589a128b 100644
--- a/arch/h8300/include/asm/ptrace.h
+++ b/arch/h8300/include/asm/ptrace.h
@@ -62,6 +62,7 @@ struct pt_regs {
 #define profile_pc(regs) instruction_pointer(regs)
 #define current_pt_regs() ((struct pt_regs *) \
 	(THREAD_SIZE + (unsigned long)current_thread_info()) - 1)
+#define signal_pt_regs() ((struct pt_regs *)current->thread.esp0)
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 #endif /* _H8300_PTRACE_H */
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 7aefbae2452e..b8e6dcec78ae 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -333,6 +333,15 @@ static inline void user_single_step_siginfo(struct task_struct *tsk,
 #define ptrace_signal_deliver(regs, cookie) do { } while (0)
 #endif
 
+/*
+ * unlike current_pt_regs(), this one is equal to task_pt_regs(current)
+ * on *all* architectures; the only reason to have a per-arch definition
+ * is optimisation.
+ */
+#ifndef signal_pt_regs
+#define signal_pt_regs() task_pt_regs(current)
+#endif
+
 extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
-- 
cgit v1.2.3-55-g7522


From b7f9591c44505ee16ed4561cfeb3642798bdd132 Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Mon, 5 Nov 2012 13:06:22 -0500
Subject: get rid of ptrace_signal_deliver() arguments

the first one is equal to signal_pt_regs(), the second is never used
(and always NULL, while we are at it).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/m68k/include/asm/signal.h | 3 +--
 arch/m68k/kernel/signal.c      | 3 ++-
 include/linux/ptrace.h         | 2 +-
 kernel/signal.c                | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
index eb51a5241187..9c8c46b06b0c 100644
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -87,8 +87,7 @@ static inline int sigfindinword(unsigned long word)
 #endif /* !CONFIG_CPU_HAS_NO_BITFIELDS */
 
 #ifndef __uClinux__
-struct pt_regs;
-extern void ptrace_signal_deliver(struct pt_regs *regs, void *cookie);
+extern void ptrace_signal_deliver(void);
 #define ptrace_signal_deliver ptrace_signal_deliver
 #endif /* __uClinux__ */
 
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 710a528b928b..9a396cda3147 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -108,8 +108,9 @@ int handle_kernel_fault(struct pt_regs *regs)
 	return 1;
 }
 
-void ptrace_signal_deliver(struct pt_regs *regs, void *cookie)
+void ptrace_signal_deliver(void)
 {
+	struct pt_regs *regs = signal_pt_regs();
 	if (regs->orig_d0 < 0)
 		return;
 	switch (regs->d0) {
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index b8e6dcec78ae..a89ff04bddd9 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -330,7 +330,7 @@ static inline void user_single_step_siginfo(struct task_struct *tsk,
 #endif
 
 #ifndef ptrace_signal_deliver
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
+#define ptrace_signal_deliver() ((void)0)
 #endif
 
 /*
diff --git a/kernel/signal.c b/kernel/signal.c
index 0af8868525d6..17d4e17fd614 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2141,7 +2141,7 @@ static void do_jobctl_trap(void)
 static int ptrace_signal(int signr, siginfo_t *info,
 			 struct pt_regs *regs, void *cookie)
 {
-	ptrace_signal_deliver(regs, cookie);
+	ptrace_signal_deliver();
 	/*
 	 * We do not check sig_kernel_stop(signr) but set this marker
 	 * unconditionally because we do not know whether debugger will
-- 
cgit v1.2.3-55-g7522


From 541880d9a2c7871f6370071d55aa6662d329c51e Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Mon, 5 Nov 2012 13:11:26 -0500
Subject: do_coredump(): get rid of pt_regs argument

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/coredump.c            | 4 ++--
 include/linux/coredump.h | 4 ++--
 kernel/signal.c          | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/coredump.c b/fs/coredump.c
index ce47379bfa61..177493272a61 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -458,7 +458,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 	return err;
 }
 
-void do_coredump(siginfo_t *siginfo, struct pt_regs *regs)
+void do_coredump(siginfo_t *siginfo)
 {
 	struct core_state core_state;
 	struct core_name cn;
@@ -474,7 +474,7 @@ void do_coredump(siginfo_t *siginfo, struct pt_regs *regs)
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
 	struct coredump_params cprm = {
 		.siginfo = siginfo,
-		.regs = regs,
+		.regs = signal_pt_regs(),
 		.limit = rlimit(RLIMIT_CORE),
 		/*
 		 * We must use the same mm->flags while dumping core to avoid
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 1d7399314a89..a98f1ca60407 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -13,9 +13,9 @@
 extern int dump_write(struct file *file, const void *addr, int nr);
 extern int dump_seek(struct file *file, loff_t off);
 #ifdef CONFIG_COREDUMP
-extern void do_coredump(siginfo_t *siginfo, struct pt_regs *regs);
+extern void do_coredump(siginfo_t *siginfo);
 #else
-static inline void do_coredump(siginfo_t *siginfo, struct pt_regs *regs) {}
+static inline void do_coredump(siginfo_t *siginfo) {}
 #endif
 
 #endif /* _LINUX_COREDUMP_H */
diff --git a/kernel/signal.c b/kernel/signal.c
index 2e3b5d572808..e75e4bd2839b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2359,7 +2359,7 @@ relock:
 			 * first and our do_group_exit call below will use
 			 * that value and ignore the one we pass it.
 			 */
-			do_coredump(info, regs);
+			do_coredump(info);
 		}
 
 		/*
-- 
cgit v1.2.3-55-g7522


From 4b9347dcbe3b426d19bda99b2061a2d1fe5a2dce Mon Sep 17 00:00:00 2001
From: Marek Szyprowski
Date: Mon, 15 Oct 2012 16:03:51 +0200
Subject: common: DMA-mapping: add DMA_ATTR_FORCE_CONTIGUOUS attribute

This patch adds DMA_ATTR_FORCE_CONTIGUOUS attribute to the DMA-mapping
subsystem.

By default DMA-mapping subsystem is allowed to assemble the buffer
allocated by dma_alloc_attrs() function from individual pages if it can
be mapped as contiguous chunk into device dma address space. By
specifing this attribute the allocated buffer is forced to be contiguous
also in physical memory.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 Documentation/DMA-attributes.txt | 9 +++++++++
 include/linux/dma-attrs.h        | 1 +
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index f50309081ac7..e59480db9ee0 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -91,3 +91,12 @@ transferred to 'device' domain. This attribute can be also used for
 dma_unmap_{single,page,sg} functions family to force buffer to stay in
 device domain after releasing a mapping for it. Use this attribute with
 care!
+
+DMA_ATTR_FORCE_CONTIGUOUS
+-------------------------
+
+By default DMA-mapping subsystem is allowed to assemble the buffer
+allocated by dma_alloc_attrs() function from individual pages if it can
+be mapped as contiguous chunk into device dma address space. By
+specifing this attribute the allocated buffer is forced to be contiguous
+also in physical memory.
diff --git a/include/linux/dma-attrs.h b/include/linux/dma-attrs.h
index f83f793223ff..c8e1831d7572 100644
--- a/include/linux/dma-attrs.h
+++ b/include/linux/dma-attrs.h
@@ -17,6 +17,7 @@ enum dma_attr {
 	DMA_ATTR_NON_CONSISTENT,
 	DMA_ATTR_NO_KERNEL_MAPPING,
 	DMA_ATTR_SKIP_CPU_SYNC,
+	DMA_ATTR_FORCE_CONTIGUOUS,
 	DMA_ATTR_MAX,
 };
 
-- 
cgit v1.2.3-55-g7522


From b8fbdc42c5c5df8ab1f358fe90e3a8a1bdc9ae48 Mon Sep 17 00:00:00 2001
From: Steffen Trumtrar
Date: Thu, 22 Nov 2012 12:16:43 +0100
Subject: of: add 'const' for of_parse_phandle parameter *np

The existing function does not change the passed device_node pointer. It is
only handed to of_get_property which itself takes a const struct device_node.

of_parse_phandle() can therefore take a const pointer as well.

Signed-off-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
[grant.likely: drop extraneous whitespace change]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/base.c  | 4 ++--
 include/linux/of.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 0ceb26a16050..c3724110a288 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -978,8 +978,8 @@ EXPORT_SYMBOL_GPL(of_property_count_strings);
  * Returns the device_node pointer with refcount incremented.  Use
  * of_node_put() on it when done.
  */
-struct device_node *
-of_parse_phandle(struct device_node *np, const char *phandle_name, int index)
+struct device_node *of_parse_phandle(const struct device_node *np,
+				     const char *phandle_name, int index)
 {
 	const __be32 *phandle;
 	int size;
diff --git a/include/linux/of.h b/include/linux/of.h
index 13e0aacb4d9f..7337dc109c89 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -270,7 +270,7 @@ extern int of_n_size_cells(struct device_node *np);
 extern const struct of_device_id *of_match_node(
 	const struct of_device_id *matches, const struct device_node *node);
 extern int of_modalias_node(struct device_node *node, char *modalias, int len);
-extern struct device_node *of_parse_phandle(struct device_node *np,
+extern struct device_node *of_parse_phandle(const struct device_node *np,
 					    const char *phandle_name,
 					    int index);
 extern int of_parse_phandle_with_args(struct device_node *np,
@@ -438,7 +438,7 @@ static inline int of_property_match_string(struct device_node *np,
 	return -ENOSYS;
 }
 
-static inline struct device_node *of_parse_phandle(struct device_node *np,
+static inline struct device_node *of_parse_phandle(const struct device_node *np,
 						   const char *phandle_name,
 						   int index)
 {
-- 
cgit v1.2.3-55-g7522


From bb728820fe7c42fdb838ab2745fb5fe6b18b5ffa Mon Sep 17 00:00:00 2001
From: Rami Rosen
Date: Wed, 28 Nov 2012 21:55:25 +0000
Subject: core: make GRO methods static.

This patch changes three methods to be static and removes their
EXPORT_SYMBOLs in core/dev.c and their external declaration in
netdevice.h. The methods, dev_gro_receive(), napi_frags_finish() and
napi_skb_finish(), which are in the GRO rx path, are not used
outside core/dev.c.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ------
 net/core/dev.c            | 9 +++------
 2 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e9929abeb932..18c5dc98f6dc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2153,16 +2153,10 @@ extern void dev_kfree_skb_any(struct sk_buff *skb);
 extern int		netif_rx(struct sk_buff *skb);
 extern int		netif_rx_ni(struct sk_buff *skb);
 extern int		netif_receive_skb(struct sk_buff *skb);
-extern gro_result_t	dev_gro_receive(struct napi_struct *napi,
-					struct sk_buff *skb);
-extern gro_result_t	napi_skb_finish(gro_result_t ret, struct sk_buff *skb);
 extern gro_result_t	napi_gro_receive(struct napi_struct *napi,
 					 struct sk_buff *skb);
 extern void		napi_gro_flush(struct napi_struct *napi, bool flush_old);
 extern struct sk_buff *	napi_get_frags(struct napi_struct *napi);
-extern gro_result_t	napi_frags_finish(struct napi_struct *napi,
-					  struct sk_buff *skb,
-					  gro_result_t ret);
 extern gro_result_t	napi_gro_frags(struct napi_struct *napi);
 
 static inline void napi_free_frags(struct napi_struct *napi)
diff --git a/net/core/dev.c b/net/core/dev.c
index 2a5f55866429..2f94df257e5a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3592,7 +3592,7 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
-enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff **pp = NULL;
 	struct packet_offload *ptype;
@@ -3683,7 +3683,6 @@ normal:
 	ret = GRO_NORMAL;
 	goto pull;
 }
-EXPORT_SYMBOL(dev_gro_receive);
 
 static inline gro_result_t
 __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
@@ -3710,7 +3709,7 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	return dev_gro_receive(napi, skb);
 }
 
-gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
+static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 {
 	switch (ret) {
 	case GRO_NORMAL:
@@ -3736,7 +3735,6 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 
 	return ret;
 }
-EXPORT_SYMBOL(napi_skb_finish);
 
 static void skb_gro_reset_offset(struct sk_buff *skb)
 {
@@ -3788,7 +3786,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_get_frags);
 
-gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
+static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
 			       gro_result_t ret)
 {
 	switch (ret) {
@@ -3813,7 +3811,6 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
 
 	return ret;
 }
-EXPORT_SYMBOL(napi_frags_finish);
 
 static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
 {
-- 
cgit v1.2.3-55-g7522


From 465aac6d496aa3e99caaa6868865fb3830f73d80 Mon Sep 17 00:00:00 2001
From: Randy Dunlap
Date: Fri, 30 Nov 2012 10:01:51 +0000
Subject: Fix build when CONFIG_W1_MASTER_GPIO=m b exporting "allnodes"

ERROR: "allnodes" [drivers/w1/masters/w1-gpio.ko] undefined!

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
[grant.likely: allnodes is too generic; rename to of_allnodes]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Ville Syrjala <syrjala@sci.fi>
---
 arch/arm/mach-vexpress/v2m.c |  2 +-
 drivers/of/base.c            | 29 +++++++++++++++--------------
 drivers/of/fdt.c             |  2 +-
 drivers/of/pdt.c             | 12 ++++++------
 include/linux/of.h           |  4 ++--
 5 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 560e0df728f8..359f782c747d 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -589,7 +589,7 @@ void __init v2m_dt_init_early(void)
 		return;
 
 	/* Confirm board type against DT property, if available */
-	if (of_property_read_u32(allnodes, "arm,hbi", &dt_hbi) == 0) {
+	if (of_property_read_u32(of_allnodes, "arm,hbi", &dt_hbi) == 0) {
 		int site = v2m_get_master_site();
 		u32 id = readl(v2m_sysreg_base + (site == SYS_CFG_SITE_DB2 ?
 				V2M_SYS_PROCID1 : V2M_SYS_PROCID0));
diff --git a/drivers/of/base.c b/drivers/of/base.c
index c3724110a288..538e3cfad23e 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -45,7 +45,8 @@ struct alias_prop {
 
 static LIST_HEAD(aliases_lookup);
 
-struct device_node *allnodes;
+struct device_node *of_allnodes;
+EXPORT_SYMBOL(of_allnodes);
 struct device_node *of_chosen;
 struct device_node *of_aliases;
 
@@ -199,7 +200,7 @@ struct device_node *of_find_all_nodes(struct device_node *prev)
 	struct device_node *np;
 
 	read_lock(&devtree_lock);
-	np = prev ? prev->allnext : allnodes;
+	np = prev ? prev->allnext : of_allnodes;
 	for (; np != NULL; np = np->allnext)
 		if (of_node_get(np))
 			break;
@@ -422,7 +423,7 @@ EXPORT_SYMBOL(of_get_child_by_name);
  */
 struct device_node *of_find_node_by_path(const char *path)
 {
-	struct device_node *np = allnodes;
+	struct device_node *np = of_allnodes;
 
 	read_lock(&devtree_lock);
 	for (; np; np = np->allnext) {
@@ -452,7 +453,7 @@ struct device_node *of_find_node_by_name(struct device_node *from,
 	struct device_node *np;
 
 	read_lock(&devtree_lock);
-	np = from ? from->allnext : allnodes;
+	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext)
 		if (np->name && (of_node_cmp(np->name, name) == 0)
 		    && of_node_get(np))
@@ -481,7 +482,7 @@ struct device_node *of_find_node_by_type(struct device_node *from,
 	struct device_node *np;
 
 	read_lock(&devtree_lock);
-	np = from ? from->allnext : allnodes;
+	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext)
 		if (np->type && (of_node_cmp(np->type, type) == 0)
 		    && of_node_get(np))
@@ -512,7 +513,7 @@ struct device_node *of_find_compatible_node(struct device_node *from,
 	struct device_node *np;
 
 	read_lock(&devtree_lock);
-	np = from ? from->allnext : allnodes;
+	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext) {
 		if (type
 		    && !(np->type && (of_node_cmp(np->type, type) == 0)))
@@ -545,7 +546,7 @@ struct device_node *of_find_node_with_property(struct device_node *from,
 	struct property *pp;
 
 	read_lock(&devtree_lock);
-	np = from ? from->allnext : allnodes;
+	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext) {
 		for (pp = np->properties; pp; pp = pp->next) {
 			if (of_prop_cmp(pp->name, prop_name) == 0) {
@@ -616,7 +617,7 @@ struct device_node *of_find_matching_node_and_match(struct device_node *from,
 		*match = NULL;
 
 	read_lock(&devtree_lock);
-	np = from ? from->allnext : allnodes;
+	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext) {
 		if (of_match_node(matches, np) && of_node_get(np)) {
 			if (match)
@@ -669,7 +670,7 @@ struct device_node *of_find_node_by_phandle(phandle handle)
 	struct device_node *np;
 
 	read_lock(&devtree_lock);
-	for (np = allnodes; np; np = np->allnext)
+	for (np = of_allnodes; np; np = np->allnext)
 		if (np->phandle == handle)
 			break;
 	of_node_get(np);
@@ -1254,9 +1255,9 @@ void of_attach_node(struct device_node *np)
 
 	write_lock_irqsave(&devtree_lock, flags);
 	np->sibling = np->parent->child;
-	np->allnext = allnodes;
+	np->allnext = of_allnodes;
 	np->parent->child = np;
-	allnodes = np;
+	of_allnodes = np;
 	write_unlock_irqrestore(&devtree_lock, flags);
 }
 
@@ -1277,11 +1278,11 @@ void of_detach_node(struct device_node *np)
 	if (!parent)
 		goto out_unlock;
 
-	if (allnodes == np)
-		allnodes = np->allnext;
+	if (of_allnodes == np)
+		of_allnodes = np->allnext;
 	else {
 		struct device_node *prev;
-		for (prev = allnodes;
+		for (prev = of_allnodes;
 		     prev->allnext != np;
 		     prev = prev->allnext)
 			;
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index e36ff40011f4..a65c39c473bf 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -712,7 +712,7 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
  */
 void __init unflatten_device_tree(void)
 {
-	__unflatten_device_tree(initial_boot_params, &allnodes,
+	__unflatten_device_tree(initial_boot_params, &of_allnodes,
 				early_init_dt_alloc_memory_arch);
 
 	/* Get pointer to "/chosen" and "/aliasas" nodes for use everywhere */
diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c
index 07cc1d678e4d..37b56fd716e6 100644
--- a/drivers/of/pdt.c
+++ b/drivers/of/pdt.c
@@ -241,15 +241,15 @@ void __init of_pdt_build_devicetree(phandle root_node, struct of_pdt_ops *ops)
 	BUG_ON(!ops);
 	of_pdt_prom_ops = ops;
 
-	allnodes = of_pdt_create_node(root_node, NULL);
+	of_allnodes = of_pdt_create_node(root_node, NULL);
 #if defined(CONFIG_SPARC)
-	allnodes->path_component_name = "";
+	of_allnodes->path_component_name = "";
 #endif
-	allnodes->full_name = "/";
+	of_allnodes->full_name = "/";
 
-	nextp = &allnodes->allnext;
-	allnodes->child = of_pdt_build_tree(allnodes,
-			of_pdt_prom_ops->getchild(allnodes->phandle), &nextp);
+	nextp = &of_allnodes->allnext;
+	of_allnodes->child = of_pdt_build_tree(of_allnodes,
+			of_pdt_prom_ops->getchild(of_allnodes->phandle), &nextp);
 
 	/* Get pointer to "/chosen" and "/aliasas" nodes for use everywhere */
 	of_alias_scan(kernel_tree_alloc);
diff --git a/include/linux/of.h b/include/linux/of.h
index 7337dc109c89..60053bd7e79a 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -88,14 +88,14 @@ static inline void of_node_put(struct device_node *node) { }
 #ifdef CONFIG_OF
 
 /* Pointer for first entry in chain of all nodes. */
-extern struct device_node *allnodes;
+extern struct device_node *of_allnodes;
 extern struct device_node *of_chosen;
 extern struct device_node *of_aliases;
 extern rwlock_t devtree_lock;
 
 static inline bool of_have_populated_dt(void)
 {
-	return allnodes != NULL;
+	return of_allnodes != NULL;
 }
 
 static inline bool of_node_is_root(const struct device_node *node)
-- 
cgit v1.2.3-55-g7522


From eed8c02e680c04cd737e0a9cef74e68d8eb0cefa Mon Sep 17 00:00:00 2001
From: Lukas Czerner
Date: Fri, 30 Nov 2012 11:42:40 +0100
Subject: wait: add wait_event_lock_irq() interface

New wait_event{_interruptible}_lock_irq{_cmd} macros added. This commit
moves the private wait_event_lock_irq() macro from MD to regular wait
includes, introduces new macro wait_event_lock_irq_cmd() instead of using
the old method with omitting cmd parameter which is ugly and makes a use
of new macros in the MD. It also introduces the _interruptible_ variant.

The use of new interface is when one have a special lock to protect data
structures used in the condition, or one also needs to invoke "cmd"
before putting it to sleep.

All new macros are expected to be called with the lock taken. The lock
is released before sleep and is reacquired afterwards. We will leave the
macro with the lock held.

Note to DM: IMO this should also fix theoretical race on waitqueue while
using simultaneously wait_event_lock_irq() and wait_event() because of
lack of locking around current state setting and wait queue removal.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Cc: Neil Brown <neilb@suse.de>
Cc: David Howells <dhowells@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/md.c      |   2 +-
 drivers/md/md.h      |  26 --------
 drivers/md/raid1.c   |  15 +++--
 drivers/md/raid10.c  |  15 +++--
 drivers/md/raid5.c   |  12 ++--
 include/linux/wait.h | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 184 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9ab768acfb62..7e513a38cec7 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -452,7 +452,7 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
 	spin_lock_irq(&mddev->write_lock);
 	wait_event_lock_irq(mddev->sb_wait,
 			    !mddev->flush_bio,
-			    mddev->write_lock, /*nothing*/);
+			    mddev->write_lock);
 	mddev->flush_bio = bio;
 	spin_unlock_irq(&mddev->write_lock);
 
diff --git a/drivers/md/md.h b/drivers/md/md.h
index af443ab868db..1e2fc3d9c74c 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -551,32 +551,6 @@ struct md_thread {
 
 #define THREAD_WAKEUP  0
 
-#define __wait_event_lock_irq(wq, condition, lock, cmd) 		\
-do {									\
-	wait_queue_t __wait;						\
-	init_waitqueue_entry(&__wait, current);				\
-									\
-	add_wait_queue(&wq, &__wait);					\
-	for (;;) {							\
-		set_current_state(TASK_UNINTERRUPTIBLE);		\
-		if (condition)						\
-			break;						\
-		spin_unlock_irq(&lock);					\
-		cmd;							\
-		schedule();						\
-		spin_lock_irq(&lock);					\
-	}								\
-	current->state = TASK_RUNNING;					\
-	remove_wait_queue(&wq, &__wait);				\
-} while (0)
-
-#define wait_event_lock_irq(wq, condition, lock, cmd) 			\
-do {									\
-	if (condition)	 						\
-		break;							\
-	__wait_event_lock_irq(wq, condition, lock, cmd);		\
-} while (0)
-
 static inline void safe_put_page(struct page *p)
 {
 	if (p) put_page(p);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 8034fbd6190c..534dd74a2da0 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -822,7 +822,7 @@ static void raise_barrier(struct r1conf *conf)
 
 	/* Wait until no block IO is waiting */
 	wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	/* block any new IO from starting */
 	conf->barrier++;
@@ -830,7 +830,7 @@ static void raise_barrier(struct r1conf *conf)
 	/* Now wait for all pending IO to complete */
 	wait_event_lock_irq(conf->wait_barrier,
 			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -864,8 +864,7 @@ static void wait_barrier(struct r1conf *conf)
 				    (conf->nr_pending &&
 				     current->bio_list &&
 				     !bio_list_empty(current->bio_list)),
-				    conf->resync_lock,
-			);
+				    conf->resync_lock);
 		conf->nr_waiting--;
 	}
 	conf->nr_pending++;
@@ -898,10 +897,10 @@ static void freeze_array(struct r1conf *conf)
 	spin_lock_irq(&conf->resync_lock);
 	conf->barrier++;
 	conf->nr_waiting++;
-	wait_event_lock_irq(conf->wait_barrier,
-			    conf->nr_pending == conf->nr_queued+1,
-			    conf->resync_lock,
-			    flush_pending_writes(conf));
+	wait_event_lock_irq_cmd(conf->wait_barrier,
+				conf->nr_pending == conf->nr_queued+1,
+				conf->resync_lock,
+				flush_pending_writes(conf));
 	spin_unlock_irq(&conf->resync_lock);
 }
 static void unfreeze_array(struct r1conf *conf)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 906ccbd0f7dc..9a08f621b27d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -952,7 +952,7 @@ static void raise_barrier(struct r10conf *conf, int force)
 
 	/* Wait until no block IO is waiting (unless 'force') */
 	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	/* block any new IO from starting */
 	conf->barrier++;
@@ -960,7 +960,7 @@ static void raise_barrier(struct r10conf *conf, int force)
 	/* Now wait for all pending IO to complete */
 	wait_event_lock_irq(conf->wait_barrier,
 			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -993,8 +993,7 @@ static void wait_barrier(struct r10conf *conf)
 				    (conf->nr_pending &&
 				     current->bio_list &&
 				     !bio_list_empty(current->bio_list)),
-				    conf->resync_lock,
-			);
+				    conf->resync_lock);
 		conf->nr_waiting--;
 	}
 	conf->nr_pending++;
@@ -1027,10 +1026,10 @@ static void freeze_array(struct r10conf *conf)
 	spin_lock_irq(&conf->resync_lock);
 	conf->barrier++;
 	conf->nr_waiting++;
-	wait_event_lock_irq(conf->wait_barrier,
-			    conf->nr_pending == conf->nr_queued+1,
-			    conf->resync_lock,
-			    flush_pending_writes(conf));
+	wait_event_lock_irq_cmd(conf->wait_barrier,
+				conf->nr_pending == conf->nr_queued+1,
+				conf->resync_lock,
+				flush_pending_writes(conf));
 
 	spin_unlock_irq(&conf->resync_lock);
 }
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c5439dce0295..2bf617d6f4fd 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -466,7 +466,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
 	do {
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    conf->quiesce == 0 || noquiesce,
-				    conf->device_lock, /* nothing */);
+				    conf->device_lock);
 		sh = __find_stripe(conf, sector, conf->generation - previous);
 		if (!sh) {
 			if (!conf->inactive_blocked)
@@ -480,8 +480,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
 						    (atomic_read(&conf->active_stripes)
 						     < (conf->max_nr_stripes *3/4)
 						     || !conf->inactive_blocked),
-						    conf->device_lock,
-						    );
+						    conf->device_lock);
 				conf->inactive_blocked = 0;
 			} else
 				init_stripe(sh, sector, previous);
@@ -1646,8 +1645,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 		spin_lock_irq(&conf->device_lock);
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    !list_empty(&conf->inactive_list),
-				    conf->device_lock,
-				    );
+				    conf->device_lock);
 		osh = get_free_stripe(conf);
 		spin_unlock_irq(&conf->device_lock);
 		atomic_set(&nsh->count, 1);
@@ -4000,7 +3998,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 		spin_lock_irq(&conf->device_lock);
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    conf->quiesce == 0,
-				    conf->device_lock, /* nothing */);
+				    conf->device_lock);
 		atomic_inc(&conf->active_aligned_reads);
 		spin_unlock_irq(&conf->device_lock);
 
@@ -6088,7 +6086,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    atomic_read(&conf->active_stripes) == 0 &&
 				    atomic_read(&conf->active_aligned_reads) == 0,
-				    conf->device_lock, /* nothing */);
+				    conf->device_lock);
 		conf->quiesce = 1;
 		spin_unlock_irq(&conf->device_lock);
 		/* allow reshape to continue */
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 168dfe122dd3..7cb64d4b499d 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -550,6 +550,170 @@ do {									\
 	__ret;								\
 })
 
+
+#define __wait_event_lock_irq(wq, condition, lock, cmd)			\
+do {									\
+	DEFINE_WAIT(__wait);						\
+									\
+	for (;;) {							\
+		prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);	\
+		if (condition)						\
+			break;						\
+		spin_unlock_irq(&lock);					\
+		cmd;							\
+		schedule();						\
+		spin_lock_irq(&lock);					\
+	}								\
+	finish_wait(&wq, &__wait);					\
+} while (0)
+
+/**
+ * wait_event_lock_irq_cmd - sleep until a condition gets true. The
+ *			     condition is checked under the lock. This
+ *			     is expected to be called with the lock
+ *			     taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before cmd
+ *	  and schedule() and reacquired afterwards.
+ * @cmd: a command which is invoked outside the critical section before
+ *	 sleep
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before invoking the cmd and going to sleep and is reacquired
+ * afterwards.
+ */
+#define wait_event_lock_irq_cmd(wq, condition, lock, cmd)		\
+do {									\
+	if (condition)							\
+		break;							\
+	__wait_event_lock_irq(wq, condition, lock, cmd);		\
+} while (0)
+
+/**
+ * wait_event_lock_irq - sleep until a condition gets true. The
+ *			 condition is checked under the lock. This
+ *			 is expected to be called with the lock
+ *			 taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before schedule()
+ *	  and reacquired afterwards.
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before going to sleep and is reacquired afterwards.
+ */
+#define wait_event_lock_irq(wq, condition, lock)			\
+do {									\
+	if (condition)							\
+		break;							\
+	__wait_event_lock_irq(wq, condition, lock, );			\
+} while (0)
+
+
+#define __wait_event_interruptible_lock_irq(wq, condition,		\
+					    lock, ret, cmd)		\
+do {									\
+	DEFINE_WAIT(__wait);						\
+									\
+	for (;;) {							\
+		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
+		if (condition)						\
+			break;						\
+		if (signal_pending(current)) {				\
+			ret = -ERESTARTSYS;				\
+			break;						\
+		}							\
+		spin_unlock_irq(&lock);					\
+		cmd;							\
+		schedule();						\
+		spin_lock_irq(&lock);					\
+	}								\
+	finish_wait(&wq, &__wait);					\
+} while (0)
+
+/**
+ * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
+ *		The condition is checked under the lock. This is expected to
+ *		be called with the lock taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before cmd and
+ *	  schedule() and reacquired afterwards.
+ * @cmd: a command which is invoked outside the critical section before
+ *	 sleep
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received. The @condition is
+ * checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before invoking the cmd and going to sleep and is reacquired
+ * afterwards.
+ *
+ * The macro will return -ERESTARTSYS if it was interrupted by a signal
+ * and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd)	\
+({									\
+	int __ret = 0;							\
+									\
+	if (!(condition))						\
+		__wait_event_interruptible_lock_irq(wq, condition,	\
+						    lock, __ret, cmd);	\
+	__ret;								\
+})
+
+/**
+ * wait_event_interruptible_lock_irq - sleep until a condition gets true.
+ *		The condition is checked under the lock. This is expected
+ *		to be called with the lock taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before schedule()
+ *	  and reacquired afterwards.
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or signal is received. The @condition is
+ * checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before going to sleep and is reacquired afterwards.
+ *
+ * The macro will return -ERESTARTSYS if it was interrupted by a signal
+ * and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_lock_irq(wq, condition, lock)		\
+({									\
+	int __ret = 0;							\
+									\
+	if (!(condition))						\
+		__wait_event_interruptible_lock_irq(wq, condition,	\
+						    lock, __ret, );	\
+	__ret;								\
+})
+
+
 /*
  * These are the old interfaces to sleep waiting for an event.
  * They are racy.  DO NOT use them, use the wait_event* interfaces above.
-- 
cgit v1.2.3-55-g7522


From 7b5a35225b0d4fd779cf79d7624e63d1957f6c4d Mon Sep 17 00:00:00 2001
From: Lukas Czerner
Date: Fri, 30 Nov 2012 11:42:41 +0100
Subject: loop: Limit the number of requests in the bio list

Currently there is not limitation of number of requests in the loop bio
list. This can lead into some nasty situations when the caller spawns
tons of bio requests taking huge amount of memory. This is even more
obvious with discard where blkdev_issue_discard() will submit all bios
for the range and wait for them to finish afterwards. On really big loop
devices and slow backing file system this can lead to OOM situation as
reported by Dave Chinner.

With this patch we will wait in loop_make_request() if the number of
bios in the loop bio list would exceed 'nr_congestion_on'.
We'll wake up the process as we process the bios form the list. Some
threshold hysteresis is in place to avoid high frequency oscillation.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Reported-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 10 ++++++++++
 include/linux/loop.h |  3 +++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e9d594fd12cb..800aec7927de 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -463,6 +463,7 @@ out:
  */
 static void loop_add_bio(struct loop_device *lo, struct bio *bio)
 {
+	lo->lo_bio_count++;
 	bio_list_add(&lo->lo_bio_list, bio);
 }
 
@@ -471,6 +472,7 @@ static void loop_add_bio(struct loop_device *lo, struct bio *bio)
  */
 static struct bio *loop_get_bio(struct loop_device *lo)
 {
+	lo->lo_bio_count--;
 	return bio_list_pop(&lo->lo_bio_list);
 }
 
@@ -489,6 +491,10 @@ static void loop_make_request(struct request_queue *q, struct bio *old_bio)
 		goto out;
 	if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
 		goto out;
+	if (lo->lo_bio_count >= q->nr_congestion_on)
+		wait_event_lock_irq(lo->lo_req_wait,
+				    lo->lo_bio_count < q->nr_congestion_off,
+				    lo->lo_lock);
 	loop_add_bio(lo, old_bio);
 	wake_up(&lo->lo_event);
 	spin_unlock_irq(&lo->lo_lock);
@@ -546,6 +552,8 @@ static int loop_thread(void *data)
 			continue;
 		spin_lock_irq(&lo->lo_lock);
 		bio = loop_get_bio(lo);
+		if (lo->lo_bio_count < lo->lo_queue->nr_congestion_off)
+			wake_up(&lo->lo_req_wait);
 		spin_unlock_irq(&lo->lo_lock);
 
 		BUG_ON(!bio);
@@ -873,6 +881,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	lo->transfer = transfer_none;
 	lo->ioctl = NULL;
 	lo->lo_sizelimit = 0;
+	lo->lo_bio_count = 0;
 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
 	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 
@@ -1660,6 +1669,7 @@ static int loop_add(struct loop_device **l, int i)
 	lo->lo_number		= i;
 	lo->lo_thread		= NULL;
 	init_waitqueue_head(&lo->lo_event);
+	init_waitqueue_head(&lo->lo_req_wait);
 	spin_lock_init(&lo->lo_lock);
 	disk->major		= LOOP_MAJOR;
 	disk->first_minor	= i << part_shift;
diff --git a/include/linux/loop.h b/include/linux/loop.h
index 6492181bcb1d..460b60fa7adf 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -53,10 +53,13 @@ struct loop_device {
 
 	spinlock_t		lo_lock;
 	struct bio_list		lo_bio_list;
+	unsigned int		lo_bio_count;
 	int			lo_state;
 	struct mutex		lo_ctl_mutex;
 	struct task_struct	*lo_thread;
 	wait_queue_head_t	lo_event;
+	/* wait queue for incoming requests */
+	wait_queue_head_t	lo_req_wait;
 
 	struct request_queue	*lo_queue;
 	struct gendisk		*lo_disk;
-- 
cgit v1.2.3-55-g7522


From 12a5105e04143569b3e9e5ef03cf9cad8862473a Mon Sep 17 00:00:00 2001
From: Viresh Kumar
Date: Thu, 29 Nov 2012 00:17:15 +0530
Subject: mfd: stmpe: Get rid of irq_invert_polarity

Since the very first patch, stmpe core driver is using irq_invert_polarity as
part of platform data. But, nobody is actually using it in kernel till now.

Also, this is not something part of hardware specs, but is included to cater
some board mistakes or quirks.

So, better get rid of it. This is earlier discussed here:

https://lkml.org/lkml/2012/11/27/636

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/stmpe.c       | 7 -------
 include/linux/mfd/stmpe.h | 2 --
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index f9f7de796029..90c6151bc52e 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -953,13 +953,6 @@ static int __devinit stmpe_chip_init(struct stmpe *stmpe)
 			else
 				icr |= STMPE_ICR_LSB_HIGH;
 		}
-
-		if (stmpe->pdata->irq_invert_polarity) {
-			if (id == STMPE801_ID)
-				icr ^= STMPE801_REG_SYS_CTRL_INT_HI;
-			else
-				icr ^= STMPE_ICR_LSB_HIGH;
-		}
 	}
 
 	if (stmpe->pdata->autosleep) {
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index 15dac790365f..383ac1512a39 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -190,7 +190,6 @@ struct stmpe_ts_platform_data {
  * @id: device id to distinguish between multiple STMPEs on the same board
  * @blocks: bitmask of blocks to enable (use STMPE_BLOCK_*)
  * @irq_trigger: IRQ trigger to use for the interrupt to the host
- * @irq_invert_polarity: IRQ line is connected with reversed polarity
  * @autosleep: bool to enable/disable stmpe autosleep
  * @autosleep_timeout: inactivity timeout in milliseconds for autosleep
  * @irq_base: base IRQ number.  %STMPE_NR_IRQS irqs will be used, or
@@ -207,7 +206,6 @@ struct stmpe_platform_data {
 	unsigned int blocks;
 	int irq_base;
 	unsigned int irq_trigger;
-	bool irq_invert_polarity;
 	bool autosleep;
 	bool irq_over_gpio;
 	int irq_gpio;
-- 
cgit v1.2.3-55-g7522


From 0e5fca8106199f5c680bb93e75c16381c4c256ce Mon Sep 17 00:00:00 2001
From: Kim, Milo
Date: Thu, 29 Nov 2012 08:48:26 +0000
Subject: mfd: tps65910: Remove unused data

The 'io_mutex' is not used anywhere.
The regmap API supports the mutex internally, so no additional mutex
is required.

And 'domain' private data is unnecessary because the irq domain is
already registered by using regmap_add_irq_chip().

Signed-off-by: Milo(Woogyom) Kim <milo.kim@ti.com>
Acked-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps65910.c       | 1 -
 include/linux/mfd/tps65910.h | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index ca3783350ccf..c160c2d76f79 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -486,7 +486,6 @@ static __devinit int tps65910_i2c_probe(struct i2c_client *i2c,
 	tps65910->dev = &i2c->dev;
 	tps65910->i2c_client = i2c;
 	tps65910->id = chip_id;
-	mutex_init(&tps65910->io_mutex);
 
 	tps65910->regmap = devm_regmap_init_i2c(i2c, &tps65910_regmap_config);
 	if (IS_ERR(tps65910->regmap)) {
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 0b16903f7e88..20e433e551e3 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -893,7 +893,6 @@ struct tps65910 {
 	struct device *dev;
 	struct i2c_client *i2c_client;
 	struct regmap *regmap;
-	struct mutex io_mutex;
 	unsigned int id;
 
 	/* Client devices */
@@ -907,7 +906,6 @@ struct tps65910 {
 	/* IRQ Handling */
 	int chip_irq;
 	struct regmap_irq_chip_data *irq_data;
-	struct irq_domain *domain;
 };
 
 struct tps65910_platform_data {
-- 
cgit v1.2.3-55-g7522


From e29482e8487954c87dc7b4fdbc53574bf1d4cce2 Mon Sep 17 00:00:00 2001
From: Mathias Nyman
Date: Fri, 30 Nov 2012 12:37:36 +0100
Subject: gpio / ACPI: add ACPI support

Add support for translating ACPI GPIO pin numbers to Linux GPIO API pins.
Needs a gpio controller driver with the acpi handler hook set.

Drivers can use acpi_get_gpio() to translate ACPI5 GpioIO and GpioInt
resources to Linux GPIO's.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/gpio/Kconfig        |  4 ++++
 drivers/gpio/Makefile       |  1 +
 drivers/gpio/gpiolib-acpi.c | 54 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi_gpio.h   | 19 ++++++++++++++++
 4 files changed, 78 insertions(+)
 create mode 100644 drivers/gpio/gpiolib-acpi.c
 create mode 100644 include/linux/acpi_gpio.h

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index f11d8e3b4041..5c9b384119b8 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -49,6 +49,10 @@ config OF_GPIO
 	def_bool y
 	depends on OF
 
+config GPIO_ACPI
+	def_bool y
+	depends on ACPI
+
 config DEBUG_GPIO
 	bool "Debug GPIO calls"
 	depends on DEBUG_KERNEL
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 9aeed6707326..420dbaca05f1 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -4,6 +4,7 @@ ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 
 obj-$(CONFIG_GPIOLIB)		+= gpiolib.o devres.o
 obj-$(CONFIG_OF_GPIO)		+= gpiolib-of.o
+obj-$(CONFIG_GPIO_ACPI)		+= gpiolib-acpi.o
 
 # Device drivers. Generally keep list sorted alphabetically
 obj-$(CONFIG_GPIO_GENERIC)	+= gpio-generic.o
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
new file mode 100644
index 000000000000..cbad6e908d30
--- /dev/null
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -0,0 +1,54 @@
+/*
+ * ACPI helpers for GPIO API
+ *
+ * Copyright (C) 2012, Intel Corporation
+ * Authors: Mathias Nyman <mathias.nyman@linux.intel.com>
+ *          Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/gpio.h>
+#include <linux/export.h>
+#include <linux/acpi_gpio.h>
+#include <linux/acpi.h>
+
+static int acpi_gpiochip_find(struct gpio_chip *gc, void *data)
+{
+	if (!gc->dev)
+		return false;
+
+	return ACPI_HANDLE(gc->dev) == data;
+}
+
+/**
+ * acpi_get_gpio() - Translate ACPI GPIO pin to GPIO number usable with GPIO API
+ * @path:	ACPI GPIO controller full path name, (e.g. "\\_SB.GPO1")
+ * @pin:	ACPI GPIO pin number (0-based, controller-relative)
+ *
+ * Returns GPIO number to use with Linux generic GPIO API, or errno error value
+ */
+
+int acpi_get_gpio(char *path, int pin)
+{
+	struct gpio_chip *chip;
+	acpi_handle handle;
+	acpi_status status;
+
+	status = acpi_get_handle(NULL, path, &handle);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	chip = gpiochip_find(handle, acpi_gpiochip_find);
+	if (!chip)
+		return -ENODEV;
+
+	if (!gpio_is_valid(chip->base + pin))
+		return -EINVAL;
+
+	return chip->base + pin;
+}
+EXPORT_SYMBOL_GPL(acpi_get_gpio);
diff --git a/include/linux/acpi_gpio.h b/include/linux/acpi_gpio.h
new file mode 100644
index 000000000000..91615a389b65
--- /dev/null
+++ b/include/linux/acpi_gpio.h
@@ -0,0 +1,19 @@
+#ifndef _LINUX_ACPI_GPIO_H_
+#define _LINUX_ACPI_GPIO_H_
+
+#include <linux/errno.h>
+
+#ifdef CONFIG_GPIO_ACPI
+
+int acpi_get_gpio(char *path, int pin);
+
+#else /* CONFIG_GPIO_ACPI */
+
+static inline int acpi_get_gpio(char *path, int pin)
+{
+	return -ENODEV;
+}
+
+#endif /* CONFIG_GPIO_ACPI */
+
+#endif /* _LINUX_ACPI_GPIO_H_ */
-- 
cgit v1.2.3-55-g7522


From f87f1a2375a51ef8c5048bfce42587dbea1ca627 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen
Date: Wed, 21 Nov 2012 16:27:00 +0000
Subject: staging:iio: Move ad7793 driver out of staging

The driver does not expose any custom API to userspace and none of the standard
static code checker tools report any issues, so move it out of staging.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/Kconfig              |  12 +
 drivers/iio/adc/Makefile             |   1 +
 drivers/iio/adc/ad7793.c             | 691 ++++++++++++++++++++++++++++++++++
 drivers/staging/iio/adc/Kconfig      |  12 -
 drivers/staging/iio/adc/Makefile     |   1 -
 drivers/staging/iio/adc/ad7793.c     | 692 -----------------------------------
 drivers/staging/iio/adc/ad7793.h     | 116 ------
 include/linux/platform_data/ad7793.h | 112 ++++++
 8 files changed, 816 insertions(+), 821 deletions(-)
 create mode 100644 drivers/iio/adc/ad7793.c
 delete mode 100644 drivers/staging/iio/adc/ad7793.c
 delete mode 100644 drivers/staging/iio/adc/ad7793.h
 create mode 100644 include/linux/platform_data/ad7793.h

(limited to 'include/linux')

diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index cd5eed60be28..408557b02441 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -42,6 +42,18 @@ config AD7791
 	  To compile this driver as a module, choose M here: the module will be
 	  called ad7791.
 
+config AD7793
+	tristate "Analog Devices AD7793 and similar ADCs driver"
+	depends on SPI
+	select AD_SIGMA_DELTA
+	help
+	  Say yes here to build support for Analog Devices AD7785, AD7792, AD7793,
+	  AD7794 and AD7795 SPI analog to digital converters (ADC).
+	  If unsure, say N (but it's safe to say "Y").
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called AD7793.
+
 config AD7476
 	tristate "Analog Devices AD7476 and similar 1-channel ADCs driver"
 	depends on SPI
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index 3256dc64a466..78202d9eb961 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_AD7266) += ad7266.o
 obj-$(CONFIG_AD7298) += ad7298.o
 obj-$(CONFIG_AD7476) += ad7476.o
 obj-$(CONFIG_AD7791) += ad7791.o
+obj-$(CONFIG_AD7793) += ad7793.o
 obj-$(CONFIG_AD7887) += ad7887.o
 obj-$(CONFIG_AT91_ADC) += at91_adc.o
 obj-$(CONFIG_LP8788_ADC) += lp8788_adc.o
diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c
new file mode 100644
index 000000000000..a109e686b9f7
--- /dev/null
+++ b/drivers/iio/adc/ad7793.c
@@ -0,0 +1,691 @@
+/*
+ * AD7785/AD7792/AD7793/AD7794/AD7795 SPI ADC driver
+ *
+ * Copyright 2011-2012 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/spi/spi.h>
+#include <linux/regulator/consumer.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/trigger.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iio/triggered_buffer.h>
+#include <linux/iio/adc/ad_sigma_delta.h>
+#include <linux/platform_data/ad7793.h>
+
+/* Registers */
+#define AD7793_REG_COMM		0 /* Communications Register (WO, 8-bit) */
+#define AD7793_REG_STAT		0 /* Status Register	     (RO, 8-bit) */
+#define AD7793_REG_MODE		1 /* Mode Register	     (RW, 16-bit */
+#define AD7793_REG_CONF		2 /* Configuration Register  (RW, 16-bit) */
+#define AD7793_REG_DATA		3 /* Data Register	     (RO, 16-/24-bit) */
+#define AD7793_REG_ID		4 /* ID Register	     (RO, 8-bit) */
+#define AD7793_REG_IO		5 /* IO Register	     (RO, 8-bit) */
+#define AD7793_REG_OFFSET	6 /* Offset Register	     (RW, 16-bit
+				   * (AD7792)/24-bit (AD7793)) */
+#define AD7793_REG_FULLSALE	7 /* Full-Scale Register
+				   * (RW, 16-bit (AD7792)/24-bit (AD7793)) */
+
+/* Communications Register Bit Designations (AD7793_REG_COMM) */
+#define AD7793_COMM_WEN		(1 << 7) /* Write Enable */
+#define AD7793_COMM_WRITE	(0 << 6) /* Write Operation */
+#define AD7793_COMM_READ	(1 << 6) /* Read Operation */
+#define AD7793_COMM_ADDR(x)	(((x) & 0x7) << 3) /* Register Address */
+#define AD7793_COMM_CREAD	(1 << 2) /* Continuous Read of Data Register */
+
+/* Status Register Bit Designations (AD7793_REG_STAT) */
+#define AD7793_STAT_RDY		(1 << 7) /* Ready */
+#define AD7793_STAT_ERR		(1 << 6) /* Error (Overrange, Underrange) */
+#define AD7793_STAT_CH3		(1 << 2) /* Channel 3 */
+#define AD7793_STAT_CH2		(1 << 1) /* Channel 2 */
+#define AD7793_STAT_CH1		(1 << 0) /* Channel 1 */
+
+/* Mode Register Bit Designations (AD7793_REG_MODE) */
+#define AD7793_MODE_SEL(x)	(((x) & 0x7) << 13) /* Operation Mode Select */
+#define AD7793_MODE_SEL_MASK	(0x7 << 13) /* Operation Mode Select mask */
+#define AD7793_MODE_CLKSRC(x)	(((x) & 0x3) << 6) /* ADC Clock Source Select */
+#define AD7793_MODE_RATE(x)	((x) & 0xF) /* Filter Update Rate Select */
+
+#define AD7793_MODE_CONT		0 /* Continuous Conversion Mode */
+#define AD7793_MODE_SINGLE		1 /* Single Conversion Mode */
+#define AD7793_MODE_IDLE		2 /* Idle Mode */
+#define AD7793_MODE_PWRDN		3 /* Power-Down Mode */
+#define AD7793_MODE_CAL_INT_ZERO	4 /* Internal Zero-Scale Calibration */
+#define AD7793_MODE_CAL_INT_FULL	5 /* Internal Full-Scale Calibration */
+#define AD7793_MODE_CAL_SYS_ZERO	6 /* System Zero-Scale Calibration */
+#define AD7793_MODE_CAL_SYS_FULL	7 /* System Full-Scale Calibration */
+
+#define AD7793_CLK_INT		0 /* Internal 64 kHz Clock not
+				   * available at the CLK pin */
+#define AD7793_CLK_INT_CO	1 /* Internal 64 kHz Clock available
+				   * at the CLK pin */
+#define AD7793_CLK_EXT		2 /* External 64 kHz Clock */
+#define AD7793_CLK_EXT_DIV2	3 /* External Clock divided by 2 */
+
+/* Configuration Register Bit Designations (AD7793_REG_CONF) */
+#define AD7793_CONF_VBIAS(x)	(((x) & 0x3) << 14) /* Bias Voltage
+						     * Generator Enable */
+#define AD7793_CONF_BO_EN	(1 << 13) /* Burnout Current Enable */
+#define AD7793_CONF_UNIPOLAR	(1 << 12) /* Unipolar/Bipolar Enable */
+#define AD7793_CONF_BOOST	(1 << 11) /* Boost Enable */
+#define AD7793_CONF_GAIN(x)	(((x) & 0x7) << 8) /* Gain Select */
+#define AD7793_CONF_REFSEL(x)	((x) << 6) /* INT/EXT Reference Select */
+#define AD7793_CONF_BUF		(1 << 4) /* Buffered Mode Enable */
+#define AD7793_CONF_CHAN(x)	((x) & 0xf) /* Channel select */
+#define AD7793_CONF_CHAN_MASK	0xf /* Channel select mask */
+
+#define AD7793_CH_AIN1P_AIN1M	0 /* AIN1(+) - AIN1(-) */
+#define AD7793_CH_AIN2P_AIN2M	1 /* AIN2(+) - AIN2(-) */
+#define AD7793_CH_AIN3P_AIN3M	2 /* AIN3(+) - AIN3(-) */
+#define AD7793_CH_AIN1M_AIN1M	3 /* AIN1(-) - AIN1(-) */
+#define AD7793_CH_TEMP		6 /* Temp Sensor */
+#define AD7793_CH_AVDD_MONITOR	7 /* AVDD Monitor */
+
+#define AD7795_CH_AIN4P_AIN4M	4 /* AIN4(+) - AIN4(-) */
+#define AD7795_CH_AIN5P_AIN5M	5 /* AIN5(+) - AIN5(-) */
+#define AD7795_CH_AIN6P_AIN6M	6 /* AIN6(+) - AIN6(-) */
+#define AD7795_CH_AIN1M_AIN1M	8 /* AIN1(-) - AIN1(-) */
+
+/* ID Register Bit Designations (AD7793_REG_ID) */
+#define AD7785_ID		0xB
+#define AD7792_ID		0xA
+#define AD7793_ID		0xB
+#define AD7794_ID		0xF
+#define AD7795_ID		0xF
+#define AD7793_ID_MASK		0xF
+
+/* IO (Excitation Current Sources) Register Bit Designations (AD7793_REG_IO) */
+#define AD7793_IO_IEXC1_IOUT1_IEXC2_IOUT2	0 /* IEXC1 connect to IOUT1,
+						   * IEXC2 connect to IOUT2 */
+#define AD7793_IO_IEXC1_IOUT2_IEXC2_IOUT1	1 /* IEXC1 connect to IOUT2,
+						   * IEXC2 connect to IOUT1 */
+#define AD7793_IO_IEXC1_IEXC2_IOUT1		2 /* Both current sources
+						   * IEXC1,2 connect to IOUT1 */
+#define AD7793_IO_IEXC1_IEXC2_IOUT2		3 /* Both current sources
+						   * IEXC1,2 connect to IOUT2 */
+
+#define AD7793_IO_IXCEN_10uA	(1 << 0) /* Excitation Current 10uA */
+#define AD7793_IO_IXCEN_210uA	(2 << 0) /* Excitation Current 210uA */
+#define AD7793_IO_IXCEN_1mA	(3 << 0) /* Excitation Current 1mA */
+
+/* NOTE:
+ * The AD7792/AD7793 features a dual use data out ready DOUT/RDY output.
+ * In order to avoid contentions on the SPI bus, it's therefore necessary
+ * to use spi bus locking.
+ *
+ * The DOUT/RDY output must also be wired to an interrupt capable GPIO.
+ */
+
+struct ad7793_chip_info {
+	unsigned int id;
+	const struct iio_chan_spec *channels;
+	unsigned int num_channels;
+};
+
+struct ad7793_state {
+	const struct ad7793_chip_info	*chip_info;
+	struct regulator		*reg;
+	u16				int_vref_mv;
+	u16				mode;
+	u16				conf;
+	u32				scale_avail[8][2];
+
+	struct ad_sigma_delta		sd;
+
+};
+
+enum ad7793_supported_device_ids {
+	ID_AD7785,
+	ID_AD7792,
+	ID_AD7793,
+	ID_AD7794,
+	ID_AD7795,
+};
+
+static struct ad7793_state *ad_sigma_delta_to_ad7793(struct ad_sigma_delta *sd)
+{
+	return container_of(sd, struct ad7793_state, sd);
+}
+
+static int ad7793_set_channel(struct ad_sigma_delta *sd, unsigned int channel)
+{
+	struct ad7793_state *st = ad_sigma_delta_to_ad7793(sd);
+
+	st->conf &= ~AD7793_CONF_CHAN_MASK;
+	st->conf |= AD7793_CONF_CHAN(channel);
+
+	return ad_sd_write_reg(&st->sd, AD7793_REG_CONF, 2, st->conf);
+}
+
+static int ad7793_set_mode(struct ad_sigma_delta *sd,
+			   enum ad_sigma_delta_mode mode)
+{
+	struct ad7793_state *st = ad_sigma_delta_to_ad7793(sd);
+
+	st->mode &= ~AD7793_MODE_SEL_MASK;
+	st->mode |= AD7793_MODE_SEL(mode);
+
+	return ad_sd_write_reg(&st->sd, AD7793_REG_MODE, 2, st->mode);
+}
+
+static const struct ad_sigma_delta_info ad7793_sigma_delta_info = {
+	.set_channel = ad7793_set_channel,
+	.set_mode = ad7793_set_mode,
+	.has_registers = true,
+	.addr_shift = 3,
+	.read_mask = BIT(6),
+};
+
+static const struct ad_sd_calib_data ad7793_calib_arr[6] = {
+	{AD7793_MODE_CAL_INT_ZERO, AD7793_CH_AIN1P_AIN1M},
+	{AD7793_MODE_CAL_INT_FULL, AD7793_CH_AIN1P_AIN1M},
+	{AD7793_MODE_CAL_INT_ZERO, AD7793_CH_AIN2P_AIN2M},
+	{AD7793_MODE_CAL_INT_FULL, AD7793_CH_AIN2P_AIN2M},
+	{AD7793_MODE_CAL_INT_ZERO, AD7793_CH_AIN3P_AIN3M},
+	{AD7793_MODE_CAL_INT_FULL, AD7793_CH_AIN3P_AIN3M}
+};
+
+static int ad7793_calibrate_all(struct ad7793_state *st)
+{
+	return ad_sd_calibrate_all(&st->sd, ad7793_calib_arr,
+				   ARRAY_SIZE(ad7793_calib_arr));
+}
+
+static int ad7793_setup(struct iio_dev *indio_dev,
+	const struct ad7793_platform_data *pdata,
+	unsigned int vref_mv)
+{
+	struct ad7793_state *st = iio_priv(indio_dev);
+	int i, ret = -1;
+	unsigned long long scale_uv;
+	u32 id;
+
+	if ((pdata->current_source_direction == AD7793_IEXEC1_IEXEC2_IOUT1 ||
+		pdata->current_source_direction == AD7793_IEXEC1_IEXEC2_IOUT2) &&
+		((pdata->exitation_current != AD7793_IX_10uA) &&
+		(pdata->exitation_current != AD7793_IX_210uA)))
+		return -EINVAL;
+
+	/* reset the serial interface */
+	ret = spi_write(st->sd.spi, (u8 *)&ret, sizeof(ret));
+	if (ret < 0)
+		goto out;
+	usleep_range(500, 2000); /* Wait for at least 500us */
+
+	/* write/read test for device presence */
+	ret = ad_sd_read_reg(&st->sd, AD7793_REG_ID, 1, &id);
+	if (ret)
+		goto out;
+
+	id &= AD7793_ID_MASK;
+
+	if (id != st->chip_info->id) {
+		dev_err(&st->sd.spi->dev, "device ID query failed\n");
+		goto out;
+	}
+
+	st->mode = AD7793_MODE_RATE(1);
+	st->mode |= AD7793_MODE_CLKSRC(pdata->clock_src);
+	st->conf = AD7793_CONF_REFSEL(pdata->refsel);
+	st->conf |= AD7793_CONF_VBIAS(pdata->bias_voltage);
+	if (pdata->buffered)
+		st->conf |= AD7793_CONF_BUF;
+	if (pdata->boost_enable)
+		st->conf |= AD7793_CONF_BOOST;
+	if (pdata->burnout_current)
+		st->conf |= AD7793_CONF_BO_EN;
+	if (pdata->unipolar)
+		st->conf |= AD7793_CONF_UNIPOLAR;
+
+	ret = ad7793_set_mode(&st->sd, AD_SD_MODE_IDLE);
+	if (ret)
+		goto out;
+
+	ret = ad7793_set_channel(&st->sd, 0);
+	if (ret)
+		goto out;
+
+	ret = ad_sd_write_reg(&st->sd, AD7793_REG_IO, 1,
+				   pdata->exitation_current |
+			       (pdata->current_source_direction << 2));
+	if (ret)
+		goto out;
+
+	ret = ad7793_calibrate_all(st);
+	if (ret)
+		goto out;
+
+	/* Populate available ADC input ranges */
+	for (i = 0; i < ARRAY_SIZE(st->scale_avail); i++) {
+		scale_uv = ((u64)vref_mv * 100000000)
+			>> (st->chip_info->channels[0].scan_type.realbits -
+			(!!(st->conf & AD7793_CONF_UNIPOLAR) ? 0 : 1));
+		scale_uv >>= i;
+
+		st->scale_avail[i][1] = do_div(scale_uv, 100000000) * 10;
+		st->scale_avail[i][0] = scale_uv;
+	}
+
+	return 0;
+out:
+	dev_err(&st->sd.spi->dev, "setup failed\n");
+	return ret;
+}
+
+static const u16 sample_freq_avail[16] = {0, 470, 242, 123, 62, 50, 39, 33, 19,
+					  17, 16, 12, 10, 8, 6, 4};
+
+static ssize_t ad7793_read_frequency(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct ad7793_state *st = iio_priv(indio_dev);
+
+	return sprintf(buf, "%d\n",
+		       sample_freq_avail[AD7793_MODE_RATE(st->mode)]);
+}
+
+static ssize_t ad7793_write_frequency(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t len)
+{
+	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct ad7793_state *st = iio_priv(indio_dev);
+	long lval;
+	int i, ret;
+
+	mutex_lock(&indio_dev->mlock);
+	if (iio_buffer_enabled(indio_dev)) {
+		mutex_unlock(&indio_dev->mlock);
+		return -EBUSY;
+	}
+	mutex_unlock(&indio_dev->mlock);
+
+	ret = kstrtol(buf, 10, &lval);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(sample_freq_avail); i++)
+		if (lval == sample_freq_avail[i]) {
+			mutex_lock(&indio_dev->mlock);
+			st->mode &= ~AD7793_MODE_RATE(-1);
+			st->mode |= AD7793_MODE_RATE(i);
+			ad_sd_write_reg(&st->sd, AD7793_REG_MODE,
+					 sizeof(st->mode), st->mode);
+			mutex_unlock(&indio_dev->mlock);
+			ret = 0;
+		}
+
+	return ret ? ret : len;
+}
+
+static IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO,
+		ad7793_read_frequency,
+		ad7793_write_frequency);
+
+static IIO_CONST_ATTR_SAMP_FREQ_AVAIL(
+	"470 242 123 62 50 39 33 19 17 16 12 10 8 6 4");
+
+static ssize_t ad7793_show_scale_available(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct ad7793_state *st = iio_priv(indio_dev);
+	int i, len = 0;
+
+	for (i = 0; i < ARRAY_SIZE(st->scale_avail); i++)
+		len += sprintf(buf + len, "%d.%09u ", st->scale_avail[i][0],
+			       st->scale_avail[i][1]);
+
+	len += sprintf(buf + len, "\n");
+
+	return len;
+}
+
+static IIO_DEVICE_ATTR_NAMED(in_m_in_scale_available,
+		in_voltage-voltage_scale_available, S_IRUGO,
+		ad7793_show_scale_available, NULL, 0);
+
+static struct attribute *ad7793_attributes[] = {
+	&iio_dev_attr_sampling_frequency.dev_attr.attr,
+	&iio_const_attr_sampling_frequency_available.dev_attr.attr,
+	&iio_dev_attr_in_m_in_scale_available.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group ad7793_attribute_group = {
+	.attrs = ad7793_attributes,
+};
+
+static int ad7793_read_raw(struct iio_dev *indio_dev,
+			   struct iio_chan_spec const *chan,
+			   int *val,
+			   int *val2,
+			   long m)
+{
+	struct ad7793_state *st = iio_priv(indio_dev);
+	int ret;
+	unsigned long long scale_uv;
+	bool unipolar = !!(st->conf & AD7793_CONF_UNIPOLAR);
+
+	switch (m) {
+	case IIO_CHAN_INFO_RAW:
+		ret = ad_sigma_delta_single_conversion(indio_dev, chan, val);
+		if (ret < 0)
+			return ret;
+
+		return IIO_VAL_INT;
+
+	case IIO_CHAN_INFO_SCALE:
+		switch (chan->type) {
+		case IIO_VOLTAGE:
+			if (chan->differential) {
+				*val = st->
+					scale_avail[(st->conf >> 8) & 0x7][0];
+				*val2 = st->
+					scale_avail[(st->conf >> 8) & 0x7][1];
+				return IIO_VAL_INT_PLUS_NANO;
+			} else {
+				/* 1170mV / 2^23 * 6 */
+				scale_uv = (1170ULL * 1000000000ULL * 6ULL);
+			}
+			break;
+		case IIO_TEMP:
+				/* 1170mV / 0.81 mV/C / 2^23 */
+				scale_uv = 1444444444444444ULL;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		scale_uv >>= (chan->scan_type.realbits - (unipolar ? 0 : 1));
+		*val = 0;
+		*val2 = scale_uv;
+		return IIO_VAL_INT_PLUS_NANO;
+	case IIO_CHAN_INFO_OFFSET:
+		if (!unipolar)
+			*val = -(1 << (chan->scan_type.realbits - 1));
+		else
+			*val = 0;
+
+		/* Kelvin to Celsius */
+		if (chan->type == IIO_TEMP) {
+			unsigned long long offset;
+			unsigned int shift;
+
+			shift = chan->scan_type.realbits - (unipolar ? 0 : 1);
+			offset = 273ULL << shift;
+			do_div(offset, 1444);
+			*val -= offset;
+		}
+		return IIO_VAL_INT;
+	}
+	return -EINVAL;
+}
+
+static int ad7793_write_raw(struct iio_dev *indio_dev,
+			       struct iio_chan_spec const *chan,
+			       int val,
+			       int val2,
+			       long mask)
+{
+	struct ad7793_state *st = iio_priv(indio_dev);
+	int ret, i;
+	unsigned int tmp;
+
+	mutex_lock(&indio_dev->mlock);
+	if (iio_buffer_enabled(indio_dev)) {
+		mutex_unlock(&indio_dev->mlock);
+		return -EBUSY;
+	}
+
+	switch (mask) {
+	case IIO_CHAN_INFO_SCALE:
+		ret = -EINVAL;
+		for (i = 0; i < ARRAY_SIZE(st->scale_avail); i++)
+			if (val2 == st->scale_avail[i][1]) {
+				ret = 0;
+				tmp = st->conf;
+				st->conf &= ~AD7793_CONF_GAIN(-1);
+				st->conf |= AD7793_CONF_GAIN(i);
+
+				if (tmp == st->conf)
+					break;
+
+				ad_sd_write_reg(&st->sd, AD7793_REG_CONF,
+						sizeof(st->conf), st->conf);
+				ad7793_calibrate_all(st);
+				break;
+			}
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	mutex_unlock(&indio_dev->mlock);
+	return ret;
+}
+
+static int ad7793_write_raw_get_fmt(struct iio_dev *indio_dev,
+			       struct iio_chan_spec const *chan,
+			       long mask)
+{
+	return IIO_VAL_INT_PLUS_NANO;
+}
+
+static const struct iio_info ad7793_info = {
+	.read_raw = &ad7793_read_raw,
+	.write_raw = &ad7793_write_raw,
+	.write_raw_get_fmt = &ad7793_write_raw_get_fmt,
+	.attrs = &ad7793_attribute_group,
+	.validate_trigger = ad_sd_validate_trigger,
+	.driver_module = THIS_MODULE,
+};
+
+#define DECLARE_AD7793_CHANNELS(_name, _b, _sb, _s) \
+const struct iio_chan_spec _name##_channels[] = { \
+	AD_SD_DIFF_CHANNEL(0, 0, 0, AD7793_CH_AIN1P_AIN1M, (_b), (_sb), (_s)), \
+	AD_SD_DIFF_CHANNEL(1, 1, 1, AD7793_CH_AIN2P_AIN2M, (_b), (_sb), (_s)), \
+	AD_SD_DIFF_CHANNEL(2, 2, 2, AD7793_CH_AIN3P_AIN3M, (_b), (_sb), (_s)), \
+	AD_SD_SHORTED_CHANNEL(3, 0, AD7793_CH_AIN1M_AIN1M, (_b), (_sb), (_s)), \
+	AD_SD_TEMP_CHANNEL(4, AD7793_CH_TEMP, (_b), (_sb), (_s)), \
+	AD_SD_SUPPLY_CHANNEL(5, 3, AD7793_CH_AVDD_MONITOR, (_b), (_sb), (_s)), \
+	IIO_CHAN_SOFT_TIMESTAMP(6), \
+}
+
+#define DECLARE_AD7795_CHANNELS(_name, _b, _sb) \
+const struct iio_chan_spec _name##_channels[] = { \
+	AD_SD_DIFF_CHANNEL(0, 0, 0, AD7793_CH_AIN1P_AIN1M, (_b), (_sb), 0), \
+	AD_SD_DIFF_CHANNEL(1, 1, 1, AD7793_CH_AIN2P_AIN2M, (_b), (_sb), 0), \
+	AD_SD_DIFF_CHANNEL(2, 2, 2, AD7793_CH_AIN3P_AIN3M, (_b), (_sb), 0), \
+	AD_SD_DIFF_CHANNEL(3, 3, 3, AD7795_CH_AIN4P_AIN4M, (_b), (_sb), 0), \
+	AD_SD_DIFF_CHANNEL(4, 4, 4, AD7795_CH_AIN5P_AIN5M, (_b), (_sb), 0), \
+	AD_SD_DIFF_CHANNEL(5, 5, 5, AD7795_CH_AIN6P_AIN6M, (_b), (_sb), 0), \
+	AD_SD_SHORTED_CHANNEL(6, 0, AD7795_CH_AIN1M_AIN1M, (_b), (_sb), 0), \
+	AD_SD_TEMP_CHANNEL(7, AD7793_CH_TEMP, (_b), (_sb), 0), \
+	AD_SD_SUPPLY_CHANNEL(8, 3, AD7793_CH_AVDD_MONITOR, (_b), (_sb), 0), \
+	IIO_CHAN_SOFT_TIMESTAMP(9), \
+}
+
+static DECLARE_AD7793_CHANNELS(ad7785, 20, 32, 4);
+static DECLARE_AD7793_CHANNELS(ad7792, 16, 32, 0);
+static DECLARE_AD7793_CHANNELS(ad7793, 24, 32, 0);
+static DECLARE_AD7795_CHANNELS(ad7794, 16, 32);
+static DECLARE_AD7795_CHANNELS(ad7795, 24, 32);
+
+static const struct ad7793_chip_info ad7793_chip_info_tbl[] = {
+	[ID_AD7785] = {
+		.id = AD7785_ID,
+		.channels = ad7785_channels,
+		.num_channels = ARRAY_SIZE(ad7785_channels),
+	},
+	[ID_AD7792] = {
+		.id = AD7792_ID,
+		.channels = ad7792_channels,
+		.num_channels = ARRAY_SIZE(ad7792_channels),
+	},
+	[ID_AD7793] = {
+		.id = AD7793_ID,
+		.channels = ad7793_channels,
+		.num_channels = ARRAY_SIZE(ad7793_channels),
+	},
+	[ID_AD7794] = {
+		.id = AD7794_ID,
+		.channels = ad7794_channels,
+		.num_channels = ARRAY_SIZE(ad7794_channels),
+	},
+	[ID_AD7795] = {
+		.id = AD7795_ID,
+		.channels = ad7795_channels,
+		.num_channels = ARRAY_SIZE(ad7795_channels),
+	},
+};
+
+static int ad7793_probe(struct spi_device *spi)
+{
+	const struct ad7793_platform_data *pdata = spi->dev.platform_data;
+	struct ad7793_state *st;
+	struct iio_dev *indio_dev;
+	int ret, vref_mv = 0;
+
+	if (!pdata) {
+		dev_err(&spi->dev, "no platform data?\n");
+		return -ENODEV;
+	}
+
+	if (!spi->irq) {
+		dev_err(&spi->dev, "no IRQ?\n");
+		return -ENODEV;
+	}
+
+	indio_dev = iio_device_alloc(sizeof(*st));
+	if (indio_dev == NULL)
+		return -ENOMEM;
+
+	st = iio_priv(indio_dev);
+
+	ad_sd_init(&st->sd, indio_dev, spi, &ad7793_sigma_delta_info);
+
+	if (pdata->refsel != AD7793_REFSEL_INTERNAL) {
+		st->reg = regulator_get(&spi->dev, "refin");
+		if (IS_ERR(st->reg)) {
+			ret = PTR_ERR(st->reg);
+			goto error_device_free;
+		}
+
+		ret = regulator_enable(st->reg);
+		if (ret)
+			goto error_put_reg;
+
+		vref_mv = regulator_get_voltage(st->reg);
+		if (vref_mv < 0) {
+			ret = vref_mv;
+			goto error_disable_reg;
+		}
+
+		vref_mv /= 1000;
+	} else {
+		vref_mv = 1170; /* Build-in ref */
+	}
+
+	st->chip_info =
+		&ad7793_chip_info_tbl[spi_get_device_id(spi)->driver_data];
+
+	spi_set_drvdata(spi, indio_dev);
+
+	indio_dev->dev.parent = &spi->dev;
+	indio_dev->name = spi_get_device_id(spi)->name;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->channels = st->chip_info->channels;
+	indio_dev->num_channels = st->chip_info->num_channels;
+	indio_dev->info = &ad7793_info;
+
+	ret = ad_sd_setup_buffer_and_trigger(indio_dev);
+	if (ret)
+		goto error_disable_reg;
+
+	ret = ad7793_setup(indio_dev, pdata, vref_mv);
+	if (ret)
+		goto error_remove_trigger;
+
+	ret = iio_device_register(indio_dev);
+	if (ret)
+		goto error_remove_trigger;
+
+	return 0;
+
+error_remove_trigger:
+	ad_sd_cleanup_buffer_and_trigger(indio_dev);
+error_disable_reg:
+	if (pdata->refsel != AD7793_REFSEL_INTERNAL)
+		regulator_disable(st->reg);
+error_put_reg:
+	if (pdata->refsel != AD7793_REFSEL_INTERNAL)
+		regulator_put(st->reg);
+error_device_free:
+	iio_device_free(indio_dev);
+
+	return ret;
+}
+
+static int ad7793_remove(struct spi_device *spi)
+{
+	const struct ad7793_platform_data *pdata = spi->dev.platform_data;
+	struct iio_dev *indio_dev = spi_get_drvdata(spi);
+	struct ad7793_state *st = iio_priv(indio_dev);
+
+	iio_device_unregister(indio_dev);
+	ad_sd_cleanup_buffer_and_trigger(indio_dev);
+
+	if (pdata->refsel != AD7793_REFSEL_INTERNAL) {
+		regulator_disable(st->reg);
+		regulator_put(st->reg);
+	}
+
+	iio_device_free(indio_dev);
+
+	return 0;
+}
+
+static const struct spi_device_id ad7793_id[] = {
+	{"ad7785", ID_AD7785},
+	{"ad7792", ID_AD7792},
+	{"ad7793", ID_AD7793},
+	{"ad7794", ID_AD7794},
+	{"ad7795", ID_AD7795},
+	{}
+};
+MODULE_DEVICE_TABLE(spi, ad7793_id);
+
+static struct spi_driver ad7793_driver = {
+	.driver = {
+		.name	= "ad7793",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= ad7793_probe,
+	.remove		= ad7793_remove,
+	.id_table	= ad7793_id,
+};
+module_spi_driver(ad7793_driver);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("Analog Devices AD7793 and simialr ADCs");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/staging/iio/adc/Kconfig b/drivers/staging/iio/adc/Kconfig
index dc8582b95b61..fb8c239b0c88 100644
--- a/drivers/staging/iio/adc/Kconfig
+++ b/drivers/staging/iio/adc/Kconfig
@@ -70,18 +70,6 @@ config AD7780
 	  To compile this driver as a module, choose M here: the
 	  module will be called ad7780.
 
-config AD7793
-	tristate "Analog Devices AD7793 and similar ADCs driver"
-	depends on SPI
-	select AD_SIGMA_DELTA
-	help
-	  Say yes here to build support for Analog Devices AD7785, AD7792, AD7793,
-	  AD7794 and AD7795 SPI analog to digital converters (ADC).
-	  If unsure, say N (but it's safe to say "Y").
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called AD7793.
-
 config AD7816
 	tristate "Analog Devices AD7816/7/8 temperature sensor and ADC driver"
 	depends on SPI
diff --git a/drivers/staging/iio/adc/Makefile b/drivers/staging/iio/adc/Makefile
index 7281451a613a..d285596272a0 100644
--- a/drivers/staging/iio/adc/Makefile
+++ b/drivers/staging/iio/adc/Makefile
@@ -14,7 +14,6 @@ obj-$(CONFIG_AD799X) += ad799x.o
 
 obj-$(CONFIG_AD7291) += ad7291.o
 obj-$(CONFIG_AD7780) += ad7780.o
-obj-$(CONFIG_AD7793) += ad7793.o
 obj-$(CONFIG_AD7816) += ad7816.o
 obj-$(CONFIG_AD7192) += ad7192.o
 obj-$(CONFIG_ADT7410) += adt7410.o
diff --git a/drivers/staging/iio/adc/ad7793.c b/drivers/staging/iio/adc/ad7793.c
deleted file mode 100644
index 8928609a1828..000000000000
--- a/drivers/staging/iio/adc/ad7793.c
+++ /dev/null
@@ -1,692 +0,0 @@
-/*
- * AD7785/AD7792/AD7793/AD7794/AD7795 SPI ADC driver
- *
- * Copyright 2011-2012 Analog Devices Inc.
- *
- * Licensed under the GPL-2.
- */
-
-#include <linux/interrupt.h>
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/sysfs.h>
-#include <linux/spi/spi.h>
-#include <linux/regulator/consumer.h>
-#include <linux/err.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
-#include <linux/module.h>
-
-#include <linux/iio/iio.h>
-#include <linux/iio/sysfs.h>
-#include <linux/iio/buffer.h>
-#include <linux/iio/trigger.h>
-#include <linux/iio/trigger_consumer.h>
-#include <linux/iio/triggered_buffer.h>
-#include <linux/iio/adc/ad_sigma_delta.h>
-
-#include "ad7793.h"
-
-/* Registers */
-#define AD7793_REG_COMM		0 /* Communications Register (WO, 8-bit) */
-#define AD7793_REG_STAT		0 /* Status Register	     (RO, 8-bit) */
-#define AD7793_REG_MODE		1 /* Mode Register	     (RW, 16-bit */
-#define AD7793_REG_CONF		2 /* Configuration Register  (RW, 16-bit) */
-#define AD7793_REG_DATA		3 /* Data Register	     (RO, 16-/24-bit) */
-#define AD7793_REG_ID		4 /* ID Register	     (RO, 8-bit) */
-#define AD7793_REG_IO		5 /* IO Register	     (RO, 8-bit) */
-#define AD7793_REG_OFFSET	6 /* Offset Register	     (RW, 16-bit
-				   * (AD7792)/24-bit (AD7793)) */
-#define AD7793_REG_FULLSALE	7 /* Full-Scale Register
-				   * (RW, 16-bit (AD7792)/24-bit (AD7793)) */
-
-/* Communications Register Bit Designations (AD7793_REG_COMM) */
-#define AD7793_COMM_WEN		(1 << 7) /* Write Enable */
-#define AD7793_COMM_WRITE	(0 << 6) /* Write Operation */
-#define AD7793_COMM_READ	(1 << 6) /* Read Operation */
-#define AD7793_COMM_ADDR(x)	(((x) & 0x7) << 3) /* Register Address */
-#define AD7793_COMM_CREAD	(1 << 2) /* Continuous Read of Data Register */
-
-/* Status Register Bit Designations (AD7793_REG_STAT) */
-#define AD7793_STAT_RDY		(1 << 7) /* Ready */
-#define AD7793_STAT_ERR		(1 << 6) /* Error (Overrange, Underrange) */
-#define AD7793_STAT_CH3		(1 << 2) /* Channel 3 */
-#define AD7793_STAT_CH2		(1 << 1) /* Channel 2 */
-#define AD7793_STAT_CH1		(1 << 0) /* Channel 1 */
-
-/* Mode Register Bit Designations (AD7793_REG_MODE) */
-#define AD7793_MODE_SEL(x)	(((x) & 0x7) << 13) /* Operation Mode Select */
-#define AD7793_MODE_SEL_MASK	(0x7 << 13) /* Operation Mode Select mask */
-#define AD7793_MODE_CLKSRC(x)	(((x) & 0x3) << 6) /* ADC Clock Source Select */
-#define AD7793_MODE_RATE(x)	((x) & 0xF) /* Filter Update Rate Select */
-
-#define AD7793_MODE_CONT		0 /* Continuous Conversion Mode */
-#define AD7793_MODE_SINGLE		1 /* Single Conversion Mode */
-#define AD7793_MODE_IDLE		2 /* Idle Mode */
-#define AD7793_MODE_PWRDN		3 /* Power-Down Mode */
-#define AD7793_MODE_CAL_INT_ZERO	4 /* Internal Zero-Scale Calibration */
-#define AD7793_MODE_CAL_INT_FULL	5 /* Internal Full-Scale Calibration */
-#define AD7793_MODE_CAL_SYS_ZERO	6 /* System Zero-Scale Calibration */
-#define AD7793_MODE_CAL_SYS_FULL	7 /* System Full-Scale Calibration */
-
-#define AD7793_CLK_INT		0 /* Internal 64 kHz Clock not
-				   * available at the CLK pin */
-#define AD7793_CLK_INT_CO	1 /* Internal 64 kHz Clock available
-				   * at the CLK pin */
-#define AD7793_CLK_EXT		2 /* External 64 kHz Clock */
-#define AD7793_CLK_EXT_DIV2	3 /* External Clock divided by 2 */
-
-/* Configuration Register Bit Designations (AD7793_REG_CONF) */
-#define AD7793_CONF_VBIAS(x)	(((x) & 0x3) << 14) /* Bias Voltage
-						     * Generator Enable */
-#define AD7793_CONF_BO_EN	(1 << 13) /* Burnout Current Enable */
-#define AD7793_CONF_UNIPOLAR	(1 << 12) /* Unipolar/Bipolar Enable */
-#define AD7793_CONF_BOOST	(1 << 11) /* Boost Enable */
-#define AD7793_CONF_GAIN(x)	(((x) & 0x7) << 8) /* Gain Select */
-#define AD7793_CONF_REFSEL(x)	((x) << 6) /* INT/EXT Reference Select */
-#define AD7793_CONF_BUF		(1 << 4) /* Buffered Mode Enable */
-#define AD7793_CONF_CHAN(x)	((x) & 0xf) /* Channel select */
-#define AD7793_CONF_CHAN_MASK	0xf /* Channel select mask */
-
-#define AD7793_CH_AIN1P_AIN1M	0 /* AIN1(+) - AIN1(-) */
-#define AD7793_CH_AIN2P_AIN2M	1 /* AIN2(+) - AIN2(-) */
-#define AD7793_CH_AIN3P_AIN3M	2 /* AIN3(+) - AIN3(-) */
-#define AD7793_CH_AIN1M_AIN1M	3 /* AIN1(-) - AIN1(-) */
-#define AD7793_CH_TEMP		6 /* Temp Sensor */
-#define AD7793_CH_AVDD_MONITOR	7 /* AVDD Monitor */
-
-#define AD7795_CH_AIN4P_AIN4M	4 /* AIN4(+) - AIN4(-) */
-#define AD7795_CH_AIN5P_AIN5M	5 /* AIN5(+) - AIN5(-) */
-#define AD7795_CH_AIN6P_AIN6M	6 /* AIN6(+) - AIN6(-) */
-#define AD7795_CH_AIN1M_AIN1M	8 /* AIN1(-) - AIN1(-) */
-
-/* ID Register Bit Designations (AD7793_REG_ID) */
-#define AD7785_ID		0xB
-#define AD7792_ID		0xA
-#define AD7793_ID		0xB
-#define AD7794_ID		0xF
-#define AD7795_ID		0xF
-#define AD7793_ID_MASK		0xF
-
-/* IO (Excitation Current Sources) Register Bit Designations (AD7793_REG_IO) */
-#define AD7793_IO_IEXC1_IOUT1_IEXC2_IOUT2	0 /* IEXC1 connect to IOUT1,
-						   * IEXC2 connect to IOUT2 */
-#define AD7793_IO_IEXC1_IOUT2_IEXC2_IOUT1	1 /* IEXC1 connect to IOUT2,
-						   * IEXC2 connect to IOUT1 */
-#define AD7793_IO_IEXC1_IEXC2_IOUT1		2 /* Both current sources
-						   * IEXC1,2 connect to IOUT1 */
-#define AD7793_IO_IEXC1_IEXC2_IOUT2		3 /* Both current sources
-						   * IEXC1,2 connect to IOUT2 */
-
-#define AD7793_IO_IXCEN_10uA	(1 << 0) /* Excitation Current 10uA */
-#define AD7793_IO_IXCEN_210uA	(2 << 0) /* Excitation Current 210uA */
-#define AD7793_IO_IXCEN_1mA	(3 << 0) /* Excitation Current 1mA */
-
-/* NOTE:
- * The AD7792/AD7793 features a dual use data out ready DOUT/RDY output.
- * In order to avoid contentions on the SPI bus, it's therefore necessary
- * to use spi bus locking.
- *
- * The DOUT/RDY output must also be wired to an interrupt capable GPIO.
- */
-
-struct ad7793_chip_info {
-	unsigned int id;
-	const struct iio_chan_spec *channels;
-	unsigned int num_channels;
-};
-
-struct ad7793_state {
-	const struct ad7793_chip_info	*chip_info;
-	struct regulator		*reg;
-	u16				int_vref_mv;
-	u16				mode;
-	u16				conf;
-	u32				scale_avail[8][2];
-
-	struct ad_sigma_delta		sd;
-
-};
-
-enum ad7793_supported_device_ids {
-	ID_AD7785,
-	ID_AD7792,
-	ID_AD7793,
-	ID_AD7794,
-	ID_AD7795,
-};
-
-static struct ad7793_state *ad_sigma_delta_to_ad7793(struct ad_sigma_delta *sd)
-{
-	return container_of(sd, struct ad7793_state, sd);
-}
-
-static int ad7793_set_channel(struct ad_sigma_delta *sd, unsigned int channel)
-{
-	struct ad7793_state *st = ad_sigma_delta_to_ad7793(sd);
-
-	st->conf &= ~AD7793_CONF_CHAN_MASK;
-	st->conf |= AD7793_CONF_CHAN(channel);
-
-	return ad_sd_write_reg(&st->sd, AD7793_REG_CONF, 2, st->conf);
-}
-
-static int ad7793_set_mode(struct ad_sigma_delta *sd,
-			   enum ad_sigma_delta_mode mode)
-{
-	struct ad7793_state *st = ad_sigma_delta_to_ad7793(sd);
-
-	st->mode &= ~AD7793_MODE_SEL_MASK;
-	st->mode |= AD7793_MODE_SEL(mode);
-
-	return ad_sd_write_reg(&st->sd, AD7793_REG_MODE, 2, st->mode);
-}
-
-static const struct ad_sigma_delta_info ad7793_sigma_delta_info = {
-	.set_channel = ad7793_set_channel,
-	.set_mode = ad7793_set_mode,
-	.has_registers = true,
-	.addr_shift = 3,
-	.read_mask = BIT(6),
-};
-
-static const struct ad_sd_calib_data ad7793_calib_arr[6] = {
-	{AD7793_MODE_CAL_INT_ZERO, AD7793_CH_AIN1P_AIN1M},
-	{AD7793_MODE_CAL_INT_FULL, AD7793_CH_AIN1P_AIN1M},
-	{AD7793_MODE_CAL_INT_ZERO, AD7793_CH_AIN2P_AIN2M},
-	{AD7793_MODE_CAL_INT_FULL, AD7793_CH_AIN2P_AIN2M},
-	{AD7793_MODE_CAL_INT_ZERO, AD7793_CH_AIN3P_AIN3M},
-	{AD7793_MODE_CAL_INT_FULL, AD7793_CH_AIN3P_AIN3M}
-};
-
-static int ad7793_calibrate_all(struct ad7793_state *st)
-{
-	return ad_sd_calibrate_all(&st->sd, ad7793_calib_arr,
-				   ARRAY_SIZE(ad7793_calib_arr));
-}
-
-static int ad7793_setup(struct iio_dev *indio_dev,
-	const struct ad7793_platform_data *pdata,
-	unsigned int vref_mv)
-{
-	struct ad7793_state *st = iio_priv(indio_dev);
-	int i, ret = -1;
-	unsigned long long scale_uv;
-	u32 id;
-
-	if ((pdata->current_source_direction == AD7793_IEXEC1_IEXEC2_IOUT1 ||
-		pdata->current_source_direction == AD7793_IEXEC1_IEXEC2_IOUT2) &&
-		((pdata->exitation_current != AD7793_IX_10uA) &&
-		(pdata->exitation_current != AD7793_IX_210uA)))
-		return -EINVAL;
-
-	/* reset the serial interface */
-	ret = spi_write(st->sd.spi, (u8 *)&ret, sizeof(ret));
-	if (ret < 0)
-		goto out;
-	usleep_range(500, 2000); /* Wait for at least 500us */
-
-	/* write/read test for device presence */
-	ret = ad_sd_read_reg(&st->sd, AD7793_REG_ID, 1, &id);
-	if (ret)
-		goto out;
-
-	id &= AD7793_ID_MASK;
-
-	if (id != st->chip_info->id) {
-		dev_err(&st->sd.spi->dev, "device ID query failed\n");
-		goto out;
-	}
-
-	st->mode = AD7793_MODE_RATE(1);
-	st->mode |= AD7793_MODE_CLKSRC(pdata->clock_src);
-	st->conf = AD7793_CONF_REFSEL(pdata->refsel);
-	st->conf |= AD7793_CONF_VBIAS(pdata->bias_voltage);
-	if (pdata->buffered)
-		st->conf |= AD7793_CONF_BUF;
-	if (pdata->boost_enable)
-		st->conf |= AD7793_CONF_BOOST;
-	if (pdata->burnout_current)
-		st->conf |= AD7793_CONF_BO_EN;
-	if (pdata->unipolar)
-		st->conf |= AD7793_CONF_UNIPOLAR;
-
-	ret = ad7793_set_mode(&st->sd, AD_SD_MODE_IDLE);
-	if (ret)
-		goto out;
-
-	ret = ad7793_set_channel(&st->sd, 0);
-	if (ret)
-		goto out;
-
-	ret = ad_sd_write_reg(&st->sd, AD7793_REG_IO, 1,
-				   pdata->exitation_current |
-			       (pdata->current_source_direction << 2));
-	if (ret)
-		goto out;
-
-	ret = ad7793_calibrate_all(st);
-	if (ret)
-		goto out;
-
-	/* Populate available ADC input ranges */
-	for (i = 0; i < ARRAY_SIZE(st->scale_avail); i++) {
-		scale_uv = ((u64)vref_mv * 100000000)
-			>> (st->chip_info->channels[0].scan_type.realbits -
-			(!!(st->conf & AD7793_CONF_UNIPOLAR) ? 0 : 1));
-		scale_uv >>= i;
-
-		st->scale_avail[i][1] = do_div(scale_uv, 100000000) * 10;
-		st->scale_avail[i][0] = scale_uv;
-	}
-
-	return 0;
-out:
-	dev_err(&st->sd.spi->dev, "setup failed\n");
-	return ret;
-}
-
-static const u16 sample_freq_avail[16] = {0, 470, 242, 123, 62, 50, 39, 33, 19,
-					  17, 16, 12, 10, 8, 6, 4};
-
-static ssize_t ad7793_read_frequency(struct device *dev,
-		struct device_attribute *attr,
-		char *buf)
-{
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
-	struct ad7793_state *st = iio_priv(indio_dev);
-
-	return sprintf(buf, "%d\n",
-		       sample_freq_avail[AD7793_MODE_RATE(st->mode)]);
-}
-
-static ssize_t ad7793_write_frequency(struct device *dev,
-		struct device_attribute *attr,
-		const char *buf,
-		size_t len)
-{
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
-	struct ad7793_state *st = iio_priv(indio_dev);
-	long lval;
-	int i, ret;
-
-	mutex_lock(&indio_dev->mlock);
-	if (iio_buffer_enabled(indio_dev)) {
-		mutex_unlock(&indio_dev->mlock);
-		return -EBUSY;
-	}
-	mutex_unlock(&indio_dev->mlock);
-
-	ret = kstrtol(buf, 10, &lval);
-	if (ret)
-		return ret;
-
-	ret = -EINVAL;
-
-	for (i = 0; i < ARRAY_SIZE(sample_freq_avail); i++)
-		if (lval == sample_freq_avail[i]) {
-			mutex_lock(&indio_dev->mlock);
-			st->mode &= ~AD7793_MODE_RATE(-1);
-			st->mode |= AD7793_MODE_RATE(i);
-			ad_sd_write_reg(&st->sd, AD7793_REG_MODE,
-					 sizeof(st->mode), st->mode);
-			mutex_unlock(&indio_dev->mlock);
-			ret = 0;
-		}
-
-	return ret ? ret : len;
-}
-
-static IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO,
-		ad7793_read_frequency,
-		ad7793_write_frequency);
-
-static IIO_CONST_ATTR_SAMP_FREQ_AVAIL(
-	"470 242 123 62 50 39 33 19 17 16 12 10 8 6 4");
-
-static ssize_t ad7793_show_scale_available(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
-	struct ad7793_state *st = iio_priv(indio_dev);
-	int i, len = 0;
-
-	for (i = 0; i < ARRAY_SIZE(st->scale_avail); i++)
-		len += sprintf(buf + len, "%d.%09u ", st->scale_avail[i][0],
-			       st->scale_avail[i][1]);
-
-	len += sprintf(buf + len, "\n");
-
-	return len;
-}
-
-static IIO_DEVICE_ATTR_NAMED(in_m_in_scale_available,
-		in_voltage-voltage_scale_available, S_IRUGO,
-		ad7793_show_scale_available, NULL, 0);
-
-static struct attribute *ad7793_attributes[] = {
-	&iio_dev_attr_sampling_frequency.dev_attr.attr,
-	&iio_const_attr_sampling_frequency_available.dev_attr.attr,
-	&iio_dev_attr_in_m_in_scale_available.dev_attr.attr,
-	NULL
-};
-
-static const struct attribute_group ad7793_attribute_group = {
-	.attrs = ad7793_attributes,
-};
-
-static int ad7793_read_raw(struct iio_dev *indio_dev,
-			   struct iio_chan_spec const *chan,
-			   int *val,
-			   int *val2,
-			   long m)
-{
-	struct ad7793_state *st = iio_priv(indio_dev);
-	int ret;
-	unsigned long long scale_uv;
-	bool unipolar = !!(st->conf & AD7793_CONF_UNIPOLAR);
-
-	switch (m) {
-	case IIO_CHAN_INFO_RAW:
-		ret = ad_sigma_delta_single_conversion(indio_dev, chan, val);
-		if (ret < 0)
-			return ret;
-
-		return IIO_VAL_INT;
-
-	case IIO_CHAN_INFO_SCALE:
-		switch (chan->type) {
-		case IIO_VOLTAGE:
-			if (chan->differential) {
-				*val = st->
-					scale_avail[(st->conf >> 8) & 0x7][0];
-				*val2 = st->
-					scale_avail[(st->conf >> 8) & 0x7][1];
-				return IIO_VAL_INT_PLUS_NANO;
-			} else {
-				/* 1170mV / 2^23 * 6 */
-				scale_uv = (1170ULL * 1000000000ULL * 6ULL);
-			}
-			break;
-		case IIO_TEMP:
-				/* 1170mV / 0.81 mV/C / 2^23 */
-				scale_uv = 1444444444444444ULL;
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		scale_uv >>= (chan->scan_type.realbits - (unipolar ? 0 : 1));
-		*val = 0;
-		*val2 = scale_uv;
-		return IIO_VAL_INT_PLUS_NANO;
-	case IIO_CHAN_INFO_OFFSET:
-		if (!unipolar)
-			*val = -(1 << (chan->scan_type.realbits - 1));
-		else
-			*val = 0;
-
-		/* Kelvin to Celsius */
-		if (chan->type == IIO_TEMP) {
-			unsigned long long offset;
-			unsigned int shift;
-
-			shift = chan->scan_type.realbits - (unipolar ? 0 : 1);
-			offset = 273ULL << shift;
-			do_div(offset, 1444);
-			*val -= offset;
-		}
-		return IIO_VAL_INT;
-	}
-	return -EINVAL;
-}
-
-static int ad7793_write_raw(struct iio_dev *indio_dev,
-			       struct iio_chan_spec const *chan,
-			       int val,
-			       int val2,
-			       long mask)
-{
-	struct ad7793_state *st = iio_priv(indio_dev);
-	int ret, i;
-	unsigned int tmp;
-
-	mutex_lock(&indio_dev->mlock);
-	if (iio_buffer_enabled(indio_dev)) {
-		mutex_unlock(&indio_dev->mlock);
-		return -EBUSY;
-	}
-
-	switch (mask) {
-	case IIO_CHAN_INFO_SCALE:
-		ret = -EINVAL;
-		for (i = 0; i < ARRAY_SIZE(st->scale_avail); i++)
-			if (val2 == st->scale_avail[i][1]) {
-				ret = 0;
-				tmp = st->conf;
-				st->conf &= ~AD7793_CONF_GAIN(-1);
-				st->conf |= AD7793_CONF_GAIN(i);
-
-				if (tmp == st->conf)
-					break;
-
-				ad_sd_write_reg(&st->sd, AD7793_REG_CONF,
-						sizeof(st->conf), st->conf);
-				ad7793_calibrate_all(st);
-				break;
-			}
-		break;
-	default:
-		ret = -EINVAL;
-	}
-
-	mutex_unlock(&indio_dev->mlock);
-	return ret;
-}
-
-static int ad7793_write_raw_get_fmt(struct iio_dev *indio_dev,
-			       struct iio_chan_spec const *chan,
-			       long mask)
-{
-	return IIO_VAL_INT_PLUS_NANO;
-}
-
-static const struct iio_info ad7793_info = {
-	.read_raw = &ad7793_read_raw,
-	.write_raw = &ad7793_write_raw,
-	.write_raw_get_fmt = &ad7793_write_raw_get_fmt,
-	.attrs = &ad7793_attribute_group,
-	.validate_trigger = ad_sd_validate_trigger,
-	.driver_module = THIS_MODULE,
-};
-
-#define DECLARE_AD7793_CHANNELS(_name, _b, _sb, _s) \
-const struct iio_chan_spec _name##_channels[] = { \
-	AD_SD_DIFF_CHANNEL(0, 0, 0, AD7793_CH_AIN1P_AIN1M, (_b), (_sb), (_s)), \
-	AD_SD_DIFF_CHANNEL(1, 1, 1, AD7793_CH_AIN2P_AIN2M, (_b), (_sb), (_s)), \
-	AD_SD_DIFF_CHANNEL(2, 2, 2, AD7793_CH_AIN3P_AIN3M, (_b), (_sb), (_s)), \
-	AD_SD_SHORTED_CHANNEL(3, 0, AD7793_CH_AIN1M_AIN1M, (_b), (_sb), (_s)), \
-	AD_SD_TEMP_CHANNEL(4, AD7793_CH_TEMP, (_b), (_sb), (_s)), \
-	AD_SD_SUPPLY_CHANNEL(5, 3, AD7793_CH_AVDD_MONITOR, (_b), (_sb), (_s)), \
-	IIO_CHAN_SOFT_TIMESTAMP(6), \
-}
-
-#define DECLARE_AD7795_CHANNELS(_name, _b, _sb) \
-const struct iio_chan_spec _name##_channels[] = { \
-	AD_SD_DIFF_CHANNEL(0, 0, 0, AD7793_CH_AIN1P_AIN1M, (_b), (_sb), 0), \
-	AD_SD_DIFF_CHANNEL(1, 1, 1, AD7793_CH_AIN2P_AIN2M, (_b), (_sb), 0), \
-	AD_SD_DIFF_CHANNEL(2, 2, 2, AD7793_CH_AIN3P_AIN3M, (_b), (_sb), 0), \
-	AD_SD_DIFF_CHANNEL(3, 3, 3, AD7795_CH_AIN4P_AIN4M, (_b), (_sb), 0), \
-	AD_SD_DIFF_CHANNEL(4, 4, 4, AD7795_CH_AIN5P_AIN5M, (_b), (_sb), 0), \
-	AD_SD_DIFF_CHANNEL(5, 5, 5, AD7795_CH_AIN6P_AIN6M, (_b), (_sb), 0), \
-	AD_SD_SHORTED_CHANNEL(6, 0, AD7795_CH_AIN1M_AIN1M, (_b), (_sb), 0), \
-	AD_SD_TEMP_CHANNEL(7, AD7793_CH_TEMP, (_b), (_sb), 0), \
-	AD_SD_SUPPLY_CHANNEL(8, 3, AD7793_CH_AVDD_MONITOR, (_b), (_sb), 0), \
-	IIO_CHAN_SOFT_TIMESTAMP(9), \
-}
-
-static DECLARE_AD7793_CHANNELS(ad7785, 20, 32, 4);
-static DECLARE_AD7793_CHANNELS(ad7792, 16, 32, 0);
-static DECLARE_AD7793_CHANNELS(ad7793, 24, 32, 0);
-static DECLARE_AD7795_CHANNELS(ad7794, 16, 32);
-static DECLARE_AD7795_CHANNELS(ad7795, 24, 32);
-
-static const struct ad7793_chip_info ad7793_chip_info_tbl[] = {
-	[ID_AD7785] = {
-		.id = AD7785_ID,
-		.channels = ad7785_channels,
-		.num_channels = ARRAY_SIZE(ad7785_channels),
-	},
-	[ID_AD7792] = {
-		.id = AD7792_ID,
-		.channels = ad7792_channels,
-		.num_channels = ARRAY_SIZE(ad7792_channels),
-	},
-	[ID_AD7793] = {
-		.id = AD7793_ID,
-		.channels = ad7793_channels,
-		.num_channels = ARRAY_SIZE(ad7793_channels),
-	},
-	[ID_AD7794] = {
-		.id = AD7794_ID,
-		.channels = ad7794_channels,
-		.num_channels = ARRAY_SIZE(ad7794_channels),
-	},
-	[ID_AD7795] = {
-		.id = AD7795_ID,
-		.channels = ad7795_channels,
-		.num_channels = ARRAY_SIZE(ad7795_channels),
-	},
-};
-
-static int ad7793_probe(struct spi_device *spi)
-{
-	const struct ad7793_platform_data *pdata = spi->dev.platform_data;
-	struct ad7793_state *st;
-	struct iio_dev *indio_dev;
-	int ret, vref_mv = 0;
-
-	if (!pdata) {
-		dev_err(&spi->dev, "no platform data?\n");
-		return -ENODEV;
-	}
-
-	if (!spi->irq) {
-		dev_err(&spi->dev, "no IRQ?\n");
-		return -ENODEV;
-	}
-
-	indio_dev = iio_device_alloc(sizeof(*st));
-	if (indio_dev == NULL)
-		return -ENOMEM;
-
-	st = iio_priv(indio_dev);
-
-	ad_sd_init(&st->sd, indio_dev, spi, &ad7793_sigma_delta_info);
-
-	if (pdata->refsel != AD7793_REFSEL_INTERNAL) {
-		st->reg = regulator_get(&spi->dev, "refin");
-		if (IS_ERR(st->reg)) {
-			ret = PTR_ERR(st->reg);
-			goto error_device_free;
-		}
-
-		ret = regulator_enable(st->reg);
-		if (ret)
-			goto error_put_reg;
-
-		vref_mv = regulator_get_voltage(st->reg);
-		if (vref_mv < 0) {
-			ret = vref_mv;
-			goto error_disable_reg;
-		}
-
-		vref_mv /= 1000;
-	} else {
-		vref_mv = 1170; /* Build-in ref */
-	}
-
-	st->chip_info =
-		&ad7793_chip_info_tbl[spi_get_device_id(spi)->driver_data];
-
-	spi_set_drvdata(spi, indio_dev);
-
-	indio_dev->dev.parent = &spi->dev;
-	indio_dev->name = spi_get_device_id(spi)->name;
-	indio_dev->modes = INDIO_DIRECT_MODE;
-	indio_dev->channels = st->chip_info->channels;
-	indio_dev->num_channels = st->chip_info->num_channels;
-	indio_dev->info = &ad7793_info;
-
-	ret = ad_sd_setup_buffer_and_trigger(indio_dev);
-	if (ret)
-		goto error_disable_reg;
-
-	ret = ad7793_setup(indio_dev, pdata, vref_mv);
-	if (ret)
-		goto error_remove_trigger;
-
-	ret = iio_device_register(indio_dev);
-	if (ret)
-		goto error_remove_trigger;
-
-	return 0;
-
-error_remove_trigger:
-	ad_sd_cleanup_buffer_and_trigger(indio_dev);
-error_disable_reg:
-	if (pdata->refsel != AD7793_REFSEL_INTERNAL)
-		regulator_disable(st->reg);
-error_put_reg:
-	if (pdata->refsel != AD7793_REFSEL_INTERNAL)
-		regulator_put(st->reg);
-error_device_free:
-	iio_device_free(indio_dev);
-
-	return ret;
-}
-
-static int ad7793_remove(struct spi_device *spi)
-{
-	const struct ad7793_platform_data *pdata = spi->dev.platform_data;
-	struct iio_dev *indio_dev = spi_get_drvdata(spi);
-	struct ad7793_state *st = iio_priv(indio_dev);
-
-	iio_device_unregister(indio_dev);
-	ad_sd_cleanup_buffer_and_trigger(indio_dev);
-
-	if (pdata->refsel != AD7793_REFSEL_INTERNAL) {
-		regulator_disable(st->reg);
-		regulator_put(st->reg);
-	}
-
-	iio_device_free(indio_dev);
-
-	return 0;
-}
-
-static const struct spi_device_id ad7793_id[] = {
-	{"ad7785", ID_AD7785},
-	{"ad7792", ID_AD7792},
-	{"ad7793", ID_AD7793},
-	{"ad7794", ID_AD7794},
-	{"ad7795", ID_AD7795},
-	{}
-};
-MODULE_DEVICE_TABLE(spi, ad7793_id);
-
-static struct spi_driver ad7793_driver = {
-	.driver = {
-		.name	= "ad7793",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= ad7793_probe,
-	.remove		= ad7793_remove,
-	.id_table	= ad7793_id,
-};
-module_spi_driver(ad7793_driver);
-
-MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
-MODULE_DESCRIPTION("Analog Devices AD7793 and simialr ADCs");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/staging/iio/adc/ad7793.h b/drivers/staging/iio/adc/ad7793.h
deleted file mode 100644
index 0e455de215e4..000000000000
--- a/drivers/staging/iio/adc/ad7793.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * AD7792/AD7793 SPI ADC driver
- *
- * Copyright 2011 Analog Devices Inc.
- *
- * Licensed under the GPL-2.
- */
-#ifndef IIO_ADC_AD7793_H_
-#define IIO_ADC_AD7793_H_
-
-/*
- * TODO: struct ad7793_platform_data needs to go into include/linux/iio
- */
-
-/**
- * enum ad7793_clock_source - AD7793 clock source selection
- * @AD7793_CLK_SRC_INT: Internal 64 kHz clock, not available at the CLK pin.
- * @AD7793_CLK_SRC_INT_CO: Internal 64 kHz clock, available at the CLK pin.
- * @AD7793_CLK_SRC_EXT: Use external clock.
- * @AD7793_CLK_SRC_EXT_DIV2: Use external clock divided by 2.
- */
-enum ad7793_clock_source {
-	AD7793_CLK_SRC_INT,
-	AD7793_CLK_SRC_INT_CO,
-	AD7793_CLK_SRC_EXT,
-	AD7793_CLK_SRC_EXT_DIV2,
-};
-
-/**
- * enum ad7793_bias_voltage - AD7793 bias voltage selection
- * @AD7793_BIAS_VOLTAGE_DISABLED: Bias voltage generator disabled
- * @AD7793_BIAS_VOLTAGE_AIN1: Bias voltage connected to AIN1(-).
- * @AD7793_BIAS_VOLTAGE_AIN2: Bias voltage connected to AIN2(-).
- * @AD7793_BIAS_VOLTAGE_AIN3: Bias voltage connected to AIN3(-).
- *	Only valid for AD7795/AD7796.
- */
-enum ad7793_bias_voltage {
-	AD7793_BIAS_VOLTAGE_DISABLED,
-	AD7793_BIAS_VOLTAGE_AIN1,
-	AD7793_BIAS_VOLTAGE_AIN2,
-	AD7793_BIAS_VOLTAGE_AIN3,
-};
-
-/**
- * enum ad7793_refsel - AD7793 reference voltage selection
- * @AD7793_REFSEL_REFIN1: External reference applied between REFIN1(+)
- *	and REFIN1(-).
- * @AD7793_REFSEL_REFIN2: External reference applied between REFIN2(+) and
- *	and REFIN1(-). Only valid for AD7795/AD7796.
- * @AD7793_REFSEL_INTERNAL: Internal 1.17 V reference.
- */
-enum ad7793_refsel {
-	AD7793_REFSEL_REFIN1 = 0,
-	AD7793_REFSEL_REFIN2 = 1,
-	AD7793_REFSEL_INTERNAL = 2,
-};
-
-/**
- * enum ad7793_current_source_direction - AD7793 excitation current direction
- * @AD7793_IEXEC1_IOUT1_IEXEC2_IOUT2: Current source IEXC1 connected to pin
- *	IOUT1, current source IEXC2 connected to pin IOUT2.
- * @AD7793_IEXEC1_IOUT2_IEXEC2_IOUT1: Current source IEXC2 connected to pin
- *	IOUT1, current source IEXC1 connected to pin IOUT2.
- * @AD7793_IEXEC1_IEXEC2_IOUT1: Both current sources connected to pin IOUT1.
- *	Only valid when the current sources are set to 10 uA or 210 uA.
- * @AD7793_IEXEC1_IEXEC2_IOUT2: Both current sources connected to Pin IOUT2.
- *	Only valid when the current ources are set to 10 uA or 210 uA.
- */
-enum ad7793_current_source_direction {
-	AD7793_IEXEC1_IOUT1_IEXEC2_IOUT2 = 0,
-	AD7793_IEXEC1_IOUT2_IEXEC2_IOUT1 = 1,
-	AD7793_IEXEC1_IEXEC2_IOUT1 = 2,
-	AD7793_IEXEC1_IEXEC2_IOUT2 = 3,
-};
-
-/**
- * enum ad7793_excitation_current - AD7793 excitation current selection
- * @AD7793_IX_DISABLED: Excitation current Disabled.
- * @AD7793_IX_10uA: Enable 10 micro-ampere excitation current.
- * @AD7793_IX_210uA: Enable 210 micro-ampere excitation current.
- * @AD7793_IX_1mA: Enable 1 milli-Ampere excitation current.
- */
-enum ad7793_excitation_current {
-	AD7793_IX_DISABLED = 0,
-	AD7793_IX_10uA = 1,
-	AD7793_IX_210uA = 2,
-	AD7793_IX_1mA = 3,
-};
-
-/**
- * struct ad7793_platform_data - AD7793 platform data
- * @clock_src: Clock source selection
- * @burnout_current: If set to true the 100nA burnout current is enabled.
- * @boost_enable: Enable boost for the bias voltage generator.
- * @buffered: If set to true configure the device for buffered input mode.
- * @unipolar: If set to true sample in unipolar mode, if set to false sample in
- *		bipolar mode.
- * @refsel: Reference voltage selection
- * @bias_voltage: Bias voltage selection
- * @exitation_current: Excitation current selection
- * @current_source_direction: Excitation current direction selection
- */
-struct ad7793_platform_data {
-	enum ad7793_clock_source clock_src;
-	bool burnout_current;
-	bool boost_enable;
-	bool buffered;
-	bool unipolar;
-
-	enum ad7793_refsel refsel;
-	enum ad7793_bias_voltage bias_voltage;
-	enum ad7793_excitation_current exitation_current;
-	enum ad7793_current_source_direction current_source_direction;
-};
-
-#endif /* IIO_ADC_AD7793_H_ */
diff --git a/include/linux/platform_data/ad7793.h b/include/linux/platform_data/ad7793.h
new file mode 100644
index 000000000000..7ea6751aae6d
--- /dev/null
+++ b/include/linux/platform_data/ad7793.h
@@ -0,0 +1,112 @@
+/*
+ * AD7792/AD7793 SPI ADC driver
+ *
+ * Copyright 2011 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2.
+ */
+#ifndef __LINUX_PLATFORM_DATA_AD7793_H__
+#define __LINUX_PLATFORM_DATA_AD7793_H__
+
+/**
+ * enum ad7793_clock_source - AD7793 clock source selection
+ * @AD7793_CLK_SRC_INT: Internal 64 kHz clock, not available at the CLK pin.
+ * @AD7793_CLK_SRC_INT_CO: Internal 64 kHz clock, available at the CLK pin.
+ * @AD7793_CLK_SRC_EXT: Use external clock.
+ * @AD7793_CLK_SRC_EXT_DIV2: Use external clock divided by 2.
+ */
+enum ad7793_clock_source {
+	AD7793_CLK_SRC_INT,
+	AD7793_CLK_SRC_INT_CO,
+	AD7793_CLK_SRC_EXT,
+	AD7793_CLK_SRC_EXT_DIV2,
+};
+
+/**
+ * enum ad7793_bias_voltage - AD7793 bias voltage selection
+ * @AD7793_BIAS_VOLTAGE_DISABLED: Bias voltage generator disabled
+ * @AD7793_BIAS_VOLTAGE_AIN1: Bias voltage connected to AIN1(-).
+ * @AD7793_BIAS_VOLTAGE_AIN2: Bias voltage connected to AIN2(-).
+ * @AD7793_BIAS_VOLTAGE_AIN3: Bias voltage connected to AIN3(-).
+ *	Only valid for AD7795/AD7796.
+ */
+enum ad7793_bias_voltage {
+	AD7793_BIAS_VOLTAGE_DISABLED,
+	AD7793_BIAS_VOLTAGE_AIN1,
+	AD7793_BIAS_VOLTAGE_AIN2,
+	AD7793_BIAS_VOLTAGE_AIN3,
+};
+
+/**
+ * enum ad7793_refsel - AD7793 reference voltage selection
+ * @AD7793_REFSEL_REFIN1: External reference applied between REFIN1(+)
+ *	and REFIN1(-).
+ * @AD7793_REFSEL_REFIN2: External reference applied between REFIN2(+) and
+ *	and REFIN1(-). Only valid for AD7795/AD7796.
+ * @AD7793_REFSEL_INTERNAL: Internal 1.17 V reference.
+ */
+enum ad7793_refsel {
+	AD7793_REFSEL_REFIN1 = 0,
+	AD7793_REFSEL_REFIN2 = 1,
+	AD7793_REFSEL_INTERNAL = 2,
+};
+
+/**
+ * enum ad7793_current_source_direction - AD7793 excitation current direction
+ * @AD7793_IEXEC1_IOUT1_IEXEC2_IOUT2: Current source IEXC1 connected to pin
+ *	IOUT1, current source IEXC2 connected to pin IOUT2.
+ * @AD7793_IEXEC1_IOUT2_IEXEC2_IOUT1: Current source IEXC2 connected to pin
+ *	IOUT1, current source IEXC1 connected to pin IOUT2.
+ * @AD7793_IEXEC1_IEXEC2_IOUT1: Both current sources connected to pin IOUT1.
+ *	Only valid when the current sources are set to 10 uA or 210 uA.
+ * @AD7793_IEXEC1_IEXEC2_IOUT2: Both current sources connected to Pin IOUT2.
+ *	Only valid when the current ources are set to 10 uA or 210 uA.
+ */
+enum ad7793_current_source_direction {
+	AD7793_IEXEC1_IOUT1_IEXEC2_IOUT2 = 0,
+	AD7793_IEXEC1_IOUT2_IEXEC2_IOUT1 = 1,
+	AD7793_IEXEC1_IEXEC2_IOUT1 = 2,
+	AD7793_IEXEC1_IEXEC2_IOUT2 = 3,
+};
+
+/**
+ * enum ad7793_excitation_current - AD7793 excitation current selection
+ * @AD7793_IX_DISABLED: Excitation current Disabled.
+ * @AD7793_IX_10uA: Enable 10 micro-ampere excitation current.
+ * @AD7793_IX_210uA: Enable 210 micro-ampere excitation current.
+ * @AD7793_IX_1mA: Enable 1 milli-Ampere excitation current.
+ */
+enum ad7793_excitation_current {
+	AD7793_IX_DISABLED = 0,
+	AD7793_IX_10uA = 1,
+	AD7793_IX_210uA = 2,
+	AD7793_IX_1mA = 3,
+};
+
+/**
+ * struct ad7793_platform_data - AD7793 platform data
+ * @clock_src: Clock source selection
+ * @burnout_current: If set to true the 100nA burnout current is enabled.
+ * @boost_enable: Enable boost for the bias voltage generator.
+ * @buffered: If set to true configure the device for buffered input mode.
+ * @unipolar: If set to true sample in unipolar mode, if set to false sample in
+ *		bipolar mode.
+ * @refsel: Reference voltage selection
+ * @bias_voltage: Bias voltage selection
+ * @exitation_current: Excitation current selection
+ * @current_source_direction: Excitation current direction selection
+ */
+struct ad7793_platform_data {
+	enum ad7793_clock_source clock_src;
+	bool burnout_current;
+	bool boost_enable;
+	bool buffered;
+	bool unipolar;
+
+	enum ad7793_refsel refsel;
+	enum ad7793_bias_voltage bias_voltage;
+	enum ad7793_excitation_current exitation_current;
+	enum ad7793_current_source_direction current_source_direction;
+};
+
+#endif /* IIO_ADC_AD7793_H_ */
-- 
cgit v1.2.3-55-g7522


From 45c3eb7d3a07eb08d1b5b0f5983a996d41610b84 Mon Sep 17 00:00:00 2001
From: Tony Lindgren
Date: Fri, 30 Nov 2012 08:41:50 -0800
Subject: ARM: OMAP: Move plat-omap/dma-omap.h to include/linux/omap-dma.h

Based on earlier discussions[1] we attempted to find a suitable
location for the omap DMA header in commit 2b6c4e73 (ARM: OMAP:
DMA: Move plat/dma.h to plat-omap/dma-omap.h) until the conversion
to dmaengine is complete.

Unfortunately that was before I was able to try to test compile
of the ARM multiplatform builds for omap2+, and the end result
was not very good.

So I'm creating yet another all over the place patch to cut the
last dependency for building omap2+ for ARM multiplatform. After
this, we have finally removed the driver dependencies to the
arch/arm code, except for few drivers that are being worked on.

The other option was to make the <plat-omap/dma-omap.h> path
to work, but we'd have to add some new header directory to for
multiplatform builds.

Or we would have to manually include arch/arm/plat-omap/include
again from arch/arm/Makefile for omap2+.

Neither of these alternatives sound appealing as they will
likely lead addition of various other headers exposed to the
drivers, which we want to avoid for the multiplatform kernels.

Since we already have a minimal include/linux/omap-dma.h,
let's just use that instead and add a note to it to not
use the custom omap DMA functions any longer where possible.

Note that converting omap DMA to dmaengine depends on
dmaengine supporting automatically incrementing the FIFO
address at the device end, and converting all the remaining
legacy drivers. So it's going to be few more merge windows.

[1] https://patchwork.kernel.org/patch/1519591/#

cc: Russell King <linux@arm.linux.org.uk>
cc: Kevin Hilman <khilman@ti.com>
cc: "Benoît Cousson" <b-cousson@ti.com>
cc: Herbert Xu <herbert@gondor.apana.org.au>
cc: "David S. Miller" <davem@davemloft.net>
cc: Vinod Koul <vinod.koul@intel.com>
cc: Dan Williams <djbw@fb.com>
cc: Mauro Carvalho Chehab <mchehab@infradead.org>
cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
cc: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
cc: David Woodhouse <dwmw2@infradead.org>
cc: Kyungmin Park <kyungmin.park@samsung.com>
cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
cc: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
cc: Hans Verkuil <hans.verkuil@cisco.com>
cc: Vaibhav Hiremath <hvaibhav@ti.com>
cc: Lokesh Vutla <lokeshvutla@ti.com>
cc: Rusty Russell <rusty@rustcorp.com.au>
cc: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
cc: Afzal Mohammed <afzal@ti.com>
cc: linux-crypto@vger.kernel.org
cc: linux-media@vger.kernel.org
cc: linux-mtd@lists.infradead.org
cc: linux-usb@vger.kernel.org
cc: linux-fbdev@vger.kernel.org
Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/board-h2.c                     |   2 +-
 arch/arm/mach-omap1/board-h3.c                     |   2 +-
 arch/arm/mach-omap1/board-palmte.c                 |   2 +-
 arch/arm/mach-omap1/board-palmtt.c                 |   2 +-
 arch/arm/mach-omap1/board-palmz71.c                |   2 +-
 arch/arm/mach-omap1/board-sx1.c                    |   2 +-
 arch/arm/mach-omap1/dma.c                          |   2 +-
 arch/arm/mach-omap1/io.c                           |   2 +-
 arch/arm/mach-omap1/lcd_dma.c                      |   2 +-
 arch/arm/mach-omap1/mcbsp.c                        |   2 +-
 arch/arm/mach-omap1/pm.c                           |   2 +-
 arch/arm/mach-omap2/board-3430sdp.c                |   2 +-
 arch/arm/mach-omap2/board-h4.c                     |   2 +-
 arch/arm/mach-omap2/board-rx51-peripherals.c       |   2 +-
 arch/arm/mach-omap2/board-rx51.c                   |   2 +-
 arch/arm/mach-omap2/devices.c                      |   2 +-
 arch/arm/mach-omap2/dma.c                          |   2 +-
 arch/arm/mach-omap2/io.c                           |   2 +-
 arch/arm/mach-omap2/mcbsp.c                        |   2 +-
 arch/arm/mach-omap2/omap_hwmod_2420_data.c         |   2 +-
 arch/arm/mach-omap2/omap_hwmod_2430_data.c         |   2 +-
 .../mach-omap2/omap_hwmod_2xxx_3xxx_ipblock_data.c |   3 +-
 arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c |   2 +-
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c         |   2 +-
 arch/arm/mach-omap2/omap_hwmod_44xx_data.c         |   2 +-
 arch/arm/mach-omap2/pm24xx.c                       |   2 +-
 arch/arm/mach-omap2/pm34xx.c                       |   3 +-
 arch/arm/mach-omap2/serial.c                       |   2 +-
 arch/arm/plat-omap/dma.c                           |   2 +-
 arch/arm/plat-omap/include/plat-omap/dma-omap.h    | 377 ---------------------
 drivers/crypto/omap-aes.c                          |   2 +-
 drivers/crypto/omap-sham.c                         |   2 +-
 drivers/dma/omap-dma.c                             |   2 -
 drivers/media/platform/omap/omap_vout.c            |   2 +-
 drivers/media/platform/omap/omap_vout_vrfb.c       |   2 +-
 drivers/media/platform/omap3isp/ispstat.h          |   2 +-
 drivers/media/platform/soc_camera/omap1_camera.c   |   2 +-
 drivers/mtd/nand/omap2.c                           |   1 -
 drivers/mtd/onenand/omap2.c                        |   2 +-
 drivers/usb/gadget/omap_udc.c                      |   2 +-
 drivers/usb/musb/tusb6010_omap.c                   |   2 +-
 drivers/video/omap/lcdc.c                          |   2 +-
 drivers/video/omap/omapfb_main.c                   |   2 +-
 drivers/video/omap/sossi.c                         |   2 +-
 include/linux/omap-dma.h                           | 366 ++++++++++++++++++++
 45 files changed, 408 insertions(+), 422 deletions(-)
 delete mode 100644 arch/arm/plat-omap/include/plat-omap/dma-omap.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c
index 9134b646f01b..dcf364d1a8b1 100644
--- a/arch/arm/mach-omap1/board-h2.c
+++ b/arch/arm/mach-omap1/board-h2.c
@@ -39,7 +39,7 @@
 #include <asm/mach/map.h>
 
 #include <mach/mux.h>
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <mach/tc.h>
 #include <mach/irda.h>
 #include <linux/platform_data/keypad-omap.h>
diff --git a/arch/arm/mach-omap1/board-h3.c b/arch/arm/mach-omap1/board-h3.c
index bf213d1d8075..b3fcdedb44ca 100644
--- a/arch/arm/mach-omap1/board-h3.c
+++ b/arch/arm/mach-omap1/board-h3.c
@@ -43,7 +43,7 @@
 #include <mach/mux.h>
 #include <mach/tc.h>
 #include <linux/platform_data/keypad-omap.h>
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <mach/flash.h>
 
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-omap1/board-palmte.c b/arch/arm/mach-omap1/board-palmte.c
index 584b6fab894b..c33dceb46607 100644
--- a/arch/arm/mach-omap1/board-palmte.c
+++ b/arch/arm/mach-omap1/board-palmte.c
@@ -37,7 +37,7 @@
 #include <mach/flash.h>
 #include <mach/mux.h>
 #include <mach/tc.h>
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <mach/irda.h>
 #include <linux/platform_data/keypad-omap.h>
 
diff --git a/arch/arm/mach-omap1/board-palmtt.c b/arch/arm/mach-omap1/board-palmtt.c
index fbc986bfe69e..2948b0ee4be8 100644
--- a/arch/arm/mach-omap1/board-palmtt.c
+++ b/arch/arm/mach-omap1/board-palmtt.c
@@ -36,7 +36,7 @@
 
 #include <mach/flash.h>
 #include <mach/mux.h>
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <mach/tc.h>
 #include <mach/irda.h>
 #include <linux/platform_data/keypad-omap.h>
diff --git a/arch/arm/mach-omap1/board-palmz71.c b/arch/arm/mach-omap1/board-palmz71.c
index 60d917a93763..7a05895c0be3 100644
--- a/arch/arm/mach-omap1/board-palmz71.c
+++ b/arch/arm/mach-omap1/board-palmz71.c
@@ -38,7 +38,7 @@
 
 #include <mach/flash.h>
 #include <mach/mux.h>
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <mach/tc.h>
 #include <mach/irda.h>
 #include <linux/platform_data/keypad-omap.h>
diff --git a/arch/arm/mach-omap1/board-sx1.c b/arch/arm/mach-omap1/board-sx1.c
index 1ebc7e08d6e5..20ed52ae1714 100644
--- a/arch/arm/mach-omap1/board-sx1.c
+++ b/arch/arm/mach-omap1/board-sx1.c
@@ -36,7 +36,7 @@
 
 #include <mach/flash.h>
 #include <mach/mux.h>
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <mach/irda.h>
 #include <mach/tc.h>
 #include <mach/board-sx1.h>
diff --git a/arch/arm/mach-omap1/dma.c b/arch/arm/mach-omap1/dma.c
index 978aed85d328..e190611e4b46 100644
--- a/arch/arm/mach-omap1/dma.c
+++ b/arch/arm/mach-omap1/dma.c
@@ -25,7 +25,7 @@
 #include <linux/device.h>
 #include <linux/io.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <mach/tc.h>
 
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-omap1/io.c b/arch/arm/mach-omap1/io.c
index 5a3b80617a11..499b8accb83d 100644
--- a/arch/arm/mach-omap1/io.c
+++ b/arch/arm/mach-omap1/io.c
@@ -18,7 +18,7 @@
 
 #include <mach/mux.h>
 #include <mach/tc.h>
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #include "iomap.h"
 #include "common.h"
diff --git a/arch/arm/mach-omap1/lcd_dma.c b/arch/arm/mach-omap1/lcd_dma.c
index 7ed8c1857d56..77924be37d41 100644
--- a/arch/arm/mach-omap1/lcd_dma.c
+++ b/arch/arm/mach-omap1/lcd_dma.c
@@ -27,7 +27,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #include <mach/hardware.h>
 #include <mach/lcdc.h>
diff --git a/arch/arm/mach-omap1/mcbsp.c b/arch/arm/mach-omap1/mcbsp.c
index c6d8fdf92e9c..b0d4723c9a90 100644
--- a/arch/arm/mach-omap1/mcbsp.c
+++ b/arch/arm/mach-omap1/mcbsp.c
@@ -19,7 +19,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <mach/mux.h>
 #include "soc.h"
 #include <linux/platform_data/asoc-ti-mcbsp.h>
diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c
index 66d663a6ef3a..7a7690ab6cb8 100644
--- a/arch/arm/mach-omap1/pm.c
+++ b/arch/arm/mach-omap1/pm.c
@@ -52,7 +52,7 @@
 
 #include <mach/tc.h>
 #include <mach/mux.h>
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <plat/dmtimer.h>
 
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c
index 6601754f9512..7b201546834d 100644
--- a/arch/arm/mach-omap2/board-3430sdp.c
+++ b/arch/arm/mach-omap2/board-3430sdp.c
@@ -31,7 +31,7 @@
 #include <asm/mach/map.h>
 
 #include "common.h"
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <video/omapdss.h>
 #include <video/omap-panel-tfp410.h>
 
diff --git a/arch/arm/mach-omap2/board-h4.c b/arch/arm/mach-omap2/board-h4.c
index b626dbe6f7bc..9a3878ec2256 100644
--- a/arch/arm/mach-omap2/board-h4.c
+++ b/arch/arm/mach-omap2/board-h4.c
@@ -32,7 +32,7 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <plat/debug-devices.h>
 
 #include <video/omapdss.h>
diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index 07005fe40a2a..60529e0b3d67 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -31,7 +31,7 @@
 #include <asm/system_info.h>
 
 #include "common.h"
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include "gpmc-smc91x.h"
 
 #include "board-rx51.h"
diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c
index bf8f74b0ce3e..ee1045c0ad67 100644
--- a/arch/arm/mach-omap2/board-rx51.c
+++ b/arch/arm/mach-omap2/board-rx51.c
@@ -24,7 +24,7 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #include "common.h"
 #include "mux.h"
diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index cf365c387c06..cfef4547f2fd 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -23,7 +23,7 @@
 #include <asm/mach-types.h>
 #include <asm/mach/map.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #include "iomap.h"
 #include "omap_hwmod.h"
diff --git a/arch/arm/mach-omap2/dma.c b/arch/arm/mach-omap2/dma.c
index e5aba58da5d2..612b98249873 100644
--- a/arch/arm/mach-omap2/dma.c
+++ b/arch/arm/mach-omap2/dma.c
@@ -28,7 +28,7 @@
 #include <linux/init.h>
 #include <linux/device.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #include "soc.h"
 #include "omap_hwmod.h"
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 7c39238322e0..2c3fdd65387b 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -25,7 +25,7 @@
 #include <asm/tlb.h>
 #include <asm/mach/map.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #include "omap_hwmod.h"
 #include "soc.h"
diff --git a/arch/arm/mach-omap2/mcbsp.c b/arch/arm/mach-omap2/mcbsp.c
index bf496510eb5e..df49f2a49461 100644
--- a/arch/arm/mach-omap2/mcbsp.c
+++ b/arch/arm/mach-omap2/mcbsp.c
@@ -21,7 +21,7 @@
 #include <linux/platform_data/asoc-ti-mcbsp.h>
 #include <linux/pm_runtime.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #include "omap_device.h"
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_2420_data.c b/arch/arm/mach-omap2/omap_hwmod_2420_data.c
index a8b3368dca3d..10af996aa479 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2420_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2420_data.c
@@ -16,7 +16,7 @@
 #include <linux/i2c-omap.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <plat/dmtimer.h>
 
 #include "omap_hwmod.h"
diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
index dc768c50e523..f14b894ce761 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
@@ -17,7 +17,7 @@
 #include <linux/platform_data/asoc-ti-mcbsp.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <plat/dmtimer.h>
 
 #include "omap_hwmod.h"
diff --git a/arch/arm/mach-omap2/omap_hwmod_2xxx_3xxx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_2xxx_3xxx_ipblock_data.c
index 40d6c93d9853..534974e08add 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2xxx_3xxx_ipblock_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2xxx_3xxx_ipblock_data.c
@@ -10,7 +10,8 @@
  * published by the Free Software Foundation.
  */
 
-#include <plat-omap/dma-omap.h>
+#include <linux/dmaengine.h>
+#include <linux/omap-dma.h>
 
 #include "omap_hwmod.h"
 #include "hdq1w.h"
diff --git a/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c
index a0116d08cf45..63d805521b4b 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/platform_data/gpio-omap.h>
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <plat/dmtimer.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index abe66ced903f..d87e24834dc9 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -19,7 +19,7 @@
 #include <linux/power/smartreflex.h>
 #include <linux/platform_data/gpio-omap.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include "l3_3xxx.h"
 #include "l4_3xxx.h"
 #include <linux/platform_data/asoc-ti-mcbsp.h>
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index 7a6132848f5d..61e807f2a798 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -23,7 +23,7 @@
 #include <linux/power/smartreflex.h>
 #include <linux/i2c-omap.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #include <linux/platform_data/spi-omap2-mcspi.h>
 #include <linux/platform_data/asoc-ti-mcbsp.h>
diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c
index c289b3333c99..c333fa6dffa8 100644
--- a/arch/arm/mach-omap2/pm24xx.c
+++ b/arch/arm/mach-omap2/pm24xx.c
@@ -38,7 +38,7 @@
 #include <asm/mach-types.h>
 #include <asm/system_misc.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #include "soc.h"
 #include "common.h"
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 770320061422..7be3622cfc85 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -28,6 +28,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/omap-dma.h>
 #include <linux/platform_data/gpio-omap.h>
 
 #include <trace/events/power.h>
@@ -38,8 +39,6 @@
 
 #include "clockdomain.h"
 #include "powerdomain.h"
-#include <plat-omap/dma-omap.h>
-
 #include "soc.h"
 #include "common.h"
 #include "cm3xxx.h"
diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index aa30a3c20883..93d102535c85 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -26,9 +26,9 @@
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
 #include <linux/console.h>
+#include <linux/omap-dma.h>
 
 #include <plat/omap-serial.h>
-#include <plat-omap/dma-omap.h>
 
 #include "common.h"
 #include "omap_hwmod.h"
diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index c288b76f8e6c..37a488aaa2ba 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -36,7 +36,7 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 /*
  * MAX_LOGICAL_DMA_CH_COUNT: the maximum number of logical DMA
diff --git a/arch/arm/plat-omap/include/plat-omap/dma-omap.h b/arch/arm/plat-omap/include/plat-omap/dma-omap.h
deleted file mode 100644
index 6f506ba9e453..000000000000
--- a/arch/arm/plat-omap/include/plat-omap/dma-omap.h
+++ /dev/null
@@ -1,377 +0,0 @@
-/*
- *  OMAP DMA handling defines and function
- *
- *  Copyright (C) 2003 Nokia Corporation
- *  Author: Juha Yrjölä <juha.yrjola@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-#ifndef __ASM_ARCH_DMA_H
-#define __ASM_ARCH_DMA_H
-
-#include <linux/platform_device.h>
-
-#define INT_DMA_LCD			25
-
-#define OMAP1_DMA_TOUT_IRQ		(1 << 0)
-#define OMAP_DMA_DROP_IRQ		(1 << 1)
-#define OMAP_DMA_HALF_IRQ		(1 << 2)
-#define OMAP_DMA_FRAME_IRQ		(1 << 3)
-#define OMAP_DMA_LAST_IRQ		(1 << 4)
-#define OMAP_DMA_BLOCK_IRQ		(1 << 5)
-#define OMAP1_DMA_SYNC_IRQ		(1 << 6)
-#define OMAP2_DMA_PKT_IRQ		(1 << 7)
-#define OMAP2_DMA_TRANS_ERR_IRQ		(1 << 8)
-#define OMAP2_DMA_SECURE_ERR_IRQ	(1 << 9)
-#define OMAP2_DMA_SUPERVISOR_ERR_IRQ	(1 << 10)
-#define OMAP2_DMA_MISALIGNED_ERR_IRQ	(1 << 11)
-
-#define OMAP_DMA_CCR_EN			(1 << 7)
-#define OMAP_DMA_CCR_RD_ACTIVE		(1 << 9)
-#define OMAP_DMA_CCR_WR_ACTIVE		(1 << 10)
-#define OMAP_DMA_CCR_SEL_SRC_DST_SYNC	(1 << 24)
-#define OMAP_DMA_CCR_BUFFERING_DISABLE	(1 << 25)
-
-#define OMAP_DMA_DATA_TYPE_S8		0x00
-#define OMAP_DMA_DATA_TYPE_S16		0x01
-#define OMAP_DMA_DATA_TYPE_S32		0x02
-
-#define OMAP_DMA_SYNC_ELEMENT		0x00
-#define OMAP_DMA_SYNC_FRAME		0x01
-#define OMAP_DMA_SYNC_BLOCK		0x02
-#define OMAP_DMA_SYNC_PACKET		0x03
-
-#define OMAP_DMA_DST_SYNC_PREFETCH	0x02
-#define OMAP_DMA_SRC_SYNC		0x01
-#define OMAP_DMA_DST_SYNC		0x00
-
-#define OMAP_DMA_PORT_EMIFF		0x00
-#define OMAP_DMA_PORT_EMIFS		0x01
-#define OMAP_DMA_PORT_OCP_T1		0x02
-#define OMAP_DMA_PORT_TIPB		0x03
-#define OMAP_DMA_PORT_OCP_T2		0x04
-#define OMAP_DMA_PORT_MPUI		0x05
-
-#define OMAP_DMA_AMODE_CONSTANT		0x00
-#define OMAP_DMA_AMODE_POST_INC		0x01
-#define OMAP_DMA_AMODE_SINGLE_IDX	0x02
-#define OMAP_DMA_AMODE_DOUBLE_IDX	0x03
-
-#define DMA_DEFAULT_FIFO_DEPTH		0x10
-#define DMA_DEFAULT_ARB_RATE		0x01
-/* Pass THREAD_RESERVE ORed with THREAD_FIFO for tparams */
-#define DMA_THREAD_RESERVE_NORM		(0x00 << 12) /* Def */
-#define DMA_THREAD_RESERVE_ONET		(0x01 << 12)
-#define DMA_THREAD_RESERVE_TWOT		(0x02 << 12)
-#define DMA_THREAD_RESERVE_THREET	(0x03 << 12)
-#define DMA_THREAD_FIFO_NONE		(0x00 << 14) /* Def */
-#define DMA_THREAD_FIFO_75		(0x01 << 14)
-#define DMA_THREAD_FIFO_25		(0x02 << 14)
-#define DMA_THREAD_FIFO_50		(0x03 << 14)
-
-/* DMA4_OCP_SYSCONFIG bits */
-#define DMA_SYSCONFIG_MIDLEMODE_MASK		(3 << 12)
-#define DMA_SYSCONFIG_CLOCKACTIVITY_MASK	(3 << 8)
-#define DMA_SYSCONFIG_EMUFREE			(1 << 5)
-#define DMA_SYSCONFIG_SIDLEMODE_MASK		(3 << 3)
-#define DMA_SYSCONFIG_SOFTRESET			(1 << 2)
-#define DMA_SYSCONFIG_AUTOIDLE			(1 << 0)
-
-#define DMA_SYSCONFIG_MIDLEMODE(n)		((n) << 12)
-#define DMA_SYSCONFIG_SIDLEMODE(n)		((n) << 3)
-
-#define DMA_IDLEMODE_SMARTIDLE			0x2
-#define DMA_IDLEMODE_NO_IDLE			0x1
-#define DMA_IDLEMODE_FORCE_IDLE			0x0
-
-/* Chaining modes*/
-#ifndef CONFIG_ARCH_OMAP1
-#define OMAP_DMA_STATIC_CHAIN		0x1
-#define OMAP_DMA_DYNAMIC_CHAIN		0x2
-#define OMAP_DMA_CHAIN_ACTIVE		0x1
-#define OMAP_DMA_CHAIN_INACTIVE		0x0
-#endif
-
-#define DMA_CH_PRIO_HIGH		0x1
-#define DMA_CH_PRIO_LOW			0x0 /* Def */
-
-/* Errata handling */
-#define IS_DMA_ERRATA(id)		(errata & (id))
-#define SET_DMA_ERRATA(id)		(errata |= (id))
-
-#define DMA_ERRATA_IFRAME_BUFFERING	BIT(0x0)
-#define DMA_ERRATA_PARALLEL_CHANNELS	BIT(0x1)
-#define DMA_ERRATA_i378			BIT(0x2)
-#define DMA_ERRATA_i541			BIT(0x3)
-#define DMA_ERRATA_i88			BIT(0x4)
-#define DMA_ERRATA_3_3			BIT(0x5)
-#define DMA_ROMCODE_BUG			BIT(0x6)
-
-/* Attributes for OMAP DMA Contrller */
-#define DMA_LINKED_LCH			BIT(0x0)
-#define GLOBAL_PRIORITY			BIT(0x1)
-#define RESERVE_CHANNEL			BIT(0x2)
-#define IS_CSSA_32			BIT(0x3)
-#define IS_CDSA_32			BIT(0x4)
-#define IS_RW_PRIORITY			BIT(0x5)
-#define ENABLE_1510_MODE		BIT(0x6)
-#define SRC_PORT			BIT(0x7)
-#define DST_PORT			BIT(0x8)
-#define SRC_INDEX			BIT(0x9)
-#define DST_INDEX			BIT(0xa)
-#define IS_BURST_ONLY4			BIT(0xb)
-#define CLEAR_CSR_ON_READ		BIT(0xc)
-#define IS_WORD_16			BIT(0xd)
-#define ENABLE_16XX_MODE		BIT(0xe)
-#define HS_CHANNELS_RESERVED		BIT(0xf)
-
-/* Defines for DMA Capabilities */
-#define DMA_HAS_TRANSPARENT_CAPS	(0x1 << 18)
-#define DMA_HAS_CONSTANT_FILL_CAPS	(0x1 << 19)
-#define DMA_HAS_DESCRIPTOR_CAPS		(0x3 << 20)
-
-enum omap_reg_offsets {
-
-GCR,		GSCR,		GRST1,		HW_ID,
-PCH2_ID,	PCH0_ID,	PCH1_ID,	PCHG_ID,
-PCHD_ID,	CAPS_0,		CAPS_1,		CAPS_2,
-CAPS_3,		CAPS_4,		PCH2_SR,	PCH0_SR,
-PCH1_SR,	PCHD_SR,	REVISION,	IRQSTATUS_L0,
-IRQSTATUS_L1,	IRQSTATUS_L2,	IRQSTATUS_L3,	IRQENABLE_L0,
-IRQENABLE_L1,	IRQENABLE_L2,	IRQENABLE_L3,	SYSSTATUS,
-OCP_SYSCONFIG,
-
-/* omap1+ specific */
-CPC, CCR2, LCH_CTRL,
-
-/* Common registers for all omap's */
-CSDP,		CCR,		CICR,		CSR,
-CEN,		CFN,		CSFI,		CSEI,
-CSAC,		CDAC,		CDEI,
-CDFI,		CLNK_CTRL,
-
-/* Channel specific registers */
-CSSA,		CDSA,		COLOR,
-CCEN,		CCFN,
-
-/* omap3630 and omap4 specific */
-CDP,		CNDP,		CCDN,
-
-};
-
-enum omap_dma_burst_mode {
-	OMAP_DMA_DATA_BURST_DIS = 0,
-	OMAP_DMA_DATA_BURST_4,
-	OMAP_DMA_DATA_BURST_8,
-	OMAP_DMA_DATA_BURST_16,
-};
-
-enum end_type {
-	OMAP_DMA_LITTLE_ENDIAN = 0,
-	OMAP_DMA_BIG_ENDIAN
-};
-
-enum omap_dma_color_mode {
-	OMAP_DMA_COLOR_DIS = 0,
-	OMAP_DMA_CONSTANT_FILL,
-	OMAP_DMA_TRANSPARENT_COPY
-};
-
-enum omap_dma_write_mode {
-	OMAP_DMA_WRITE_NON_POSTED = 0,
-	OMAP_DMA_WRITE_POSTED,
-	OMAP_DMA_WRITE_LAST_NON_POSTED
-};
-
-enum omap_dma_channel_mode {
-	OMAP_DMA_LCH_2D = 0,
-	OMAP_DMA_LCH_G,
-	OMAP_DMA_LCH_P,
-	OMAP_DMA_LCH_PD
-};
-
-struct omap_dma_channel_params {
-	int data_type;		/* data type 8,16,32 */
-	int elem_count;		/* number of elements in a frame */
-	int frame_count;	/* number of frames in a element */
-
-	int src_port;		/* Only on OMAP1 REVISIT: Is this needed? */
-	int src_amode;		/* constant, post increment, indexed,
-					double indexed */
-	unsigned long src_start;	/* source address : physical */
-	int src_ei;		/* source element index */
-	int src_fi;		/* source frame index */
-
-	int dst_port;		/* Only on OMAP1 REVISIT: Is this needed? */
-	int dst_amode;		/* constant, post increment, indexed,
-					double indexed */
-	unsigned long dst_start;	/* source address : physical */
-	int dst_ei;		/* source element index */
-	int dst_fi;		/* source frame index */
-
-	int trigger;		/* trigger attached if the channel is
-					synchronized */
-	int sync_mode;		/* sycn on element, frame , block or packet */
-	int src_or_dst_synch;	/* source synch(1) or destination synch(0) */
-
-	int ie;			/* interrupt enabled */
-
-	unsigned char read_prio;/* read priority */
-	unsigned char write_prio;/* write priority */
-
-#ifndef CONFIG_ARCH_OMAP1
-	enum omap_dma_burst_mode burst_mode; /* Burst mode 4/8/16 words */
-#endif
-};
-
-struct omap_dma_lch {
-	int next_lch;
-	int dev_id;
-	u16 saved_csr;
-	u16 enabled_irqs;
-	const char *dev_name;
-	void (*callback)(int lch, u16 ch_status, void *data);
-	void *data;
-	long flags;
-	/* required for Dynamic chaining */
-	int prev_linked_ch;
-	int next_linked_ch;
-	int state;
-	int chain_id;
-	int status;
-};
-
-struct omap_dma_dev_attr {
-	u32 dev_caps;
-	u16 lch_count;
-	u16 chan_count;
-	struct omap_dma_lch *chan;
-};
-
-/* System DMA platform data structure */
-struct omap_system_dma_plat_info {
-	struct omap_dma_dev_attr *dma_attr;
-	u32 errata;
-	void (*disable_irq_lch)(int lch);
-	void (*show_dma_caps)(void);
-	void (*clear_lch_regs)(int lch);
-	void (*clear_dma)(int lch);
-	void (*dma_write)(u32 val, int reg, int lch);
-	u32 (*dma_read)(int reg, int lch);
-};
-
-#ifdef CONFIG_ARCH_OMAP2PLUS
-#define dma_omap2plus()	1
-#else
-#define dma_omap2plus()	0
-#endif
-#define dma_omap1()	(!dma_omap2plus())
-#define dma_omap15xx()	((dma_omap1() && (d->dev_caps & ENABLE_1510_MODE)))
-#define dma_omap16xx()	((dma_omap1() && (d->dev_caps & ENABLE_16XX_MODE)))
-
-extern void omap_set_dma_priority(int lch, int dst_port, int priority);
-extern int omap_request_dma(int dev_id, const char *dev_name,
-			void (*callback)(int lch, u16 ch_status, void *data),
-			void *data, int *dma_ch);
-extern void omap_enable_dma_irq(int ch, u16 irq_bits);
-extern void omap_disable_dma_irq(int ch, u16 irq_bits);
-extern void omap_free_dma(int ch);
-extern void omap_start_dma(int lch);
-extern void omap_stop_dma(int lch);
-extern void omap_set_dma_transfer_params(int lch, int data_type,
-					 int elem_count, int frame_count,
-					 int sync_mode,
-					 int dma_trigger, int src_or_dst_synch);
-extern void omap_set_dma_color_mode(int lch, enum omap_dma_color_mode mode,
-				    u32 color);
-extern void omap_set_dma_write_mode(int lch, enum omap_dma_write_mode mode);
-extern void omap_set_dma_channel_mode(int lch, enum omap_dma_channel_mode mode);
-
-extern void omap_set_dma_src_params(int lch, int src_port, int src_amode,
-				    unsigned long src_start,
-				    int src_ei, int src_fi);
-extern void omap_set_dma_src_index(int lch, int eidx, int fidx);
-extern void omap_set_dma_src_data_pack(int lch, int enable);
-extern void omap_set_dma_src_burst_mode(int lch,
-					enum omap_dma_burst_mode burst_mode);
-
-extern void omap_set_dma_dest_params(int lch, int dest_port, int dest_amode,
-				     unsigned long dest_start,
-				     int dst_ei, int dst_fi);
-extern void omap_set_dma_dest_index(int lch, int eidx, int fidx);
-extern void omap_set_dma_dest_data_pack(int lch, int enable);
-extern void omap_set_dma_dest_burst_mode(int lch,
-					 enum omap_dma_burst_mode burst_mode);
-
-extern void omap_set_dma_params(int lch,
-				struct omap_dma_channel_params *params);
-
-extern void omap_dma_link_lch(int lch_head, int lch_queue);
-extern void omap_dma_unlink_lch(int lch_head, int lch_queue);
-
-extern int omap_set_dma_callback(int lch,
-			void (*callback)(int lch, u16 ch_status, void *data),
-			void *data);
-extern dma_addr_t omap_get_dma_src_pos(int lch);
-extern dma_addr_t omap_get_dma_dst_pos(int lch);
-extern void omap_clear_dma(int lch);
-extern int omap_get_dma_active_status(int lch);
-extern int omap_dma_running(void);
-extern void omap_dma_set_global_params(int arb_rate, int max_fifo_depth,
-				       int tparams);
-extern int omap_dma_set_prio_lch(int lch, unsigned char read_prio,
-				 unsigned char write_prio);
-extern void omap_set_dma_dst_endian_type(int lch, enum end_type etype);
-extern void omap_set_dma_src_endian_type(int lch, enum end_type etype);
-extern int omap_get_dma_index(int lch, int *ei, int *fi);
-
-void omap_dma_global_context_save(void);
-void omap_dma_global_context_restore(void);
-
-extern void omap_dma_disable_irq(int lch);
-
-/* Chaining APIs */
-#ifndef CONFIG_ARCH_OMAP1
-extern int omap_request_dma_chain(int dev_id, const char *dev_name,
-				  void (*callback) (int lch, u16 ch_status,
-						    void *data),
-				  int *chain_id, int no_of_chans,
-				  int chain_mode,
-				  struct omap_dma_channel_params params);
-extern int omap_free_dma_chain(int chain_id);
-extern int omap_dma_chain_a_transfer(int chain_id, int src_start,
-				     int dest_start, int elem_count,
-				     int frame_count, void *callbk_data);
-extern int omap_start_dma_chain_transfers(int chain_id);
-extern int omap_stop_dma_chain_transfers(int chain_id);
-extern int omap_get_dma_chain_index(int chain_id, int *ei, int *fi);
-extern int omap_get_dma_chain_dst_pos(int chain_id);
-extern int omap_get_dma_chain_src_pos(int chain_id);
-
-extern int omap_modify_dma_chain_params(int chain_id,
-					struct omap_dma_channel_params params);
-extern int omap_dma_chain_status(int chain_id);
-#endif
-
-#if defined(CONFIG_ARCH_OMAP1) && defined(CONFIG_FB_OMAP)
-#include <mach/lcd_dma.h>
-#else
-static inline int omap_lcd_dma_running(void)
-{
-	return 0;
-}
-#endif
-
-#endif /* __ASM_ARCH_DMA_H */
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index 649a146e1382..e66e8ee5a9af 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -29,7 +29,7 @@
 #include <crypto/scatterwalk.h>
 #include <crypto/aes.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 /* OMAP TRM gives bitfields as start:end, where start is the higher bit
    number. For example 7:0 */
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index d76fe06b9417..1d75e6f95a58 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -37,7 +37,7 @@
 #include <crypto/hash.h>
 #include <crypto/internal/hash.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <mach/irqs.h>
 
 #define SHA_REG_DIGEST(x)		(0x00 + ((x) * 0x04))
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index 7d35c237fbf1..5a31264f2bd1 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -19,8 +19,6 @@
 
 #include "virt-dma.h"
 
-#include <plat-omap/dma-omap.h>
-
 struct omap_dmadev {
 	struct dma_device ddev;
 	spinlock_t lock;
diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c
index 4b1becc86e54..993504015963 100644
--- a/drivers/media/platform/omap/omap_vout.c
+++ b/drivers/media/platform/omap/omap_vout.c
@@ -45,7 +45,7 @@
 #include <media/v4l2-ioctl.h>
 
 #include <plat/cpu.h>
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <video/omapvrfb.h>
 #include <video/omapdss.h>
 
diff --git a/drivers/media/platform/omap/omap_vout_vrfb.c b/drivers/media/platform/omap/omap_vout_vrfb.c
index 8340445a0ee5..cf1c437a8687 100644
--- a/drivers/media/platform/omap/omap_vout_vrfb.c
+++ b/drivers/media/platform/omap/omap_vout_vrfb.c
@@ -16,7 +16,7 @@
 #include <media/videobuf-dma-contig.h>
 #include <media/v4l2-device.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <video/omapvrfb.h>
 
 #include "omap_voutdef.h"
diff --git a/drivers/media/platform/omap3isp/ispstat.h b/drivers/media/platform/omap3isp/ispstat.h
index 40f87cdd7994..bb363742bd1c 100644
--- a/drivers/media/platform/omap3isp/ispstat.h
+++ b/drivers/media/platform/omap3isp/ispstat.h
@@ -30,7 +30,7 @@
 
 #include <linux/types.h>
 #include <linux/omap3isp.h>
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 #include <media/v4l2-event.h>
 
 #include "isp.h"
diff --git a/drivers/media/platform/soc_camera/omap1_camera.c b/drivers/media/platform/soc_camera/omap1_camera.c
index cae9ce6275e9..611595f1a75e 100644
--- a/drivers/media/platform/soc_camera/omap1_camera.c
+++ b/drivers/media/platform/soc_camera/omap1_camera.c
@@ -34,7 +34,7 @@
 #include <media/videobuf-dma-contig.h>
 #include <media/videobuf-dma-sg.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 
 #define DRIVER_NAME		"omap1-camera"
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 5c8978e90240..1f34ba104ef4 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -27,7 +27,6 @@
 #include <linux/bch.h>
 #endif
 
-#include <plat-omap/dma-omap.h>
 #include <linux/platform_data/mtd-nand-omap2.h>
 
 #define	DRIVER_NAME	"omap2-nand"
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index 99f96e19ebea..00cd3da29435 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -41,7 +41,7 @@
 #include <linux/platform_data/mtd-onenand-omap2.h>
 #include <asm/gpio.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #define DRIVER_NAME "omap2-onenand"
 
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index 23afa06b65a4..1b8ddc3ddc50 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -44,7 +44,7 @@
 #include <asm/unaligned.h>
 #include <asm/mach-types.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #include <mach/usb.h>
 
diff --git a/drivers/usb/musb/tusb6010_omap.c b/drivers/usb/musb/tusb6010_omap.c
index bfca114f7c56..82310b6bc97b 100644
--- a/drivers/usb/musb/tusb6010_omap.c
+++ b/drivers/usb/musb/tusb6010_omap.c
@@ -16,7 +16,7 @@
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #include "musb_core.h"
 #include "tusb6010.h"
diff --git a/drivers/video/omap/lcdc.c b/drivers/video/omap/lcdc.c
index c39d6e46f8c5..b52f62595f65 100644
--- a/drivers/video/omap/lcdc.c
+++ b/drivers/video/omap/lcdc.c
@@ -31,7 +31,7 @@
 #include <linux/gfp.h>
 
 #include <mach/lcdc.h>
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #include <asm/mach-types.h>
 
diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c
index 1b5ee8ec192a..e31f5b33b501 100644
--- a/drivers/video/omap/omapfb_main.c
+++ b/drivers/video/omap/omapfb_main.c
@@ -30,7 +30,7 @@
 #include <linux/uaccess.h>
 #include <linux/module.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #include "omapfb.h"
 #include "lcdc.h"
diff --git a/drivers/video/omap/sossi.c b/drivers/video/omap/sossi.c
index c510a4457398..d4e7684e7045 100644
--- a/drivers/video/omap/sossi.c
+++ b/drivers/video/omap/sossi.c
@@ -25,7 +25,7 @@
 #include <linux/io.h>
 #include <linux/interrupt.h>
 
-#include <plat-omap/dma-omap.h>
+#include <linux/omap-dma.h>
 
 #include "omapfb.h"
 #include "lcdc.h"
diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index eb475a8ea25b..7af25a9c9c51 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -19,4 +19,370 @@ static inline bool omap_dma_filter_fn(struct dma_chan *c, void *d)
 }
 #endif
 
+/*
+ *  Legacy OMAP DMA handling defines and functions
+ *
+ *  NOTE: Do not use these any longer.
+ *
+ *  Use the generic dmaengine functions as defined in
+ *  include/linux/dmaengine.h.
+ *
+ *  Copyright (C) 2003 Nokia Corporation
+ *  Author: Juha Yrjölä <juha.yrjola@nokia.com>
+ *
+ */
+
+#include <linux/platform_device.h>
+
+#define INT_DMA_LCD			25
+
+#define OMAP1_DMA_TOUT_IRQ		(1 << 0)
+#define OMAP_DMA_DROP_IRQ		(1 << 1)
+#define OMAP_DMA_HALF_IRQ		(1 << 2)
+#define OMAP_DMA_FRAME_IRQ		(1 << 3)
+#define OMAP_DMA_LAST_IRQ		(1 << 4)
+#define OMAP_DMA_BLOCK_IRQ		(1 << 5)
+#define OMAP1_DMA_SYNC_IRQ		(1 << 6)
+#define OMAP2_DMA_PKT_IRQ		(1 << 7)
+#define OMAP2_DMA_TRANS_ERR_IRQ		(1 << 8)
+#define OMAP2_DMA_SECURE_ERR_IRQ	(1 << 9)
+#define OMAP2_DMA_SUPERVISOR_ERR_IRQ	(1 << 10)
+#define OMAP2_DMA_MISALIGNED_ERR_IRQ	(1 << 11)
+
+#define OMAP_DMA_CCR_EN			(1 << 7)
+#define OMAP_DMA_CCR_RD_ACTIVE		(1 << 9)
+#define OMAP_DMA_CCR_WR_ACTIVE		(1 << 10)
+#define OMAP_DMA_CCR_SEL_SRC_DST_SYNC	(1 << 24)
+#define OMAP_DMA_CCR_BUFFERING_DISABLE	(1 << 25)
+
+#define OMAP_DMA_DATA_TYPE_S8		0x00
+#define OMAP_DMA_DATA_TYPE_S16		0x01
+#define OMAP_DMA_DATA_TYPE_S32		0x02
+
+#define OMAP_DMA_SYNC_ELEMENT		0x00
+#define OMAP_DMA_SYNC_FRAME		0x01
+#define OMAP_DMA_SYNC_BLOCK		0x02
+#define OMAP_DMA_SYNC_PACKET		0x03
+
+#define OMAP_DMA_DST_SYNC_PREFETCH	0x02
+#define OMAP_DMA_SRC_SYNC		0x01
+#define OMAP_DMA_DST_SYNC		0x00
+
+#define OMAP_DMA_PORT_EMIFF		0x00
+#define OMAP_DMA_PORT_EMIFS		0x01
+#define OMAP_DMA_PORT_OCP_T1		0x02
+#define OMAP_DMA_PORT_TIPB		0x03
+#define OMAP_DMA_PORT_OCP_T2		0x04
+#define OMAP_DMA_PORT_MPUI		0x05
+
+#define OMAP_DMA_AMODE_CONSTANT		0x00
+#define OMAP_DMA_AMODE_POST_INC		0x01
+#define OMAP_DMA_AMODE_SINGLE_IDX	0x02
+#define OMAP_DMA_AMODE_DOUBLE_IDX	0x03
+
+#define DMA_DEFAULT_FIFO_DEPTH		0x10
+#define DMA_DEFAULT_ARB_RATE		0x01
+/* Pass THREAD_RESERVE ORed with THREAD_FIFO for tparams */
+#define DMA_THREAD_RESERVE_NORM		(0x00 << 12) /* Def */
+#define DMA_THREAD_RESERVE_ONET		(0x01 << 12)
+#define DMA_THREAD_RESERVE_TWOT		(0x02 << 12)
+#define DMA_THREAD_RESERVE_THREET	(0x03 << 12)
+#define DMA_THREAD_FIFO_NONE		(0x00 << 14) /* Def */
+#define DMA_THREAD_FIFO_75		(0x01 << 14)
+#define DMA_THREAD_FIFO_25		(0x02 << 14)
+#define DMA_THREAD_FIFO_50		(0x03 << 14)
+
+/* DMA4_OCP_SYSCONFIG bits */
+#define DMA_SYSCONFIG_MIDLEMODE_MASK		(3 << 12)
+#define DMA_SYSCONFIG_CLOCKACTIVITY_MASK	(3 << 8)
+#define DMA_SYSCONFIG_EMUFREE			(1 << 5)
+#define DMA_SYSCONFIG_SIDLEMODE_MASK		(3 << 3)
+#define DMA_SYSCONFIG_SOFTRESET			(1 << 2)
+#define DMA_SYSCONFIG_AUTOIDLE			(1 << 0)
+
+#define DMA_SYSCONFIG_MIDLEMODE(n)		((n) << 12)
+#define DMA_SYSCONFIG_SIDLEMODE(n)		((n) << 3)
+
+#define DMA_IDLEMODE_SMARTIDLE			0x2
+#define DMA_IDLEMODE_NO_IDLE			0x1
+#define DMA_IDLEMODE_FORCE_IDLE			0x0
+
+/* Chaining modes*/
+#ifndef CONFIG_ARCH_OMAP1
+#define OMAP_DMA_STATIC_CHAIN		0x1
+#define OMAP_DMA_DYNAMIC_CHAIN		0x2
+#define OMAP_DMA_CHAIN_ACTIVE		0x1
+#define OMAP_DMA_CHAIN_INACTIVE		0x0
+#endif
+
+#define DMA_CH_PRIO_HIGH		0x1
+#define DMA_CH_PRIO_LOW			0x0 /* Def */
+
+/* Errata handling */
+#define IS_DMA_ERRATA(id)		(errata & (id))
+#define SET_DMA_ERRATA(id)		(errata |= (id))
+
+#define DMA_ERRATA_IFRAME_BUFFERING	BIT(0x0)
+#define DMA_ERRATA_PARALLEL_CHANNELS	BIT(0x1)
+#define DMA_ERRATA_i378			BIT(0x2)
+#define DMA_ERRATA_i541			BIT(0x3)
+#define DMA_ERRATA_i88			BIT(0x4)
+#define DMA_ERRATA_3_3			BIT(0x5)
+#define DMA_ROMCODE_BUG			BIT(0x6)
+
+/* Attributes for OMAP DMA Contrller */
+#define DMA_LINKED_LCH			BIT(0x0)
+#define GLOBAL_PRIORITY			BIT(0x1)
+#define RESERVE_CHANNEL			BIT(0x2)
+#define IS_CSSA_32			BIT(0x3)
+#define IS_CDSA_32			BIT(0x4)
+#define IS_RW_PRIORITY			BIT(0x5)
+#define ENABLE_1510_MODE		BIT(0x6)
+#define SRC_PORT			BIT(0x7)
+#define DST_PORT			BIT(0x8)
+#define SRC_INDEX			BIT(0x9)
+#define DST_INDEX			BIT(0xa)
+#define IS_BURST_ONLY4			BIT(0xb)
+#define CLEAR_CSR_ON_READ		BIT(0xc)
+#define IS_WORD_16			BIT(0xd)
+#define ENABLE_16XX_MODE		BIT(0xe)
+#define HS_CHANNELS_RESERVED		BIT(0xf)
+
+/* Defines for DMA Capabilities */
+#define DMA_HAS_TRANSPARENT_CAPS	(0x1 << 18)
+#define DMA_HAS_CONSTANT_FILL_CAPS	(0x1 << 19)
+#define DMA_HAS_DESCRIPTOR_CAPS		(0x3 << 20)
+
+enum omap_reg_offsets {
+
+GCR,		GSCR,		GRST1,		HW_ID,
+PCH2_ID,	PCH0_ID,	PCH1_ID,	PCHG_ID,
+PCHD_ID,	CAPS_0,		CAPS_1,		CAPS_2,
+CAPS_3,		CAPS_4,		PCH2_SR,	PCH0_SR,
+PCH1_SR,	PCHD_SR,	REVISION,	IRQSTATUS_L0,
+IRQSTATUS_L1,	IRQSTATUS_L2,	IRQSTATUS_L3,	IRQENABLE_L0,
+IRQENABLE_L1,	IRQENABLE_L2,	IRQENABLE_L3,	SYSSTATUS,
+OCP_SYSCONFIG,
+
+/* omap1+ specific */
+CPC, CCR2, LCH_CTRL,
+
+/* Common registers for all omap's */
+CSDP,		CCR,		CICR,		CSR,
+CEN,		CFN,		CSFI,		CSEI,
+CSAC,		CDAC,		CDEI,
+CDFI,		CLNK_CTRL,
+
+/* Channel specific registers */
+CSSA,		CDSA,		COLOR,
+CCEN,		CCFN,
+
+/* omap3630 and omap4 specific */
+CDP,		CNDP,		CCDN,
+
+};
+
+enum omap_dma_burst_mode {
+	OMAP_DMA_DATA_BURST_DIS = 0,
+	OMAP_DMA_DATA_BURST_4,
+	OMAP_DMA_DATA_BURST_8,
+	OMAP_DMA_DATA_BURST_16,
+};
+
+enum end_type {
+	OMAP_DMA_LITTLE_ENDIAN = 0,
+	OMAP_DMA_BIG_ENDIAN
+};
+
+enum omap_dma_color_mode {
+	OMAP_DMA_COLOR_DIS = 0,
+	OMAP_DMA_CONSTANT_FILL,
+	OMAP_DMA_TRANSPARENT_COPY
+};
+
+enum omap_dma_write_mode {
+	OMAP_DMA_WRITE_NON_POSTED = 0,
+	OMAP_DMA_WRITE_POSTED,
+	OMAP_DMA_WRITE_LAST_NON_POSTED
+};
+
+enum omap_dma_channel_mode {
+	OMAP_DMA_LCH_2D = 0,
+	OMAP_DMA_LCH_G,
+	OMAP_DMA_LCH_P,
+	OMAP_DMA_LCH_PD
+};
+
+struct omap_dma_channel_params {
+	int data_type;		/* data type 8,16,32 */
+	int elem_count;		/* number of elements in a frame */
+	int frame_count;	/* number of frames in a element */
+
+	int src_port;		/* Only on OMAP1 REVISIT: Is this needed? */
+	int src_amode;		/* constant, post increment, indexed,
+					double indexed */
+	unsigned long src_start;	/* source address : physical */
+	int src_ei;		/* source element index */
+	int src_fi;		/* source frame index */
+
+	int dst_port;		/* Only on OMAP1 REVISIT: Is this needed? */
+	int dst_amode;		/* constant, post increment, indexed,
+					double indexed */
+	unsigned long dst_start;	/* source address : physical */
+	int dst_ei;		/* source element index */
+	int dst_fi;		/* source frame index */
+
+	int trigger;		/* trigger attached if the channel is
+					synchronized */
+	int sync_mode;		/* sycn on element, frame , block or packet */
+	int src_or_dst_synch;	/* source synch(1) or destination synch(0) */
+
+	int ie;			/* interrupt enabled */
+
+	unsigned char read_prio;/* read priority */
+	unsigned char write_prio;/* write priority */
+
+#ifndef CONFIG_ARCH_OMAP1
+	enum omap_dma_burst_mode burst_mode; /* Burst mode 4/8/16 words */
+#endif
+};
+
+struct omap_dma_lch {
+	int next_lch;
+	int dev_id;
+	u16 saved_csr;
+	u16 enabled_irqs;
+	const char *dev_name;
+	void (*callback)(int lch, u16 ch_status, void *data);
+	void *data;
+	long flags;
+	/* required for Dynamic chaining */
+	int prev_linked_ch;
+	int next_linked_ch;
+	int state;
+	int chain_id;
+	int status;
+};
+
+struct omap_dma_dev_attr {
+	u32 dev_caps;
+	u16 lch_count;
+	u16 chan_count;
+	struct omap_dma_lch *chan;
+};
+
+/* System DMA platform data structure */
+struct omap_system_dma_plat_info {
+	struct omap_dma_dev_attr *dma_attr;
+	u32 errata;
+	void (*disable_irq_lch)(int lch);
+	void (*show_dma_caps)(void);
+	void (*clear_lch_regs)(int lch);
+	void (*clear_dma)(int lch);
+	void (*dma_write)(u32 val, int reg, int lch);
+	u32 (*dma_read)(int reg, int lch);
+};
+
+#ifdef CONFIG_ARCH_OMAP2PLUS
+#define dma_omap2plus()	1
+#else
+#define dma_omap2plus()	0
 #endif
+#define dma_omap1()	(!dma_omap2plus())
+#define dma_omap15xx()	((dma_omap1() && (d->dev_caps & ENABLE_1510_MODE)))
+#define dma_omap16xx()	((dma_omap1() && (d->dev_caps & ENABLE_16XX_MODE)))
+
+extern void omap_set_dma_priority(int lch, int dst_port, int priority);
+extern int omap_request_dma(int dev_id, const char *dev_name,
+			void (*callback)(int lch, u16 ch_status, void *data),
+			void *data, int *dma_ch);
+extern void omap_enable_dma_irq(int ch, u16 irq_bits);
+extern void omap_disable_dma_irq(int ch, u16 irq_bits);
+extern void omap_free_dma(int ch);
+extern void omap_start_dma(int lch);
+extern void omap_stop_dma(int lch);
+extern void omap_set_dma_transfer_params(int lch, int data_type,
+					 int elem_count, int frame_count,
+					 int sync_mode,
+					 int dma_trigger, int src_or_dst_synch);
+extern void omap_set_dma_color_mode(int lch, enum omap_dma_color_mode mode,
+				    u32 color);
+extern void omap_set_dma_write_mode(int lch, enum omap_dma_write_mode mode);
+extern void omap_set_dma_channel_mode(int lch, enum omap_dma_channel_mode mode);
+
+extern void omap_set_dma_src_params(int lch, int src_port, int src_amode,
+				    unsigned long src_start,
+				    int src_ei, int src_fi);
+extern void omap_set_dma_src_index(int lch, int eidx, int fidx);
+extern void omap_set_dma_src_data_pack(int lch, int enable);
+extern void omap_set_dma_src_burst_mode(int lch,
+					enum omap_dma_burst_mode burst_mode);
+
+extern void omap_set_dma_dest_params(int lch, int dest_port, int dest_amode,
+				     unsigned long dest_start,
+				     int dst_ei, int dst_fi);
+extern void omap_set_dma_dest_index(int lch, int eidx, int fidx);
+extern void omap_set_dma_dest_data_pack(int lch, int enable);
+extern void omap_set_dma_dest_burst_mode(int lch,
+					 enum omap_dma_burst_mode burst_mode);
+
+extern void omap_set_dma_params(int lch,
+				struct omap_dma_channel_params *params);
+
+extern void omap_dma_link_lch(int lch_head, int lch_queue);
+extern void omap_dma_unlink_lch(int lch_head, int lch_queue);
+
+extern int omap_set_dma_callback(int lch,
+			void (*callback)(int lch, u16 ch_status, void *data),
+			void *data);
+extern dma_addr_t omap_get_dma_src_pos(int lch);
+extern dma_addr_t omap_get_dma_dst_pos(int lch);
+extern void omap_clear_dma(int lch);
+extern int omap_get_dma_active_status(int lch);
+extern int omap_dma_running(void);
+extern void omap_dma_set_global_params(int arb_rate, int max_fifo_depth,
+				       int tparams);
+extern int omap_dma_set_prio_lch(int lch, unsigned char read_prio,
+				 unsigned char write_prio);
+extern void omap_set_dma_dst_endian_type(int lch, enum end_type etype);
+extern void omap_set_dma_src_endian_type(int lch, enum end_type etype);
+extern int omap_get_dma_index(int lch, int *ei, int *fi);
+
+void omap_dma_global_context_save(void);
+void omap_dma_global_context_restore(void);
+
+extern void omap_dma_disable_irq(int lch);
+
+/* Chaining APIs */
+#ifndef CONFIG_ARCH_OMAP1
+extern int omap_request_dma_chain(int dev_id, const char *dev_name,
+				  void (*callback) (int lch, u16 ch_status,
+						    void *data),
+				  int *chain_id, int no_of_chans,
+				  int chain_mode,
+				  struct omap_dma_channel_params params);
+extern int omap_free_dma_chain(int chain_id);
+extern int omap_dma_chain_a_transfer(int chain_id, int src_start,
+				     int dest_start, int elem_count,
+				     int frame_count, void *callbk_data);
+extern int omap_start_dma_chain_transfers(int chain_id);
+extern int omap_stop_dma_chain_transfers(int chain_id);
+extern int omap_get_dma_chain_index(int chain_id, int *ei, int *fi);
+extern int omap_get_dma_chain_dst_pos(int chain_id);
+extern int omap_get_dma_chain_src_pos(int chain_id);
+
+extern int omap_modify_dma_chain_params(int chain_id,
+					struct omap_dma_channel_params params);
+extern int omap_dma_chain_status(int chain_id);
+#endif
+
+#if defined(CONFIG_ARCH_OMAP1) && defined(CONFIG_FB_OMAP)
+#include <mach/lcd_dma.h>
+#else
+static inline int omap_lcd_dma_running(void)
+{
+	return 0;
+}
+#endif
+
+#endif /* __LINUX_OMAP_DMA_H */
-- 
cgit v1.2.3-55-g7522


From 9a4da8a5b109906a64bed5aaeb83bf4edb1f5888 Mon Sep 17 00:00:00 2001
From: Jan Glauber
Date: Thu, 29 Nov 2012 13:05:05 +0100
Subject: s390/pci: PCI adapter interrupts for MSI/MSI-X

Support PCI adapter interrupts using the Single-IRQ-mode. Single-IRQ-mode
disables an adapter IRQ automatically after delivering it until the SIC
instruction enables it again. This is used to reduce the number of IRQs
for streaming workloads.

Up to 64 MSI handlers can be registered per PCI function.
A hash table is used to map interrupt numbers to MSI descriptors.
The interrupt vector is scanned using the flogr instruction.
Only MSI/MSI-X interrupts are supported, no legacy INTs.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/hw_irq.h |  22 ++
 arch/s390/include/asm/irq.h    |  12 ++
 arch/s390/include/asm/isc.h    |   1 +
 arch/s390/include/asm/pci.h    |  27 +++
 arch/s390/kernel/irq.c         |   2 +
 arch/s390/pci/Makefile         |   2 +-
 arch/s390/pci/pci.c            | 464 ++++++++++++++++++++++++++++++++++++++++-
 arch/s390/pci/pci_clp.c        |   3 +
 arch/s390/pci/pci_msi.c        | 141 +++++++++++++
 drivers/pci/msi.c              |   6 +
 include/linux/irq.h            |  10 +-
 11 files changed, 683 insertions(+), 7 deletions(-)
 create mode 100644 arch/s390/include/asm/hw_irq.h
 create mode 100644 arch/s390/pci/pci_msi.c

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h
new file mode 100644
index 000000000000..7e3d2586c1ff
--- /dev/null
+++ b/arch/s390/include/asm/hw_irq.h
@@ -0,0 +1,22 @@
+#ifndef _HW_IRQ_H
+#define _HW_IRQ_H
+
+#include <linux/msi.h>
+#include <linux/pci.h>
+
+static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
+{
+	return __irq_get_msi_desc(irq);
+}
+
+/* Must be called with msi map lock held */
+static inline int irq_set_msi_desc(unsigned int irq, struct msi_desc *msi)
+{
+	if (!msi)
+		return -EINVAL;
+
+	msi->irq = irq;
+	return 0;
+}
+
+#endif
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 6703dd986fd4..e6972f85d2b0 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -33,6 +33,8 @@ enum interruption_class {
 	IOINT_APB,
 	IOINT_ADM,
 	IOINT_CSC,
+	IOINT_PCI,
+	IOINT_MSI,
 	NMI_NMI,
 	NR_IRQS,
 };
@@ -51,4 +53,14 @@ void service_subclass_irq_unregister(void);
 void measurement_alert_subclass_register(void);
 void measurement_alert_subclass_unregister(void);
 
+#ifdef CONFIG_LOCKDEP
+#  define disable_irq_nosync_lockdep(irq)	disable_irq_nosync(irq)
+#  define disable_irq_nosync_lockdep_irqsave(irq, flags) \
+						disable_irq_nosync(irq)
+#  define disable_irq_lockdep(irq)		disable_irq(irq)
+#  define enable_irq_lockdep(irq)		enable_irq(irq)
+#  define enable_irq_lockdep_irqrestore(irq, flags) \
+						enable_irq(irq)
+#endif
+
 #endif /* _ASM_IRQ_H */
diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h
index 5ae606456b0a..68d7d68300f2 100644
--- a/arch/s390/include/asm/isc.h
+++ b/arch/s390/include/asm/isc.h
@@ -18,6 +18,7 @@
 #define CHSC_SCH_ISC 7			/* CHSC subchannels */
 /* Adapter interrupts. */
 #define QDIO_AIRQ_ISC IO_SCH_ISC	/* I/O subchannel in qdio mode */
+#define PCI_ISC 2			/* PCI I/O subchannels */
 #define AP_ISC 6			/* adjunct processor (crypto) devices */
 
 /* Functions for registration of I/O interruption subclasses */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 6f98a54950ea..2a6084fa4b1a 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -20,6 +20,10 @@ void pci_iounmap(struct pci_dev *, void __iomem *);
 int pci_domain_nr(struct pci_bus *);
 int pci_proc_domain(struct pci_bus *);
 
+/* MSI arch hooks */
+#define arch_setup_msi_irqs	arch_setup_msi_irqs
+#define arch_teardown_msi_irqs	arch_teardown_msi_irqs
+
 #define ZPCI_BUS_NR			0	/* default bus number */
 #define ZPCI_DEVFN			0	/* default device number */
 
@@ -29,6 +33,15 @@ int pci_proc_domain(struct pci_bus *);
 #define ZPCI_FC_BLOCKED			0x20
 #define ZPCI_FC_DMA_ENABLED		0x10
 
+struct msi_map {
+	unsigned long irq;
+	struct msi_desc *msi;
+	struct hlist_node msi_chain;
+};
+
+#define ZPCI_NR_MSI_VECS	64
+#define ZPCI_MSI_MASK		(ZPCI_NR_MSI_VECS - 1)
+
 enum zpci_state {
 	ZPCI_FN_STATE_RESERVED,
 	ZPCI_FN_STATE_STANDBY,
@@ -56,6 +69,12 @@ struct zpci_dev {
 	u8		pfgid;		/* function group ID */
 	u16		domain;
 
+	/* IRQ stuff */
+	u64		msi_addr;	/* MSI address */
+	struct zdev_irq_map *irq_map;
+	struct msi_map *msi_map[ZPCI_NR_MSI_VECS];
+	unsigned int	aisb;		/* number of the summary bit */
+
 	struct zpci_bar_struct bars[PCI_BAR_COUNT];
 
 	enum pci_bus_speed max_bus_speed;
@@ -83,6 +102,14 @@ int clp_add_pci_device(u32, u32, int);
 int clp_enable_fh(struct zpci_dev *, u8);
 int clp_disable_fh(struct zpci_dev *);
 
+/* MSI */
+struct msi_desc *__irq_get_msi_desc(unsigned int);
+int zpci_msi_set_mask_bits(struct msi_desc *, u32, u32);
+int zpci_setup_msi_irq(struct zpci_dev *, struct msi_desc *, unsigned int, int);
+void zpci_teardown_msi_irq(struct zpci_dev *, struct msi_desc *);
+int zpci_msihash_init(void);
+void zpci_msihash_exit(void);
+
 /* Helpers */
 struct zpci_dev *get_zdev(struct pci_dev *);
 struct zpci_dev *get_zdev_by_fid(u32);
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 6cdc55b26d68..bf24293970ce 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -58,6 +58,8 @@ static const struct irq_class intrclass_names[] = {
 	[IOINT_APB]  = {.name = "APB", .desc = "[I/O] AP Bus"},
 	[IOINT_ADM]  = {.name = "ADM", .desc = "[I/O] EADM Subchannel"},
 	[IOINT_CSC]  = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"},
+	[IOINT_PCI]  = {.name = "PCI", .desc = "[I/O] PCI Interrupt" },
+	[IOINT_MSI] =  {.name = "MSI", .desc = "[I/O] MSI Interrupt" },
 	[NMI_NMI]    = {.name = "NMI", .desc = "[NMI] Machine Check"},
 };
 
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 1afd68c4c984..628be7bc006c 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -2,4 +2,4 @@
 # Makefile for the s390 PCI subsystem.
 #
 
-obj-$(CONFIG_PCI)	+= pci.o pci_clp.o
+obj-$(CONFIG_PCI)	+= pci.o pci_clp.o pci_msi.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 70f6c56c8d0f..d11dc8a25f34 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -23,17 +23,25 @@
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
 #include <linux/seq_file.h>
 #include <linux/pci.h>
 #include <linux/msi.h>
 
+#include <asm/isc.h>
+#include <asm/airq.h>
 #include <asm/facility.h>
 #include <asm/pci_insn.h>
 #include <asm/pci_clp.h>
 
 #define DEBUG				/* enable pr_debug */
 
+#define	SIC_IRQ_MODE_ALL		0
+#define	SIC_IRQ_MODE_SINGLE		1
+
 #define ZPCI_NR_DMA_SPACES		1
+#define ZPCI_MSI_VEC_BITS		6
 #define ZPCI_NR_DEVICES			CONFIG_PCI_NR_FUNCTIONS
 
 /* list of all detected zpci devices */
@@ -43,12 +51,63 @@ DEFINE_MUTEX(zpci_list_lock);
 static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
 static DEFINE_SPINLOCK(zpci_domain_lock);
 
+struct callback {
+	irq_handler_t	handler;
+	void		*data;
+};
+
+struct zdev_irq_map {
+	unsigned long	aibv;		/* AI bit vector */
+	int		msi_vecs;	/* consecutive MSI-vectors used */
+	int		__unused;
+	struct callback	cb[ZPCI_NR_MSI_VECS]; /* callback handler array */
+	spinlock_t	lock;		/* protect callbacks against de-reg */
+};
+
+struct intr_bucket {
+	/* amap of adapters, one bit per dev, corresponds to one irq nr */
+	unsigned long	*alloc;
+	/* AI summary bit, global page for all devices */
+	unsigned long	*aisb;
+	/* pointer to aibv and callback data in zdev */
+	struct zdev_irq_map *imap[ZPCI_NR_DEVICES];
+	/* protects the whole bucket struct */
+	spinlock_t	lock;
+};
+
+static struct intr_bucket *bucket;
+
+/* Adapter local summary indicator */
+static u8 *zpci_irq_si;
+
+static atomic_t irq_retries = ATOMIC_INIT(0);
+
 /* I/O Map */
 static DEFINE_SPINLOCK(zpci_iomap_lock);
 static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
 struct zpci_iomap_entry *zpci_iomap_start;
 EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
+/* highest irq summary bit */
+static int __read_mostly aisb_max;
+
+static struct kmem_cache *zdev_irq_cache;
+
+static inline int irq_to_msi_nr(unsigned int irq)
+{
+	return irq & ZPCI_MSI_MASK;
+}
+
+static inline int irq_to_dev_nr(unsigned int irq)
+{
+	return irq >> ZPCI_MSI_VEC_BITS;
+}
+
+static inline struct zdev_irq_map *get_imap(unsigned int irq)
+{
+	return bucket->imap[irq_to_dev_nr(irq)];
+}
+
 struct zpci_dev *get_zdev(struct pci_dev *pdev)
 {
 	return (struct zpci_dev *) pdev->sysdata;
@@ -120,6 +179,67 @@ static int zpci_store_fib(struct zpci_dev *zdev, u8 *fc)
 	return (cc) ? -EIO : 0;
 }
 
+/* Modify PCI: Register adapter interruptions */
+static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb,
+			      u64 aibv)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+	struct zpci_fib *fib;
+	int rc;
+
+	fib = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!fib)
+		return -ENOMEM;
+
+	fib->isc = PCI_ISC;
+	fib->noi = zdev->irq_map->msi_vecs;
+	fib->sum = 1;		/* enable summary notifications */
+	fib->aibv = aibv;
+	fib->aibvo = 0;		/* every function has its own page */
+	fib->aisb = (u64) bucket->aisb + aisb / 8;
+	fib->aisbo = aisb & ZPCI_MSI_MASK;
+
+	rc = mpcifc_instr(req, fib);
+	pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi);
+
+	free_page((unsigned long) fib);
+	return rc;
+}
+
+struct mod_pci_args {
+	u64 base;
+	u64 limit;
+	u64 iota;
+};
+
+static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args *args)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, fn);
+	struct zpci_fib *fib;
+	int rc;
+
+	/* The FIB must be available even if it's not used */
+	fib = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!fib)
+		return -ENOMEM;
+
+	fib->pba = args->base;
+	fib->pal = args->limit;
+	fib->iota = args->iota;
+
+	rc = mpcifc_instr(req, fib);
+	free_page((unsigned long) fib);
+	return rc;
+}
+
+/* Modify PCI: Unregister adapter interruptions */
+static int zpci_unregister_airq(struct zpci_dev *zdev)
+{
+	struct mod_pci_args args = { 0, 0, 0 };
+
+	return mod_pci(zdev, ZPCI_MOD_FC_DEREG_INT, 0, &args);
+}
+
 #define ZPCI_PCIAS_CFGSPC	15
 
 static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
@@ -150,6 +270,55 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
 	return rc;
 }
 
+void synchronize_irq(unsigned int irq)
+{
+	/*
+	 * Not needed, the handler is protected by a lock and IRQs that occur
+	 * after the handler is deleted are just NOPs.
+	 */
+}
+EXPORT_SYMBOL_GPL(synchronize_irq);
+
+void enable_irq(unsigned int irq)
+{
+	struct msi_desc *msi = irq_get_msi_desc(irq);
+
+	zpci_msi_set_mask_bits(msi, 1, 0);
+}
+EXPORT_SYMBOL_GPL(enable_irq);
+
+void disable_irq(unsigned int irq)
+{
+	struct msi_desc *msi = irq_get_msi_desc(irq);
+
+	zpci_msi_set_mask_bits(msi, 1, 1);
+}
+EXPORT_SYMBOL_GPL(disable_irq);
+
+void disable_irq_nosync(unsigned int irq)
+{
+	disable_irq(irq);
+}
+EXPORT_SYMBOL_GPL(disable_irq_nosync);
+
+unsigned long probe_irq_on(void)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(probe_irq_on);
+
+int probe_irq_off(unsigned long val)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(probe_irq_off);
+
+unsigned int probe_irq_mask(unsigned long val)
+{
+	return val;
+}
+EXPORT_SYMBOL_GPL(probe_irq_mask);
+
 void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 {
 }
@@ -219,6 +388,155 @@ static struct pci_ops pci_root_ops = {
 	.write = pci_write,
 };
 
+/* store the last handled bit to implement fair scheduling of devices */
+static DEFINE_PER_CPU(unsigned long, next_sbit);
+
+static void zpci_irq_handler(void *dont, void *need)
+{
+	unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit);
+	int rescan = 0, max = aisb_max;
+	struct zdev_irq_map *imap;
+
+	kstat_cpu(smp_processor_id()).irqs[IOINT_PCI]++;
+	sbit = start;
+
+scan:
+	/* find summary_bit */
+	for_each_set_bit_left_cont(sbit, bucket->aisb, max) {
+		clear_bit(63 - (sbit & 63), bucket->aisb + (sbit >> 6));
+		last = sbit;
+
+		/* find vector bit */
+		imap = bucket->imap[sbit];
+		for_each_set_bit_left(mbit, &imap->aibv, imap->msi_vecs) {
+			kstat_cpu(smp_processor_id()).irqs[IOINT_MSI]++;
+			clear_bit(63 - mbit, &imap->aibv);
+
+			spin_lock(&imap->lock);
+			if (imap->cb[mbit].handler)
+				imap->cb[mbit].handler(mbit,
+					imap->cb[mbit].data);
+			spin_unlock(&imap->lock);
+		}
+	}
+
+	if (rescan)
+		goto out;
+
+	/* scan the skipped bits */
+	if (start > 0) {
+		sbit = 0;
+		max = start;
+		start = 0;
+		goto scan;
+	}
+
+	/* enable interrupts again */
+	sic_instr(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+
+	/* check again to not lose initiative */
+	rmb();
+	max = aisb_max;
+	sbit = find_first_bit_left(bucket->aisb, max);
+	if (sbit != max) {
+		atomic_inc(&irq_retries);
+		rescan++;
+		goto scan;
+	}
+out:
+	/* store next device bit to scan */
+	__get_cpu_var(next_sbit) = (++last >= aisb_max) ? 0 : last;
+}
+
+/* msi_vecs - number of requested interrupts, 0 place function to error state */
+static int zpci_setup_msi(struct pci_dev *pdev, int msi_vecs)
+{
+	struct zpci_dev *zdev = get_zdev(pdev);
+	unsigned int aisb, msi_nr;
+	struct msi_desc *msi;
+	int rc;
+
+	/* store the number of used MSI vectors */
+	zdev->irq_map->msi_vecs = min(msi_vecs, ZPCI_NR_MSI_VECS);
+
+	spin_lock(&bucket->lock);
+	aisb = find_first_zero_bit(bucket->alloc, PAGE_SIZE);
+	/* alloc map exhausted? */
+	if (aisb == PAGE_SIZE) {
+		spin_unlock(&bucket->lock);
+		return -EIO;
+	}
+	set_bit(aisb, bucket->alloc);
+	spin_unlock(&bucket->lock);
+
+	zdev->aisb = aisb;
+	if (aisb + 1 > aisb_max)
+		aisb_max = aisb + 1;
+
+	/* wire up IRQ shortcut pointer */
+	bucket->imap[zdev->aisb] = zdev->irq_map;
+	pr_debug("%s: imap[%u] linked to %p\n", __func__, zdev->aisb, zdev->irq_map);
+
+	/* TODO: irq number 0 wont be found if we return less than requested MSIs.
+	 * ignore it for now and fix in common code.
+	 */
+	msi_nr = aisb << ZPCI_MSI_VEC_BITS;
+
+	list_for_each_entry(msi, &pdev->msi_list, list) {
+		rc = zpci_setup_msi_irq(zdev, msi, msi_nr,
+					  aisb << ZPCI_MSI_VEC_BITS);
+		if (rc)
+			return rc;
+		msi_nr++;
+	}
+
+	rc = zpci_register_airq(zdev, aisb, (u64) &zdev->irq_map->aibv);
+	if (rc) {
+		clear_bit(aisb, bucket->alloc);
+		dev_err(&pdev->dev, "register MSI failed with: %d\n", rc);
+		return rc;
+	}
+	return (zdev->irq_map->msi_vecs == msi_vecs) ?
+		0 : zdev->irq_map->msi_vecs;
+}
+
+static void zpci_teardown_msi(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = get_zdev(pdev);
+	struct msi_desc *msi;
+	int aisb, rc;
+
+	rc = zpci_unregister_airq(zdev);
+	if (rc) {
+		dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc);
+		return;
+	}
+
+	msi = list_first_entry(&pdev->msi_list, struct msi_desc, list);
+	aisb = irq_to_dev_nr(msi->irq);
+
+	list_for_each_entry(msi, &pdev->msi_list, list)
+		zpci_teardown_msi_irq(zdev, msi);
+
+	clear_bit(aisb, bucket->alloc);
+	if (aisb + 1 == aisb_max)
+		aisb_max--;
+}
+
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
+	if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
+		return -EINVAL;
+	return zpci_setup_msi(pdev, nvec);
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	pr_info("%s: on pdev: %p\n", __func__, pdev);
+	zpci_teardown_msi(pdev);
+}
+
 static void zpci_map_resources(struct zpci_dev *zdev)
 {
 	struct pci_dev *pdev = zdev->pdev;
@@ -257,11 +575,23 @@ struct zpci_dev *zpci_alloc_device(void)
 	zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
 	if (!zdev)
 		return ERR_PTR(-ENOMEM);
+
+	/* Alloc aibv & callback space */
+	zdev->irq_map = kmem_cache_alloc(zdev_irq_cache, GFP_KERNEL);
+	if (!zdev->irq_map)
+		goto error;
+	memset(zdev->irq_map, 0, sizeof(*zdev->irq_map));
+	WARN_ON((u64) zdev->irq_map & 0xff);
 	return zdev;
+
+error:
+	kfree(zdev);
+	return ERR_PTR(-ENOMEM);
 }
 
 void zpci_free_device(struct zpci_dev *zdev)
 {
+	kmem_cache_free(zdev_irq_cache, zdev->irq_map);
 	kfree(zdev);
 }
 
@@ -320,6 +650,118 @@ void pcibios_disable_device(struct pci_dev *pdev)
 	pdev->sysdata = NULL;
 }
 
+int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data)
+{
+	int msi_nr = irq_to_msi_nr(irq);
+	struct zdev_irq_map *imap;
+	struct msi_desc *msi;
+
+	msi = irq_get_msi_desc(irq);
+	if (!msi)
+		return -EIO;
+
+	imap = get_imap(irq);
+	spin_lock_init(&imap->lock);
+
+	pr_debug("%s: register handler for IRQ:MSI %d:%d\n", __func__, irq >> 6, msi_nr);
+	imap->cb[msi_nr].handler = handler;
+	imap->cb[msi_nr].data = data;
+
+	/*
+	 * The generic MSI code returns with the interrupt disabled on the
+	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
+	 * at that level, so we do it here by hand.
+	 */
+	zpci_msi_set_mask_bits(msi, 1, 0);
+	return 0;
+}
+
+void zpci_free_irq(unsigned int irq)
+{
+	struct zdev_irq_map *imap = get_imap(irq);
+	int msi_nr = irq_to_msi_nr(irq);
+	unsigned long flags;
+
+	pr_debug("%s: for irq: %d\n", __func__, irq);
+
+	spin_lock_irqsave(&imap->lock, flags);
+	imap->cb[msi_nr].handler = NULL;
+	imap->cb[msi_nr].data = NULL;
+	spin_unlock_irqrestore(&imap->lock, flags);
+}
+
+int request_irq(unsigned int irq, irq_handler_t handler,
+		unsigned long irqflags, const char *devname, void *dev_id)
+{
+	pr_debug("%s: irq: %d  handler: %p  flags: %lx  dev: %s\n",
+		__func__, irq, handler, irqflags, devname);
+
+	return zpci_request_irq(irq, handler, dev_id);
+}
+EXPORT_SYMBOL_GPL(request_irq);
+
+void free_irq(unsigned int irq, void *dev_id)
+{
+	zpci_free_irq(irq);
+}
+EXPORT_SYMBOL_GPL(free_irq);
+
+static int __init zpci_irq_init(void)
+{
+	int cpu, rc;
+
+	bucket = kzalloc(sizeof(*bucket), GFP_KERNEL);
+	if (!bucket)
+		return -ENOMEM;
+
+	bucket->aisb = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+	if (!bucket->aisb) {
+		rc = -ENOMEM;
+		goto out_aisb;
+	}
+
+	bucket->alloc = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+	if (!bucket->alloc) {
+		rc = -ENOMEM;
+		goto out_alloc;
+	}
+
+	isc_register(PCI_ISC);
+	zpci_irq_si = s390_register_adapter_interrupt(&zpci_irq_handler, NULL, PCI_ISC);
+	if (IS_ERR(zpci_irq_si)) {
+		rc = PTR_ERR(zpci_irq_si);
+		zpci_irq_si = NULL;
+		goto out_ai;
+	}
+
+	for_each_online_cpu(cpu)
+		per_cpu(next_sbit, cpu) = 0;
+
+	spin_lock_init(&bucket->lock);
+	/* set summary to 1 to be called every time for the ISC */
+	*zpci_irq_si = 1;
+	sic_instr(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+	return 0;
+
+out_ai:
+	isc_unregister(PCI_ISC);
+	free_page((unsigned long) bucket->alloc);
+out_alloc:
+	free_page((unsigned long) bucket->aisb);
+out_aisb:
+	kfree(bucket);
+	return rc;
+}
+
+static void zpci_irq_exit(void)
+{
+	free_page((unsigned long) bucket->alloc);
+	free_page((unsigned long) bucket->aisb);
+	s390_unregister_adapter_interrupt(zpci_irq_si, PCI_ISC);
+	isc_unregister(PCI_ISC);
+	kfree(bucket);
+}
+
 static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size,
 						unsigned long flags, int domain)
 {
@@ -523,13 +965,20 @@ static inline int barsize(u8 size)
 
 static int zpci_mem_init(void)
 {
+	zdev_irq_cache = kmem_cache_create("PCI_IRQ_cache", sizeof(struct zdev_irq_map),
+				L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, NULL);
+	if (!zdev_irq_cache)
+		goto error_zdev;
+
 	/* TODO: use realloc */
 	zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
 				   GFP_KERNEL);
 	if (!zpci_iomap_start)
-		goto error_zdev;
+		goto error_iomap;
 	return 0;
 
+error_iomap:
+	kmem_cache_destroy(zdev_irq_cache);
 error_zdev:
 	return -ENOMEM;
 }
@@ -537,6 +986,7 @@ error_zdev:
 static void zpci_mem_exit(void)
 {
 	kfree(zpci_iomap_start);
+	kmem_cache_destroy(zdev_irq_cache);
 }
 
 unsigned int pci_probe = 1;
@@ -570,6 +1020,14 @@ static int __init pci_base_init(void)
 	if (rc)
 		goto out_mem;
 
+	rc = zpci_msihash_init();
+	if (rc)
+		goto out_hash;
+
+	rc = zpci_irq_init();
+	if (rc)
+		goto out_irq;
+
 	rc = clp_find_pci_devices();
 	if (rc)
 		goto out_find;
@@ -578,6 +1036,10 @@ static int __init pci_base_init(void)
 	return 0;
 
 out_find:
+	zpci_irq_exit();
+out_irq:
+	zpci_msihash_exit();
+out_hash:
 	zpci_mem_exit();
 out_mem:
 	return rc;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 291da1a96560..72694fb6d525 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -48,6 +48,9 @@ static void clp_free_block(void *ptr)
 static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
 				      struct clp_rsp_query_pci_grp *response)
 {
+	zdev->msi_addr = response->msia;
+
+	pr_debug("Supported number of MSI vectors: %u\n", response->noi);
 	switch (response->version) {
 	case 1:
 		zdev->max_bus_speed = PCIE_SPEED_5_0GT;
diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c
new file mode 100644
index 000000000000..90fd3482b9e2
--- /dev/null
+++ b/arch/s390/pci/pci_msi.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ *   Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define COMPONENT "zPCI"
+#define pr_fmt(fmt) COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/rculist.h>
+#include <linux/hash.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <asm/hw_irq.h>
+
+/* mapping of irq numbers to msi_desc */
+static struct hlist_head *msi_hash;
+static unsigned int msihash_shift = 6;
+#define msi_hashfn(nr)	hash_long(nr, msihash_shift)
+
+static DEFINE_SPINLOCK(msi_map_lock);
+
+struct msi_desc *__irq_get_msi_desc(unsigned int irq)
+{
+	struct hlist_node *entry;
+	struct msi_map *map;
+
+	hlist_for_each_entry_rcu(map, entry,
+			&msi_hash[msi_hashfn(irq)], msi_chain)
+		if (map->irq == irq)
+			return map->msi;
+	return NULL;
+}
+
+int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
+{
+	if (msi->msi_attrib.is_msix) {
+		int offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+			PCI_MSIX_ENTRY_VECTOR_CTRL;
+		msi->masked = readl(msi->mask_base + offset);
+		writel(flag, msi->mask_base + offset);
+	} else {
+		if (msi->msi_attrib.maskbit) {
+			int pos;
+			u32 mask_bits;
+
+			pos = (long) msi->mask_base;
+			pci_read_config_dword(msi->dev, pos, &mask_bits);
+			mask_bits &= ~(mask);
+			mask_bits |= flag & mask;
+			pci_write_config_dword(msi->dev, pos, mask_bits);
+		} else {
+			return 0;
+		}
+	}
+
+	msi->msi_attrib.maskbit = !!flag;
+	return 1;
+}
+
+int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi,
+			unsigned int nr, int offset)
+{
+	struct msi_map *map;
+	struct msi_msg msg;
+	int rc;
+
+	map = kmalloc(sizeof(*map), GFP_KERNEL);
+	if (map == NULL)
+		return -ENOMEM;
+
+	map->irq = nr;
+	map->msi = msi;
+	zdev->msi_map[nr & ZPCI_MSI_MASK] = map;
+
+	pr_debug("%s hashing irq: %u  to bucket nr: %llu\n",
+		__func__, nr, msi_hashfn(nr));
+	hlist_add_head_rcu(&map->msi_chain, &msi_hash[msi_hashfn(nr)]);
+
+	spin_lock(&msi_map_lock);
+	rc = irq_set_msi_desc(nr, msi);
+	if (rc) {
+		spin_unlock(&msi_map_lock);
+		hlist_del_rcu(&map->msi_chain);
+		kfree(map);
+		zdev->msi_map[nr & ZPCI_MSI_MASK] = NULL;
+		return rc;
+	}
+	spin_unlock(&msi_map_lock);
+
+	msg.data = nr - offset;
+	msg.address_lo = zdev->msi_addr & 0xffffffff;
+	msg.address_hi = zdev->msi_addr >> 32;
+	write_msi_msg(nr, &msg);
+	return 0;
+}
+
+void zpci_teardown_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi)
+{
+	int irq = msi->irq & ZPCI_MSI_MASK;
+	struct msi_map *map;
+
+	msi->msg.address_lo = 0;
+	msi->msg.address_hi = 0;
+	msi->msg.data = 0;
+	msi->irq = 0;
+	zpci_msi_set_mask_bits(msi, 1, 1);
+
+	spin_lock(&msi_map_lock);
+	map = zdev->msi_map[irq];
+	hlist_del_rcu(&map->msi_chain);
+	kfree(map);
+	zdev->msi_map[irq] = NULL;
+	spin_unlock(&msi_map_lock);
+}
+
+/*
+ * The msi hash table has 256 entries which is good for 4..20
+ * devices (a typical device allocates 10 + CPUs MSI's). Maybe make
+ * the hash table size adjustable later.
+ */
+int __init zpci_msihash_init(void)
+{
+	unsigned int i;
+
+	msi_hash = kmalloc(256 * sizeof(*msi_hash), GFP_KERNEL);
+	if (!msi_hash)
+		return -ENOMEM;
+
+	for (i = 0; i < (1U << msihash_shift); i++)
+		INIT_HLIST_HEAD(&msi_hash[i]);
+	return 0;
+}
+
+void __init zpci_msihash_exit(void)
+{
+	kfree(msi_hash);
+}
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index a825d78fd0aa..5099636a6e5f 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -207,6 +207,8 @@ static void msix_mask_irq(struct msi_desc *desc, u32 flag)
 	desc->masked = __msix_mask_irq(desc, flag);
 }
 
+#ifdef CONFIG_GENERIC_HARDIRQS
+
 static void msi_set_mask_bit(struct irq_data *data, u32 flag)
 {
 	struct msi_desc *desc = irq_data_get_msi(data);
@@ -230,6 +232,8 @@ void unmask_msi_irq(struct irq_data *data)
 	msi_set_mask_bit(data, 0);
 }
 
+#endif /* CONFIG_GENERIC_HARDIRQS */
+
 void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
 	BUG_ON(entry->dev->current_state != PCI_D0);
@@ -337,8 +341,10 @@ static void free_msi_irqs(struct pci_dev *dev)
 		if (!entry->irq)
 			continue;
 		nvec = 1 << entry->msi_attrib.multiple;
+#ifdef CONFIG_GENERIC_HARDIRQS
 		for (i = 0; i < nvec; i++)
 			BUG_ON(irq_has_action(entry->irq + i));
+#endif
 	}
 
 	arch_teardown_msi_irqs(dev);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 216b0ba109d7..e21ed837c673 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -10,9 +10,6 @@
  */
 
 #include <linux/smp.h>
-
-#ifndef CONFIG_S390
-
 #include <linux/linkage.h>
 #include <linux/cache.h>
 #include <linux/spinlock.h>
@@ -737,8 +734,11 @@ static inline void irq_gc_lock(struct irq_chip_generic *gc) { }
 static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
 #endif
 
-#endif /* CONFIG_GENERIC_HARDIRQS */
+#else /* !CONFIG_GENERIC_HARDIRQS */
 
-#endif /* !CONFIG_S390 */
+extern struct msi_desc *irq_get_msi_desc(unsigned int irq);
+extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
+
+#endif /* CONFIG_GENERIC_HARDIRQS */
 
 #endif /* _LINUX_IRQ_H */
-- 
cgit v1.2.3-55-g7522


From 0362063b7be97f6f8e2c644b970f5726489aacdf Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Tue, 27 Nov 2012 00:31:55 +0100
Subject: ssb: extif: fix compile errors

If CONFIG_SSB_EMBEDDED or CONFIG_SSB_DRIVER_MIPS is set and
CONFIG_SSB_DRIVER_EXTIF is not set, it will cause compile problems
because of missing functions. This patch fixes these problems.

The mips driver now also uses ssb_chipco_available() instead of
checking bus->chipco.dev manually.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/driver_mipscore.c        | 14 ++++++------
 include/linux/ssb/ssb_driver_extif.h | 42 ++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_mipscore.c b/drivers/ssb/driver_mipscore.c
index b918ba922306..5bd05b136d22 100644
--- a/drivers/ssb/driver_mipscore.c
+++ b/drivers/ssb/driver_mipscore.c
@@ -178,9 +178,9 @@ static void ssb_mips_serial_init(struct ssb_mipscore *mcore)
 {
 	struct ssb_bus *bus = mcore->dev->bus;
 
-	if (bus->extif.dev)
+	if (ssb_extif_available(&bus->extif))
 		mcore->nr_serial_ports = ssb_extif_serial_init(&bus->extif, mcore->serial_ports);
-	else if (bus->chipco.dev)
+	else if (ssb_chipco_available(&bus->chipco))
 		mcore->nr_serial_ports = ssb_chipco_serial_init(&bus->chipco, mcore->serial_ports);
 	else
 		mcore->nr_serial_ports = 0;
@@ -191,7 +191,7 @@ static void ssb_mips_flash_detect(struct ssb_mipscore *mcore)
 	struct ssb_bus *bus = mcore->dev->bus;
 
 	/* When there is no chipcommon on the bus there is 4MB flash */
-	if (!bus->chipco.dev) {
+	if (!ssb_chipco_available(&bus->chipco)) {
 		mcore->pflash.present = true;
 		mcore->pflash.buswidth = 2;
 		mcore->pflash.window = SSB_FLASH1;
@@ -227,9 +227,9 @@ u32 ssb_cpu_clock(struct ssb_mipscore *mcore)
 	if (bus->chipco.capabilities & SSB_CHIPCO_CAP_PMU)
 		return ssb_pmu_get_cpu_clock(&bus->chipco);
 
-	if (bus->extif.dev) {
+	if (ssb_extif_available(&bus->extif)) {
 		ssb_extif_get_clockcontrol(&bus->extif, &pll_type, &n, &m);
-	} else if (bus->chipco.dev) {
+	} else if (ssb_chipco_available(&bus->chipco)) {
 		ssb_chipco_get_clockcpu(&bus->chipco, &pll_type, &n, &m);
 	} else
 		return 0;
@@ -265,9 +265,9 @@ void ssb_mipscore_init(struct ssb_mipscore *mcore)
 		hz = 100000000;
 	ns = 1000000000 / hz;
 
-	if (bus->extif.dev)
+	if (ssb_extif_available(&bus->extif))
 		ssb_extif_timing_init(&bus->extif, ns);
-	else if (bus->chipco.dev)
+	else if (ssb_chipco_available(&bus->chipco))
 		ssb_chipco_timing_init(&bus->chipco, ns);
 
 	/* Assign IRQs to all cores on the bus, start with irq line 2, because serial usually takes 1 */
diff --git a/include/linux/ssb/ssb_driver_extif.h b/include/linux/ssb/ssb_driver_extif.h
index 91161f0aa22b..2604efa7dc4d 100644
--- a/include/linux/ssb/ssb_driver_extif.h
+++ b/include/linux/ssb/ssb_driver_extif.h
@@ -204,11 +204,53 @@ void ssb_extif_get_clockcontrol(struct ssb_extif *extif,
 {
 }
 
+static inline
+void ssb_extif_timing_init(struct ssb_extif *extif, unsigned long ns)
+{
+}
+
 static inline
 void ssb_extif_watchdog_timer_set(struct ssb_extif *extif,
 				  u32 ticks)
 {
 }
 
+static inline u32 ssb_extif_gpio_in(struct ssb_extif *extif, u32 mask)
+{
+	return 0;
+}
+
+static inline u32 ssb_extif_gpio_out(struct ssb_extif *extif, u32 mask,
+				     u32 value)
+{
+	return 0;
+}
+
+static inline u32 ssb_extif_gpio_outen(struct ssb_extif *extif, u32 mask,
+				       u32 value)
+{
+	return 0;
+}
+
+static inline u32 ssb_extif_gpio_polarity(struct ssb_extif *extif, u32 mask,
+					  u32 value)
+{
+	return 0;
+}
+
+static inline u32 ssb_extif_gpio_intmask(struct ssb_extif *extif, u32 mask,
+					 u32 value)
+{
+	return 0;
+}
+
+#ifdef CONFIG_SSB_SERIAL
+static inline int ssb_extif_serial_init(struct ssb_extif *extif,
+					struct ssb_serial_port *ports)
+{
+	return 0;
+}
+#endif /* CONFIG_SSB_SERIAL */
+
 #endif /* CONFIG_SSB_DRIVER_EXTIF */
 #endif /* LINUX_SSB_EXTIFCORE_H_ */
-- 
cgit v1.2.3-55-g7522


From 91d1aa43d30505b0b825db8898ffc80a8eca96c7 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Tue, 27 Nov 2012 19:33:25 +0100
Subject: context_tracking: New context tracking susbsystem

Create a new subsystem that probes on kernel boundaries
to keep track of the transitions between level contexts
with two basic initial contexts: user or kernel.

This is an abstraction of some RCU code that use such tracking
to implement its userspace extended quiescent state.

We need to pull this up from RCU into this new level of indirection
because this tracking is also going to be used to implement an "on
demand" generic virtual cputime accounting. A necessary step to
shutdown the tick while still accounting the cputime.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Gilad Ben-Yossef <gilad@benyossef.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
[ paulmck: fix whitespace error and email address. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 arch/Kconfig                            | 15 +++---
 arch/x86/Kconfig                        |  2 +-
 arch/x86/include/asm/context_tracking.h | 31 ++++++++++++
 arch/x86/include/asm/rcu.h              | 32 -------------
 arch/x86/kernel/entry_64.S              |  2 +-
 arch/x86/kernel/ptrace.c                |  8 ++--
 arch/x86/kernel/signal.c                |  5 +-
 arch/x86/kernel/traps.c                 |  2 +-
 arch/x86/mm/fault.c                     |  2 +-
 include/linux/context_tracking.h        | 18 +++++++
 include/linux/rcupdate.h                |  2 -
 init/Kconfig                            | 28 +++++------
 kernel/Makefile                         |  1 +
 kernel/context_tracking.c               | 83 +++++++++++++++++++++++++++++++++
 kernel/rcutree.c                        | 64 +------------------------
 kernel/sched/core.c                     | 11 +++--
 16 files changed, 174 insertions(+), 132 deletions(-)
 create mode 100644 arch/x86/include/asm/context_tracking.h
 delete mode 100644 arch/x86/include/asm/rcu.h
 create mode 100644 include/linux/context_tracking.h
 create mode 100644 kernel/context_tracking.c

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 366ec06a5185..cc74aaea116c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -300,15 +300,16 @@ config SECCOMP_FILTER
 
 	  See Documentation/prctl/seccomp_filter.txt for details.
 
-config HAVE_RCU_USER_QS
+config HAVE_CONTEXT_TRACKING
 	bool
 	help
-	  Provide kernel entry/exit hooks necessary for userspace
-	  RCU extended quiescent state. Syscalls need to be wrapped inside
-	  rcu_user_exit()-rcu_user_enter() through the slow path using
-	  TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs
-	  are already protected inside rcu_irq_enter/rcu_irq_exit() but
-	  preemption or signal handling on irq exit still need to be protected.
+	  Provide kernel/user boundaries probes necessary for subsystems
+	  that need it, such as userspace RCU extended quiescent state.
+	  Syscalls need to be wrapped inside user_exit()-user_enter() through
+	  the slow path using TIF_NOHZ flag. Exceptions handlers must be
+	  wrapped as well. Irqs are already protected inside
+	  rcu_irq_enter/rcu_irq_exit() but preemption or signal handling on
+	  irq exit still need to be protected.
 
 config HAVE_VIRT_CPU_ACCOUNTING
 	bool
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 46c3bff3ced2..110cfad24f26 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -106,7 +106,7 @@ config X86
 	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
-	select HAVE_RCU_USER_QS if X86_64
+	select HAVE_CONTEXT_TRACKING if X86_64
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select GENERIC_KERNEL_THREAD
 	select GENERIC_KERNEL_EXECVE
diff --git a/arch/x86/include/asm/context_tracking.h b/arch/x86/include/asm/context_tracking.h
new file mode 100644
index 000000000000..1616562683e9
--- /dev/null
+++ b/arch/x86/include/asm/context_tracking.h
@@ -0,0 +1,31 @@
+#ifndef _ASM_X86_CONTEXT_TRACKING_H
+#define _ASM_X86_CONTEXT_TRACKING_H
+
+#ifndef __ASSEMBLY__
+#include <linux/context_tracking.h>
+#include <asm/ptrace.h>
+
+static inline void exception_enter(struct pt_regs *regs)
+{
+	user_exit();
+}
+
+static inline void exception_exit(struct pt_regs *regs)
+{
+#ifdef CONFIG_CONTEXT_TRACKING
+	if (user_mode(regs))
+		user_enter();
+#endif
+}
+
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_CONTEXT_TRACKING
+# define SCHEDULE_USER call schedule_user
+#else
+# define SCHEDULE_USER call schedule
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h
deleted file mode 100644
index d1ac07a23979..000000000000
--- a/arch/x86/include/asm/rcu.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef _ASM_X86_RCU_H
-#define _ASM_X86_RCU_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/rcupdate.h>
-#include <asm/ptrace.h>
-
-static inline void exception_enter(struct pt_regs *regs)
-{
-	rcu_user_exit();
-}
-
-static inline void exception_exit(struct pt_regs *regs)
-{
-#ifdef CONFIG_RCU_USER_QS
-	if (user_mode(regs))
-		rcu_user_enter();
-#endif
-}
-
-#else /* __ASSEMBLY__ */
-
-#ifdef CONFIG_RCU_USER_QS
-# define SCHEDULE_USER call schedule_user
-#else
-# define SCHEDULE_USER call schedule
-#endif
-
-#endif /* !__ASSEMBLY__ */
-
-#endif
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 0c58952d64e8..98faeb30139d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -56,7 +56,7 @@
 #include <asm/ftrace.h>
 #include <asm/percpu.h>
 #include <asm/asm.h>
-#include <asm/rcu.h>
+#include <asm/context_tracking.h>
 #include <asm/smap.h>
 #include <linux/err.h>
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index eff5b8c68652..65b88a5dc1a8 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,7 +21,7 @@
 #include <linux/signal.h>
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
-#include <linux/rcupdate.h>
+#include <linux/context_tracking.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -1461,7 +1461,7 @@ long syscall_trace_enter(struct pt_regs *regs)
 {
 	long ret = 0;
 
-	rcu_user_exit();
+	user_exit();
 
 	/*
 	 * If we stepped into a sysenter/syscall insn, it trapped in
@@ -1516,7 +1516,7 @@ void syscall_trace_leave(struct pt_regs *regs)
 	 * or do_notify_resume(), in which case we can be in RCU
 	 * user mode.
 	 */
-	rcu_user_exit();
+	user_exit();
 
 	audit_syscall_exit(regs);
 
@@ -1534,5 +1534,5 @@ void syscall_trace_leave(struct pt_regs *regs)
 	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall_exit(regs, step);
 
-	rcu_user_enter();
+	user_enter();
 }
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 29ad351804e9..20ecac112e74 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -22,6 +22,7 @@
 #include <linux/uaccess.h>
 #include <linux/user-return-notifier.h>
 #include <linux/uprobes.h>
+#include <linux/context_tracking.h>
 
 #include <asm/processor.h>
 #include <asm/ucontext.h>
@@ -816,7 +817,7 @@ static void do_signal(struct pt_regs *regs)
 void
 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
-	rcu_user_exit();
+	user_exit();
 
 #ifdef CONFIG_X86_MCE
 	/* notify userspace of pending MCEs */
@@ -840,7 +841,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 	if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
 		fire_user_return_notifiers();
 
-	rcu_user_enter();
+	user_enter();
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8276dc6794cc..eb8586693e0b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -55,7 +55,7 @@
 #include <asm/i387.h>
 #include <asm/fpu-internal.h>
 #include <asm/mce.h>
-#include <asm/rcu.h>
+#include <asm/context_tracking.h>
 
 #include <asm/mach_traps.h>
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8e13ecb41bee..7a529cbab7ad 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -18,7 +18,7 @@
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
 #include <asm/kmemcheck.h>		/* kmemcheck_*(), ...		*/
 #include <asm/fixmap.h>			/* VSYSCALL_START		*/
-#include <asm/rcu.h>			/* exception_enter(), ...	*/
+#include <asm/context_tracking.h>	/* exception_enter(), ...	*/
 
 /*
  * Page fault error code bits:
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
new file mode 100644
index 000000000000..e24339ccb7f0
--- /dev/null
+++ b/include/linux/context_tracking.h
@@ -0,0 +1,18 @@
+#ifndef _LINUX_CONTEXT_TRACKING_H
+#define _LINUX_CONTEXT_TRACKING_H
+
+#ifdef CONFIG_CONTEXT_TRACKING
+#include <linux/sched.h>
+
+extern void user_enter(void);
+extern void user_exit(void);
+extern void context_tracking_task_switch(struct task_struct *prev,
+					 struct task_struct *next);
+#else
+static inline void user_enter(void) { }
+static inline void user_exit(void) { }
+static inline void context_tracking_task_switch(struct task_struct *prev,
+						struct task_struct *next) { }
+#endif /* !CONFIG_CONTEXT_TRACKING */
+
+#endif
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8fe7c1840d30..275aa3f1062d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -222,8 +222,6 @@ extern void rcu_user_enter(void);
 extern void rcu_user_exit(void);
 extern void rcu_user_enter_after_irq(void);
 extern void rcu_user_exit_after_irq(void);
-extern void rcu_user_hooks_switch(struct task_struct *prev,
-				  struct task_struct *next);
 #else
 static inline void rcu_user_enter(void) { }
 static inline void rcu_user_exit(void) { }
diff --git a/init/Kconfig b/init/Kconfig
index 5ac6ee094225..2054e048bb98 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -486,9 +486,13 @@ config PREEMPT_RCU
 	  This option enables preemptible-RCU code that is common between
 	  the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
 
+config CONTEXT_TRACKING
+       bool
+
 config RCU_USER_QS
 	bool "Consider userspace as in RCU extended quiescent state"
-	depends on HAVE_RCU_USER_QS && SMP
+	depends on HAVE_CONTEXT_TRACKING && SMP
+	select CONTEXT_TRACKING
 	help
 	  This option sets hooks on kernel / userspace boundaries and
 	  puts RCU in extended quiescent state when the CPU runs in
@@ -497,24 +501,20 @@ config RCU_USER_QS
 	  try to keep the timer tick on for RCU.
 
 	  Unless you want to hack and help the development of the full
-	  tickless feature, you shouldn't enable this option.  It also
+	  dynticks mode, you shouldn't enable this option.  It also
 	  adds unnecessary overhead.
 
 	  If unsure say N
 
-config RCU_USER_QS_FORCE
-	bool "Force userspace extended QS by default"
-	depends on RCU_USER_QS
+config CONTEXT_TRACKING_FORCE
+	bool "Force context tracking"
+	depends on CONTEXT_TRACKING
 	help
-	  Set the hooks in user/kernel boundaries by default in order to
-	  test this feature that treats userspace as an extended quiescent
-	  state until we have a real user like a full adaptive nohz option.
-
-	  Unless you want to hack and help the development of the full
-	  tickless feature, you shouldn't enable this option. It adds
-	  unnecessary overhead.
-
-	  If unsure say N
+	  Probe on user/kernel boundaries by default in order to
+	  test the features that rely on it such as userspace RCU extended
+	  quiescent states.
+	  This test is there for debugging until we have a real user like the
+	  full dynticks mode.
 
 config RCU_FANOUT
 	int "Tree-based hierarchical RCU fanout value"
diff --git a/kernel/Makefile b/kernel/Makefile
index 0dfeca4324ee..f90bbfc9727f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -110,6 +110,7 @@ obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
 obj-$(CONFIG_PADATA) += padata.o
 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
 
 $(obj)/configs.o: $(obj)/config_data.h
 
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
new file mode 100644
index 000000000000..e0e07fd55508
--- /dev/null
+++ b/kernel/context_tracking.c
@@ -0,0 +1,83 @@
+#include <linux/context_tracking.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+
+struct context_tracking {
+	/*
+	 * When active is false, hooks are not set to
+	 * minimize overhead: TIF flags are cleared
+	 * and calls to user_enter/exit are ignored. This
+	 * may be further optimized using static keys.
+	 */
+	bool active;
+	enum {
+		IN_KERNEL = 0,
+		IN_USER,
+	} state;
+};
+
+static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
+#ifdef CONFIG_CONTEXT_TRACKING_FORCE
+	.active = true,
+#endif
+};
+
+void user_enter(void)
+{
+	unsigned long flags;
+
+	/*
+	 * Some contexts may involve an exception occuring in an irq,
+	 * leading to that nesting:
+	 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
+	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
+	 * helpers are enough to protect RCU uses inside the exception. So
+	 * just return immediately if we detect we are in an IRQ.
+	 */
+	if (in_interrupt())
+		return;
+
+	WARN_ON_ONCE(!current->mm);
+
+	local_irq_save(flags);
+	if (__this_cpu_read(context_tracking.active) &&
+	    __this_cpu_read(context_tracking.state) != IN_USER) {
+		__this_cpu_write(context_tracking.state, IN_USER);
+		rcu_user_enter();
+	}
+	local_irq_restore(flags);
+}
+
+void user_exit(void)
+{
+	unsigned long flags;
+
+	/*
+	 * Some contexts may involve an exception occuring in an irq,
+	 * leading to that nesting:
+	 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
+	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
+	 * helpers are enough to protect RCU uses inside the exception. So
+	 * just return immediately if we detect we are in an IRQ.
+	 */
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+	if (__this_cpu_read(context_tracking.state) == IN_USER) {
+		__this_cpu_write(context_tracking.state, IN_KERNEL);
+		rcu_user_exit();
+	}
+	local_irq_restore(flags);
+}
+
+void context_tracking_task_switch(struct task_struct *prev,
+			     struct task_struct *next)
+{
+	if (__this_cpu_read(context_tracking.active)) {
+		clear_tsk_thread_flag(prev, TIF_NOHZ);
+		set_tsk_thread_flag(next, TIF_NOHZ);
+	}
+}
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 7733eb56e156..e441b77b614e 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -207,9 +207,6 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
 	.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
 	.dynticks = ATOMIC_INIT(1),
-#if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE)
-	.ignore_user_qs = true,
-#endif
 };
 
 static long blimit = 10;	/* Maximum callbacks per rcu_do_batch. */
@@ -420,29 +417,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
  */
 void rcu_user_enter(void)
 {
-	unsigned long flags;
-	struct rcu_dynticks *rdtp;
-
-	/*
-	 * Some contexts may involve an exception occuring in an irq,
-	 * leading to that nesting:
-	 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
-	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
-	 * helpers are enough to protect RCU uses inside the exception. So
-	 * just return immediately if we detect we are in an IRQ.
-	 */
-	if (in_interrupt())
-		return;
-
-	WARN_ON_ONCE(!current->mm);
-
-	local_irq_save(flags);
-	rdtp = &__get_cpu_var(rcu_dynticks);
-	if (!rdtp->ignore_user_qs && !rdtp->in_user) {
-		rdtp->in_user = true;
-		rcu_eqs_enter(true);
-	}
-	local_irq_restore(flags);
+	rcu_eqs_enter(1);
 }
 
 /**
@@ -579,27 +554,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit);
  */
 void rcu_user_exit(void)
 {
-	unsigned long flags;
-	struct rcu_dynticks *rdtp;
-
-	/*
-	 * Some contexts may involve an exception occuring in an irq,
-	 * leading to that nesting:
-	 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
-	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
-	 * helpers are enough to protect RCU uses inside the exception. So
-	 * just return immediately if we detect we are in an IRQ.
-	 */
-	if (in_interrupt())
-		return;
-
-	local_irq_save(flags);
-	rdtp = &__get_cpu_var(rcu_dynticks);
-	if (rdtp->in_user) {
-		rdtp->in_user = false;
-		rcu_eqs_exit(true);
-	}
-	local_irq_restore(flags);
+	rcu_eqs_exit(1);
 }
 
 /**
@@ -722,21 +677,6 @@ int rcu_is_cpu_idle(void)
 }
 EXPORT_SYMBOL(rcu_is_cpu_idle);
 
-#ifdef CONFIG_RCU_USER_QS
-void rcu_user_hooks_switch(struct task_struct *prev,
-			   struct task_struct *next)
-{
-	struct rcu_dynticks *rdtp;
-
-	/* Interrupts are disabled in context switch */
-	rdtp = &__get_cpu_var(rcu_dynticks);
-	if (!rdtp->ignore_user_qs) {
-		clear_tsk_thread_flag(prev, TIF_NOHZ);
-		set_tsk_thread_flag(next, TIF_NOHZ);
-	}
-}
-#endif /* #ifdef CONFIG_RCU_USER_QS */
-
 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
 
 /*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 36f260864f65..80f80dfca70e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -72,6 +72,7 @@
 #include <linux/slab.h>
 #include <linux/init_task.h>
 #include <linux/binfmts.h>
+#include <linux/context_tracking.h>
 
 #include <asm/switch_to.h>
 #include <asm/tlb.h>
@@ -1886,8 +1887,8 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
 #endif
 
+	context_tracking_task_switch(prev, next);
 	/* Here we just switch the register state and the stack. */
-	rcu_user_hooks_switch(prev, next);
 	switch_to(prev, next, prev);
 
 	barrier();
@@ -2911,7 +2912,7 @@ asmlinkage void __sched schedule(void)
 }
 EXPORT_SYMBOL(schedule);
 
-#ifdef CONFIG_RCU_USER_QS
+#ifdef CONFIG_CONTEXT_TRACKING
 asmlinkage void __sched schedule_user(void)
 {
 	/*
@@ -2920,9 +2921,9 @@ asmlinkage void __sched schedule_user(void)
 	 * we haven't yet exited the RCU idle mode. Do it here manually until
 	 * we find a better solution.
 	 */
-	rcu_user_exit();
+	user_exit();
 	schedule();
-	rcu_user_enter();
+	user_enter();
 }
 #endif
 
@@ -3027,7 +3028,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
 	/* Catch callers which need to be fixed */
 	BUG_ON(ti->preempt_count || !irqs_disabled());
 
-	rcu_user_exit();
+	user_exit();
 	do {
 		add_preempt_count(PREEMPT_ACTIVE);
 		local_irq_enable();
-- 
cgit v1.2.3-55-g7522


From ce43b03e8889475817d427b1f3724c7e294b76eb Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Fri, 30 Nov 2012 09:49:27 +0000
Subject: net: move inet_dport/inet_num in sock_common

commit 68835aba4d9b (net: optimize INET input path further)
moved some fields used for tcp/udp sockets lookup in the first cache
line of struct sock_common.

This patch moves inet_dport/inet_num as well, filling a 32bit hole
on 64 bit arches and reducing number of cache line misses in lookups.

Also change INET_MATCH()/INET_TW_MATCH() to perform the ports match
before addresses match, as this check is more discriminant.

Remove the hash check from MATCH() macros because we dont need to
re validate the hash value after taking a refcount on socket, and
use likely/unlikely compiler hints, as the sk_hash/hash check
makes the following conditional tests 100% predicted by cpu.

Introduce skc_addrpair/skc_portpair pair values to better
document the alignment requirements of the port/addr pairs
used in the various MATCH() macros, and remove some casts.

The namespace check can also be done at last.

This slightly improves TCP/UDP lookup times.

IP/TCP early demux needs inet->rx_dst_ifindex and
TCP needs inet->min_ttl, lets group them together in same cache line.

With help from Ben Hutchings & Joe Perches.

Idea of this patch came after Ling Ma proposal to move skc_hash
to the beginning of struct sock_common, and should allow him
to submit a final version of his patch. My tests show an improvement
doing so.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Joe Perches <joe@perches.com>
Cc: Ling Ma <ling.ma.program@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h             | 32 ++++++++++++++-------------
 include/net/inet_hashtables.h    | 48 ++++++++++++++++++++++------------------
 include/net/inet_sock.h          |  8 ++++---
 include/net/inet_timewait_sock.h |  7 ++++--
 include/net/sock.h               | 25 ++++++++++++++++-----
 net/ipv4/inet_hashtables.c       | 36 +++++++++++++++++++-----------
 net/ipv6/inet6_hashtables.c      | 27 +++++++++++++++-------
 7 files changed, 115 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5e11905a4f01..12729e966dc9 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -364,20 +364,22 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 #define inet_v6_ipv6only(__sk)		0
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 
-#define INET6_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif)\
-	(((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net)	&& \
-	 ((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports)) && \
-	 ((__sk)->sk_family		== AF_INET6)		&& \
-	 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))	&& \
-	 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr))	&& \
-	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-
-#define INET6_TW_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif) \
-	(((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net)	&& \
-	 (*((__portpair *)&(inet_twsk(__sk)->tw_dport)) == (__ports))	&& \
-	 ((__sk)->sk_family	       == PF_INET6)			&& \
-	 (ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr)))	&& \
-	 (ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr))) && \
-	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	\
+	((inet_sk(__sk)->inet_portpair == (__ports))		&&	\
+	 ((__sk)->sk_family == AF_INET6)			&&	\
+	 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))	&&	\
+	 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr))	&&	\
+	 (!(__sk)->sk_bound_dev_if	||				\
+	   ((__sk)->sk_bound_dev_if == (__dif))) 		&&	\
+	 net_eq(sock_net(__sk), (__net)))
+
+#define INET6_TW_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	   \
+	((inet_twsk(__sk)->tw_portpair == (__ports))			&& \
+	 ((__sk)->sk_family == AF_INET6)				&& \
+	 ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr))	&& \
+	 ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr)) && \
+	 (!(__sk)->sk_bound_dev_if	||				   \
+	  ((__sk)->sk_bound_dev_if == (__dif)))				&& \
+	 net_eq(sock_net(__sk), (__net)))
 
 #endif /* _IPV6_H */
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 54be0287eb98..d1de4fbd45c2 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -299,30 +299,34 @@ typedef __u64 __bitwise __addrpair;
 				   (((__force __u64)(__be32)(__daddr)) << 32) | \
 				   ((__force __u64)(__be32)(__saddr)));
 #endif /* __BIG_ENDIAN */
-#define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
-	(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) &&	\
-	 ((*((__addrpair *)&(inet_sk(__sk)->inet_daddr))) == (__cookie))  &&	\
-	 ((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports))   &&	\
-	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
-	(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) &&	\
-	 ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) &&	\
-	 ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
-	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)	\
+	((inet_sk(__sk)->inet_portpair == (__ports))		&&	\
+	 (inet_sk(__sk)->inet_addrpair == (__cookie))		&&	\
+	 (!(__sk)->sk_bound_dev_if	||				\
+	   ((__sk)->sk_bound_dev_if == (__dif))) 		&& 	\
+	 net_eq(sock_net(__sk), (__net)))
+#define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)\
+	((inet_twsk(__sk)->tw_portpair == (__ports))	&&		\
+	 (inet_twsk(__sk)->tw_addrpair == (__cookie))	&&		\
+	 (!(__sk)->sk_bound_dev_if	||				\
+	   ((__sk)->sk_bound_dev_if == (__dif)))	&&		\
+	 net_eq(sock_net(__sk), (__net)))
 #else /* 32-bit arch */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
-#define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)	\
-	(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net))	&&	\
-	 (inet_sk(__sk)->inet_daddr	== (__saddr))		&&	\
-	 (inet_sk(__sk)->inet_rcv_saddr	== (__daddr))		&&	\
-	 ((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports))	&&	\
-	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif)	\
-	(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net))	&&	\
-	 (inet_twsk(__sk)->tw_daddr	== (__saddr))		&&	\
-	 (inet_twsk(__sk)->tw_rcv_saddr	== (__daddr))		&&	\
-	 ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
-	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
+	((inet_sk(__sk)->inet_portpair == (__ports))	&&		\
+	 (inet_sk(__sk)->inet_daddr	== (__saddr))	&&		\
+	 (inet_sk(__sk)->inet_rcv_saddr	== (__daddr))	&&		\
+	 (!(__sk)->sk_bound_dev_if	||				\
+	   ((__sk)->sk_bound_dev_if == (__dif))) 	&&		\
+	 net_eq(sock_net(__sk), (__net)))
+#define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
+	((inet_twsk(__sk)->tw_portpair == (__ports))	&&		\
+	 (inet_twsk(__sk)->tw_daddr	== (__saddr))	&&		\
+	 (inet_twsk(__sk)->tw_rcv_saddr	== (__daddr))	&&		\
+	 (!(__sk)->sk_bound_dev_if	||				\
+	   ((__sk)->sk_bound_dev_if == (__dif))) 	&&		\
+	 net_eq(sock_net(__sk), (__net)))
 #endif /* 64-bit arch */
 
 /*
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 256c1ed2d69a..a4196cbc84ec 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -144,9 +144,11 @@ struct inet_sock {
 	/* Socket demultiplex comparisons on incoming packets. */
 #define inet_daddr		sk.__sk_common.skc_daddr
 #define inet_rcv_saddr		sk.__sk_common.skc_rcv_saddr
+#define inet_addrpair		sk.__sk_common.skc_addrpair
+#define inet_dport		sk.__sk_common.skc_dport
+#define inet_num		sk.__sk_common.skc_num
+#define inet_portpair		sk.__sk_common.skc_portpair
 
-	__be16			inet_dport;
-	__u16			inet_num;
 	__be32			inet_saddr;
 	__s16			uc_ttl;
 	__u16			cmsg_flags;
@@ -154,6 +156,7 @@ struct inet_sock {
 	__u16			inet_id;
 
 	struct ip_options_rcu __rcu	*inet_opt;
+	int			rx_dst_ifindex;
 	__u8			tos;
 	__u8			min_ttl;
 	__u8			mc_ttl;
@@ -170,7 +173,6 @@ struct inet_sock {
 	int			uc_index;
 	int			mc_index;
 	__be32			mc_addr;
-	int			rx_dst_ifindex;
 	struct ip_mc_socklist __rcu	*mc_list;
 	struct inet_cork_full	cork;
 };
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index ba52c830a7a5..7d658d577368 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -112,6 +112,11 @@ struct inet_timewait_sock {
 #define tw_net			__tw_common.skc_net
 #define tw_daddr        	__tw_common.skc_daddr
 #define tw_rcv_saddr    	__tw_common.skc_rcv_saddr
+#define tw_addrpair		__tw_common.skc_addrpair
+#define tw_dport		__tw_common.skc_dport
+#define tw_num			__tw_common.skc_num
+#define tw_portpair		__tw_common.skc_portpair
+
 	int			tw_timeout;
 	volatile unsigned char	tw_substate;
 	unsigned char		tw_rcv_wscale;
@@ -119,8 +124,6 @@ struct inet_timewait_sock {
 	/* Socket demultiplex comparisons on incoming packets. */
 	/* these three are in inet_sock */
 	__be16			tw_sport;
-	__be16			tw_dport;
-	__u16			tw_num;
 	kmemcheck_bitfield_begin(flags);
 	/* And these are ours. */
 	unsigned int		tw_ipv6only     : 1,
diff --git a/include/net/sock.h b/include/net/sock.h
index c945fba4f543..c4132c1b63a8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -132,6 +132,8 @@ struct net;
  *	@skc_rcv_saddr: Bound local IPv4 addr
  *	@skc_hash: hash value used with various protocol lookup tables
  *	@skc_u16hashes: two u16 hash values used by UDP lookup tables
+ *	@skc_dport: placeholder for inet_dport/tw_dport
+ *	@skc_num: placeholder for inet_num/tw_num
  *	@skc_family: network address family
  *	@skc_state: Connection state
  *	@skc_reuse: %SO_REUSEADDR setting
@@ -149,16 +151,29 @@ struct net;
  *	for struct sock and struct inet_timewait_sock.
  */
 struct sock_common {
-	/* skc_daddr and skc_rcv_saddr must be grouped :
-	 * cf INET_MATCH() and INET_TW_MATCH()
+	/* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned
+	 * address on 64bit arches : cf INET_MATCH() and INET_TW_MATCH()
 	 */
-	__be32			skc_daddr;
-	__be32			skc_rcv_saddr;
-
+	union {
+		unsigned long	skc_addrpair;
+		struct {
+			__be32	skc_daddr;
+			__be32	skc_rcv_saddr;
+		};
+	};
 	union  {
 		unsigned int	skc_hash;
 		__u16		skc_u16hashes[2];
 	};
+	/* skc_dport && skc_num must be grouped as well */
+	union {
+		u32		skc_portpair;
+		struct {
+			__be16	skc_dport;
+			__u16	skc_num;
+		};
+	};
+
 	unsigned short		skc_family;
 	volatile unsigned char	skc_state;
 	unsigned char		skc_reuse;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 7880af970208..fa3ae8148710 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -237,12 +237,14 @@ struct sock *__inet_lookup_established(struct net *net,
 	rcu_read_lock();
 begin:
 	sk_nulls_for_each_rcu(sk, node, &head->chain) {
-		if (INET_MATCH(sk, net, hash, acookie,
-					saddr, daddr, ports, dif)) {
+		if (sk->sk_hash != hash)
+			continue;
+		if (likely(INET_MATCH(sk, net, acookie,
+				      saddr, daddr, ports, dif))) {
 			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
 				goto begintw;
-			if (unlikely(!INET_MATCH(sk, net, hash, acookie,
-				saddr, daddr, ports, dif))) {
+			if (unlikely(!INET_MATCH(sk, net, acookie,
+						 saddr, daddr, ports, dif))) {
 				sock_put(sk);
 				goto begin;
 			}
@@ -260,14 +262,18 @@ begin:
 begintw:
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
 	sk_nulls_for_each_rcu(sk, node, &head->twchain) {
-		if (INET_TW_MATCH(sk, net, hash, acookie,
-					saddr, daddr, ports, dif)) {
+		if (sk->sk_hash != hash)
+			continue;
+		if (likely(INET_TW_MATCH(sk, net, acookie,
+					 saddr, daddr, ports,
+					 dif))) {
 			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
 				sk = NULL;
 				goto out;
 			}
-			if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie,
-				 saddr, daddr, ports, dif))) {
+			if (unlikely(!INET_TW_MATCH(sk, net, acookie,
+						    saddr, daddr, ports,
+						    dif))) {
 				sock_put(sk);
 				goto begintw;
 			}
@@ -314,10 +320,12 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 
 	/* Check TIME-WAIT sockets first. */
 	sk_nulls_for_each(sk2, node, &head->twchain) {
-		tw = inet_twsk(sk2);
+		if (sk2->sk_hash != hash)
+			continue;
 
-		if (INET_TW_MATCH(sk2, net, hash, acookie,
-					saddr, daddr, ports, dif)) {
+		if (likely(INET_TW_MATCH(sk2, net, acookie,
+					 saddr, daddr, ports, dif))) {
+			tw = inet_twsk(sk2);
 			if (twsk_unique(sk, sk2, twp))
 				goto unique;
 			else
@@ -328,8 +336,10 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 
 	/* And established part... */
 	sk_nulls_for_each(sk2, node, &head->chain) {
-		if (INET_MATCH(sk2, net, hash, acookie,
-					saddr, daddr, ports, dif))
+		if (sk2->sk_hash != hash)
+			continue;
+		if (likely(INET_MATCH(sk2, net, acookie,
+				      saddr, daddr, ports, dif)))
 			goto not_unique;
 	}
 
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 73f1a00a96af..dea17fd28e50 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -87,11 +87,13 @@ struct sock *__inet6_lookup_established(struct net *net,
 	rcu_read_lock();
 begin:
 	sk_nulls_for_each_rcu(sk, node, &head->chain) {
-		/* For IPV6 do the cheaper port and family tests first. */
-		if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+		if (sk->sk_hash != hash)
+			continue;
+		if (likely(INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
 			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
 				goto begintw;
-			if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+			if (unlikely(!INET6_MATCH(sk, net, saddr, daddr,
+						  ports, dif))) {
 				sock_put(sk);
 				goto begin;
 			}
@@ -104,12 +106,16 @@ begin:
 begintw:
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
 	sk_nulls_for_each_rcu(sk, node, &head->twchain) {
-		if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+		if (sk->sk_hash != hash)
+			continue;
+		if (likely(INET6_TW_MATCH(sk, net, saddr, daddr,
+					  ports, dif))) {
 			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
 				sk = NULL;
 				goto out;
 			}
-			if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+			if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
+						     ports, dif))) {
 				sock_put(sk);
 				goto begintw;
 			}
@@ -236,9 +242,12 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 
 	/* Check TIME-WAIT sockets first. */
 	sk_nulls_for_each(sk2, node, &head->twchain) {
-		tw = inet_twsk(sk2);
+		if (sk2->sk_hash != hash)
+			continue;
 
-		if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) {
+		if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr,
+					  ports, dif))) {
+			tw = inet_twsk(sk2);
 			if (twsk_unique(sk, sk2, twp))
 				goto unique;
 			else
@@ -249,7 +258,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 
 	/* And established part... */
 	sk_nulls_for_each(sk2, node, &head->chain) {
-		if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif))
+		if (sk2->sk_hash != hash)
+			continue;
+		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif)))
 			goto not_unique;
 	}
 
-- 
cgit v1.2.3-55-g7522


From 78d86c213f28193082b5d8a1a424044b7ba406f1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman
Date: Thu, 29 Nov 2012 10:43:28 -0800
Subject: init.h: Remove __dev* sections from the kernel

With the recent work to remove CONFIG_HOTPLUG, we are starting to get a
bunch of __devinit section warnings, despite CONFIG_HOTPLUG always being
enabled.  So, stop marking the sections entirely, by defining them away
the section markings in init.h

Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/init.h | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index e59041e21df3..f63692d6902e 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -93,13 +93,13 @@
 
 #define __exit          __section(.exit.text) __exitused __cold notrace
 
-/* Used for HOTPLUG */
-#define __devinit        __section(.devinit.text) __cold notrace
-#define __devinitdata    __section(.devinit.data)
-#define __devinitconst   __constsection(.devinit.rodata)
-#define __devexit        __section(.devexit.text) __exitused __cold notrace
-#define __devexitdata    __section(.devexit.data)
-#define __devexitconst   __constsection(.devexit.rodata)
+/* Used for HOTPLUG, but that is always enabled now, so just make them noops */
+#define __devinit
+#define __devinitdata
+#define __devinitconst
+#define __devexit
+#define __devexitdata
+#define __devexitconst
 
 /* Used for HOTPLUG_CPU */
 #define __cpuinit        __section(.cpuinit.text) __cold notrace
@@ -126,10 +126,6 @@
 #define __INITRODATA	.section	".init.rodata","a",%progbits
 #define __FINITDATA	.previous
 
-#define __DEVINIT        .section	".devinit.text", "ax"
-#define __DEVINITDATA    .section	".devinit.data", "aw"
-#define __DEVINITRODATA  .section	".devinit.rodata", "a"
-
 #define __CPUINIT        .section	".cpuinit.text", "ax"
 #define __CPUINITDATA    .section	".cpuinit.data", "aw"
 #define __CPUINITRODATA  .section	".cpuinit.rodata", "a"
-- 
cgit v1.2.3-55-g7522


From 78ad724ade23c66650dcaba04f4d2307c2884c69 Mon Sep 17 00:00:00 2001
From: Sreekanth Reddy
Date: Fri, 30 Nov 2012 07:58:10 +0530
Subject: [SCSI] miscdevice: Adding support for MPT3SAS_MINOR(222)

Signed-off-by: Sreekanth Reddy <Sreekanth.Reddy@lsi.com>
Reviewed-by: Nagalakshmi Nandigama <Nagalakshmi.Nandigama@lsi.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
---
 include/linux/miscdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index e0deeb2cc939..09c2300ddb37 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -34,6 +34,7 @@
 #define MWAVE_MINOR		219	/* ACP/Mwave Modem */
 #define MPT_MINOR		220
 #define MPT2SAS_MINOR		221
+#define MPT3SAS_MINOR		222
 #define UINPUT_MINOR		223
 #define MISC_MCELOG_MINOR	227
 #define HPET_MINOR		228
-- 
cgit v1.2.3-55-g7522


From c971f08cba56ed17fe22040ca5ff97fe5c3f0bd7 Mon Sep 17 00:00:00 2001
From: David Woodhouse
Date: Wed, 28 Nov 2012 00:03:11 +0000
Subject: atm: add release_cb() callback to vcc

The immediate use case for this is that it will allow us to ensure that a
pppoatm queue is woken after it has to drop a packet due to the sock being
locked.

Note that 'release_cb' is called when the socket is *unlocked*. This is
not to be confused with vcc_release() — which probably ought to be called
vcc_close().

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Acked-by: Krzysztof Mazur <krzysiek@podlesie.net>
---
 include/linux/atmdev.h |  1 +
 net/atm/common.c       | 10 ++++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 72db2af902a8..c1da539f5e28 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -99,6 +99,7 @@ struct atm_vcc {
 	struct atm_dev	*dev;		/* device back pointer */
 	struct atm_qos	qos;		/* QOS */
 	struct atm_sap	sap;		/* SAP */
+	void (*release_cb)(struct atm_vcc *vcc); /* release_sock callback */
 	void (*push)(struct atm_vcc *vcc,struct sk_buff *skb);
 	void (*pop)(struct atm_vcc *vcc,struct sk_buff *skb); /* optional */
 	int (*push_oam)(struct atm_vcc *vcc,void *cell);
diff --git a/net/atm/common.c b/net/atm/common.c
index 24216642d696..806fc0a40051 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -126,10 +126,19 @@ static void vcc_write_space(struct sock *sk)
 	rcu_read_unlock();
 }
 
+static void vcc_release_cb(struct sock *sk)
+{
+	struct atm_vcc *vcc = atm_sk(sk);
+
+	if (vcc->release_cb)
+		vcc->release_cb(vcc);
+}
+
 static struct proto vcc_proto = {
 	.name	  = "VCC",
 	.owner	  = THIS_MODULE,
 	.obj_size = sizeof(struct atm_vcc),
+	.release_cb = vcc_release_cb,
 };
 
 int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
@@ -158,6 +167,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
 	vcc->pop = NULL;
 	vcc->owner = NULL;
 	vcc->push_oam = NULL;
+	vcc->release_cb = NULL;
 	vcc->vpi = vcc->vci = 0; /* no VCI/VPI yet */
 	vcc->atm_options = vcc->aal_options = 0;
 	sk->sk_destruct = vcc_sock_destruct;
-- 
cgit v1.2.3-55-g7522


From 0d0cdb028f9d9771e2b346038707734121f906e3 Mon Sep 17 00:00:00 2001
From: Aaron Lu
Date: Mon, 26 Nov 2012 13:55:25 +0800
Subject: libata: restore acpi disable functionality

Commit 66fa7f215 "libata-acpi: improve ACPI disabling" introdcued the
behaviour of disabling ATA ACPI if ata_acpi_on_devcfg failed the 2nd
time, but commit 30dcf76ac dropped this behaviour and this caused
problem for Dimitris Damigos, where his laptop can not resume correctly.

The bugzilla page for it is:
https://bugzilla.kernel.org/show_bug.cgi?id=49331

The problem is, ata_dev_push_id will fail the 2nd time it is invoked,
and due to disabling ACPI code is dropped, ata_acpi_on_devcfg which
calls ata_dev_push_id will keep failing and eventually made the device
disabled.

This patch restores the original behaviour, if acpi failed the 2nd time,
disable acpi functionality for the device(and we do not event need to
add a debug message for this as it is still there ;-).

Reported-by: Dimitris Damigos <damigos@freemail.gr>
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-acpi.c | 4 ++++
 include/linux/libata.h    | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 5b0ba3f20edc..ef01ac07502e 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -76,6 +76,9 @@ acpi_handle ata_dev_acpi_handle(struct ata_device *dev)
 	acpi_integer adr;
 	struct ata_port *ap = dev->link->ap;
 
+	if (dev->flags & ATA_DFLAG_ACPI_DISABLED)
+		return NULL;
+
 	if (ap->flags & ATA_FLAG_ACPI_SATA) {
 		if (!sata_pmp_attached(ap))
 			adr = SATA_ADR(ap->port_no, NO_PORT_MULT);
@@ -945,6 +948,7 @@ int ata_acpi_on_devcfg(struct ata_device *dev)
 		return rc;
 	}
 
+	dev->flags |= ATA_DFLAG_ACPI_DISABLED;
 	ata_dev_warn(dev, "ACPI: failed the second time, disabled\n");
 
 	/* We can safely continue if no _GTF command has been executed
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 77eeeda2b6e2..e931c9ad1078 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -163,6 +163,7 @@ enum {
 
 	ATA_DFLAG_DA		= (1 << 26), /* device supports Device Attention */
 	ATA_DFLAG_DEVSLP	= (1 << 27), /* device supports Device Sleep */
+	ATA_DFLAG_ACPI_DISABLED = (1 << 28), /* ACPI for the device is disabled */
 
 	ATA_DEV_UNKNOWN		= 0,	/* unknown device */
 	ATA_DEV_ATA		= 1,	/* ATA device */
-- 
cgit v1.2.3-55-g7522


From de90cd71f68e947d3bd6c3f2ef5731ead010a768 Mon Sep 17 00:00:00 2001
From: Shane Huang
Date: Sun, 18 Nov 2012 04:44:41 +0800
Subject: libata: check SATA_SETTINGS log with HW Feature Ctrl

NCQ capability was used to check availability of SATA Settings page
from Identify Device Data Log, which contains DevSlp timing variables.
It does not work on some HDDs and leads to error messages.
IDENTIFY word 78 bit 5(Hardware Feature Control) should be used.

Quoting SATA spec 3.1:
If Hardware Feature Control is supported, then:
a) IDENTIFY DEVICE data word 78 bit 5 (see 13.2.1.18) shall be
set to one;
b) the SET FEATURES Select Hardware Feature Control subcommand
shall be supported (see 13.3.8);
c) page 08h of the Identify Device Data log (see 13.7.7) shall
be supported;

This patch is not tested on SATA HDD with DevSlp supported.

Reported-by: Borislav Petkov <bp@amd64.org>
Signed-off-by: Shane Huang <shane.huang@amd.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 3 +--
 include/linux/ata.h       | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index f46fbd3bd3fb..caffe73c1e4a 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2330,9 +2330,8 @@ int ata_dev_configure(struct ata_device *dev)
 
 		/* Obtain SATA Settings page from Identify Device Data Log,
 		 * which contains DevSlp timing variables etc.
-		 * Exclude old devices with ata_id_has_ncq()
 		 */
-		if (ata_id_has_ncq(dev->id)) {
+		if (ata_id_has_hw_feature_ctrl(dev->id)) {
 			err_mask = ata_read_log_page(dev,
 						     ATA_LOG_SATA_ID_DEV_DATA,
 						     ATA_LOG_SATA_SETTINGS,
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 408da9502177..18cbb93fdbca 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -593,6 +593,7 @@ static inline int ata_is_data(u8 prot)
 #define ata_id_cdb_intr(id)	(((id)[ATA_ID_CONFIG] & 0x60) == 0x20)
 #define ata_id_has_da(id)	((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4))
 #define ata_id_has_devslp(id)	((id)[ATA_ID_FEATURE_SUPP] & (1 << 8))
+#define ata_id_has_hw_feature_ctrl(id)	((id)[ATA_ID_FEATURE_SUPP] & (1 << 5))
 
 static inline bool ata_id_has_hipm(const u16 *id)
 {
-- 
cgit v1.2.3-55-g7522


From b7db04d9264fca4b00e949da7b3180c50e243fca Mon Sep 17 00:00:00 2001
From: Brian Norris
Date: Fri, 2 Nov 2012 12:29:32 -0700
Subject: libata: implement ata_platform_remove_one()

This relatively simple boiler-plate code is repeated in several platform
drivers. We should implement a common version in libata.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 23 +++++++++++++++++++++++
 include/linux/libata.h    |  4 ++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 583f26d6d455..8e3f4a90c088 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -67,6 +67,7 @@
 #include <linux/cdrom.h>
 #include <linux/ratelimit.h>
 #include <linux/pm_runtime.h>
+#include <linux/platform_device.h>
 
 #include "libata.h"
 #include "libata-transport.h"
@@ -6382,6 +6383,26 @@ int ata_pci_device_resume(struct pci_dev *pdev)
 
 #endif /* CONFIG_PCI */
 
+/**
+ *	ata_platform_remove_one - Platform layer callback for device removal
+ *	@pdev: Platform device that was removed
+ *
+ *	Platform layer indicates to libata via this hook that hot-unplug or
+ *	module unload event has occurred.  Detach all ports.  Resource
+ *	release is handled via devres.
+ *
+ *	LOCKING:
+ *	Inherited from platform layer (may sleep).
+ */
+int ata_platform_remove_one(struct platform_device *pdev)
+{
+	struct ata_host *host = platform_get_drvdata(pdev);
+
+	ata_host_detach(host);
+
+	return 0;
+}
+
 static int __init ata_parse_force_one(char **cur,
 				      struct ata_force_ent *force_ent,
 				      const char **reason)
@@ -6877,6 +6898,8 @@ EXPORT_SYMBOL_GPL(ata_pci_device_resume);
 #endif /* CONFIG_PM */
 #endif /* CONFIG_PCI */
 
+EXPORT_SYMBOL_GPL(ata_platform_remove_one);
+
 EXPORT_SYMBOL_GPL(__ata_ehi_push_desc);
 EXPORT_SYMBOL_GPL(ata_ehi_push_desc);
 EXPORT_SYMBOL_GPL(ata_ehi_clear_desc);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index e931c9ad1078..83ba0ab2c915 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1115,6 +1115,10 @@ extern int ata_pci_device_resume(struct pci_dev *pdev);
 #endif /* CONFIG_PM */
 #endif /* CONFIG_PCI */
 
+struct platform_device;
+
+extern int ata_platform_remove_one(struct platform_device *pdev);
+
 /*
  * ACPI - drivers/ata/libata-acpi.c
  */
-- 
cgit v1.2.3-55-g7522


From 156f34d26a74c6ea060fb43d898f17509ed8e18a Mon Sep 17 00:00:00 2001
From: Brian Norris
Date: Fri, 2 Nov 2012 00:46:24 -0700
Subject: pata_platform: remove unused remove function

All users of __pata_platform_remove() have been converted to utilize the
common ata_platform_remove_one().

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/pata_platform.c  | 17 -----------------
 include/linux/ata_platform.h |  2 --
 2 files changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index 449aa29931bc..f4372d0c7ce6 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -178,23 +178,6 @@ int __devinit __pata_platform_probe(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(__pata_platform_probe);
 
-/**
- *	__pata_platform_remove		-	unplug a platform interface
- *	@dev: device
- *
- *	A platform bus ATA device has been unplugged. Perform the needed
- *	cleanup. Also called on module unload for any active devices.
- */
-int __pata_platform_remove(struct device *dev)
-{
-	struct ata_host *host = dev_get_drvdata(dev);
-
-	ata_host_detach(host);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(__pata_platform_remove);
-
 static int __devinit pata_platform_probe(struct platform_device *pdev)
 {
 	struct resource *io_res;
diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h
index b856a2a590d9..fe9989636b62 100644
--- a/include/linux/ata_platform.h
+++ b/include/linux/ata_platform.h
@@ -22,8 +22,6 @@ extern int __devinit __pata_platform_probe(struct device *dev,
 					   unsigned int ioport_shift,
 					   int __pio_mask);
 
-extern int __devexit __pata_platform_remove(struct device *dev);
-
 /*
  * Marvell SATA private data
  */
-- 
cgit v1.2.3-55-g7522


From 3ad909bc8f2ea32fd7d24266c61cd4605feecec8 Mon Sep 17 00:00:00 2001
From: Linus Walleij
Date: Fri, 30 Nov 2012 16:30:43 +0100
Subject: ARM: 7588/1: amba: create a resource parent registrator

This creates amba_apb_device_add_res() and
amba_ahb_device_add_res() respectively, to add devices with
another parent than iomem_resource. This is needed to specify
that a device is contained in a specific IO range.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/amba/bus.c       | 32 ++++++++++++++++++++++++++++----
 include/linux/amba/bus.h | 10 ++++++++++
 2 files changed, 38 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index e8eb91bd0d28..a2fc56d2e681 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -546,7 +546,8 @@ EXPORT_SYMBOL_GPL(amba_device_add);
 static struct amba_device *
 amba_aphb_device_add(struct device *parent, const char *name,
 		     resource_size_t base, size_t size, int irq1, int irq2,
-		     void *pdata, unsigned int periphid, u64 dma_mask)
+		     void *pdata, unsigned int periphid, u64 dma_mask,
+		     struct resource *resbase)
 {
 	struct amba_device *dev;
 	int ret;
@@ -563,7 +564,7 @@ amba_aphb_device_add(struct device *parent, const char *name,
 	dev->dev.platform_data = pdata;
 	dev->dev.parent = parent;
 
-	ret = amba_device_add(dev, &iomem_resource);
+	ret = amba_device_add(dev, resbase);
 	if (ret) {
 		amba_device_put(dev);
 		return ERR_PTR(ret);
@@ -578,7 +579,7 @@ amba_apb_device_add(struct device *parent, const char *name,
 		    void *pdata, unsigned int periphid)
 {
 	return amba_aphb_device_add(parent, name, base, size, irq1, irq2, pdata,
-				    periphid, 0);
+				    periphid, 0, &iomem_resource);
 }
 EXPORT_SYMBOL_GPL(amba_apb_device_add);
 
@@ -588,10 +589,33 @@ amba_ahb_device_add(struct device *parent, const char *name,
 		    void *pdata, unsigned int periphid)
 {
 	return amba_aphb_device_add(parent, name, base, size, irq1, irq2, pdata,
-				    periphid, ~0ULL);
+				    periphid, ~0ULL, &iomem_resource);
 }
 EXPORT_SYMBOL_GPL(amba_ahb_device_add);
 
+struct amba_device *
+amba_apb_device_add_res(struct device *parent, const char *name,
+			resource_size_t base, size_t size, int irq1,
+			int irq2, void *pdata, unsigned int periphid,
+			struct resource *resbase)
+{
+	return amba_aphb_device_add(parent, name, base, size, irq1, irq2, pdata,
+				    periphid, 0, resbase);
+}
+EXPORT_SYMBOL_GPL(amba_apb_device_add_res);
+
+struct amba_device *
+amba_ahb_device_add_res(struct device *parent, const char *name,
+			resource_size_t base, size_t size, int irq1,
+			int irq2, void *pdata, unsigned int periphid,
+			struct resource *resbase)
+{
+	return amba_aphb_device_add(parent, name, base, size, irq1, irq2, pdata,
+				    periphid, ~0ULL, resbase);
+}
+EXPORT_SYMBOL_GPL(amba_ahb_device_add_res);
+
+
 static void amba_device_initialize(struct amba_device *dev, const char *name)
 {
 	device_initialize(&dev->dev);
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index d36417158d8f..43ec7e247a80 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -71,6 +71,16 @@ struct amba_device *amba_ahb_device_add(struct device *parent, const char *name,
 					resource_size_t base, size_t size,
 					int irq1, int irq2, void *pdata,
 					unsigned int periphid);
+struct amba_device *
+amba_apb_device_add_res(struct device *parent, const char *name,
+			resource_size_t base, size_t size, int irq1,
+			int irq2, void *pdata, unsigned int periphid,
+			struct resource *resbase);
+struct amba_device *
+amba_ahb_device_add_res(struct device *parent, const char *name,
+			resource_size_t base, size_t size, int irq1,
+			int irq2, void *pdata, unsigned int periphid,
+			struct resource *resbase);
 void amba_device_unregister(struct amba_device *);
 struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int);
 int amba_request_regions(struct amba_device *, const char *);
-- 
cgit v1.2.3-55-g7522


From 64b37b2a63eb2f80b65c7185f0013f8ffc637ae3 Mon Sep 17 00:00:00 2001
From: Matthieu CASTET
Date: Tue, 6 Nov 2012 11:51:44 +0100
Subject: mtd: nand: add NAND_BUSWIDTH_AUTO to autodetect bus width

The driver call nand_scan_ident in 8 bit mode, then
readid or onfi detection are done (and detect bus width).
The driver should update its bus width before calling nand_scan_tail.

This work because readid and onfi are read work 8 byte mode.

Note that nand_scan_ident send command (NAND_CMD_RESET, NAND_CMD_READID, NAND_CMD_PARAM), address and read data
The ONFI specificication is not very clear for x16 device if high byte of address should be driven to 0,
but according to [1] it should be ok to not drive it during autodetection.

[1]
3.3.2. Target Initialization

[...]
The Read ID and Read Parameter Page commands only use the lower 8-bits of the data bus.
The host shall not issue commands that use a word data width on x16 devices until the host
determines the device supports a 16-bit data bus width in the parameter page.

Signed-off-by: Matthieu CASTET <matthieu.castet@parrot.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/mtd/nand/nand_base.c | 14 +++++++++-----
 include/linux/mtd/nand.h     |  7 +++++++
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 6f58e1633e2f..5851c51ac2df 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3250,11 +3250,15 @@ ident_done:
 			break;
 	}
 
-	/*
-	 * Check, if buswidth is correct. Hardware drivers should set
-	 * chip correct!
-	 */
-	if (busw != (chip->options & NAND_BUSWIDTH_16)) {
+	if (chip->options & NAND_BUSWIDTH_AUTO) {
+		WARN_ON(chip->options & NAND_BUSWIDTH_16);
+		chip->options |= busw;
+		nand_set_defaults(chip, busw);
+	} else if (busw != (chip->options & NAND_BUSWIDTH_16)) {
+		/*
+		 * Check, if buswidth is correct. Hardware drivers should set
+		 * chip correct!
+		 */
 		pr_info("NAND device: Manufacturer ID:"
 			" 0x%02x, Chip ID: 0x%02x (%s %s)\n", *maf_id,
 			*dev_id, nand_manuf_ids[maf_idx].name, mtd->name);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 9d8a6048aacd..7ccb3c59ed60 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -219,6 +219,13 @@ typedef enum {
 #define NAND_OWN_BUFFERS	0x00020000
 /* Chip may not exist, so silence any errors in scan */
 #define NAND_SCAN_SILENT_NODEV	0x00040000
+/*
+ * Autodetect nand buswidth with readid/onfi.
+ * This suppose the driver will configure the hardware in 8 bits mode
+ * when calling nand_scan_ident, and update its configuration
+ * before calling nand_scan_tail.
+ */
+#define NAND_BUSWIDTH_AUTO      0x00080000
 
 /* Options set by nand scan */
 /* Nand scan has allocated controller struct */
-- 
cgit v1.2.3-55-g7522


From 72b15b6ae97796c5fac687addde5dbfab872cf94 Mon Sep 17 00:00:00 2001
From: Omar Ramirez Luna
Date: Mon, 19 Nov 2012 19:05:50 -0600
Subject: iommu/omap: Migrate to hwmod framework

Use hwmod data and device attributes to build and register an
omap device for iommu driver.

 - Update the naming convention in isp module.
 - Remove unneeded check for number of resources, as this is now
   handled by omap_device and prevents driver from loading.
 - Now unused, remove platform device and resource data, handling
   of sysconfig register for softreset purposes, use default
   latency structure.
 - Use hwmod API for reset handling.

Signed-off-by: Omar Ramirez Luna <omar.luna@linaro.org>
Tested-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 arch/arm/mach-omap2/devices.c            |   2 +-
 arch/arm/mach-omap2/omap-iommu.c         | 168 +++++++------------------------
 drivers/iommu/omap-iommu.c               |  23 ++++-
 drivers/iommu/omap-iommu2.c              |  19 ----
 include/linux/platform_data/iommu-omap.h |   8 +-
 5 files changed, 64 insertions(+), 156 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index c15f5a97b51c..787a996ec4eb 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -214,7 +214,7 @@ static struct platform_device omap3isp_device = {
 };
 
 static struct omap_iommu_arch_data omap3_isp_iommu = {
-	.name = "isp",
+	.name = "mmu_isp",
 };
 
 int omap3_init_camera(struct isp_platform_data *pdata)
diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c
index a6a4ff8744b7..02726a647b1d 100644
--- a/arch/arm/mach-omap2/omap-iommu.c
+++ b/arch/arm/mach-omap2/omap-iommu.c
@@ -12,153 +12,61 @@
 
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/slab.h>
 
 #include <linux/platform_data/iommu-omap.h>
+#include <plat/omap_hwmod.h>
+#include <plat/omap_device.h>
 
-#include "soc.h"
-#include "common.h"
-
-struct iommu_device {
-	resource_size_t base;
-	int irq;
-	struct iommu_platform_data pdata;
-	struct resource res[2];
-};
-static struct iommu_device *devices;
-static int num_iommu_devices;
-
-#ifdef CONFIG_ARCH_OMAP3
-static struct iommu_device omap3_devices[] = {
-	{
-		.base = 0x480bd400,
-		.irq = 24 + OMAP_INTC_START,
-		.pdata = {
-			.name = "isp",
-			.nr_tlb_entries = 8,
-			.clk_name = "cam_ick",
-			.da_start = 0x0,
-			.da_end = 0xFFFFF000,
-		},
-	},
-#if defined(CONFIG_OMAP_IOMMU_IVA2)
-	{
-		.base = 0x5d000000,
-		.irq = 28 + OMAP_INTC_START,
-		.pdata = {
-			.name = "iva2",
-			.nr_tlb_entries = 32,
-			.clk_name = "iva2_ck",
-			.da_start = 0x11000000,
-			.da_end = 0xFFFFF000,
-		},
-	},
-#endif
-};
-#define NR_OMAP3_IOMMU_DEVICES ARRAY_SIZE(omap3_devices)
-static struct platform_device *omap3_iommu_pdev[NR_OMAP3_IOMMU_DEVICES];
-#else
-#define omap3_devices		NULL
-#define NR_OMAP3_IOMMU_DEVICES	0
-#define omap3_iommu_pdev	NULL
-#endif
-
-#ifdef CONFIG_ARCH_OMAP4
-static struct iommu_device omap4_devices[] = {
-	{
-		.base = OMAP4_MMU1_BASE,
-		.irq = 100 + OMAP44XX_IRQ_GIC_START,
-		.pdata = {
-			.name = "ducati",
-			.nr_tlb_entries = 32,
-			.clk_name = "ipu_fck",
-			.da_start = 0x0,
-			.da_end = 0xFFFFF000,
-		},
-	},
-	{
-		.base = OMAP4_MMU2_BASE,
-		.irq = 28 + OMAP44XX_IRQ_GIC_START,
-		.pdata = {
-			.name = "tesla",
-			.nr_tlb_entries = 32,
-			.clk_name = "dsp_fck",
-			.da_start = 0x0,
-			.da_end = 0xFFFFF000,
-		},
-	},
-};
-#define NR_OMAP4_IOMMU_DEVICES ARRAY_SIZE(omap4_devices)
-static struct platform_device *omap4_iommu_pdev[NR_OMAP4_IOMMU_DEVICES];
-#else
-#define omap4_devices		NULL
-#define NR_OMAP4_IOMMU_DEVICES	0
-#define omap4_iommu_pdev	NULL
-#endif
-
-static struct platform_device **omap_iommu_pdev;
-
-static int __init omap_iommu_init(void)
+static int __init omap_iommu_dev_init(struct omap_hwmod *oh, void *unused)
 {
-	int i, err;
-	struct resource res[] = {
-		{ .flags = IORESOURCE_MEM },
-		{ .flags = IORESOURCE_IRQ },
-	};
+	struct platform_device *pdev;
+	struct iommu_platform_data *pdata;
+	struct omap_mmu_dev_attr *a = (struct omap_mmu_dev_attr *)oh->dev_attr;
+	static int i;
+
+	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	pdata->name = oh->name;
+	pdata->clk_name = oh->main_clk;
+	pdata->nr_tlb_entries = a->nr_tlb_entries;
+	pdata->da_start = a->da_start;
+	pdata->da_end = a->da_end;
+
+	if (oh->rst_lines_cnt == 1) {
+		pdata->reset_name = oh->rst_lines->name;
+		pdata->assert_reset = omap_device_assert_hardreset;
+		pdata->deassert_reset = omap_device_deassert_hardreset;
+	}
 
-	if (cpu_is_omap34xx()) {
-		devices = omap3_devices;
-		omap_iommu_pdev = omap3_iommu_pdev;
-		num_iommu_devices = NR_OMAP3_IOMMU_DEVICES;
-	} else if (cpu_is_omap44xx()) {
-		devices = omap4_devices;
-		omap_iommu_pdev = omap4_iommu_pdev;
-		num_iommu_devices = NR_OMAP4_IOMMU_DEVICES;
-	} else
-		return -ENODEV;
+	pdev = omap_device_build("omap-iommu", i, oh, pdata, sizeof(*pdata),
+				NULL, 0, 0);
 
-	for (i = 0; i < num_iommu_devices; i++) {
-		struct platform_device *pdev;
-		const struct iommu_device *d = &devices[i];
+	kfree(pdata);
 
-		pdev = platform_device_alloc("omap-iommu", i);
-		if (!pdev) {
-			err = -ENOMEM;
-			goto err_out;
-		}
+	if (IS_ERR(pdev)) {
+		pr_err("%s: device build err: %ld\n", __func__, PTR_ERR(pdev));
+		return PTR_ERR(pdev);
+	}
 
-		res[0].start = d->base;
-		res[0].end = d->base + MMU_REG_SIZE - 1;
-		res[1].start = res[1].end = d->irq;
+	i++;
 
-		err = platform_device_add_resources(pdev, res,
-						    ARRAY_SIZE(res));
-		if (err)
-			goto err_out;
-		err = platform_device_add_data(pdev, &d->pdata,
-					       sizeof(d->pdata));
-		if (err)
-			goto err_out;
-		err = platform_device_add(pdev);
-		if (err)
-			goto err_out;
-		omap_iommu_pdev[i] = pdev;
-	}
 	return 0;
+}
 
-err_out:
-	while (i--)
-		platform_device_put(omap_iommu_pdev[i]);
-	return err;
+static int __init omap_iommu_init(void)
+{
+	return omap_hwmod_for_each_by_class("mmu", omap_iommu_dev_init, NULL);
 }
 /* must be ready before omap3isp is probed */
 subsys_initcall(omap_iommu_init);
 
 static void __exit omap_iommu_exit(void)
 {
-	int i;
-
-	for (i = 0; i < num_iommu_devices; i++)
-		platform_device_unregister(omap_iommu_pdev[i]);
+	/* Do nothing */
 }
 module_exit(omap_iommu_exit);
 
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index f8082da6179b..af9b4f31f594 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -143,13 +143,23 @@ EXPORT_SYMBOL_GPL(omap_iommu_arch_version);
 static int iommu_enable(struct omap_iommu *obj)
 {
 	int err;
+	struct platform_device *pdev = to_platform_device(obj->dev);
+	struct iommu_platform_data *pdata = pdev->dev.platform_data;
 
-	if (!obj)
+	if (!obj || !pdata)
 		return -EINVAL;
 
 	if (!arch_iommu)
 		return -ENODEV;
 
+	if (pdata->deassert_reset) {
+		err = pdata->deassert_reset(pdev, pdata->reset_name);
+		if (err) {
+			dev_err(obj->dev, "deassert_reset failed: %d\n", err);
+			return err;
+		}
+	}
+
 	clk_enable(obj->clk);
 
 	err = arch_iommu->enable(obj);
@@ -159,12 +169,18 @@ static int iommu_enable(struct omap_iommu *obj)
 
 static void iommu_disable(struct omap_iommu *obj)
 {
-	if (!obj)
+	struct platform_device *pdev = to_platform_device(obj->dev);
+	struct iommu_platform_data *pdata = pdev->dev.platform_data;
+
+	if (!obj || !pdata)
 		return;
 
 	arch_iommu->disable(obj);
 
 	clk_disable(obj->clk);
+
+	if (pdata->assert_reset)
+		pdata->assert_reset(pdev, pdata->reset_name);
 }
 
 /*
@@ -926,9 +942,6 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev)
 	struct resource *res;
 	struct iommu_platform_data *pdata = pdev->dev.platform_data;
 
-	if (pdev->num_resources != 2)
-		return -EINVAL;
-
 	obj = kzalloc(sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL);
 	if (!obj)
 		return -ENOMEM;
diff --git a/drivers/iommu/omap-iommu2.c b/drivers/iommu/omap-iommu2.c
index c02020292377..4a3a1c7a38c1 100644
--- a/drivers/iommu/omap-iommu2.c
+++ b/drivers/iommu/omap-iommu2.c
@@ -35,12 +35,8 @@
 #define MMU_SYS_IDLE_SMART	(2 << MMU_SYS_IDLE_SHIFT)
 #define MMU_SYS_IDLE_MASK	(3 << MMU_SYS_IDLE_SHIFT)
 
-#define MMU_SYS_SOFTRESET	(1 << 1)
 #define MMU_SYS_AUTOIDLE	1
 
-/* SYSSTATUS */
-#define MMU_SYS_RESETDONE	1
-
 /* IRQSTATUS & IRQENABLE */
 #define MMU_IRQ_MULTIHITFAULT	(1 << 4)
 #define MMU_IRQ_TABLEWALKFAULT	(1 << 3)
@@ -97,7 +93,6 @@ static void __iommu_set_twl(struct omap_iommu *obj, bool on)
 static int omap2_iommu_enable(struct omap_iommu *obj)
 {
 	u32 l, pa;
-	unsigned long timeout;
 
 	if (!obj->iopgd || !IS_ALIGNED((u32)obj->iopgd,  SZ_16K))
 		return -EINVAL;
@@ -106,20 +101,6 @@ static int omap2_iommu_enable(struct omap_iommu *obj)
 	if (!IS_ALIGNED(pa, SZ_16K))
 		return -EINVAL;
 
-	iommu_write_reg(obj, MMU_SYS_SOFTRESET, MMU_SYSCONFIG);
-
-	timeout = jiffies + msecs_to_jiffies(20);
-	do {
-		l = iommu_read_reg(obj, MMU_SYSSTATUS);
-		if (l & MMU_SYS_RESETDONE)
-			break;
-	} while (!time_after(jiffies, timeout));
-
-	if (!(l & MMU_SYS_RESETDONE)) {
-		dev_err(obj->dev, "can't take mmu out of reset\n");
-		return -ENODEV;
-	}
-
 	l = iommu_read_reg(obj, MMU_REVISION);
 	dev_info(obj->dev, "%s: version %d.%d\n", obj->name,
 		 (l >> 4) & 0xf, l & 0xf);
diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h
index c677b9f2fefa..ef2060d7eeb8 100644
--- a/include/linux/platform_data/iommu-omap.h
+++ b/include/linux/platform_data/iommu-omap.h
@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/platform_device.h>
+
 #define MMU_REG_SIZE		256
 
 /**
@@ -43,7 +45,11 @@ struct omap_mmu_dev_attr {
 struct iommu_platform_data {
 	const char *name;
 	const char *clk_name;
-	const int nr_tlb_entries;
+	const char *reset_name;
+	int nr_tlb_entries;
 	u32 da_start;
 	u32 da_end;
+
+	int (*assert_reset)(struct platform_device *pdev, const char *name);
+	int (*deassert_reset)(struct platform_device *pdev, const char *name);
 };
-- 
cgit v1.2.3-55-g7522


From ebf7cda0f92effd8169b831fae81e9437dce1fef Mon Sep 17 00:00:00 2001
From: Omar Ramirez Luna
Date: Mon, 19 Nov 2012 19:05:51 -0600
Subject: iommu/omap: Adapt to runtime pm

Use runtime PM functionality interfaced with hwmod enable/idle
functions, to replace direct clock operations and sysconfig
handling.

Due to reset sequence, pm_runtime_[get|put]_sync must be used, to
avoid possible operations with the module under reset. Because of
this and given that the driver uses spin_locks to protect their
critical sections, we must use pm_runtime_irq_safe in order for the
runtime ops to be happy, otherwise might_sleep_if checks in runtime
framework will complain.

The remaining pm_runtime out of iommu_enable and iommu_disable
corresponds to paths that can be accessed through debugfs, some of
them doesn't work if the module is not enabled first, but in future
if the mmu is idled withouth freeing, these are needed to debug.

Signed-off-by: Omar Ramirez Luna <omar.luna@linaro.org>
Tested-by: Ohad Ben-Cohen <ohad@wizery.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 arch/arm/mach-omap2/omap-iommu.c         |  1 -
 drivers/iommu/omap-iommu.c               | 40 +++++++++++++++-----------------
 drivers/iommu/omap-iommu.h               |  3 ---
 drivers/iommu/omap-iommu2.c              | 17 --------------
 include/linux/platform_data/iommu-omap.h |  1 -
 5 files changed, 19 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c
index 02726a647b1d..7642fc4672c1 100644
--- a/arch/arm/mach-omap2/omap-iommu.c
+++ b/arch/arm/mach-omap2/omap-iommu.c
@@ -31,7 +31,6 @@ static int __init omap_iommu_dev_init(struct omap_hwmod *oh, void *unused)
 		return -ENOMEM;
 
 	pdata->name = oh->name;
-	pdata->clk_name = oh->main_clk;
 	pdata->nr_tlb_entries = a->nr_tlb_entries;
 	pdata->da_start = a->da_start;
 	pdata->da_end = a->da_end;
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index af9b4f31f594..18108c1405e2 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -16,13 +16,13 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
-#include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/iommu.h>
 #include <linux/omap-iommu.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/io.h>
+#include <linux/pm_runtime.h>
 
 #include <asm/cacheflush.h>
 
@@ -160,7 +160,7 @@ static int iommu_enable(struct omap_iommu *obj)
 		}
 	}
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	err = arch_iommu->enable(obj);
 
@@ -177,7 +177,7 @@ static void iommu_disable(struct omap_iommu *obj)
 
 	arch_iommu->disable(obj);
 
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 
 	if (pdata->assert_reset)
 		pdata->assert_reset(pdev, pdata->reset_name);
@@ -303,7 +303,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
 	if (!obj || !obj->nr_tlb_entries || !e)
 		return -EINVAL;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	iotlb_lock_get(obj, &l);
 	if (l.base == obj->nr_tlb_entries) {
@@ -333,7 +333,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
 
 	cr = iotlb_alloc_cr(obj, e);
 	if (IS_ERR(cr)) {
-		clk_disable(obj->clk);
+		pm_runtime_put_sync(obj->dev);
 		return PTR_ERR(cr);
 	}
 
@@ -347,7 +347,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
 		l.vict = l.base;
 	iotlb_lock_set(obj, &l);
 out:
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 	return err;
 }
 
@@ -377,7 +377,7 @@ static void flush_iotlb_page(struct omap_iommu *obj, u32 da)
 	int i;
 	struct cr_regs cr;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, cr) {
 		u32 start;
@@ -396,7 +396,7 @@ static void flush_iotlb_page(struct omap_iommu *obj, u32 da)
 			iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
 		}
 	}
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 
 	if (i == obj->nr_tlb_entries)
 		dev_dbg(obj->dev, "%s: no page for %08x\n", __func__, da);
@@ -410,7 +410,7 @@ static void flush_iotlb_all(struct omap_iommu *obj)
 {
 	struct iotlb_lock l;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	l.base = 0;
 	l.vict = 0;
@@ -418,7 +418,7 @@ static void flush_iotlb_all(struct omap_iommu *obj)
 
 	iommu_write_reg(obj, 1, MMU_GFLUSH);
 
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 }
 
 #if defined(CONFIG_OMAP_IOMMU_DEBUG) || defined(CONFIG_OMAP_IOMMU_DEBUG_MODULE)
@@ -428,11 +428,11 @@ ssize_t omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t bytes)
 	if (!obj || !buf)
 		return -EINVAL;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	bytes = arch_iommu->dump_ctx(obj, buf, bytes);
 
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 
 	return bytes;
 }
@@ -446,7 +446,7 @@ __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num)
 	struct cr_regs tmp;
 	struct cr_regs *p = crs;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 	iotlb_lock_get(obj, &saved);
 
 	for_each_iotlb_cr(obj, num, i, tmp) {
@@ -456,7 +456,7 @@ __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num)
 	}
 
 	iotlb_lock_set(obj, &saved);
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 
 	return  p - crs;
 }
@@ -946,10 +946,6 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev)
 	if (!obj)
 		return -ENOMEM;
 
-	obj->clk = clk_get(&pdev->dev, pdata->clk_name);
-	if (IS_ERR(obj->clk))
-		goto err_clk;
-
 	obj->nr_tlb_entries = pdata->nr_tlb_entries;
 	obj->name = pdata->name;
 	obj->dev = &pdev->dev;
@@ -992,6 +988,9 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev)
 		goto err_irq;
 	platform_set_drvdata(pdev, obj);
 
+	pm_runtime_irq_safe(obj->dev);
+	pm_runtime_enable(obj->dev);
+
 	dev_info(&pdev->dev, "%s registered\n", obj->name);
 	return 0;
 
@@ -1000,8 +999,6 @@ err_irq:
 err_ioremap:
 	release_mem_region(res->start, resource_size(res));
 err_mem:
-	clk_put(obj->clk);
-err_clk:
 	kfree(obj);
 	return err;
 }
@@ -1022,7 +1019,8 @@ static int __devexit omap_iommu_remove(struct platform_device *pdev)
 	release_mem_region(res->start, resource_size(res));
 	iounmap(obj->regbase);
 
-	clk_put(obj->clk);
+	pm_runtime_disable(obj->dev);
+
 	dev_info(&pdev->dev, "%s removed\n", obj->name);
 	kfree(obj);
 	return 0;
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index 2b5f3c04d167..120084206602 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -29,7 +29,6 @@ struct iotlb_entry {
 struct omap_iommu {
 	const char	*name;
 	struct module	*owner;
-	struct clk	*clk;
 	void __iomem	*regbase;
 	struct device	*dev;
 	void		*isr_priv;
@@ -116,8 +115,6 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
  * MMU Register offsets
  */
 #define MMU_REVISION		0x00
-#define MMU_SYSCONFIG		0x10
-#define MMU_SYSSTATUS		0x14
 #define MMU_IRQSTATUS		0x18
 #define MMU_IRQENABLE		0x1c
 #define MMU_WALKING_ST		0x40
diff --git a/drivers/iommu/omap-iommu2.c b/drivers/iommu/omap-iommu2.c
index 4a3a1c7a38c1..d745094a69dd 100644
--- a/drivers/iommu/omap-iommu2.c
+++ b/drivers/iommu/omap-iommu2.c
@@ -28,15 +28,6 @@
  */
 #define IOMMU_ARCH_VERSION	0x00000011
 
-/* SYSCONF */
-#define MMU_SYS_IDLE_SHIFT	3
-#define MMU_SYS_IDLE_FORCE	(0 << MMU_SYS_IDLE_SHIFT)
-#define MMU_SYS_IDLE_NONE	(1 << MMU_SYS_IDLE_SHIFT)
-#define MMU_SYS_IDLE_SMART	(2 << MMU_SYS_IDLE_SHIFT)
-#define MMU_SYS_IDLE_MASK	(3 << MMU_SYS_IDLE_SHIFT)
-
-#define MMU_SYS_AUTOIDLE	1
-
 /* IRQSTATUS & IRQENABLE */
 #define MMU_IRQ_MULTIHITFAULT	(1 << 4)
 #define MMU_IRQ_TABLEWALKFAULT	(1 << 3)
@@ -105,11 +96,6 @@ static int omap2_iommu_enable(struct omap_iommu *obj)
 	dev_info(obj->dev, "%s: version %d.%d\n", obj->name,
 		 (l >> 4) & 0xf, l & 0xf);
 
-	l = iommu_read_reg(obj, MMU_SYSCONFIG);
-	l &= ~MMU_SYS_IDLE_MASK;
-	l |= (MMU_SYS_IDLE_SMART | MMU_SYS_AUTOIDLE);
-	iommu_write_reg(obj, l, MMU_SYSCONFIG);
-
 	iommu_write_reg(obj, pa, MMU_TTB);
 
 	__iommu_set_twl(obj, true);
@@ -123,7 +109,6 @@ static void omap2_iommu_disable(struct omap_iommu *obj)
 
 	l &= ~MMU_CNTL_MASK;
 	iommu_write_reg(obj, l, MMU_CNTL);
-	iommu_write_reg(obj, MMU_SYS_IDLE_FORCE, MMU_SYSCONFIG);
 
 	dev_dbg(obj->dev, "%s is shutting down\n", obj->name);
 }
@@ -252,8 +237,6 @@ omap2_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len)
 	char *p = buf;
 
 	pr_reg(REVISION);
-	pr_reg(SYSCONFIG);
-	pr_reg(SYSSTATUS);
 	pr_reg(IRQSTATUS);
 	pr_reg(IRQENABLE);
 	pr_reg(WALKING_ST);
diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h
index ef2060d7eeb8..5b429c43a297 100644
--- a/include/linux/platform_data/iommu-omap.h
+++ b/include/linux/platform_data/iommu-omap.h
@@ -44,7 +44,6 @@ struct omap_mmu_dev_attr {
 
 struct iommu_platform_data {
 	const char *name;
-	const char *clk_name;
 	const char *reset_name;
 	int nr_tlb_entries;
 	u32 da_start;
-- 
cgit v1.2.3-55-g7522


From cc248d4b1ddf05fefc1373d9d7a4dd1df71b6190 Mon Sep 17 00:00:00 2001
From: J. Bruce Fields
Date: Mon, 3 Dec 2012 16:11:13 -0500
Subject: svcrpc: don't byte-swap sk_reclen in place

Byte-swapping in place is always a little dubious.

Let's instead define this field to always be big-endian, and do the
swapping on demand where we need it.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcsock.h | 12 +++++++++++-
 net/sunrpc/svcsock.c           | 26 +++++++++++---------------
 2 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 92ad02f0dcc0..613cf42227aa 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -26,11 +26,21 @@ struct svc_sock {
 	void			(*sk_owspace)(struct sock *);
 
 	/* private TCP part */
-	u32			sk_reclen;	/* length of record */
+	__be32			sk_reclen;	/* length of record */
 	u32			sk_tcplen;	/* current read length */
 	struct page *		sk_pages[RPCSVC_MAXPAGES];	/* received data */
 };
 
+static inline u32 svc_sock_reclen(struct svc_sock *svsk)
+{
+	return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK;
+}
+
+static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
+{
+	return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT;
+}
+
 /*
  * Function prototypes.
  */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 03827cef1fa7..d50de2b95036 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -950,8 +950,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 			return -EAGAIN;
 		}
 
-		svsk->sk_reclen = ntohl(svsk->sk_reclen);
-		if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) {
+		if (!(svc_sock_final_rec(svsk))) {
 			/* FIXME: technically, a record can be fragmented,
 			 *  and non-terminal fragments will not have the top
 			 *  bit set in the fragment length header.
@@ -961,21 +960,18 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 			goto err_delete;
 		}
 
-		svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK;
-		dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
-		if (svsk->sk_reclen > serv->sv_max_mesg) {
+		dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk));
+		if (svc_sock_reclen(svsk) > serv->sv_max_mesg) {
 			net_notice_ratelimited("RPC: fragment too large: 0x%08lx\n",
-					       (unsigned long)svsk->sk_reclen);
+					(unsigned long)svc_sock_reclen(svsk));
 			goto err_delete;
 		}
 	}
 
-	if (svsk->sk_reclen < 8)
+	if (svc_sock_reclen(svsk) < 8)
 		goto err_delete; /* client is nuts. */
 
-	len = svsk->sk_reclen;
-
-	return len;
+	return svc_sock_reclen(svsk);
 error:
 	dprintk("RPC: TCP recv_record got %d\n", len);
 	return len;
@@ -1019,7 +1015,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
 	if (dst->iov_len < src->iov_len)
 		return -EAGAIN; /* whatever; just giving up. */
 	memcpy(dst->iov_base, src->iov_base, src->iov_len);
-	xprt_complete_rqst(req->rq_task, svsk->sk_reclen);
+	xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
 	rqstp->rq_arg.len = 0;
 	return 0;
 }
@@ -1064,12 +1060,12 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		goto error;
 
 	base = svc_tcp_restore_pages(svsk, rqstp);
-	want = svsk->sk_reclen - base;
+	want = svc_sock_reclen(svsk) - base;
 
 	vec = rqstp->rq_vec;
 
 	pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
-						svsk->sk_reclen);
+						svc_sock_reclen(svsk));
 
 	rqstp->rq_respages = &rqstp->rq_pages[pnum];
 
@@ -1082,11 +1078,11 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		if (len < 0 && len != -EAGAIN)
 			goto err_other;
 		dprintk("svc: incomplete TCP record (%d of %d)\n",
-			svsk->sk_tcplen, svsk->sk_reclen);
+			svsk->sk_tcplen, svc_sock_reclen(svsk));
 		goto err_noclose;
 	}
 
-	rqstp->rq_arg.len = svsk->sk_reclen;
+	rqstp->rq_arg.len = svc_sock_reclen(svsk);
 	rqstp->rq_arg.page_base = 0;
 	if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
 		rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
-- 
cgit v1.2.3-55-g7522


From 8af345f58ac9b350bb23c1457c613381d9f00472 Mon Sep 17 00:00:00 2001
From: J. Bruce Fields
Date: Mon, 3 Dec 2012 16:45:35 -0500
Subject: svcrpc: track rpc data length separately from sk_tcplen

Keep a separate field, sk_datalen, that tracks only the data contained
in a fragment, not including the fragment header.

For now, this is always just max(0, sk_tcplen - 4), but after we allow
multiple fragments sk_datalen will accumulate the total rpc data size
while sk_tcplen only tracks progress receiving the current fragment.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcsock.h | 11 +++++++++--
 net/sunrpc/svcsock.c           | 19 ++++++++++++-------
 2 files changed, 21 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 613cf42227aa..62fd1b756e99 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -26,8 +26,15 @@ struct svc_sock {
 	void			(*sk_owspace)(struct sock *);
 
 	/* private TCP part */
-	__be32			sk_reclen;	/* length of record */
-	u32			sk_tcplen;	/* current read length */
+	/* On-the-wire fragment header: */
+	__be32			sk_reclen;
+	/* As we receive a record, this includes the length received so
+	 * far (including the fragment header): */
+	u32			sk_tcplen;
+	/* Total length of the data (not including fragment headers)
+	 * received so far in the fragments making up this rpc: */
+	u32			sk_datalen;
+
 	struct page *		sk_pages[RPCSVC_MAXPAGES];	/* received data */
 };
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 1db42b1ffe28..2b09e2306bfa 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -874,9 +874,9 @@ static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst
 {
 	unsigned int i, len, npages;
 
-	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+	if (svsk->sk_datalen == 0)
 		return 0;
-	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	len = svsk->sk_datalen;
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		if (rqstp->rq_pages[i] != NULL)
@@ -893,9 +893,9 @@ static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
 {
 	unsigned int i, len, npages;
 
-	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+	if (svsk->sk_datalen == 0)
 		return;
-	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	len = svsk->sk_datalen;
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		svsk->sk_pages[i] = rqstp->rq_pages[i];
@@ -907,9 +907,9 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
 {
 	unsigned int i, len, npages;
 
-	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+	if (svsk->sk_datalen == 0)
 		goto out;
-	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	len = svsk->sk_datalen;
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		BUG_ON(svsk->sk_pages[i] == NULL);
@@ -918,6 +918,7 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
 	}
 out:
 	svsk->sk_tcplen = 0;
+	svsk->sk_datalen = 0;
 }
 
 /*
@@ -1066,8 +1067,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 
 	/* Now receive data */
 	len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
-	if (len >= 0)
+	if (len >= 0) {
 		svsk->sk_tcplen += len;
+		svsk->sk_datalen += len;
+	}
 	if (len != want) {
 		svc_tcp_save_pages(svsk, rqstp);
 		if (len < 0 && len != -EAGAIN)
@@ -1100,6 +1103,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	/* Reset TCP read info */
 	svsk->sk_reclen = 0;
 	svsk->sk_tcplen = 0;
+	svsk->sk_datalen = 0;
 	/* If we have more data, signal svc_xprt_enqueue() to try again */
 	if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
@@ -1296,6 +1300,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 
 		svsk->sk_reclen = 0;
 		svsk->sk_tcplen = 0;
+		svsk->sk_datalen = 0;
 		memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
 
 		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
-- 
cgit v1.2.3-55-g7522


From d67b8c616b48df30e2836d797795f2420d109bc9 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel
Date: Tue, 4 Dec 2012 01:13:35 +0000
Subject: netconf: advertise mc_forwarding status

This patch advertise the MC_FORWARDING status for IPv4 and IPv6.
This field is readonly, only multicast engine in the kernel updates it.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h   |  3 +++
 include/net/addrconf.h       |  3 +++
 include/uapi/linux/netconf.h |  1 +
 net/ipv4/devinet.c           | 10 ++++++++--
 net/ipv4/ipmr.c              | 12 ++++++++++++
 net/ipv6/addrconf.c          | 10 ++++++++--
 net/ipv6/ip6mr.c             | 20 ++++++++++++++++++--
 7 files changed, 53 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index d032780d0ce5..a9d828976a77 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -171,6 +171,9 @@ struct in_ifaddr {
 extern int register_inetaddr_notifier(struct notifier_block *nb);
 extern int unregister_inetaddr_notifier(struct notifier_block *nb);
 
+extern void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
+					struct ipv4_devconf *devconf);
+
 extern struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref);
 static inline struct net_device *ip_dev_find(struct net *net, __be32 addr)
 {
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 9e63e76b20e7..df4ef9453384 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -172,6 +172,9 @@ extern bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 extern int register_inet6addr_notifier(struct notifier_block *nb);
 extern int unregister_inet6addr_notifier(struct notifier_block *nb);
 
+extern void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
+					 struct ipv6_devconf *devconf);
+
 /**
  * __in6_dev_get - get inet6_dev pointer from netdevice
  * @dev: network device
diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h
index 75dcbc587fb5..64804a798b0c 100644
--- a/include/uapi/linux/netconf.h
+++ b/include/uapi/linux/netconf.h
@@ -13,6 +13,7 @@ enum {
 	NETCONFA_IFINDEX,
 	NETCONFA_FORWARDING,
 	NETCONFA_RP_FILTER,
+	NETCONFA_MC_FORWARDING,
 	__NETCONFA_MAX
 };
 #define NETCONFA_MAX	(__NETCONFA_MAX - 1)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e13183abd7f6..cc06a47f1216 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1453,6 +1453,8 @@ static int inet_netconf_msgsize_devconf(int type)
 		size += nla_total_size(4);
 	if (type == -1 || type == NETCONFA_RP_FILTER)
 		size += nla_total_size(4);
+	if (type == -1 || type == NETCONFA_MC_FORWARDING)
+		size += nla_total_size(4);
 
 	return size;
 }
@@ -1485,6 +1487,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
 		goto nla_put_failure;
+	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
+	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
+			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
+		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
 
@@ -1493,8 +1499,8 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
-					struct ipv4_devconf *devconf)
+void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
+				 struct ipv4_devconf *devconf)
 {
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 58e4160fdcee..0c452e3fdc1b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -65,6 +65,7 @@
 #include <net/checksum.h>
 #include <net/netlink.h>
 #include <net/fib_rules.h>
+#include <linux/netconf.h>
 
 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
 #define CONFIG_IP_PIMSM	1
@@ -582,6 +583,9 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
 	in_dev = __in_dev_get_rtnl(dev);
 	if (in_dev) {
 		IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
+		inet_netconf_notify_devconf(dev_net(dev),
+					    NETCONFA_MC_FORWARDING,
+					    dev->ifindex, &in_dev->cnf);
 		ip_rt_multicast_event(in_dev);
 	}
 
@@ -772,6 +776,8 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 		return -EADDRNOTAVAIL;
 	}
 	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
+	inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, dev->ifindex,
+				    &in_dev->cnf);
 	ip_rt_multicast_event(in_dev);
 
 	/* Fill in the VIF structures */
@@ -1185,6 +1191,9 @@ static void mrtsock_destruct(struct sock *sk)
 	ipmr_for_each_table(mrt, net) {
 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
+			inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+						    NETCONFA_IFINDEX_ALL,
+						    net->ipv4.devconf_all);
 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
 			mroute_clean_tables(mrt);
 		}
@@ -1236,6 +1245,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 		if (ret == 0) {
 			rcu_assign_pointer(mrt->mroute_sk, sk);
 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
+			inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+						    NETCONFA_IFINDEX_ALL,
+						    net->ipv4.devconf_all);
 		}
 		rtnl_unlock();
 		return ret;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 22ae75d54017..28e0e627229c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -469,6 +469,8 @@ static int inet6_netconf_msgsize_devconf(int type)
 	/* type -1 is used for ALL */
 	if (type == -1 || type == NETCONFA_FORWARDING)
 		size += nla_total_size(4);
+	if (type == -1 || type == NETCONFA_MC_FORWARDING)
+		size += nla_total_size(4);
 
 	return size;
 }
@@ -496,6 +498,10 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
 	    nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
 		goto nla_put_failure;
+	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
+	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
+			devconf->mc_forwarding) < 0)
+		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
 
@@ -504,8 +510,8 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
-					 struct ipv6_devconf *devconf)
+void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
+				  struct ipv6_devconf *devconf)
 {
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 926ea544f499..1c05fe604d37 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -52,6 +52,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/export.h>
 #include <net/ip6_checksum.h>
+#include <linux/netconf.h>
 
 struct mr6_table {
 	struct list_head	list;
@@ -805,8 +806,12 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
 	dev_set_allmulti(dev, -1);
 
 	in6_dev = __in6_dev_get(dev);
-	if (in6_dev)
+	if (in6_dev) {
 		in6_dev->cnf.mc_forwarding--;
+		inet6_netconf_notify_devconf(dev_net(dev),
+					     NETCONFA_MC_FORWARDING,
+					     dev->ifindex, &in6_dev->cnf);
+	}
 
 	if (v->flags & MIFF_REGISTER)
 		unregister_netdevice_queue(dev, head);
@@ -958,8 +963,12 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
 	}
 
 	in6_dev = __in6_dev_get(dev);
-	if (in6_dev)
+	if (in6_dev) {
 		in6_dev->cnf.mc_forwarding++;
+		inet6_netconf_notify_devconf(dev_net(dev),
+					     NETCONFA_MC_FORWARDING,
+					     dev->ifindex, &in6_dev->cnf);
+	}
 
 	/*
 	 *	Fill in the VIF structures
@@ -1513,6 +1522,9 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
 	if (likely(mrt->mroute6_sk == NULL)) {
 		mrt->mroute6_sk = sk;
 		net->ipv6.devconf_all->mc_forwarding++;
+		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+					     NETCONFA_IFINDEX_ALL,
+					     net->ipv6.devconf_all);
 	}
 	else
 		err = -EADDRINUSE;
@@ -1535,6 +1547,10 @@ int ip6mr_sk_done(struct sock *sk)
 			write_lock_bh(&mrt_lock);
 			mrt->mroute6_sk = NULL;
 			net->ipv6.devconf_all->mc_forwarding--;
+			inet6_netconf_notify_devconf(net,
+						     NETCONFA_MC_FORWARDING,
+						     NETCONFA_IFINDEX_ALL,
+						     net->ipv6.devconf_all);
 			write_unlock_bh(&mrt_lock);
 
 			mroute_clean_tables(mrt);
-- 
cgit v1.2.3-55-g7522


From d71f2f88825b31553881944959962b1871099e1f Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Wed, 5 Dec 2012 11:59:22 +0100
Subject: ACPI / PM: Fix header of acpi_dev_pm_detach() in acpi.h

The header of acpi_dev_pm_detach() in include/linux/acpi.h has an
incorrect return type, which should be void.  Fix that.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 28ba643c92c1..79345cd5ac20 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -459,7 +459,7 @@ static inline int acpi_subsys_resume_early(struct device *dev) { return 0; }
 
 #if defined(CONFIG_ACPI) && defined(CONFIG_PM)
 int acpi_dev_pm_attach(struct device *dev, bool power_on);
-int acpi_dev_pm_detach(struct device *dev, bool power_off);
+void acpi_dev_pm_detach(struct device *dev, bool power_off);
 #else
 static inline int acpi_dev_pm_attach(struct device *dev, bool power_on)
 {
-- 
cgit v1.2.3-55-g7522


From 01f218803757c9ec1152ac2fd39d03c27c452634 Mon Sep 17 00:00:00 2001
From: Michael S. Tsirkin
Date: Wed, 17 Oct 2012 18:06:02 +0200
Subject: kvm: add kvm_set_irq_inatomic

Add an API to inject IRQ from atomic context.
Return EWOULDBLOCK if impossible (e.g. for multicast).
Only MSI is supported ATM.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/irq_comm.c      | 83 +++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 72 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8e5c7b651655..36c3704bfa7c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -693,6 +693,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 				   unsigned long *deliver_bitmask);
 #endif
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
+int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
 		int irq_source_id, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 2eb58af7ee99..656fa455e154 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -102,6 +102,23 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	return r;
 }
 
+static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+				   struct kvm_lapic_irq *irq)
+{
+	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+
+	irq->dest_id = (e->msi.address_lo &
+			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+	irq->vector = (e->msi.data &
+			MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+	irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
+	irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
+	irq->delivery_mode = e->msi.data & 0x700;
+	irq->level = 1;
+	irq->shorthand = 0;
+	/* TODO Deal with RH bit of MSI message address */
+}
+
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 		struct kvm *kvm, int irq_source_id, int level)
 {
@@ -110,22 +127,26 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	if (!level)
 		return -1;
 
-	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+	kvm_set_msi_irq(e, &irq);
 
-	irq.dest_id = (e->msi.address_lo &
-			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
-	irq.vector = (e->msi.data &
-			MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
-	irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
-	irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
-	irq.delivery_mode = e->msi.data & 0x700;
-	irq.level = 1;
-	irq.shorthand = 0;
-
-	/* TODO Deal with RH bit of MSI message address */
 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
 }
 
+
+static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
+			 struct kvm *kvm)
+{
+	struct kvm_lapic_irq irq;
+	int r;
+
+	kvm_set_msi_irq(e, &irq);
+
+	if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r))
+		return r;
+	else
+		return -EWOULDBLOCK;
+}
+
 int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
 {
 	struct kvm_kernel_irq_routing_entry route;
@@ -178,6 +199,44 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	return ret;
 }
 
+/*
+ * Deliver an IRQ in an atomic context if we can, or return a failure,
+ * user can retry in a process context.
+ * Return value:
+ *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
+ *  Other values - No need to retry.
+ */
+int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
+{
+	struct kvm_kernel_irq_routing_entry *e;
+	int ret = -EINVAL;
+	struct kvm_irq_routing_table *irq_rt;
+	struct hlist_node *n;
+
+	trace_kvm_set_irq(irq, level, irq_source_id);
+
+	/*
+	 * Injection into either PIC or IOAPIC might need to scan all CPUs,
+	 * which would need to be retried from thread context;  when same GSI
+	 * is connected to both PIC and IOAPIC, we'd have to report a
+	 * partial failure here.
+	 * Since there's no easy way to do this, we only support injecting MSI
+	 * which is limited to 1:1 GSI mapping.
+	 */
+	rcu_read_lock();
+	irq_rt = rcu_dereference(kvm->irq_routing);
+	if (irq < irq_rt->nr_rt_entries)
+		hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
+			if (likely(e->type == KVM_IRQ_ROUTING_MSI))
+				ret = kvm_set_msi_inatomic(e, kvm);
+			else
+				ret = -EWOULDBLOCK;
+			break;
+		}
+	rcu_read_unlock();
+	return ret;
+}
+
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
 	struct kvm_irq_ack_notifier *kian;
-- 
cgit v1.2.3-55-g7522


From 8fa72d234da9b6b473bbb1f74d533663e4996e6b Mon Sep 17 00:00:00 2001
From: Jeff Moyer
Date: Wed, 5 Dec 2012 20:17:21 +0100
Subject: bdi: add a user-tunable cpu_list for the bdi flusher threads

In realtime environments, it may be desirable to keep the per-bdi
flusher threads from running on certain cpus.  This patch adds a
cpu_list file to /sys/class/bdi/* to enable this.  The default is to tie
the flusher threads to the same numa node as the backing device (though
I could be convinced to make it a mask of all cpus to avoid a change in
behaviour).

Thanks to Jeremy Eder for the original idea.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/backing-dev.h |  4 +++
 mm/backing-dev.c            | 84 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2a9a9abc9126..238521a19849 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -18,6 +18,7 @@
 #include <linux/writeback.h>
 #include <linux/atomic.h>
 #include <linux/sysctl.h>
+#include <linux/mutex.h>
 
 struct page;
 struct device;
@@ -105,6 +106,9 @@ struct backing_dev_info {
 
 	struct timer_list laptop_mode_wb_timer;
 
+	cpumask_t *flusher_cpumask; /* used for writeback thread scheduling */
+	struct mutex flusher_cpumask_lock;
+
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debug_dir;
 	struct dentry *debug_stats;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d3ca2b3ee176..bd6a6cabef71 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/writeback.h>
 #include <linux/device.h>
+#include <linux/slab.h>
 #include <trace/events/writeback.h>
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -221,12 +222,63 @@ static ssize_t max_ratio_store(struct device *dev,
 }
 BDI_SHOW(max_ratio, bdi->max_ratio)
 
+static ssize_t cpu_list_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+	struct bdi_writeback *wb = &bdi->wb;
+	cpumask_var_t newmask;
+	ssize_t ret;
+	struct task_struct *task;
+
+	if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
+		return -ENOMEM;
+
+	ret = cpulist_parse(buf, newmask);
+	if (!ret) {
+		spin_lock_bh(&bdi->wb_lock);
+		task = wb->task;
+		if (task)
+			get_task_struct(task);
+		spin_unlock_bh(&bdi->wb_lock);
+
+		mutex_lock(&bdi->flusher_cpumask_lock);
+		if (task) {
+			ret = set_cpus_allowed_ptr(task, newmask);
+			put_task_struct(task);
+		}
+		if (ret == 0) {
+			cpumask_copy(bdi->flusher_cpumask, newmask);
+			ret = count;
+		}
+		mutex_unlock(&bdi->flusher_cpumask_lock);
+
+	}
+	free_cpumask_var(newmask);
+
+	return ret;
+}
+
+static ssize_t cpu_list_show(struct device *dev,
+		struct device_attribute *attr, char *page)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+	ssize_t ret;
+
+	mutex_lock(&bdi->flusher_cpumask_lock);
+	ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask);
+	mutex_unlock(&bdi->flusher_cpumask_lock);
+
+	return ret;
+}
+
 #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
 
 static struct device_attribute bdi_dev_attrs[] = {
 	__ATTR_RW(read_ahead_kb),
 	__ATTR_RW(min_ratio),
 	__ATTR_RW(max_ratio),
+	__ATTR_RW(cpu_list),
 	__ATTR_NULL,
 };
 
@@ -428,6 +480,7 @@ static int bdi_forker_thread(void *ptr)
 				writeback_inodes_wb(&bdi->wb, 1024,
 						    WB_REASON_FORKER_THREAD);
 			} else {
+				int ret;
 				/*
 				 * The spinlock makes sure we do not lose
 				 * wake-ups when racing with 'bdi_queue_work()'.
@@ -437,6 +490,14 @@ static int bdi_forker_thread(void *ptr)
 				spin_lock_bh(&bdi->wb_lock);
 				bdi->wb.task = task;
 				spin_unlock_bh(&bdi->wb_lock);
+				mutex_lock(&bdi->flusher_cpumask_lock);
+				ret = set_cpus_allowed_ptr(task,
+							bdi->flusher_cpumask);
+				mutex_unlock(&bdi->flusher_cpumask_lock);
+				if (ret)
+					printk_once("%s: failed to bind flusher"
+						    " thread %s, error %d\n",
+						    __func__, task->comm, ret);
 				wake_up_process(task);
 			}
 			bdi_clear_pending(bdi);
@@ -509,6 +570,17 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 						dev_name(dev));
 		if (IS_ERR(wb->task))
 			return PTR_ERR(wb->task);
+	} else {
+		int node;
+		/*
+		 * Set up a default cpumask for the flusher threads that
+		 * includes all cpus on the same numa node as the device.
+		 * The mask may be overridden via sysfs.
+		 */
+		node = dev_to_node(bdi->dev);
+		if (node != NUMA_NO_NODE)
+			cpumask_copy(bdi->flusher_cpumask,
+				     cpumask_of_node(node));
 	}
 
 	bdi_debug_register(bdi, dev_name(dev));
@@ -634,6 +706,15 @@ int bdi_init(struct backing_dev_info *bdi)
 
 	bdi_wb_init(&bdi->wb, bdi);
 
+	if (!bdi_cap_flush_forker(bdi)) {
+		bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+		if (!bdi->flusher_cpumask)
+			return -ENOMEM;
+		cpumask_setall(bdi->flusher_cpumask);
+		mutex_init(&bdi->flusher_cpumask_lock);
+	} else
+		bdi->flusher_cpumask = NULL;
+
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
 		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
 		if (err)
@@ -656,6 +737,7 @@ int bdi_init(struct backing_dev_info *bdi)
 err:
 		while (i--)
 			percpu_counter_destroy(&bdi->bdi_stat[i]);
+		kfree(bdi->flusher_cpumask);
 	}
 
 	return err;
@@ -683,6 +765,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
 
 	bdi_unregister(bdi);
 
+	kfree(bdi->flusher_cpumask);
+
 	/*
 	 * If bdi_unregister() had already been called earlier, the
 	 * wakeup_timer could still be armed because bdi_prune_sb()
-- 
cgit v1.2.3-55-g7522


From dd5fc854de5fd37adfcef8a366cd21a55aa01d3d Mon Sep 17 00:00:00 2001
From: Matthew Garrett
Date: Wed, 5 Dec 2012 14:33:26 -0700
Subject: EFI: Stash ROMs if they're not in the PCI BAR

EFI provides support for providing PCI ROMs via means other than the ROM
BAR. This support vanishes after we've exited boot services, so add support
for stashing copies of the ROMs in setup_data if they're not otherwise
available.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Seth Forshee <seth.forshee@canonical.com>---
 arch/x86/boot/compressed/eboot.c | 118 +++++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/bootparam.h |   1 +
 arch/x86/include/asm/pci.h       |  12 ++++
 include/linux/efi.h              |  71 +++++++++++++++++++++++
 4 files changed, 202 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index c760e073963e..8a54313bc7dc 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -8,6 +8,7 @@
  * ----------------------------------------------------------------------- */
 
 #include <linux/efi.h>
+#include <linux/pci.h>
 #include <asm/efi.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
@@ -243,6 +244,121 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size)
 	*size = len;
 }
 
+static efi_status_t setup_efi_pci(struct boot_params *params)
+{
+	efi_pci_io_protocol *pci;
+	efi_status_t status;
+	void **pci_handle;
+	efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
+	unsigned long nr_pci, size = 0;
+	int i;
+	struct setup_data *data;
+
+	data = (struct setup_data *)params->hdr.setup_data;
+
+	while (data && data->next)
+		data = (struct setup_data *)data->next;
+
+	status = efi_call_phys5(sys_table->boottime->locate_handle,
+				EFI_LOCATE_BY_PROTOCOL, &pci_proto,
+				NULL, &size, pci_handle);
+
+	if (status == EFI_BUFFER_TOO_SMALL) {
+		status = efi_call_phys3(sys_table->boottime->allocate_pool,
+					EFI_LOADER_DATA, size, &pci_handle);
+
+		if (status != EFI_SUCCESS)
+			return status;
+
+		status = efi_call_phys5(sys_table->boottime->locate_handle,
+					EFI_LOCATE_BY_PROTOCOL, &pci_proto,
+					NULL, &size, pci_handle);
+	}
+
+	if (status != EFI_SUCCESS)
+		goto free_handle;
+
+	nr_pci = size / sizeof(void *);
+	for (i = 0; i < nr_pci; i++) {
+		void *h = pci_handle[i];
+		uint64_t attributes;
+		struct pci_setup_rom *rom;
+
+		status = efi_call_phys3(sys_table->boottime->handle_protocol,
+					h, &pci_proto, &pci);
+
+		if (status != EFI_SUCCESS)
+			continue;
+
+		if (!pci)
+			continue;
+
+		status = efi_call_phys4(pci->attributes, pci,
+					EfiPciIoAttributeOperationGet, 0,
+					&attributes);
+
+		if (status != EFI_SUCCESS)
+			continue;
+
+		if (!attributes & EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM)
+			continue;
+
+		if (!pci->romimage || !pci->romsize)
+			continue;
+
+		size = pci->romsize + sizeof(*rom);
+
+		status = efi_call_phys3(sys_table->boottime->allocate_pool,
+				EFI_LOADER_DATA, size, &rom);
+
+		if (status != EFI_SUCCESS)
+			continue;
+
+		rom->data.type = SETUP_PCI;
+		rom->data.len = size - sizeof(struct setup_data);
+		rom->data.next = 0;
+		rom->pcilen = pci->romsize;
+
+		status = efi_call_phys5(pci->pci.read, pci,
+					EfiPciIoWidthUint16, PCI_VENDOR_ID,
+					1, &(rom->vendor));
+
+		if (status != EFI_SUCCESS)
+			goto free_struct;
+
+		status = efi_call_phys5(pci->pci.read, pci,
+					EfiPciIoWidthUint16, PCI_DEVICE_ID,
+					1, &(rom->devid));
+
+		if (status != EFI_SUCCESS)
+			goto free_struct;
+
+		status = efi_call_phys5(pci->get_location, pci,
+					&(rom->segment), &(rom->bus),
+					&(rom->device), &(rom->function));
+
+		if (status != EFI_SUCCESS)
+			goto free_struct;
+
+		memcpy(rom->romdata, pci->romimage, pci->romsize);
+
+		if (data)
+			data->next = (uint64_t)rom;
+		else
+			params->hdr.setup_data = (uint64_t)rom;
+
+		data = (struct setup_data *)rom;
+
+		continue;
+	free_struct:
+		efi_call_phys1(sys_table->boottime->free_pool, rom);
+	}
+
+free_handle:
+	efi_call_phys1(sys_table->boottime->free_pool, pci_handle);
+	return status;
+}
+
 /*
  * See if we have Graphics Output Protocol
  */
@@ -1026,6 +1142,8 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
 
 	setup_graphics(boot_params);
 
+	setup_efi_pci(boot_params);
+
 	status = efi_call_phys3(sys_table->boottime->allocate_pool,
 				EFI_LOADER_DATA, sizeof(*gdt),
 				(void **)&gdt);
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index 2ad874cb661c..92862cd90201 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -13,6 +13,7 @@
 #define SETUP_NONE			0
 #define SETUP_E820_EXT			1
 #define SETUP_DTB			2
+#define SETUP_PCI			3
 
 /* extensible setup data list node */
 struct setup_data {
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 6e41b9343928..dba7805176bf 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -171,4 +171,16 @@ cpumask_of_pcibus(const struct pci_bus *bus)
 }
 #endif
 
+struct pci_setup_rom {
+	struct setup_data data;
+	uint16_t vendor;
+	uint16_t devid;
+	uint64_t pcilen;
+	unsigned long segment;
+	unsigned long bus;
+	unsigned long device;
+	unsigned long function;
+	uint8_t romdata[0];
+};
+
 #endif /* _ASM_X86_PCI_H */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8670eb1eb8cd..8eb1be17c801 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -196,6 +196,77 @@ typedef struct {
 	void *create_event_ex;
 } efi_boot_services_t;
 
+typedef enum {
+	EfiPciIoWidthUint8,
+	EfiPciIoWidthUint16,
+	EfiPciIoWidthUint32,
+	EfiPciIoWidthUint64,
+	EfiPciIoWidthFifoUint8,
+	EfiPciIoWidthFifoUint16,
+	EfiPciIoWidthFifoUint32,
+	EfiPciIoWidthFifoUint64,
+	EfiPciIoWidthFillUint8,
+	EfiPciIoWidthFillUint16,
+	EfiPciIoWidthFillUint32,
+	EfiPciIoWidthFillUint64,
+	EfiPciIoWidthMaximum
+} EFI_PCI_IO_PROTOCOL_WIDTH;
+
+typedef enum {
+	EfiPciIoAttributeOperationGet,
+	EfiPciIoAttributeOperationSet,
+	EfiPciIoAttributeOperationEnable,
+	EfiPciIoAttributeOperationDisable,
+	EfiPciIoAttributeOperationSupported,
+    EfiPciIoAttributeOperationMaximum
+} EFI_PCI_IO_PROTOCOL_ATTRIBUTE_OPERATION;
+
+
+typedef struct {
+	void *read;
+	void *write;
+} efi_pci_io_protocol_access_t;
+
+typedef struct {
+	void *poll_mem;
+	void *poll_io;
+	efi_pci_io_protocol_access_t mem;
+	efi_pci_io_protocol_access_t io;
+	efi_pci_io_protocol_access_t pci;
+	void *copy_mem;
+	void *map;
+	void *unmap;
+	void *allocate_buffer;
+	void *free_buffer;
+	void *flush;
+	void *get_location;
+	void *attributes;
+	void *get_bar_attributes;
+	void *set_bar_attributes;
+	uint64_t romsize;
+	void *romimage;
+} efi_pci_io_protocol;
+
+#define EFI_PCI_IO_ATTRIBUTE_ISA_MOTHERBOARD_IO 0x0001
+#define EFI_PCI_IO_ATTRIBUTE_ISA_IO 0x0002
+#define EFI_PCI_IO_ATTRIBUTE_VGA_PALETTE_IO 0x0004
+#define EFI_PCI_IO_ATTRIBUTE_VGA_MEMORY 0x0008
+#define EFI_PCI_IO_ATTRIBUTE_VGA_IO 0x0010
+#define EFI_PCI_IO_ATTRIBUTE_IDE_PRIMARY_IO 0x0020
+#define EFI_PCI_IO_ATTRIBUTE_IDE_SECONDARY_IO 0x0040
+#define EFI_PCI_IO_ATTRIBUTE_MEMORY_WRITE_COMBINE 0x0080
+#define EFI_PCI_IO_ATTRIBUTE_IO 0x0100
+#define EFI_PCI_IO_ATTRIBUTE_MEMORY 0x0200
+#define EFI_PCI_IO_ATTRIBUTE_BUS_MASTER 0x0400
+#define EFI_PCI_IO_ATTRIBUTE_MEMORY_CACHED 0x0800
+#define EFI_PCI_IO_ATTRIBUTE_MEMORY_DISABLE 0x1000
+#define EFI_PCI_IO_ATTRIBUTE_EMBEDDED_DEVICE 0x2000
+#define EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM 0x4000
+#define EFI_PCI_IO_ATTRIBUTE_DUAL_ADDRESS_CYCLE 0x8000
+#define EFI_PCI_IO_ATTRIBUTE_ISA_IO_16 0x10000
+#define EFI_PCI_IO_ATTRIBUTE_VGA_PALETTE_IO_16 0x20000
+#define EFI_PCI_IO_ATTRIBUTE_VGA_IO_16 0x40000
+
 /*
  * Types and defines for EFI ResetSystem
  */
-- 
cgit v1.2.3-55-g7522


From eca0d4676d8e29c209ddce0c0c1755472ffc70a6 Mon Sep 17 00:00:00 2001
From: Matthew Garrett
Date: Wed, 5 Dec 2012 14:33:27 -0700
Subject: PCI: Add pcibios_add_device

Platforms may want to provide architecture-specific functionality during
PCI enumeration. Add a pcibios_add_device() call that architectures can
override to do so.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Seth Forshee <seth.forshee@canonical.com>---
 drivers/pci/bus.c   |  5 +++++
 drivers/pci/pci.c   | 13 +++++++++++++
 include/linux/pci.h |  1 +
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 6241fd05bd41..4f22fa16a3ba 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -170,6 +170,11 @@ int pci_bus_add_device(struct pci_dev *dev)
 	int retval;
 
 	pci_fixup_device(pci_fixup_final, dev);
+
+	retval = pcibios_add_device(dev);
+	if (retval)
+		return retval;
+
 	retval = device_add(&dev->dev);
 	if (retval)
 		return retval;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 54858838f098..fa0ddd51a216 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1333,6 +1333,19 @@ void pcim_pin_device(struct pci_dev *pdev)
 		dr->pinned = 1;
 }
 
+/*
+ * pcibios_add_device - provide arch specific hooks when adding device dev
+ * @dev: the PCI device being added
+ *
+ * Permits the platform to provide architecture specific functionality when
+ * devices are added. This is the default implementation. Architecture
+ * implementations can override this.
+ */
+int __weak pcibios_add_device (struct pci_dev *dev)
+{
+	return 0;
+}
+
 /**
  * pcibios_disable_device - disable arch specific PCI resources for device dev
  * @dev: the PCI device to disable
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ee2179546c63..195c2593bd31 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1592,6 +1592,7 @@ void pcibios_disable_device(struct pci_dev *dev);
 void pcibios_set_master(struct pci_dev *dev);
 int pcibios_set_pcie_reset_state(struct pci_dev *dev,
 				 enum pcie_reset_state state);
+int pcibios_add_device(struct pci_dev *dev);
 
 #ifdef CONFIG_PCI_MMCONFIG
 extern void __init pci_mmcfg_early_init(void);
-- 
cgit v1.2.3-55-g7522


From 84c1b80e32638f881c17390dfe88143e5cd3f583 Mon Sep 17 00:00:00 2001
From: Matthew Garrett
Date: Wed, 5 Dec 2012 14:33:27 -0700
Subject: PCI: Add support for non-BAR ROMs

Platforms may provide their own mechanisms for obtaining ROMs. Add support
for using data provided by the platform in that case.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Seth Forshee <seth.forshee@canonical.com>---
 drivers/pci/rom.c   | 11 +++++++++--
 include/linux/pci.h |  2 ++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index 0b3037ab8b93..3a3828fbc879 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c
@@ -117,12 +117,18 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size)
 	loff_t start;
 	void __iomem *rom;
 
+	/*
+	 * Some devices may provide ROMs via a source other than the BAR
+	 */
+	if (pdev->rom && pdev->romlen) {
+		*size = pdev->romlen;
+		return phys_to_virt((phys_addr_t)pdev->rom);
 	/*
 	 * IORESOURCE_ROM_SHADOW set on x86, x86_64 and IA64 supports legacy
 	 * memory map if the VGA enable bit of the Bridge Control register is
 	 * set for embedded VGA.
 	 */
-	if (res->flags & IORESOURCE_ROM_SHADOW) {
+	} else if (res->flags & IORESOURCE_ROM_SHADOW) {
 		/* primary video rom always starts here */
 		start = (loff_t)0xC0000;
 		*size = 0x20000; /* cover C000:0 through E000:0 */
@@ -181,7 +187,8 @@ void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom)
 	if (res->flags & (IORESOURCE_ROM_COPY | IORESOURCE_ROM_BIOS_COPY))
 		return;
 
-	iounmap(rom);
+	if (!pdev->rom || !pdev->romlen)
+		iounmap(rom);
 
 	/* Disable again before continuing, leave enabled if pci=rom */
 	if (!(res->flags & (IORESOURCE_ROM_ENABLE | IORESOURCE_ROM_SHADOW)))
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 195c2593bd31..f116b2d859dc 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -333,6 +333,8 @@ struct pci_dev {
 	};
 	struct pci_ats	*ats;	/* Address Translation Service */
 #endif
+	void *rom; /* Physical pointer to ROM if it's not from the BAR */
+	size_t romlen; /* Length of ROM if it's not from the BAR */
 };
 
 static inline struct pci_dev *pci_physfn(struct pci_dev *dev)
-- 
cgit v1.2.3-55-g7522


From 161b96c383c442f4d7dabbb8500a5fbd551b344d Mon Sep 17 00:00:00 2001
From: Alexander Shiyan
Date: Wed, 7 Nov 2012 21:30:29 +0400
Subject: spi/clps711x: New SPI master driver

This patch add new driver for CLPS711X SPI master controller.
Due to platform limitations driver supports only 8 bit transfer mode.
Chip select control is handled via GPIO.

Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/Kconfig                        |   7 +
 drivers/spi/Makefile                       |   1 +
 drivers/spi/spi-clps711x.c                 | 296 +++++++++++++++++++++++++++++
 include/linux/platform_data/spi-clps711x.h |  21 ++
 4 files changed, 325 insertions(+)
 create mode 100644 drivers/spi/spi-clps711x.c
 create mode 100644 include/linux/platform_data/spi-clps711x.h

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 1acae359cabe..2a13e637e46b 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -123,6 +123,13 @@ config SPI_BUTTERFLY
 	  inexpensive battery powered microcontroller evaluation board.
 	  This same cable can be used to flash new firmware.
 
+config SPI_CLPS711X
+	tristate "CLPS711X host SPI controller"
+	depends on ARCH_CLPS711X
+	help
+	  This enables dedicated general purpose SPI/Microwire1-compatible
+	  master mode interface (SSI1) for CLPS711X-based CPUs.
+
 config SPI_COLDFIRE_QSPI
 	tristate "Freescale Coldfire QSPI controller"
 	depends on (M520x || M523x || M5249 || M525x || M527x || M528x || M532x)
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index c48df47e4b0f..25ab4a1cd014 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_SPI_BFIN5XX)		+= spi-bfin5xx.o
 obj-$(CONFIG_SPI_BFIN_SPORT)		+= spi-bfin-sport.o
 obj-$(CONFIG_SPI_BITBANG)		+= spi-bitbang.o
 obj-$(CONFIG_SPI_BUTTERFLY)		+= spi-butterfly.o
+obj-$(CONFIG_SPI_CLPS711X)		+= spi-clps711x.o
 obj-$(CONFIG_SPI_COLDFIRE_QSPI)		+= spi-coldfire-qspi.o
 obj-$(CONFIG_SPI_DAVINCI)		+= spi-davinci.o
 obj-$(CONFIG_SPI_DESIGNWARE)		+= spi-dw.o
diff --git a/drivers/spi/spi-clps711x.c b/drivers/spi/spi-clps711x.c
new file mode 100644
index 000000000000..59677eb30f94
--- /dev/null
+++ b/drivers/spi/spi-clps711x.c
@@ -0,0 +1,296 @@
+/*
+ *  CLPS711X SPI bus driver
+ *
+ *  Copyright (C) 2012 Alexander Shiyan <shc_work@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/platform_data/spi-clps711x.h>
+
+#include <mach/hardware.h>
+
+#define DRIVER_NAME	"spi-clps711x"
+
+struct spi_clps711x_data {
+	struct completion	done;
+
+	struct clk		*spi_clk;
+	u32			max_speed_hz;
+
+	u8			*tx_buf;
+	u8			*rx_buf;
+	int			count;
+	int			len;
+
+	int			chipselect[0];
+};
+
+static int spi_clps711x_setup(struct spi_device *spi)
+{
+	struct spi_clps711x_data *hw = spi_master_get_devdata(spi->master);
+
+	if (spi->bits_per_word != 8) {
+		dev_err(&spi->dev, "Unsupported master bus width %i\n",
+			spi->bits_per_word);
+		return -EINVAL;
+	}
+
+	/* We are expect that SPI-device is not selected */
+	gpio_direction_output(hw->chipselect[spi->chip_select],
+			      !(spi->mode & SPI_CS_HIGH));
+
+	return 0;
+}
+
+static void spi_clps711x_setup_mode(struct spi_device *spi)
+{
+	/* Setup edge for transfer */
+	if (spi->mode & SPI_CPHA)
+		clps_writew(clps_readw(SYSCON3) | SYSCON3_ADCCKNSEN, SYSCON3);
+	else
+		clps_writew(clps_readw(SYSCON3) & ~SYSCON3_ADCCKNSEN, SYSCON3);
+}
+
+static int spi_clps711x_setup_xfer(struct spi_device *spi,
+				   struct spi_transfer *xfer)
+{
+	u32 speed = xfer->speed_hz ? : spi->max_speed_hz;
+	u8 bpw = xfer->bits_per_word ? : spi->bits_per_word;
+	struct spi_clps711x_data *hw = spi_master_get_devdata(spi->master);
+
+	if (bpw != 8) {
+		dev_err(&spi->dev, "Unsupported master bus width %i\n", bpw);
+		return -EINVAL;
+	}
+
+	/* Setup SPI frequency divider */
+	if (!speed || (speed >= hw->max_speed_hz))
+		clps_writel((clps_readl(SYSCON1) & ~SYSCON1_ADCKSEL_MASK) |
+			    SYSCON1_ADCKSEL(3), SYSCON1);
+	else if (speed >= (hw->max_speed_hz / 2))
+		clps_writel((clps_readl(SYSCON1) & ~SYSCON1_ADCKSEL_MASK) |
+			    SYSCON1_ADCKSEL(2), SYSCON1);
+	else if (speed >= (hw->max_speed_hz / 8))
+		clps_writel((clps_readl(SYSCON1) & ~SYSCON1_ADCKSEL_MASK) |
+			    SYSCON1_ADCKSEL(1), SYSCON1);
+	else
+		clps_writel((clps_readl(SYSCON1) & ~SYSCON1_ADCKSEL_MASK) |
+			    SYSCON1_ADCKSEL(0), SYSCON1);
+
+	return 0;
+}
+
+static int spi_clps711x_transfer_one_message(struct spi_master *master,
+					     struct spi_message *msg)
+{
+	struct spi_clps711x_data *hw = spi_master_get_devdata(master);
+	struct spi_transfer *xfer;
+	int status = 0, cs = hw->chipselect[msg->spi->chip_select];
+	u32 data;
+
+	spi_clps711x_setup_mode(msg->spi);
+
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		if (spi_clps711x_setup_xfer(msg->spi, xfer)) {
+			status = -EINVAL;
+			goto out_xfr;
+		}
+
+		gpio_set_value(cs, !!(msg->spi->mode & SPI_CS_HIGH));
+
+		INIT_COMPLETION(hw->done);
+
+		hw->count = 0;
+		hw->len = xfer->len;
+		hw->tx_buf = (u8 *)xfer->tx_buf;
+		hw->rx_buf = (u8 *)xfer->rx_buf;
+
+		/* Initiate transfer */
+		data = hw->tx_buf ? hw->tx_buf[hw->count] : 0;
+		clps_writel(data | SYNCIO_FRMLEN(8) | SYNCIO_TXFRMEN, SYNCIO);
+
+		wait_for_completion(&hw->done);
+
+		if (xfer->delay_usecs)
+			udelay(xfer->delay_usecs);
+
+		if (xfer->cs_change ||
+		    list_is_last(&xfer->transfer_list, &msg->transfers))
+			gpio_set_value(cs, !(msg->spi->mode & SPI_CS_HIGH));
+
+		msg->actual_length += xfer->len;
+	}
+
+out_xfr:
+	msg->status = status;
+	spi_finalize_current_message(master);
+
+	return 0;
+}
+
+static irqreturn_t spi_clps711x_isr(int irq, void *dev_id)
+{
+	struct spi_clps711x_data *hw = (struct spi_clps711x_data *)dev_id;
+	u32 data;
+
+	/* Handle RX */
+	data = clps_readb(SYNCIO);
+	if (hw->rx_buf)
+		hw->rx_buf[hw->count] = (u8)data;
+
+	hw->count++;
+
+	/* Handle TX */
+	if (hw->count < hw->len) {
+		data = hw->tx_buf ? hw->tx_buf[hw->count] : 0;
+		clps_writel(data | SYNCIO_FRMLEN(8) | SYNCIO_TXFRMEN, SYNCIO);
+	} else
+		complete(&hw->done);
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit spi_clps711x_probe(struct platform_device *pdev)
+{
+	int i, ret;
+	struct spi_master *master;
+	struct spi_clps711x_data *hw;
+	struct spi_clps711x_pdata *pdata = dev_get_platdata(&pdev->dev);
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "No platform data supplied\n");
+		return -EINVAL;
+	}
+
+	if (pdata->num_chipselect < 1) {
+		dev_err(&pdev->dev, "At least one CS must be defined\n");
+		return -EINVAL;
+	}
+
+	master = spi_alloc_master(&pdev->dev,
+				  sizeof(struct spi_clps711x_data) +
+				  sizeof(int) * pdata->num_chipselect);
+	if (!master) {
+		dev_err(&pdev->dev, "SPI allocating memory error\n");
+		return -ENOMEM;
+	}
+
+	master->bus_num = pdev->id;
+	master->mode_bits = SPI_CPHA | SPI_CS_HIGH;
+	master->num_chipselect = pdata->num_chipselect;
+	master->setup = spi_clps711x_setup;
+	master->transfer_one_message = spi_clps711x_transfer_one_message;
+
+	hw = spi_master_get_devdata(master);
+
+	for (i = 0; i < master->num_chipselect; i++) {
+		hw->chipselect[i] = pdata->chipselect[i];
+		if (!gpio_is_valid(hw->chipselect[i])) {
+			dev_err(&pdev->dev, "Invalid CS GPIO %i\n", i);
+			ret = -EINVAL;
+			goto err_out;
+		}
+		if (gpio_request(hw->chipselect[i], DRIVER_NAME)) {
+			dev_err(&pdev->dev, "Can't get CS GPIO %i\n", i);
+			ret = -EINVAL;
+			goto err_out;
+		}
+	}
+
+	hw->spi_clk = devm_clk_get(&pdev->dev, "spi");
+	if (IS_ERR(hw->spi_clk)) {
+		dev_err(&pdev->dev, "Can't get clocks\n");
+		ret = PTR_ERR(hw->spi_clk);
+		goto err_out;
+	}
+	hw->max_speed_hz = clk_get_rate(hw->spi_clk);
+
+	init_completion(&hw->done);
+	platform_set_drvdata(pdev, master);
+
+	/* Disable extended mode due hardware problems */
+	clps_writew(clps_readw(SYSCON3) & ~SYSCON3_ADCCON, SYSCON3);
+
+	/* Clear possible pending interrupt */
+	clps_readl(SYNCIO);
+
+	ret = devm_request_irq(&pdev->dev, IRQ_SSEOTI, spi_clps711x_isr, 0,
+			       dev_name(&pdev->dev), hw);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't request IRQ\n");
+		clk_put(hw->spi_clk);
+		goto clk_out;
+	}
+
+	ret = spi_register_master(master);
+	if (!ret) {
+		dev_info(&pdev->dev,
+			 "SPI bus driver initialized. Master clock %u Hz\n",
+			 hw->max_speed_hz);
+		return 0;
+	}
+
+	dev_err(&pdev->dev, "Failed to register master\n");
+	devm_free_irq(&pdev->dev, IRQ_SSEOTI, hw);
+
+clk_out:
+	devm_clk_put(&pdev->dev, hw->spi_clk);
+
+err_out:
+	while (--i >= 0)
+		if (gpio_is_valid(hw->chipselect[i]))
+			gpio_free(hw->chipselect[i]);
+
+	platform_set_drvdata(pdev, NULL);
+	spi_master_put(master);
+	kfree(master);
+
+	return ret;
+}
+
+static int __devexit spi_clps711x_remove(struct platform_device *pdev)
+{
+	int i;
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct spi_clps711x_data *hw = spi_master_get_devdata(master);
+
+	devm_free_irq(&pdev->dev, IRQ_SSEOTI, hw);
+
+	for (i = 0; i < master->num_chipselect; i++)
+		if (gpio_is_valid(hw->chipselect[i]))
+			gpio_free(hw->chipselect[i]);
+
+	devm_clk_put(&pdev->dev, hw->spi_clk);
+	platform_set_drvdata(pdev, NULL);
+	spi_unregister_master(master);
+	kfree(master);
+
+	return 0;
+}
+
+static struct platform_driver clps711x_spi_driver = {
+	.driver	= {
+		.name	= DRIVER_NAME,
+		.owner	= THIS_MODULE,
+	},
+	.probe	= spi_clps711x_probe,
+	.remove	= __devexit_p(spi_clps711x_remove),
+};
+module_platform_driver(clps711x_spi_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alexander Shiyan <shc_work@mail.ru>");
+MODULE_DESCRIPTION("CLPS711X SPI bus driver");
diff --git a/include/linux/platform_data/spi-clps711x.h b/include/linux/platform_data/spi-clps711x.h
new file mode 100644
index 000000000000..301956e63143
--- /dev/null
+++ b/include/linux/platform_data/spi-clps711x.h
@@ -0,0 +1,21 @@
+/*
+ *  CLPS711X SPI bus driver definitions
+ *
+ *  Copyright (C) 2012 Alexander Shiyan <shc_work@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef ____LINUX_PLATFORM_DATA_SPI_CLPS711X_H
+#define ____LINUX_PLATFORM_DATA_SPI_CLPS711X_H
+
+/* Board specific platform_data */
+struct spi_clps711x_pdata {
+	int *chipselect;	/* Array of GPIO-numbers */
+	int num_chipselect;	/* Total count of GPIOs */
+};
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 464ee9f966404786ba4c6be35dc8362ee8e6ba4e Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Tue, 20 Nov 2012 12:49:27 -0500
Subject: NFSv4.1: Ensure that the client tracks the server
 target_highest_slotid

Dynamic slot allocation in NFSv4.1 depends on the client being able to
track the server's target value for the highest slotid in the
slot table.  See the reference in Section 2.10.6.1 of RFC5661.

To avoid ordering problems in the case where 2 SEQUENCE replies contain
conflicting updates to this target value, we also introduce a generation
counter, to track whether or not an RPC containing a SEQUENCE operation
was launched before or after the last update.

Also rename the nfs4_slot_table target_max_slots field to
'target_highest_slotid' to avoid confusion with a slot
table size or number of slots.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback_proc.c    |  2 +-
 fs/nfs/nfs4proc.c         | 25 +++++++++++++++++++++++++
 fs/nfs/nfs4state.c        |  7 +++----
 fs/nfs/nfs4xdr.c          |  4 ++--
 include/linux/nfs_fs_sb.h |  5 +++--
 include/linux/nfs_xdr.h   |  2 ++
 6 files changed, 36 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 0be08b964f38..0ef047b7d28d 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -576,7 +576,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
 	if (args->crsa_target_max_slots == fc_tbl->max_slots)
 		goto out;
 
-	fc_tbl->target_max_slots = args->crsa_target_max_slots;
+	fc_tbl->target_highest_slotid = args->crsa_target_max_slots;
 	nfs41_handle_recall_slot(cps->clp);
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 197ef3e4e1f7..d91abaa522e8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -488,6 +488,28 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 	res->sr_slot = NULL;
 }
 
+/* Update the client's idea of target_highest_slotid */
+static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl,
+		u32 target_highest_slotid)
+{
+	if (tbl->target_highest_slotid == target_highest_slotid)
+		return;
+	tbl->target_highest_slotid = target_highest_slotid;
+	tbl->generation++;
+}
+
+static void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
+		struct nfs4_slot *slot,
+		struct nfs4_sequence_res *res)
+{
+	spin_lock(&tbl->slot_tbl_lock);
+	if (tbl->generation != slot->generation)
+		goto out;
+	nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid);
+out:
+	spin_unlock(&tbl->slot_tbl_lock);
+}
+
 static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
 {
 	struct nfs4_session *session;
@@ -522,6 +544,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		/* Check sequence flags */
 		if (res->sr_status_flags != 0)
 			nfs4_schedule_lease_recovery(clp);
+		nfs41_update_target_slotid(slot->table, slot, res);
 		break;
 	case -NFS4ERR_DELAY:
 		/* The server detected a resend of the RPC call and
@@ -583,6 +606,7 @@ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl)
 		tbl->highest_used_slotid = slotid;
 	ret = &tbl->slots[slotid];
 	ret->renewal_time = jiffies;
+	ret->generation = tbl->generation;
 
 out:
 	dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n",
@@ -5693,6 +5717,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
 		tbl->max_slots = max_slots;
 	}
 	tbl->highest_used_slotid = NFS4_NO_SLOT;
+	tbl->target_highest_slotid = max_slots - 1;
 	for (i = 0; i < tbl->max_slots; i++)
 		tbl->slots[i].seq_nr = ivalue;
 	spin_unlock(&tbl->slot_tbl_lock);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9495789c425b..842cb8c2f65d 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2033,17 +2033,16 @@ static int nfs4_recall_slot(struct nfs_client *clp)
 		return 0;
 	nfs4_begin_drain_session(clp);
 	fc_tbl = &clp->cl_session->fc_slot_table;
-	new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_max_slots, GFP_NOFS);
+	new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_highest_slotid + 1, GFP_NOFS);
         if (!new)
 		return -ENOMEM;
 
 	spin_lock(&fc_tbl->slot_tbl_lock);
-	for (i = 0; i < fc_tbl->target_max_slots; i++)
+	for (i = 0; i <= fc_tbl->target_highest_slotid; i++)
 		new[i].seq_nr = fc_tbl->slots[i].seq_nr;
 	old = fc_tbl->slots;
 	fc_tbl->slots = new;
-	fc_tbl->max_slots = fc_tbl->target_max_slots;
-	fc_tbl->target_max_slots = 0;
+	fc_tbl->max_slots = fc_tbl->target_highest_slotid + 1;
 	clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots;
 	spin_unlock(&fc_tbl->slot_tbl_lock);
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 27b0fec1a6b0..05d34f1fcc19 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5552,8 +5552,8 @@ static int decode_sequence(struct xdr_stream *xdr,
 	}
 	/* highest slot id - currently not processed */
 	dummy = be32_to_cpup(p++);
-	/* target highest slot id - currently not processed */
-	dummy = be32_to_cpup(p++);
+	/* target highest slot id */
+	res->sr_target_highest_slotid = be32_to_cpup(p++);
 	/* result flags */
 	res->sr_status_flags = be32_to_cpup(p);
 	status = 0;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b0412873d29c..57d406997def 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -217,8 +217,9 @@ struct nfs4_slot_table {
 	u32		max_slots;		/* # slots in table */
 	u32		highest_used_slotid;	/* sent to server on each SEQ.
 						 * op for dynamic resizing */
-	u32		target_max_slots;	/* Set by CB_RECALL_SLOT as
-						 * the new max_slots */
+	u32		target_highest_slotid;	/* Server max_slot target */
+	unsigned long	generation;		/* Generation counter for
+						   target_highest_slotid */
 	struct completion complete;
 };
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index deb31bbbb857..08c47db7417f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -188,6 +188,7 @@ struct nfs4_channel_attrs {
 /* nfs41 sessions slot seqid */
 struct nfs4_slot {
 	struct nfs4_slot_table	*table;
+	unsigned long		generation;
 	unsigned long		renewal_time;
 	u32			slot_nr;
 	u32		 	seq_nr;
@@ -202,6 +203,7 @@ struct nfs4_sequence_res {
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
 	int			sr_status;	/* sequence operation status */
 	u32			sr_status_flags;
+	u32			sr_target_highest_slotid;
 };
 
 struct nfs4_get_lease_time_args {
-- 
cgit v1.2.3-55-g7522


From da0507b7c95ccd4d9c86394eef42fe076032af30 Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Tue, 20 Nov 2012 18:10:30 -0500
Subject: NFSv4.1: Reset the sequence number for slots that have been
 deallocated

When the server tells us that it is dynamically resizing the session
replay cache, we should reset the sequence number for those slots
that have been deallocated.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c         | 18 ++++++++++++++++++
 fs/nfs/nfs4xdr.c          |  4 ++--
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   |  1 +
 4 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d91abaa522e8..52435ec44193 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -498,6 +498,22 @@ static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl,
 	tbl->generation++;
 }
 
+static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl,
+		u32 highest_slotid)
+{
+	unsigned int max_slotid, i;
+
+	if (tbl->server_highest_slotid == highest_slotid)
+		return;
+	if (tbl->highest_used_slotid > highest_slotid)
+		return;
+	max_slotid = min(tbl->max_slots - 1, highest_slotid);
+	/* Reset the seq_nr for deallocated slots */
+	for (i = tbl->server_highest_slotid + 1; i <= max_slotid; i++)
+		tbl->slots[i].seq_nr = 1;
+	tbl->server_highest_slotid = highest_slotid;
+}
+
 static void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
 		struct nfs4_slot *slot,
 		struct nfs4_sequence_res *res)
@@ -505,6 +521,7 @@ static void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
 	spin_lock(&tbl->slot_tbl_lock);
 	if (tbl->generation != slot->generation)
 		goto out;
+	nfs41_set_server_slotid_locked(tbl, res->sr_highest_slotid);
 	nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid);
 out:
 	spin_unlock(&tbl->slot_tbl_lock);
@@ -5718,6 +5735,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
 	}
 	tbl->highest_used_slotid = NFS4_NO_SLOT;
 	tbl->target_highest_slotid = max_slots - 1;
+	tbl->server_highest_slotid = max_slots - 1;
 	for (i = 0; i < tbl->max_slots; i++)
 		tbl->slots[i].seq_nr = ivalue;
 	spin_unlock(&tbl->slot_tbl_lock);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 05d34f1fcc19..a67040f51597 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5550,8 +5550,8 @@ static int decode_sequence(struct xdr_stream *xdr,
 		dprintk("%s Invalid slot id\n", __func__);
 		goto out_err;
 	}
-	/* highest slot id - currently not processed */
-	dummy = be32_to_cpup(p++);
+	/* highest slot id */
+	res->sr_highest_slotid = be32_to_cpup(p++);
 	/* target highest slot id */
 	res->sr_target_highest_slotid = be32_to_cpup(p++);
 	/* result flags */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 57d406997def..646e64bbff4c 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -218,6 +218,7 @@ struct nfs4_slot_table {
 	u32		highest_used_slotid;	/* sent to server on each SEQ.
 						 * op for dynamic resizing */
 	u32		target_highest_slotid;	/* Server max_slot target */
+	u32		server_highest_slotid;	/* Server highest slotid */
 	unsigned long	generation;		/* Generation counter for
 						   target_highest_slotid */
 	struct completion complete;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 08c47db7417f..3ddb08fba935 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -203,6 +203,7 @@ struct nfs4_sequence_res {
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
 	int			sr_status;	/* sequence operation status */
 	u32			sr_status_flags;
+	u32			sr_highest_slotid;
 	u32			sr_target_highest_slotid;
 };
 
-- 
cgit v1.2.3-55-g7522


From 97e548a93de213b149eea025a97d88e28143b445 Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Tue, 20 Nov 2012 14:45:48 -0500
Subject: NFSv4.1: Support dynamic resizing of the session slot table

Allow the server to control the size of the session slot table
by adjusting the value of sr_target_max_slots in the reply to the
SEQUENCE operation.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c         | 12 ++++++++++--
 fs/nfs/nfs4state.c        |  6 +++---
 include/linux/nfs_fs_sb.h |  1 +
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 62212231ce62..1792ece8b53c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -492,10 +492,17 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl,
 		u32 target_highest_slotid)
 {
+	unsigned int max_slotid, i;
+
 	if (tbl->target_highest_slotid == target_highest_slotid)
 		return;
 	tbl->target_highest_slotid = target_highest_slotid;
 	tbl->generation++;
+
+	max_slotid = min(tbl->max_slots - 1, tbl->target_highest_slotid);
+	for (i = tbl->max_slotid + 1; i <= max_slotid; i++)
+		rpc_wake_up_next(&tbl->slot_tbl_waitq);
+	tbl->max_slotid = max_slotid;
 }
 
 void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
@@ -622,8 +629,8 @@ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl)
 	dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n",
 		__func__, tbl->used_slots[0], tbl->highest_used_slotid,
 		tbl->max_slots);
-	slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots);
-	if (slotid >= tbl->max_slots)
+	slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1);
+	if (slotid > tbl->max_slotid)
 		goto out;
 	__set_bit(slotid, tbl->used_slots);
 	if (slotid > tbl->highest_used_slotid ||
@@ -5744,6 +5751,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
 	tbl->highest_used_slotid = NFS4_NO_SLOT;
 	tbl->target_highest_slotid = max_slots - 1;
 	tbl->server_highest_slotid = max_slots - 1;
+	tbl->max_slotid = max_slots - 1;
 	for (i = 0; i < tbl->max_slots; i++)
 		tbl->slots[i].seq_nr = ivalue;
 	spin_unlock(&tbl->slot_tbl_lock);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 842cb8c2f65d..1b7fa73c9436 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -254,15 +254,14 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
 {
 	struct nfs4_session *ses = clp->cl_session;
 	struct nfs4_slot_table *tbl;
-	int max_slots;
+	unsigned int i;
 
 	if (ses == NULL)
 		return;
 	tbl = &ses->fc_slot_table;
 	if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
 		spin_lock(&tbl->slot_tbl_lock);
-		max_slots = tbl->max_slots;
-		while (max_slots--) {
+		for (i = 0; i <= tbl->max_slotid; i++) {
 			if (rpc_wake_up_first(&tbl->slot_tbl_waitq,
 						nfs4_set_task_privileged,
 						NULL) == NULL)
@@ -2043,6 +2042,7 @@ static int nfs4_recall_slot(struct nfs_client *clp)
 	old = fc_tbl->slots;
 	fc_tbl->slots = new;
 	fc_tbl->max_slots = fc_tbl->target_highest_slotid + 1;
+	fc_tbl->max_slotid = fc_tbl->target_highest_slotid;
 	clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots;
 	spin_unlock(&fc_tbl->slot_tbl_lock);
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 646e64bbff4c..30715508fade 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -215,6 +215,7 @@ struct nfs4_slot_table {
 	spinlock_t	slot_tbl_lock;
 	struct rpc_wait_queue	slot_tbl_waitq;	/* allocators may wait here */
 	u32		max_slots;		/* # slots in table */
+	u32		max_slotid;		/* Max allowed slotid value */
 	u32		highest_used_slotid;	/* sent to server on each SEQ.
 						 * op for dynamic resizing */
 	u32		target_highest_slotid;	/* Server max_slot target */
-- 
cgit v1.2.3-55-g7522


From 87dda67e7386ba7d2164391ea58b34e028d8157b Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Tue, 20 Nov 2012 19:49:20 -0500
Subject: NFSv4.1: Allow SEQUENCE to resize the slot table on the fly

Instead of an array of slots, use a singly linked list of slots that
can be dynamically appended to or shrunk.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h        |   4 +-
 fs/nfs/nfs4proc.c       | 174 +++++++++++++++++++++++++++++++-----------------
 fs/nfs/nfs4state.c      |  22 ++----
 include/linux/nfs_xdr.h |   1 +
 4 files changed, 121 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 5d4e82b10c3c..856bc496a210 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -258,10 +258,10 @@ extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
 extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
 				  bool sync);
 
-extern struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table,
-		u32 max_slots, gfp_t gfp_flags);
 extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
 		u32 target_highest_slotid);
+extern int nfs4_resize_slot_table(struct nfs4_slot_table *tbl,
+		u32 max_reqs, u32 ivalue);
 
 static inline bool
 is_ds_only_client(struct nfs_client *clp)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1792ece8b53c..fc65300172e1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -396,6 +396,27 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
 
 #if defined(CONFIG_NFS_V4_1)
 
+/*
+ * nfs4_shrink_slot_table - free retired slots from the slot table
+ */
+static void nfs4_shrink_slot_table(struct nfs4_slot_table  *tbl, u32 newsize)
+{
+	struct nfs4_slot **p;
+	if (newsize >= tbl->max_slots)
+		return;
+
+	p = &tbl->slots;
+	while (newsize--)
+		p = &(*p)->next;
+	while (*p) {
+		struct nfs4_slot *slot = *p;
+
+		*p = slot->next;
+		kfree(slot);
+		tbl->max_slots--;
+	}
+}
+
 /*
  * nfs4_free_slot - free a slot and efficiently update slot table.
  *
@@ -499,7 +520,7 @@ static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl,
 	tbl->target_highest_slotid = target_highest_slotid;
 	tbl->generation++;
 
-	max_slotid = min(tbl->max_slots - 1, tbl->target_highest_slotid);
+	max_slotid = min(NFS4_MAX_SLOT_TABLE - 1, tbl->target_highest_slotid);
 	for (i = tbl->max_slotid + 1; i <= max_slotid; i++)
 		rpc_wake_up_next(&tbl->slot_tbl_waitq);
 	tbl->max_slotid = max_slotid;
@@ -516,16 +537,12 @@ void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
 static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl,
 		u32 highest_slotid)
 {
-	unsigned int max_slotid, i;
-
 	if (tbl->server_highest_slotid == highest_slotid)
 		return;
 	if (tbl->highest_used_slotid > highest_slotid)
 		return;
-	max_slotid = min(tbl->max_slots - 1, highest_slotid);
-	/* Reset the seq_nr for deallocated slots */
-	for (i = tbl->server_highest_slotid + 1; i <= max_slotid; i++)
-		tbl->slots[i].seq_nr = 1;
+	/* Deallocate slots */
+	nfs4_shrink_slot_table(tbl, highest_slotid + 1);
 	tbl->server_highest_slotid = highest_slotid;
 }
 
@@ -612,6 +629,42 @@ static int nfs4_sequence_done(struct rpc_task *task,
 	return nfs41_sequence_done(task, res);
 }
 
+static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table  *tbl,
+		u32 slotid, u32 seq_init, gfp_t gfp_mask)
+{
+	struct nfs4_slot *slot;
+
+	slot = kzalloc(sizeof(*slot), gfp_mask);
+	if (slot) {
+		slot->table = tbl;
+		slot->slot_nr = slotid;
+		slot->seq_nr = seq_init;
+	}
+	return slot;
+}
+
+static struct nfs4_slot *nfs4_find_or_create_slot(struct nfs4_slot_table  *tbl,
+		u32 slotid, u32 seq_init, gfp_t gfp_mask)
+{
+	struct nfs4_slot **p, *slot;
+
+	p = &tbl->slots;
+	for (;;) {
+		if (*p == NULL) {
+			*p = nfs4_new_slot(tbl, tbl->max_slots,
+					seq_init, gfp_mask);
+			if (*p == NULL)
+				break;
+			tbl->max_slots++;
+		}
+		slot = *p;
+		if (slot->slot_nr == slotid)
+			return slot;
+		p = &slot->next;
+	}
+	return NULL;
+}
+
 /*
  * nfs4_alloc_slot - efficiently look for a free slot
  *
@@ -628,15 +681,17 @@ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl)
 
 	dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n",
 		__func__, tbl->used_slots[0], tbl->highest_used_slotid,
-		tbl->max_slots);
+		tbl->max_slotid + 1);
 	slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1);
 	if (slotid > tbl->max_slotid)
 		goto out;
+	ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT);
+	if (ret == NULL)
+		goto out;
 	__set_bit(slotid, tbl->used_slots);
 	if (slotid > tbl->highest_used_slotid ||
 			tbl->highest_used_slotid == NFS4_NO_SLOT)
 		tbl->highest_used_slotid = slotid;
-	ret = &tbl->slots[slotid];
 	ret->renewal_time = jiffies;
 	ret->generation = tbl->generation;
 
@@ -5718,67 +5773,56 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
 	return status;
 }
 
-struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table,
-		u32 max_slots, gfp_t gfp_flags)
+static int nfs4_grow_slot_table(struct nfs4_slot_table *tbl,
+		 u32 max_reqs, u32 ivalue)
 {
-	struct nfs4_slot *tbl;
-	u32 i;
-
-	tbl = kmalloc_array(max_slots, sizeof(*tbl), gfp_flags);
-	if (tbl != NULL) {
-		for (i = 0; i < max_slots; i++) {
-			tbl[i].table = table;
-			tbl[i].slot_nr = i;
-		}
-	}
-	return tbl;
+	if (max_reqs <= tbl->max_slots)
+		return 0;
+	if (nfs4_find_or_create_slot(tbl, max_reqs - 1, ivalue, GFP_NOFS))
+		return 0;
+	return -ENOMEM;
 }
 
-static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
-		struct nfs4_slot *new,
-		u32 max_slots,
+static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl,
+		u32 server_highest_slotid,
 		u32 ivalue)
 {
-	struct nfs4_slot *old = NULL;
-	u32 i;
+	struct nfs4_slot **p;
 
-	spin_lock(&tbl->slot_tbl_lock);
-	if (new) {
-		old = tbl->slots;
-		tbl->slots = new;
-		tbl->max_slots = max_slots;
+	nfs4_shrink_slot_table(tbl, server_highest_slotid + 1);
+	p = &tbl->slots;
+	while (*p) {
+		(*p)->seq_nr = ivalue;
+		p = &(*p)->next;
 	}
 	tbl->highest_used_slotid = NFS4_NO_SLOT;
-	tbl->target_highest_slotid = max_slots - 1;
-	tbl->server_highest_slotid = max_slots - 1;
-	tbl->max_slotid = max_slots - 1;
-	for (i = 0; i < tbl->max_slots; i++)
-		tbl->slots[i].seq_nr = ivalue;
-	spin_unlock(&tbl->slot_tbl_lock);
-	kfree(old);
+	tbl->target_highest_slotid = server_highest_slotid;
+	tbl->server_highest_slotid = server_highest_slotid;
+	tbl->max_slotid = server_highest_slotid;
 }
 
 /*
  * (re)Initialise a slot table
  */
-static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
-				 u32 ivalue)
+static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl,
+		u32 max_reqs, u32 ivalue)
 {
-	struct nfs4_slot *new = NULL;
-	int ret = -ENOMEM;
+	int ret;
 
 	dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__,
 		max_reqs, tbl->max_slots);
 
-	/* Does the newly negotiated max_reqs match the existing slot table? */
-	if (max_reqs != tbl->max_slots) {
-		new = nfs4_alloc_slots(tbl, max_reqs, GFP_NOFS);
-		if (!new)
-			goto out;
-	}
-	ret = 0;
+	if (max_reqs > NFS4_MAX_SLOT_TABLE)
+		max_reqs = NFS4_MAX_SLOT_TABLE;
+
+	ret = nfs4_grow_slot_table(tbl, max_reqs, ivalue);
+	if (ret)
+		goto out;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	nfs4_reset_slot_table(tbl, max_reqs - 1, ivalue);
+	spin_unlock(&tbl->slot_tbl_lock);
 
-	nfs4_add_and_init_slots(tbl, new, max_reqs, ivalue);
 	dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
 		tbl, tbl->slots, tbl->max_slots);
 out:
@@ -5786,18 +5830,28 @@ out:
 	return ret;
 }
 
+int nfs4_resize_slot_table(struct nfs4_slot_table *tbl,
+		 u32 max_reqs, u32 ivalue)
+{
+	int ret;
+
+	if (max_reqs > NFS4_MAX_SLOT_TABLE)
+		max_reqs = NFS4_MAX_SLOT_TABLE;
+	ret = nfs4_grow_slot_table(tbl, max_reqs, ivalue);
+	if (ret)
+		return ret;
+	spin_lock(&tbl->slot_tbl_lock);
+	nfs4_shrink_slot_table(tbl, max_reqs);
+	tbl->max_slotid = max_reqs - 1;
+	spin_unlock(&tbl->slot_tbl_lock);
+	return 0;
+}
+
 /* Destroy the slot table */
 static void nfs4_destroy_slot_tables(struct nfs4_session *session)
 {
-	if (session->fc_slot_table.slots != NULL) {
-		kfree(session->fc_slot_table.slots);
-		session->fc_slot_table.slots = NULL;
-	}
-	if (session->bc_slot_table.slots != NULL) {
-		kfree(session->bc_slot_table.slots);
-		session->bc_slot_table.slots = NULL;
-	}
-	return;
+	nfs4_shrink_slot_table(&session->fc_slot_table, 0);
+	nfs4_shrink_slot_table(&session->bc_slot_table, 0);
 }
 
 /*
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 1b7fa73c9436..c14b2c7ac8a7 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2025,29 +2025,15 @@ out:
 static int nfs4_recall_slot(struct nfs_client *clp)
 {
 	struct nfs4_slot_table *fc_tbl;
-	struct nfs4_slot *new, *old;
-	int i;
+	u32 new_size;
 
 	if (!nfs4_has_session(clp))
 		return 0;
 	nfs4_begin_drain_session(clp);
-	fc_tbl = &clp->cl_session->fc_slot_table;
-	new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_highest_slotid + 1, GFP_NOFS);
-        if (!new)
-		return -ENOMEM;
 
-	spin_lock(&fc_tbl->slot_tbl_lock);
-	for (i = 0; i <= fc_tbl->target_highest_slotid; i++)
-		new[i].seq_nr = fc_tbl->slots[i].seq_nr;
-	old = fc_tbl->slots;
-	fc_tbl->slots = new;
-	fc_tbl->max_slots = fc_tbl->target_highest_slotid + 1;
-	fc_tbl->max_slotid = fc_tbl->target_highest_slotid;
-	clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots;
-	spin_unlock(&fc_tbl->slot_tbl_lock);
-
-	kfree(old);
-	return 0;
+	fc_tbl = &clp->cl_session->fc_slot_table;
+	new_size = fc_tbl->server_highest_slotid + 1;
+	return nfs4_resize_slot_table(fc_tbl, new_size, 1);
 }
 
 static int nfs4_bind_conn_to_session(struct nfs_client *clp)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 3ddb08fba935..44d256f6021c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -188,6 +188,7 @@ struct nfs4_channel_attrs {
 /* nfs41 sessions slot seqid */
 struct nfs4_slot {
 	struct nfs4_slot_table	*table;
+	struct nfs4_slot	*next;
 	unsigned long		generation;
 	unsigned long		renewal_time;
 	u32			slot_nr;
-- 
cgit v1.2.3-55-g7522


From c34309a45ea491e5f0c0d0af49ccfa018ff35fc1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Mon, 26 Nov 2012 14:33:03 -0500
Subject: NFS: Remove unused function slot_idx

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 30715508fade..e707c1b69796 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -225,11 +225,6 @@ struct nfs4_slot_table {
 	struct completion complete;
 };
 
-static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp)
-{
-	return sp - tbl->slots;
-}
-
 /*
  * Session related parameters
  */
-- 
cgit v1.2.3-55-g7522


From 76e697ba7e8d187f50e385d21a2b2f1709a62c14 Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Mon, 26 Nov 2012 14:20:49 -0500
Subject: NFSv4.1: Move slot table and session struct definitions to
 nfs4session.h

Clean up. Gather NFSv4.1 slot definitions in fs/nfs/nfs4session.h.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/blocklayout/blocklayout.c |   1 +
 fs/nfs/callback_xdr.c            |   1 +
 fs/nfs/internal.h                |  21 --------
 fs/nfs/nfs4_fs.h                 |  12 -----
 fs/nfs/nfs4filelayout.c          |   1 +
 fs/nfs/nfs4session.h             | 101 +++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4state.c               |   1 +
 fs/nfs/nfs4xdr.c                 |   1 +
 fs/nfs/super.c                   |   1 +
 include/linux/nfs_fs_sb.h        |  49 -------------------
 include/linux/nfs_xdr.h          |  11 +----
 11 files changed, 108 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index f1027b06a1a9..4fa788c93f46 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -40,6 +40,7 @@
 #include <linux/pagevec.h>
 
 #include "../pnfs.h"
+#include "../nfs4session.h"
 #include "../internal.h"
 #include "blocklayout.h"
 
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index ea6a7b190e6b..59461c957d9d 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -16,6 +16,7 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "internal.h"
+#include "nfs4session.h"
 
 #define CB_OP_TAGLEN_MAXSZ	(512)
 #define CB_OP_HDR_RES_MAXSZ	(2 + CB_OP_TAGLEN_MAXSZ)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 8965a998b306..9bdbfc3884a9 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -18,27 +18,6 @@ struct nfs_string;
  */
 #define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
 
-/*
- * Determine if sessions are in use.
- */
-static inline int nfs4_has_session(const struct nfs_client *clp)
-{
-#ifdef CONFIG_NFS_V4_1
-	if (clp->cl_session)
-		return 1;
-#endif /* CONFIG_NFS_V4_1 */
-	return 0;
-}
-
-static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
-{
-#ifdef CONFIG_NFS_V4_1
-	if (nfs4_has_session(clp))
-		return (clp->cl_session->flags & SESSION4_PERSIST);
-#endif /* CONFIG_NFS_V4_1 */
-	return 0;
-}
-
 static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr)
 {
 	if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid))
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index cd3e3096b60a..322bd0168ebf 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -29,11 +29,6 @@ enum nfs4_client_state {
 	NFS4CLNT_BIND_CONN_TO_SESSION,
 };
 
-enum nfs4_session_state {
-	NFS4_SESSION_INITING,
-	NFS4_SESSION_DRAINING,
-};
-
 #define NFS4_RENEW_TIMEOUT		0x01
 #define NFS4_RENEW_DELEGATION_CB	0x02
 
@@ -327,13 +322,6 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
 extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
 extern void nfs41_server_notify_target_slotid_update(struct nfs_client *clp);
 
-extern void nfs4_session_drain_complete(struct nfs4_session *session,
-		struct nfs4_slot_table *tbl);
-
-static inline bool nfs4_session_draining(struct nfs4_session *session)
-{
-	return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state);
-}
 #else
 static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
 {
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index bfb28fa38e74..591a1a7f8f94 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -35,6 +35,7 @@
 
 #include <linux/sunrpc/metrics.h>
 
+#include "nfs4session.h"
 #include "internal.h"
 #include "delegation.h"
 #include "nfs4filelayout.h"
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index cb47b1eb0886..e96323ff1d95 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -7,6 +7,68 @@
 #ifndef __LINUX_FS_NFS_NFS4SESSION_H
 #define __LINUX_FS_NFS_NFS4SESSION_H
 
+/* maximum number of slots to use */
+#define NFS4_DEF_SLOT_TABLE_SIZE (16U)
+#define NFS4_MAX_SLOT_TABLE (256U)
+#define NFS4_NO_SLOT ((u32)-1)
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+
+/* Sessions slot seqid */
+struct nfs4_slot {
+	struct nfs4_slot_table	*table;
+	struct nfs4_slot	*next;
+	unsigned long		generation;
+	unsigned long		renewal_time;
+	u32			slot_nr;
+	u32		 	seq_nr;
+};
+
+/* Sessions */
+#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
+struct nfs4_slot_table {
+	struct nfs4_session *session;		/* Parent session */
+	struct nfs4_slot *slots;		/* seqid per slot */
+	unsigned long   used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
+	spinlock_t	slot_tbl_lock;
+	struct rpc_wait_queue	slot_tbl_waitq;	/* allocators may wait here */
+	u32		max_slots;		/* # slots in table */
+	u32		max_slotid;		/* Max allowed slotid value */
+	u32		highest_used_slotid;	/* sent to server on each SEQ.
+						 * op for dynamic resizing */
+	u32		target_highest_slotid;	/* Server max_slot target */
+	u32		server_highest_slotid;	/* Server highest slotid */
+	unsigned long	generation;		/* Generation counter for
+						   target_highest_slotid */
+	struct completion complete;
+};
+
+/*
+ * Session related parameters
+ */
+struct nfs4_session {
+	struct nfs4_sessionid		sess_id;
+	u32				flags;
+	unsigned long			session_state;
+	u32				hash_alg;
+	u32				ssv_len;
+
+	/* The fore and back channel */
+	struct nfs4_channel_attrs	fc_attrs;
+	struct nfs4_slot_table		fc_slot_table;
+	struct nfs4_channel_attrs	bc_attrs;
+	struct nfs4_slot_table		bc_slot_table;
+	struct nfs_client		*clp;
+	/* Create session arguments */
+	unsigned int			fc_target_max_rqst_sz;
+	unsigned int			fc_target_max_resp_sz;
+};
+
+enum nfs4_session_state {
+	NFS4_SESSION_INITING,
+	NFS4_SESSION_DRAINING,
+};
+
 #if defined(CONFIG_NFS_V4_1)
 extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
 extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
@@ -24,6 +86,31 @@ extern void nfs4_destroy_session(struct nfs4_session *session);
 extern int nfs4_init_session(struct nfs_server *server);
 extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
 
+extern void nfs4_session_drain_complete(struct nfs4_session *session,
+		struct nfs4_slot_table *tbl);
+
+static inline bool nfs4_session_draining(struct nfs4_session *session)
+{
+	return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state);
+}
+
+/*
+ * Determine if sessions are in use.
+ */
+static inline int nfs4_has_session(const struct nfs_client *clp)
+{
+	if (clp->cl_session)
+		return 1;
+	return 0;
+}
+
+static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
+{
+	if (nfs4_has_session(clp))
+		return (clp->cl_session->flags & SESSION4_PERSIST);
+	return 0;
+}
+
 #else /* defined(CONFIG_NFS_V4_1) */
 
 static inline int nfs4_init_session(struct nfs_server *server)
@@ -31,5 +118,19 @@ static inline int nfs4_init_session(struct nfs_server *server)
 	return 0;
 }
 
+/*
+ * Determine if sessions are in use.
+ */
+static inline int nfs4_has_session(const struct nfs_client *clp)
+{
+	return 0;
+}
+
+static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
+{
+	return 0;
+}
+
 #endif /* defined(CONFIG_NFS_V4_1) */
+#endif /* IS_ENABLED(CONFIG_NFS_V4) */
 #endif /* __LINUX_FS_NFS_NFS4SESSION_H */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 1077b9698381..1402283d152d 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -57,6 +57,7 @@
 #include "callback.h"
 #include "delegation.h"
 #include "internal.h"
+#include "nfs4session.h"
 #include "pnfs.h"
 #include "netns.h"
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index a67040f51597..e786dc7582b1 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -56,6 +56,7 @@
 
 #include "nfs4_fs.h"
 #include "internal.h"
+#include "nfs4session.h"
 #include "pnfs.h"
 #include "netns.h"
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 652d3f7176a9..e12cea4b36a5 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -64,6 +64,7 @@
 #include "iostat.h"
 #include "internal.h"
 #include "fscache.h"
+#include "nfs4session.h"
 #include "pnfs.h"
 #include "nfs.h"
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e707c1b69796..6c6ed153a9b4 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -198,53 +198,4 @@ struct nfs_server {
 #define NFS_CAP_POSIX_LOCK	(1U << 14)
 #define NFS_CAP_UIDGID_NOMAP	(1U << 15)
 
-
-/* maximum number of slots to use */
-#define NFS4_DEF_SLOT_TABLE_SIZE (16U)
-#define NFS4_MAX_SLOT_TABLE (256U)
-#define NFS4_NO_SLOT ((u32)-1)
-
-#if IS_ENABLED(CONFIG_NFS_V4)
-
-/* Sessions */
-#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
-struct nfs4_slot_table {
-	struct nfs4_session *session;		/* Parent session */
-	struct nfs4_slot *slots;		/* seqid per slot */
-	unsigned long   used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
-	spinlock_t	slot_tbl_lock;
-	struct rpc_wait_queue	slot_tbl_waitq;	/* allocators may wait here */
-	u32		max_slots;		/* # slots in table */
-	u32		max_slotid;		/* Max allowed slotid value */
-	u32		highest_used_slotid;	/* sent to server on each SEQ.
-						 * op for dynamic resizing */
-	u32		target_highest_slotid;	/* Server max_slot target */
-	u32		server_highest_slotid;	/* Server highest slotid */
-	unsigned long	generation;		/* Generation counter for
-						   target_highest_slotid */
-	struct completion complete;
-};
-
-/*
- * Session related parameters
- */
-struct nfs4_session {
-	struct nfs4_sessionid		sess_id;
-	u32				flags;
-	unsigned long			session_state;
-	u32				hash_alg;
-	u32				ssv_len;
-
-	/* The fore and back channel */
-	struct nfs4_channel_attrs	fc_attrs;
-	struct nfs4_slot_table		fc_slot_table;
-	struct nfs4_channel_attrs	bc_attrs;
-	struct nfs4_slot_table		bc_slot_table;
-	struct nfs_client		*clp;
-	/* Create session arguments */
-	unsigned int			fc_target_max_rqst_sz;
-	unsigned int			fc_target_max_resp_sz;
-};
-
-#endif /* CONFIG_NFS_V4 */
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 44d256f6021c..2076149db1a4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -185,16 +185,7 @@ struct nfs4_channel_attrs {
 	u32			max_reqs;
 };
 
-/* nfs41 sessions slot seqid */
-struct nfs4_slot {
-	struct nfs4_slot_table	*table;
-	struct nfs4_slot	*next;
-	unsigned long		generation;
-	unsigned long		renewal_time;
-	u32			slot_nr;
-	u32		 	seq_nr;
-};
-
+struct nfs4_slot;
 struct nfs4_sequence_args {
 	struct nfs4_slot	*sa_slot;
 	u8			sa_cache_this;
-- 
cgit v1.2.3-55-g7522


From 8fe72bac8de784c4059b41a7dd6bb0151a3ae898 Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Mon, 29 Oct 2012 19:02:20 -0400
Subject: NFSv4: Clean up handling of privileged operations

Privileged rpc calls are those that are run by the state recovery thread,
in cases where we're trying to recover the system after a server reboot
or a network partition. In those cases, we want to fence off all other
rpc calls (see nfs4_begin_drain_session()) so that they don't end up
using stateids or clientids that are in the process of being recovered.

Prior to this patch, we had to set up special callback functions in
order to declare an rpc call as being privileged.
By adding a new field to the sequence arguments, this patch simplifies
things considerably, and allows us to declare the rpc call as privileged
before it is run.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 114 ++++++++++++++++++------------------------------
 include/linux/nfs_xdr.h |   3 +-
 2 files changed, 44 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4b1635ce658d..38a709d78594 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -490,11 +490,17 @@ static void nfs41_init_sequence(struct nfs4_sequence_args *args,
 {
 	args->sa_slot = NULL;
 	args->sa_cache_this = 0;
+	args->sa_privileged = 0;
 	if (cache_reply)
 		args->sa_cache_this = 1;
 	res->sr_slot = NULL;
 }
 
+static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
+{
+	args->sa_privileged = 1;
+}
+
 int nfs41_setup_sequence(struct nfs4_session *session,
 				struct nfs4_sequence_args *args,
 				struct nfs4_sequence_res *res,
@@ -514,7 +520,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
 
 	spin_lock(&tbl->slot_tbl_lock);
 	if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
-	    !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
+	    !args->sa_privileged) {
 		/* The state manager will wait until the slot table is empty */
 		dprintk("%s session is draining\n", __func__);
 		goto out_sleep;
@@ -548,6 +554,9 @@ out_success:
 	rpc_call_start(task);
 	return 0;
 out_sleep:
+	/* Privileged tasks are queued with top priority */
+	if (args->sa_privileged)
+		rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
 	rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
 	spin_unlock(&tbl->slot_tbl_lock);
 	return -EAGAIN;
@@ -593,12 +602,6 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
 	nfs41_setup_sequence(session, data->seq_args, data->seq_res, task);
 }
 
-static void nfs41_call_priv_sync_prepare(struct rpc_task *task, void *calldata)
-{
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
-	nfs41_call_sync_prepare(task, calldata);
-}
-
 static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs41_call_sync_data *data = calldata;
@@ -611,17 +614,11 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
 	.rpc_call_done = nfs41_call_sync_done,
 };
 
-static const struct rpc_call_ops nfs41_call_priv_sync_ops = {
-	.rpc_call_prepare = nfs41_call_priv_sync_prepare,
-	.rpc_call_done = nfs41_call_sync_done,
-};
-
 static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
 				   struct nfs_server *server,
 				   struct rpc_message *msg,
 				   struct nfs4_sequence_args *args,
-				   struct nfs4_sequence_res *res,
-				   int privileged)
+				   struct nfs4_sequence_res *res)
 {
 	int ret;
 	struct rpc_task *task;
@@ -637,8 +634,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
 		.callback_data = &data
 	};
 
-	if (privileged)
-		task_setup.callback_ops = &nfs41_call_priv_sync_ops;
 	task = rpc_run_task(&task_setup);
 	if (IS_ERR(task))
 		ret = PTR_ERR(task);
@@ -656,16 +651,21 @@ int _nfs4_call_sync_session(struct rpc_clnt *clnt,
 			    struct nfs4_sequence_args *args,
 			    struct nfs4_sequence_res *res)
 {
-	return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0);
+	return nfs4_call_sync_sequence(clnt, server, msg, args, res);
 }
 
 #else
-static inline
+static
 void nfs41_init_sequence(struct nfs4_sequence_args *args,
 		struct nfs4_sequence_res *res, int cache_reply)
 {
 }
 
+static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
+{
+}
+
+
 static int nfs4_sequence_done(struct rpc_task *task,
 			       struct nfs4_sequence_res *res)
 {
@@ -1475,13 +1475,6 @@ unlock_no_action:
 	rcu_read_unlock();
 out_no_action:
 	task->tk_action = NULL;
-
-}
-
-static void nfs4_recover_open_prepare(struct rpc_task *task, void *calldata)
-{
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
-	nfs4_open_prepare(task, calldata);
 }
 
 static void nfs4_open_done(struct rpc_task *task, void *calldata)
@@ -1542,12 +1535,6 @@ static const struct rpc_call_ops nfs4_open_ops = {
 	.rpc_release = nfs4_open_release,
 };
 
-static const struct rpc_call_ops nfs4_recover_open_ops = {
-	.rpc_call_prepare = nfs4_recover_open_prepare,
-	.rpc_call_done = nfs4_open_done,
-	.rpc_release = nfs4_open_release,
-};
-
 static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
 {
 	struct inode *dir = data->dir->d_inode;
@@ -1577,7 +1564,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
 	data->rpc_status = 0;
 	data->cancelled = 0;
 	if (isrecover)
-		task_setup_data.callback_ops = &nfs4_recover_open_ops;
+		nfs4_set_sequence_privileged(&o_arg->seq_args);
 	task = rpc_run_task(&task_setup_data);
         if (IS_ERR(task))
                 return PTR_ERR(task);
@@ -4558,8 +4545,9 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
 		return;
 	/* Do we need to do an open_to_lock_owner? */
 	if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
-		if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0)
+		if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
 			goto out_release_lock_seqid;
+		}
 		data->arg.open_stateid = &state->stateid;
 		data->arg.new_lock_owner = 1;
 		data->res.open_seqid = data->arg.open_seqid;
@@ -4574,13 +4562,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
 	nfs_release_seqid(data->arg.open_seqid);
 out_release_lock_seqid:
 	nfs_release_seqid(data->arg.lock_seqid);
-	dprintk("%s: done!, ret = %d\n", __func__, task->tk_status);
-}
-
-static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata)
-{
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
-	nfs4_lock_prepare(task, calldata);
+	dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
 }
 
 static void nfs4_lock_done(struct rpc_task *task, void *calldata)
@@ -4635,12 +4617,6 @@ static const struct rpc_call_ops nfs4_lock_ops = {
 	.rpc_release = nfs4_lock_release,
 };
 
-static const struct rpc_call_ops nfs4_recover_lock_ops = {
-	.rpc_call_prepare = nfs4_recover_lock_prepare,
-	.rpc_call_done = nfs4_lock_done,
-	.rpc_release = nfs4_lock_release,
-};
-
 static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
 {
 	switch (error) {
@@ -4683,15 +4659,15 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 		return -ENOMEM;
 	if (IS_SETLKW(cmd))
 		data->arg.block = 1;
-	if (recovery_type > NFS_LOCK_NEW) {
-		if (recovery_type == NFS_LOCK_RECLAIM)
-			data->arg.reclaim = NFS_LOCK_RECLAIM;
-		task_setup_data.callback_ops = &nfs4_recover_lock_ops;
-	}
 	nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
 	msg.rpc_argp = &data->arg;
 	msg.rpc_resp = &data->res;
 	task_setup_data.callback_data = data;
+	if (recovery_type > NFS_LOCK_NEW) {
+		if (recovery_type == NFS_LOCK_RECLAIM)
+			data->arg.reclaim = NFS_LOCK_RECLAIM;
+		nfs4_set_sequence_privileged(&data->arg.seq_args);
+	}
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -5432,7 +5408,6 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task,
 			(struct nfs4_get_lease_time_data *)calldata;
 
 	dprintk("--> %s\n", __func__);
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
 	/* just setup sequence, do not trigger session recovery
 	   since we're invoked within one */
 	nfs41_setup_sequence(data->clp->cl_session,
@@ -5500,6 +5475,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
 	int status;
 
 	nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
+	nfs4_set_sequence_privileged(&args.la_seq_args);
 	dprintk("--> %s\n", __func__);
 	task = rpc_run_task(&task_setup);
 
@@ -5775,26 +5751,15 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
 	nfs41_setup_sequence(clp->cl_session, args, res, task);
 }
 
-static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data)
-{
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
-	nfs41_sequence_prepare(task, data);
-}
-
 static const struct rpc_call_ops nfs41_sequence_ops = {
 	.rpc_call_done = nfs41_sequence_call_done,
 	.rpc_call_prepare = nfs41_sequence_prepare,
 	.rpc_release = nfs41_sequence_release,
 };
 
-static const struct rpc_call_ops nfs41_sequence_privileged_ops = {
-	.rpc_call_done = nfs41_sequence_call_done,
-	.rpc_call_prepare = nfs41_sequence_prepare_privileged,
-	.rpc_release = nfs41_sequence_release,
-};
-
-static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred,
-					     const struct rpc_call_ops *seq_ops)
+static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
+		struct rpc_cred *cred,
+		bool is_privileged)
 {
 	struct nfs4_sequence_data *calldata;
 	struct rpc_message msg = {
@@ -5804,7 +5769,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = clp->cl_rpcclient,
 		.rpc_message = &msg,
-		.callback_ops = seq_ops,
+		.callback_ops = &nfs41_sequence_ops,
 		.flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
 	};
 
@@ -5816,6 +5781,8 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
 		return ERR_PTR(-ENOMEM);
 	}
 	nfs41_init_sequence(&calldata->args, &calldata->res, 0);
+	if (is_privileged)
+		nfs4_set_sequence_privileged(&calldata->args);
 	msg.rpc_argp = &calldata->args;
 	msg.rpc_resp = &calldata->res;
 	calldata->clp = clp;
@@ -5831,7 +5798,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
 
 	if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
 		return 0;
-	task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_ops);
+	task = _nfs41_proc_sequence(clp, cred, false);
 	if (IS_ERR(task))
 		ret = PTR_ERR(task);
 	else
@@ -5845,7 +5812,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
 	struct rpc_task *task;
 	int ret;
 
-	task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_privileged_ops);
+	task = _nfs41_proc_sequence(clp, cred, true);
 	if (IS_ERR(task)) {
 		ret = PTR_ERR(task);
 		goto out;
@@ -5874,7 +5841,6 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
 {
 	struct nfs4_reclaim_complete_data *calldata = data;
 
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
 	nfs41_setup_sequence(calldata->clp->cl_session,
 			&calldata->arg.seq_args,
 			&calldata->res.seq_res,
@@ -5955,6 +5921,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
 	calldata->arg.one_fs = 0;
 
 	nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0);
+	nfs4_set_sequence_privileged(&calldata->arg.seq_args);
 	msg.rpc_argp = &calldata->arg;
 	msg.rpc_resp = &calldata->res;
 	task_setup_data.callback_data = calldata;
@@ -6521,7 +6488,9 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
 
 	dprintk("NFS call  test_stateid %p\n", stateid);
 	nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
-	status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
+	nfs4_set_sequence_privileged(&args.seq_args);
+	status = nfs4_call_sync_sequence(server->client, server, &msg,
+			&args.seq_args, &res.seq_res);
 	if (status != NFS_OK) {
 		dprintk("NFS reply test_stateid: failed, %d\n", status);
 		return status;
@@ -6568,8 +6537,9 @@ static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
 
 	dprintk("NFS call  free_stateid %p\n", stateid);
 	nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
+	nfs4_set_sequence_privileged(&args.seq_args);
 	status = nfs4_call_sync_sequence(server->client, server, &msg,
-					 &args.seq_args, &res.seq_res, 1);
+			&args.seq_args, &res.seq_res);
 	dprintk("NFS reply free_stateid: %d\n", status);
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2076149db1a4..baa673edb597 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -188,7 +188,8 @@ struct nfs4_channel_attrs {
 struct nfs4_slot;
 struct nfs4_sequence_args {
 	struct nfs4_slot	*sa_slot;
-	u8			sa_cache_this;
+	u8			sa_cache_this : 1,
+				sa_privileged : 1;
 };
 
 struct nfs4_sequence_res {
-- 
cgit v1.2.3-55-g7522


From 62ae082d883d167cdaa7895cf2972d85e178228a Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Thu, 29 Nov 2012 17:10:01 -0500
Subject: NFSv4: Reorder the XDR structures to put sequence at the top, not
 bottom

Pre-condition for optimising the slot allocation and reintroducing FIFO
behaviour.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 138 ++++++++++++++++++++++++------------------------
 1 file changed, 69 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index baa673edb597..a55abd499c21 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -205,8 +205,8 @@ struct nfs4_get_lease_time_args {
 };
 
 struct nfs4_get_lease_time_res {
-	struct nfs_fsinfo	       *lr_fsinfo;
 	struct nfs4_sequence_res	lr_seq_res;
+	struct nfs_fsinfo	       *lr_fsinfo;
 };
 
 #define PNFS_LAYOUT_MAXSIZE 4096
@@ -224,23 +224,23 @@ struct pnfs_layout_range {
 };
 
 struct nfs4_layoutget_args {
+	struct nfs4_sequence_args seq_args;
 	__u32 type;
 	struct pnfs_layout_range range;
 	__u64 minlength;
 	__u32 maxcount;
 	struct inode *inode;
 	struct nfs_open_context *ctx;
-	struct nfs4_sequence_args seq_args;
 	nfs4_stateid stateid;
 	struct nfs4_layoutdriver_data layout;
 };
 
 struct nfs4_layoutget_res {
+	struct nfs4_sequence_res seq_res;
 	__u32 return_on_close;
 	struct pnfs_layout_range range;
 	__u32 type;
 	nfs4_stateid stateid;
-	struct nfs4_sequence_res seq_res;
 	struct nfs4_layoutdriver_data *layoutp;
 };
 
@@ -251,38 +251,38 @@ struct nfs4_layoutget {
 };
 
 struct nfs4_getdevicelist_args {
+	struct nfs4_sequence_args seq_args;
 	const struct nfs_fh *fh;
 	u32 layoutclass;
-	struct nfs4_sequence_args seq_args;
 };
 
 struct nfs4_getdevicelist_res {
-	struct pnfs_devicelist *devlist;
 	struct nfs4_sequence_res seq_res;
+	struct pnfs_devicelist *devlist;
 };
 
 struct nfs4_getdeviceinfo_args {
-	struct pnfs_device *pdev;
 	struct nfs4_sequence_args seq_args;
+	struct pnfs_device *pdev;
 };
 
 struct nfs4_getdeviceinfo_res {
-	struct pnfs_device *pdev;
 	struct nfs4_sequence_res seq_res;
+	struct pnfs_device *pdev;
 };
 
 struct nfs4_layoutcommit_args {
+	struct nfs4_sequence_args seq_args;
 	nfs4_stateid stateid;
 	__u64 lastbytewritten;
 	struct inode *inode;
 	const u32 *bitmask;
-	struct nfs4_sequence_args seq_args;
 };
 
 struct nfs4_layoutcommit_res {
+	struct nfs4_sequence_res seq_res;
 	struct nfs_fattr *fattr;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res seq_res;
 	int status;
 };
 
@@ -296,11 +296,11 @@ struct nfs4_layoutcommit_data {
 };
 
 struct nfs4_layoutreturn_args {
+	struct nfs4_sequence_args seq_args;
 	struct pnfs_layout_hdr *layout;
 	struct inode *inode;
 	nfs4_stateid stateid;
 	__u32   layout_type;
-	struct nfs4_sequence_args seq_args;
 };
 
 struct nfs4_layoutreturn_res {
@@ -326,6 +326,7 @@ struct stateowner_id {
  * Arguments to the open call.
  */
 struct nfs_openargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *	fh;
 	struct nfs_seqid *	seqid;
 	int			open_flags;
@@ -346,10 +347,10 @@ struct nfs_openargs {
 	const u32 *		bitmask;
 	const u32 *		open_bitmap;
 	__u32			claim;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_openres {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid            stateid;
 	struct nfs_fh           fh;
 	struct nfs4_change_info	cinfo;
@@ -364,7 +365,6 @@ struct nfs_openres {
 	__u32			attrset[NFS4_BITMAP_SIZE];
 	struct nfs4_string	*owner;
 	struct nfs4_string	*group_owner;
-	struct nfs4_sequence_res	seq_res;
 	__u32			access_request;
 	__u32			access_supported;
 	__u32			access_result;
@@ -388,20 +388,20 @@ struct nfs_open_confirmres {
  * Arguments to the close call.
  */
 struct nfs_closeargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *         fh;
 	nfs4_stateid *		stateid;
 	struct nfs_seqid *	seqid;
 	fmode_t			fmode;
 	const u32 *		bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_closeres {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid            stateid;
 	struct nfs_fattr *	fattr;
 	struct nfs_seqid *	seqid;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 /*
  *  * Arguments to the lock,lockt, and locku call.
@@ -413,6 +413,7 @@ struct nfs_lowner {
 };
 
 struct nfs_lock_args {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct file_lock *	fl;
 	struct nfs_seqid *	lock_seqid;
@@ -423,40 +424,39 @@ struct nfs_lock_args {
 	unsigned char		block : 1;
 	unsigned char		reclaim : 1;
 	unsigned char		new_lock_owner : 1;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_lock_res {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid		stateid;
 	struct nfs_seqid *	lock_seqid;
 	struct nfs_seqid *	open_seqid;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_locku_args {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct file_lock *	fl;
 	struct nfs_seqid *	seqid;
 	nfs4_stateid *		stateid;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_locku_res {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid		stateid;
 	struct nfs_seqid *	seqid;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_lockt_args {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct file_lock *	fl;
 	struct nfs_lowner	lock_owner;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_lockt_res {
-	struct file_lock *	denied; /* LOCK, LOCKT failed */
 	struct nfs4_sequence_res	seq_res;
+	struct file_lock *	denied; /* LOCK, LOCKT failed */
 };
 
 struct nfs_release_lockowner_args {
@@ -464,22 +464,23 @@ struct nfs_release_lockowner_args {
 };
 
 struct nfs4_delegreturnargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *fhandle;
 	const nfs4_stateid *stateid;
 	const u32 * bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_delegreturnres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr * fattr;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Arguments to the read call.
  */
 struct nfs_readargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct nfs_open_context *context;
 	struct nfs_lock_context *lock_context;
@@ -487,20 +488,20 @@ struct nfs_readargs {
 	__u32			count;
 	unsigned int		pgbase;
 	struct page **		pages;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_readres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *	fattr;
 	__u32			count;
 	int                     eof;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Arguments to the write call.
  */
 struct nfs_writeargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct nfs_open_context *context;
 	struct nfs_lock_context *lock_context;
@@ -510,7 +511,6 @@ struct nfs_writeargs {
 	unsigned int		pgbase;
 	struct page **		pages;
 	const u32 *		bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_write_verifier {
@@ -523,65 +523,65 @@ struct nfs_writeverf {
 };
 
 struct nfs_writeres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *	fattr;
 	struct nfs_writeverf *	verf;
 	__u32			count;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Arguments to the commit call.
  */
 struct nfs_commitargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh		*fh;
 	__u64			offset;
 	__u32			count;
 	const u32		*bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_commitres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr	*fattr;
 	struct nfs_writeverf	*verf;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Common arguments to the unlink call
  */
 struct nfs_removeargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh	*fh;
 	struct qstr		name;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_removeres {
+	struct nfs4_sequence_res 	seq_res;
 	const struct nfs_server *server;
 	struct nfs_fattr	*dir_attr;
 	struct nfs4_change_info	cinfo;
-	struct nfs4_sequence_res 	seq_res;
 };
 
 /*
  * Common arguments to the rename call
  */
 struct nfs_renameargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh		*old_dir;
 	const struct nfs_fh		*new_dir;
 	const struct qstr		*old_name;
 	const struct qstr		*new_name;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_renameres {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server		*server;
 	struct nfs4_change_info		old_cinfo;
 	struct nfs_fattr		*old_fattr;
 	struct nfs4_change_info		new_cinfo;
 	struct nfs_fattr		*new_fattr;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
@@ -622,20 +622,20 @@ struct nfs_createargs {
 };
 
 struct nfs_setattrargs {
+	struct nfs4_sequence_args 	seq_args;
 	struct nfs_fh *                 fh;
 	nfs4_stateid                    stateid;
 	struct iattr *                  iap;
 	const struct nfs_server *	server; /* Needed for name mapping */
 	const u32 *			bitmask;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs_setaclargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *			fh;
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_setaclres {
@@ -643,27 +643,27 @@ struct nfs_setaclres {
 };
 
 struct nfs_getaclargs {
+	struct nfs4_sequence_args 	seq_args;
 	struct nfs_fh *			fh;
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 /* getxattr ACL interface flags */
 #define NFS4_ACL_TRUNC		0x0001	/* ACL was truncated */
 struct nfs_getaclres {
+	struct nfs4_sequence_res	seq_res;
 	size_t				acl_len;
 	size_t				acl_data_offset;
 	int				acl_flags;
 	struct page *			acl_scratch;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_setattrres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *              fattr;
 	const struct nfs_server *	server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_linkargs {
@@ -828,21 +828,22 @@ struct nfs3_getaclres {
 typedef u64 clientid4;
 
 struct nfs4_accessargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
 	u32				access;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_accessres {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
 	u32				supported;
 	u32				access;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_create_arg {
+	struct nfs4_sequence_args 	seq_args;
 	u32				ftype;
 	union {
 		struct {
@@ -859,88 +860,88 @@ struct nfs4_create_arg {
 	const struct iattr *		attrs;
 	const struct nfs_fh *		dir_fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs4_create_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fh *			fh;
 	struct nfs_fattr *		fattr;
 	struct nfs4_change_info		dir_cinfo;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_fsinfo_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_fsinfo_res {
-	struct nfs_fsinfo	       *fsinfo;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs_fsinfo	       *fsinfo;
 };
 
 struct nfs4_getattr_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_getattr_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_link_arg {
+	struct nfs4_sequence_args 	seq_args;
 	const struct nfs_fh *		fh;
 	const struct nfs_fh *		dir_fh;
 	const struct qstr *		name;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs4_link_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
 	struct nfs4_change_info		cinfo;
 	struct nfs_fattr *		dir_attr;
-	struct nfs4_sequence_res	seq_res;
 };
 
 
 struct nfs4_lookup_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		dir_fh;
 	const struct qstr *		name;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_lookup_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
 	struct nfs_fh *			fh;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_lookup_root_arg {
-	const u32 *			bitmask;
 	struct nfs4_sequence_args	seq_args;
+	const u32 *			bitmask;
 };
 
 struct nfs4_pathconf_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_pathconf_res {
-	struct nfs_pathconf	       *pathconf;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs_pathconf	       *pathconf;
 };
 
 struct nfs4_readdir_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	u64				cookie;
 	nfs4_verifier			verifier;
@@ -949,21 +950,20 @@ struct nfs4_readdir_arg {
 	unsigned int			pgbase;	/* zero-copy data */
 	const u32 *			bitmask;
 	int				plus;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_readdir_res {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_verifier			verifier;
 	unsigned int			pgbase;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_readlink {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	unsigned int			pgbase;
 	unsigned int			pglen;   /* zero-copy data */
 	struct page **			pages;   /* zero-copy data */
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_readlink_res {
@@ -989,28 +989,28 @@ struct nfs4_setclientid_res {
 };
 
 struct nfs4_statfs_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_statfs_res {
-	struct nfs_fsstat	       *fsstat;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs_fsstat	       *fsstat;
 };
 
 struct nfs4_server_caps_arg {
-	struct nfs_fh		       *fhandle;
 	struct nfs4_sequence_args	seq_args;
+	struct nfs_fh		       *fhandle;
 };
 
 struct nfs4_server_caps_res {
+	struct nfs4_sequence_res	seq_res;
 	u32				attr_bitmask[3];
 	u32				acl_bitmask;
 	u32				has_links;
 	u32				has_symlinks;
 	u32				fh_expire_type;
-	struct nfs4_sequence_res	seq_res;
 };
 
 #define NFS4_PATHNAME_MAXCOMPONENTS 512
@@ -1036,16 +1036,16 @@ struct nfs4_fs_locations {
 };
 
 struct nfs4_fs_locations_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *dir_fh;
 	const struct qstr *name;
 	struct page *page;
 	const u32 *bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_fs_locations_res {
-	struct nfs4_fs_locations       *fs_locations;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs4_fs_locations       *fs_locations;
 };
 
 struct nfs4_secinfo_oid {
@@ -1070,14 +1070,14 @@ struct nfs4_secinfo_flavors {
 };
 
 struct nfs4_secinfo_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh		*dir_fh;
 	const struct qstr		*name;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_secinfo_res {
-	struct nfs4_secinfo_flavors	*flavors;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs4_secinfo_flavors	*flavors;
 };
 
 #endif /* CONFIG_NFS_V4 */
@@ -1157,9 +1157,9 @@ struct nfs41_create_session_res {
 };
 
 struct nfs41_reclaim_complete_args {
+	struct nfs4_sequence_args	seq_args;
 	/* In the future extend to include curr_fh for use with migration */
 	unsigned char			one_fs:1;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs41_reclaim_complete_res {
@@ -1169,28 +1169,28 @@ struct nfs41_reclaim_complete_res {
 #define SECINFO_STYLE_CURRENT_FH 0
 #define SECINFO_STYLE_PARENT 1
 struct nfs41_secinfo_no_name_args {
-	int				style;
 	struct nfs4_sequence_args	seq_args;
+	int				style;
 };
 
 struct nfs41_test_stateid_args {
-	nfs4_stateid			*stateid;
 	struct nfs4_sequence_args	seq_args;
+	nfs4_stateid			*stateid;
 };
 
 struct nfs41_test_stateid_res {
-	unsigned int			status;
 	struct nfs4_sequence_res	seq_res;
+	unsigned int			status;
 };
 
 struct nfs41_free_stateid_args {
-	nfs4_stateid			*stateid;
 	struct nfs4_sequence_args	seq_args;
+	nfs4_stateid			*stateid;
 };
 
 struct nfs41_free_stateid_res {
-	unsigned int			status;
 	struct nfs4_sequence_res	seq_res;
+	unsigned int			status;
 };
 
 #else
-- 
cgit v1.2.3-55-g7522


From c05eecf636101dd4347b2d8fa457626bf0088e0a Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Fri, 30 Nov 2012 23:59:29 -0500
Subject: SUNRPC: Don't allow low priority tasks to pre-empt higher priority
 ones

Currently, the priority queues attempt to be 'fair' to lower priority
tasks by scheduling them after a certain number of higher priority tasks
have run. The problem is that both the transport send queue and
the NFSv4.1 session slot queue have strong ordering requirements.

This patch therefore removes the fairness code in favour of strong
ordering of task priorities.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h |  1 -
 net/sunrpc/sched.c           | 44 ++++++++++++++++++++++----------------------
 2 files changed, 22 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index dc0c3cc3ada3..b64f8eb0b973 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -192,7 +192,6 @@ struct rpc_wait_queue {
 	pid_t			owner;			/* process id of last task serviced */
 	unsigned char		maxpriority;		/* maximum priority (0 if queue is not a priority queue) */
 	unsigned char		priority;		/* current priority */
-	unsigned char		count;			/* # task groups remaining serviced so far */
 	unsigned char		nr;			/* # tasks remaining for cookie */
 	unsigned short		qlen;			/* total # tasks waiting in queue */
 	struct rpc_timer	timer_list;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 1aefc9fef866..d17a704aaf5f 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -98,6 +98,23 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
 	list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
 }
 
+static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
+{
+	queue->priority = priority;
+}
+
+static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
+{
+	queue->owner = pid;
+	queue->nr = RPC_BATCH_COUNT;
+}
+
+static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
+{
+	rpc_set_waitqueue_priority(queue, queue->maxpriority);
+	rpc_set_waitqueue_owner(queue, 0);
+}
+
 /*
  * Add new request to a priority queue.
  */
@@ -109,9 +126,11 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
 	struct rpc_task *t;
 
 	INIT_LIST_HEAD(&task->u.tk_wait.links);
-	q = &queue->tasks[queue_priority];
 	if (unlikely(queue_priority > queue->maxpriority))
-		q = &queue->tasks[queue->maxpriority];
+		queue_priority = queue->maxpriority;
+	if (queue_priority > queue->priority)
+		rpc_set_waitqueue_priority(queue, queue_priority);
+	q = &queue->tasks[queue_priority];
 	list_for_each_entry(t, q, u.tk_wait.list) {
 		if (t->tk_owner == task->tk_owner) {
 			list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
@@ -180,24 +199,6 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas
 			task->tk_pid, queue, rpc_qname(queue));
 }
 
-static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
-{
-	queue->priority = priority;
-	queue->count = 1 << (priority * 2);
-}
-
-static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
-	queue->owner = pid;
-	queue->nr = RPC_BATCH_COUNT;
-}
-
-static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
-{
-	rpc_set_waitqueue_priority(queue, queue->maxpriority);
-	rpc_set_waitqueue_owner(queue, 0);
-}
-
 static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
 {
 	int i;
@@ -464,8 +465,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 		/*
 		 * Check if we need to switch queues.
 		 */
-		if (--queue->count)
-			goto new_owner;
+		goto new_owner;
 	}
 
 	/*
-- 
cgit v1.2.3-55-g7522


From 914daba865cb5c38cd5fdee024ca38029315b38f Mon Sep 17 00:00:00 2001
From: Alexander Graf
Date: Tue, 9 Oct 2012 00:22:59 +0200
Subject: KVM: Distangle eventfd code from irqchip

The current eventfd code assumes that when we have eventfd, we also have
irqfd for in-kernel interrupt delivery. This is not necessarily true. On
PPC we don't have an in-kernel irqchip yet, but we can still support easily
support eventfd.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 include/linux/kvm_host.h | 12 +++++++++++-
 virt/kvm/eventfd.c       |  6 ++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8e5c7b651655..c823e47c3641 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -900,10 +900,20 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 #ifdef CONFIG_HAVE_KVM_EVENTFD
 
 void kvm_eventfd_init(struct kvm *kvm);
+int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
+
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
 int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
 void kvm_irqfd_release(struct kvm *kvm);
 void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
-int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
+#else
+static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
+{
+	return -EINVAL;
+}
+
+static inline void kvm_irqfd_release(struct kvm *kvm) {}
+#endif
 
 #else
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 9718e98d6d2a..d7424c8c138a 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -35,6 +35,7 @@
 
 #include "iodev.h"
 
+#ifdef __KVM_HAVE_IOAPIC
 /*
  * --------------------------------------------------------------------
  * irqfd: Allows an fd to be used to inject an interrupt to the guest
@@ -425,17 +426,21 @@ fail:
 	kfree(irqfd);
 	return ret;
 }
+#endif
 
 void
 kvm_eventfd_init(struct kvm *kvm)
 {
+#ifdef __KVM_HAVE_IOAPIC
 	spin_lock_init(&kvm->irqfds.lock);
 	INIT_LIST_HEAD(&kvm->irqfds.items);
 	INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
 	mutex_init(&kvm->irqfds.resampler_lock);
+#endif
 	INIT_LIST_HEAD(&kvm->ioeventfds);
 }
 
+#ifdef __KVM_HAVE_IOAPIC
 /*
  * shutdown any irqfd's that match fd+gsi
  */
@@ -555,6 +560,7 @@ static void __exit irqfd_module_exit(void)
 
 module_init(irqfd_module_init);
 module_exit(irqfd_module_exit);
+#endif
 
 /*
  * --------------------------------------------------------------------
-- 
cgit v1.2.3-55-g7522


From cf66bb93e0f75e0a4ba1ec070692618fa028e994 Mon Sep 17 00:00:00 2001
From: David Woodhouse
Date: Mon, 3 Dec 2012 16:25:40 +0000
Subject: byteorder: allow arch to opt to use GCC intrinsics for byteswapping

Since GCC 4.4, there have been __builtin_bswap32() and __builtin_bswap16()
intrinsics. A __builtin_bswap16() came a little later (4.6 for PowerPC,
48 for other platforms).

By using these instead of the inline assembler that most architectures
have in their __arch_swabXX() macros, we let the compiler see what's
actually happening. The resulting code should be at least as good, and
much *better* in the cases where it can be combined with a nearby load
or store, using a load-and-byteswap or store-and-byteswap instruction
(e.g. lwbrx/stwbrx on PowerPC, movbe on Atom).

When GCC is sufficiently recent *and* the architecture opts in to using
the intrinsics by setting CONFIG_ARCH_USE_BUILTIN_BSWAP, they will be
used in preference to the __arch_swabXX() macros. An architecture which
does not set ARCH_USE_BUILTIN_BSWAP will continue to use its own
hand-crafted macros.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/Kconfig                   | 19 +++++++++++++++++++
 include/linux/compiler-gcc4.h  | 10 ++++++++++
 include/linux/compiler-intel.h |  7 +++++++
 include/uapi/linux/swab.h      | 12 +++++++++---
 4 files changed, 45 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 366ec06a5185..c31416b10586 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -112,6 +112,25 @@ config HAVE_EFFICIENT_UNALIGNED_ACCESS
 	  See Documentation/unaligned-memory-access.txt for more
 	  information on the topic of unaligned memory accesses.
 
+config ARCH_USE_BUILTIN_BSWAP
+       bool
+       help
+	 Modern versions of GCC (since 4.4) have builtin functions
+	 for handling byte-swapping. Using these, instead of the old
+	 inline assembler that the architecture code provides in the
+	 __arch_bswapXX() macros, allows the compiler to see what's
+	 happening and offers more opportunity for optimisation. In
+	 particular, the compiler will be able to combine the byteswap
+	 with a nearby load or store and use load-and-swap or
+	 store-and-swap instructions if the architecture has them. It
+	 should almost *never* result in code which is worse than the
+	 hand-coded assembler in <asm/swab.h>.  But just in case it
+	 does, the use of the builtins is optional.
+
+	 Any architecture with load-and-swap or store-and-swap
+	 instructions should set this. And it shouldn't hurt to set it
+	 on architectures that don't have such instructions.
+
 config HAVE_SYSCALL_WRAPPERS
 	bool
 
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 412bc6c2b023..dc16a858e77c 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -63,3 +63,13 @@
 #define __compiletime_warning(message) __attribute__((warning(message)))
 #define __compiletime_error(message) __attribute__((error(message)))
 #endif
+
+#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
+#if __GNUC_MINOR__ >= 4
+#define __HAVE_BUILTIN_BSWAP32__
+#define __HAVE_BUILTIN_BSWAP64__
+#endif
+#if __GNUC_MINOR__ >= 8 || (defined(__powerpc__) && __GNUC_MINOR__ >= 6)
+#define __HAVE_BUILTIN_BSWAP16__
+#endif
+#endif
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index d8e636e5607d..973ce10c40b6 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -29,3 +29,10 @@
 #endif
 
 #define uninitialized_var(x) x
+
+#ifndef __HAVE_BUILTIN_BSWAP16__
+/* icc has this, but it's called _bswap16 */
+#define __HAVE_BUILTIN_BSWAP16__
+#define __builtin_bswap16 _bswap16
+#endif
+
diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h
index e811474724c2..0e011eb91b5d 100644
--- a/include/uapi/linux/swab.h
+++ b/include/uapi/linux/swab.h
@@ -45,7 +45,9 @@
 
 static inline __attribute_const__ __u16 __fswab16(__u16 val)
 {
-#ifdef __arch_swab16
+#ifdef __HAVE_BUILTIN_BSWAP16__
+	return __builtin_bswap16(val);
+#elif defined (__arch_swab16)
 	return __arch_swab16(val);
 #else
 	return ___constant_swab16(val);
@@ -54,7 +56,9 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val)
 
 static inline __attribute_const__ __u32 __fswab32(__u32 val)
 {
-#ifdef __arch_swab32
+#ifdef __HAVE_BUILTIN_BSWAP32__
+	return __builtin_bswap32(val);
+#elif defined(__arch_swab32)
 	return __arch_swab32(val);
 #else
 	return ___constant_swab32(val);
@@ -63,7 +67,9 @@ static inline __attribute_const__ __u32 __fswab32(__u32 val)
 
 static inline __attribute_const__ __u64 __fswab64(__u64 val)
 {
-#ifdef __arch_swab64
+#ifdef __HAVE_BUILTIN_BSWAP64__
+	return __builtin_bswap64(val);
+#elif defined (__arch_swab64)
 	return __arch_swab64(val);
 #elif defined(__SWAB_64_THRU_32__)
 	__u32 h = val >> 32;
-- 
cgit v1.2.3-55-g7522


From d1e7de3007c6e34c5e6d5e1b707b5aba4a1cd57f Mon Sep 17 00:00:00 2001
From: Marek Szyprowski
Date: Tue, 4 Dec 2012 15:01:01 +0100
Subject: regulators: add regulator_can_change_voltage() function

Introduce a regulator_can_change_voltage() function for the subsytems or
drivers which might check if applying voltage change is possible and use
special workaround code when the driver is used with fixed regulators or
regulators with disabled ability to change the voltage.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/core.c           | 22 ++++++++++++++++++++++
 include/linux/regulator/consumer.h |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index e872c8be080e..59e08633372a 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1866,6 +1866,28 @@ int regulator_is_enabled(struct regulator *regulator)
 }
 EXPORT_SYMBOL_GPL(regulator_is_enabled);
 
+/**
+ * regulator_can_change_voltage - check if regulator can change voltage
+ * @regulator: regulator source
+ *
+ * Returns positive if the regulator driver backing the source/client
+ * can change its voltage, false otherwise. Usefull for detecting fixed
+ * or dummy regulators and disabling voltage change logic in the client
+ * driver.
+ */
+int regulator_can_change_voltage(struct regulator *regulator)
+{
+	struct regulator_dev	*rdev = regulator->rdev;
+
+	if (rdev->constraints &&
+	    rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_VOLTAGE &&
+	    rdev->desc->n_voltages > 1)
+		return 1;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(regulator_can_change_voltage);
+
 /**
  * regulator_count_voltages - count regulator_list_voltage() selectors
  * @regulator: regulator source
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index c43cd3556b1f..5d0f7c10bef1 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -160,6 +160,7 @@ int regulator_bulk_force_disable(int num_consumers,
 void regulator_bulk_free(int num_consumers,
 			 struct regulator_bulk_data *consumers);
 
+int regulator_can_change_voltage(struct regulator *regulator);
 int regulator_count_voltages(struct regulator *regulator);
 int regulator_list_voltage(struct regulator *regulator, unsigned selector);
 int regulator_is_supported_voltage(struct regulator *regulator,
-- 
cgit v1.2.3-55-g7522


From 6d49e352ae9aed3f599041b0c0389aa924815f14 Mon Sep 17 00:00:00 2001
From: Nadia Yvette Chambers
Date: Thu, 6 Dec 2012 10:39:54 +0100
Subject: propagate name change to comments in kernel source

I've legally changed my name with New York State, the US Social Security
Administration, et al. This patch propagates the name change and change
in initials and login to comments in the kernel source as well.

Signed-off-by: Nadia Yvette Chambers <nyc@holomorphy.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 MAINTAINERS                       | 2 +-
 arch/alpha/include/asm/mmzone.h   | 2 +-
 arch/frv/mm/pgalloc.c             | 2 +-
 arch/mn10300/mm/pgtable.c         | 2 +-
 arch/x86/mm/pgtable.c             | 2 +-
 fs/hugetlbfs/inode.c              | 2 +-
 fs/ncpfs/mmap.c                   | 2 +-
 include/linux/hash.h              | 2 +-
 kernel/pid.c                      | 4 ++--
 kernel/profile.c                  | 7 ++++---
 kernel/trace/ftrace.c             | 2 +-
 kernel/trace/trace.c              | 2 +-
 kernel/trace/trace_functions.c    | 2 +-
 kernel/trace/trace_irqsoff.c      | 2 +-
 kernel/trace/trace_sched_wakeup.c | 2 +-
 kernel/wait.c                     | 2 +-
 lib/bitmap.c                      | 2 +-
 mm/hugetlb.c                      | 2 +-
 mm/page_alloc.c                   | 4 ++--
 19 files changed, 24 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 322db3ae83cf..e16a3f5a1b79 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3576,7 +3576,7 @@ S:	Maintained
 F:	drivers/input/touchscreen/htcpen.c
 
 HUGETLB FILESYSTEM
-M:	William Irwin <wli@holomorphy.com>
+M:	Nadia Yvette Chambers <nyc@holomorphy.com>
 S:	Maintained
 F:	fs/hugetlbfs/
 
diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h
index 445dc42e0334..c5b5d6bac9ed 100644
--- a/arch/alpha/include/asm/mmzone.h
+++ b/arch/alpha/include/asm/mmzone.h
@@ -66,7 +66,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
     ((unsigned long)__va(NODE_DATA(kvaddr_to_nid(kaddr))->node_start_pfn  \
 			 << PAGE_SHIFT))
 
-/* XXX: FIXME -- wli */
+/* XXX: FIXME -- nyc */
 #define kern_addr_valid(kaddr)	(0)
 
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c
index 4fb63a36bd52..f6084bc524e8 100644
--- a/arch/frv/mm/pgalloc.c
+++ b/arch/frv/mm/pgalloc.c
@@ -77,7 +77,7 @@ void __set_pmd(pmd_t *pmdptr, unsigned long pmd)
  * checks at dup_mmap(), exec(), and other mmlist addition points
  * could be used. The locking scheme was chosen on the basis of
  * manfred's recommendations and having no core impact whatsoever.
- * -- wli
+ * -- nyc
  */
 DEFINE_SPINLOCK(pgd_lock);
 struct page *pgd_list;
diff --git a/arch/mn10300/mm/pgtable.c b/arch/mn10300/mm/pgtable.c
index 4ebf117c3285..bd9ada693f95 100644
--- a/arch/mn10300/mm/pgtable.c
+++ b/arch/mn10300/mm/pgtable.c
@@ -95,7 +95,7 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
  * checks at dup_mmap(), exec(), and other mmlist addition points
  * could be used. The locking scheme was chosen on the basis of
  * manfred's recommendations and having no core impact whatsoever.
- * -- wli
+ * -- nyc
  */
 DEFINE_SPINLOCK(pgd_lock);
 struct page *pgd_list;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 8573b83a63d0..217eb705fac0 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -137,7 +137,7 @@ static void pgd_dtor(pgd_t *pgd)
  * against pageattr.c; it is the unique case in which a valid change
  * of kernel pagetables can't be lazily synchronized by vmalloc faults.
  * vmalloc faults work because attached pagetables are never freed.
- * -- wli
+ * -- nyc
  */
 
 #ifdef CONFIG_X86_PAE
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c5bc355d8243..c98d0665fa5c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1,7 +1,7 @@
 /*
  * hugetlbpage-backed filesystem.  Based on ramfs.
  *
- * William Irwin, 2002
+ * Nadia Yvette Chambers, 2002
  *
  * Copyright (C) 2002 Linus Torvalds.
  */
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index be20a7e171a0..63d14a99483d 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -89,7 +89,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
 	/*
 	 * If I understand ncp_read_kernel() properly, the above always
 	 * fetches from the network, here the analogue of disk.
-	 * -- wli
+	 * -- nyc
 	 */
 	count_vm_event(PGMAJFAULT);
 	mem_cgroup_count_vm_event(area->vm_mm, PGMAJFAULT);
diff --git a/include/linux/hash.h b/include/linux/hash.h
index 24df9e70406f..61c97ae22e01 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_HASH_H
 #define _LINUX_HASH_H
 /* Fast hashing routine for ints,  longs and pointers.
-   (C) 2002 William Lee Irwin III, IBM */
+   (C) 2002 Nadia Yvette Chambers, IBM */
 
 /*
  * Knuth recommends primes in approximately golden ratio to the maximum
diff --git a/kernel/pid.c b/kernel/pid.c
index aebd4f5aaf41..fd996c1ed9f8 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -1,8 +1,8 @@
 /*
  * Generic pidhash and scalable, time-bounded PID allocator
  *
- * (C) 2002-2003 William Irwin, IBM
- * (C) 2004 William Irwin, Oracle
+ * (C) 2002-2003 Nadia Yvette Chambers, IBM
+ * (C) 2004 Nadia Yvette Chambers, Oracle
  * (C) 2002-2004 Ingo Molnar, Red Hat
  *
  * pid-structures are backing objects for tasks sharing a given ID to chain
diff --git a/kernel/profile.c b/kernel/profile.c
index 76b8e77773ee..1f391819c42f 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -8,9 +8,10 @@
  *  Scheduler profiling support, Arjan van de Ven and Ingo Molnar,
  *	Red Hat, July 2004
  *  Consolidation of architecture support code for profiling,
- *	William Irwin, Oracle, July 2004
+ *	Nadia Yvette Chambers, Oracle, July 2004
  *  Amortized hit count accounting via per-cpu open-addressed hashtables
- *	to resolve timer interrupt livelocks, William Irwin, Oracle, 2004
+ *	to resolve timer interrupt livelocks, Nadia Yvette Chambers,
+ *	Oracle, 2004
  */
 
 #include <linux/export.h>
@@ -256,7 +257,7 @@ EXPORT_SYMBOL_GPL(unregister_timer_hook);
  * pagetable hash functions, but uses a full hashtable full of finite
  * collision chains, not just pairs of them.
  *
- * -- wli
+ * -- nyc
  */
 static void __profile_flip_buffers(void *unused)
 {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9dcf15d38380..b1a817c14ed5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -10,7 +10,7 @@
  * Based on code in the latency_tracer, that is:
  *
  *  Copyright (C) 2004-2006 Ingo Molnar
- *  Copyright (C) 2004 William Lee Irwin III
+ *  Copyright (C) 2004 Nadia Yvette Chambers
  */
 
 #include <linux/stop_machine.h>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 31e4f55773f1..3cff052715fe 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9,7 +9,7 @@
  *
  * Based on code from the latency_tracer, that is:
  *  Copyright (C) 2004-2006 Ingo Molnar
- *  Copyright (C) 2004 William Lee Irwin III
+ *  Copyright (C) 2004 Nadia Yvette Chambers
  */
 #include <linux/ring_buffer.h>
 #include <generated/utsrelease.h>
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 507a7a9630bf..c7b83d03ff68 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -7,7 +7,7 @@
  * Based on code from the latency_tracer, that is:
  *
  *  Copyright (C) 2004-2006 Ingo Molnar
- *  Copyright (C) 2004 William Lee Irwin III
+ *  Copyright (C) 2004 Nadia Yvette Chambers
  */
 #include <linux/ring_buffer.h>
 #include <linux/debugfs.h>
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index d98ee8283b29..cddb3b40bf7b 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -7,7 +7,7 @@
  * From code in the latency_tracer, that is:
  *
  *  Copyright (C) 2004-2006 Ingo Molnar
- *  Copyright (C) 2004 William Lee Irwin III
+ *  Copyright (C) 2004 Nadia Yvette Chambers
  */
 #include <linux/kallsyms.h>
 #include <linux/debugfs.h>
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 02170c00c413..45464947a5ff 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -7,7 +7,7 @@
  * Based on code from the latency_tracer, that is:
  *
  *  Copyright (C) 2004-2006 Ingo Molnar
- *  Copyright (C) 2004 William Lee Irwin III
+ *  Copyright (C) 2004 Nadia Yvette Chambers
  */
 #include <linux/module.h>
 #include <linux/fs.h>
diff --git a/kernel/wait.c b/kernel/wait.c
index 7fdd9eaca2c3..6698e0c04ead 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -1,7 +1,7 @@
 /*
  * Generic waiting primitives.
  *
- * (C) 2004 William Irwin, Oracle
+ * (C) 2004 Nadia Yvette Chambers, Oracle
  */
 #include <linux/init.h>
 #include <linux/export.h>
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06fdfa1aeba7..06f7e4fe8d2d 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -353,7 +353,7 @@ again:
 EXPORT_SYMBOL(bitmap_find_next_zero_area);
 
 /*
- * Bitmap printing & parsing functions: first version by Bill Irwin,
+ * Bitmap printing & parsing functions: first version by Nadia Yvette Chambers,
  * second version by Paul Jackson, third by Joe Korty.
  */
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 59a0059b39e2..3b7a20ea3808 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1,6 +1,6 @@
 /*
  * Generic hugetlb support.
- * (C) William Irwin, April 2004
+ * (C) Nadia Yvette Chambers, April 2004
  */
 #include <linux/list.h>
 #include <linux/init.h>
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5b74de6702e0..57806b7e118b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -523,7 +523,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
  * If a block is freed, and its buddy is also free, then this
  * triggers coalescing into a block of larger size.
  *
- * -- wli
+ * -- nyc
  */
 
 static inline void __free_one_page(struct page *page,
@@ -780,7 +780,7 @@ void __init init_cma_reserved_pageblock(struct page *page)
  * large block of memory acted on by a series of small allocations.
  * This behavior is a critical factor in sglist merging's success.
  *
- * -- wli
+ * -- nyc
  */
 static inline void expand(struct zone *zone, struct page *page,
 	int low, int high, struct free_area *area,
-- 
cgit v1.2.3-55-g7522


From 01331040e6442ad09181bfaacd8bb9687dce2389 Mon Sep 17 00:00:00 2001
From: Johannes Berg
Date: Wed, 5 Dec 2012 16:45:31 +0100
Subject: wireless: fix VHT max AMPDU exponent definition

This is really a 3-bit field, not a single bit,
so declare a mask and shift. Also fix hwsim, it
advertises the maximum possible.

While at it reindent all the defines using tabs
instead of spaces.

Change-Id: I7cd81c0d72f76deb5010aba5bfa3dd312006e898
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c |  2 +-
 include/linux/ieee80211.h             | 54 ++++++++++++++++++-----------------
 2 files changed, 29 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 8a61dbd320e6..ff9085502bea 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2223,7 +2223,7 @@ static int __init init_mac80211_hwsim(void)
 				IEEE80211_VHT_CAP_RXSTBC_2 |
 				IEEE80211_VHT_CAP_RXSTBC_3 |
 				IEEE80211_VHT_CAP_RXSTBC_4 |
-				IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT;
+				IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
 			sband->vht_cap.vht_mcs.rx_mcs_map =
 				cpu_to_le16(IEEE80211_VHT_MCS_SUPPORT_0_8 << 0 |
 					    IEEE80211_VHT_MCS_SUPPORT_0_8 << 2 |
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 8f690e53dd89..f0859cc73861 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1251,32 +1251,34 @@ struct ieee80211_vht_operation {
 #define IEEE80211_VHT_MCS_NOT_SUPPORTED 3
 
 /* 802.11ac VHT Capabilities */
-#define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895                0x00000000
-#define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991                0x00000001
-#define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454               0x00000002
-#define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ              0x00000004
-#define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ     0x00000008
-#define IEEE80211_VHT_CAP_RXLDPC                              0x00000010
-#define IEEE80211_VHT_CAP_SHORT_GI_80                         0x00000020
-#define IEEE80211_VHT_CAP_SHORT_GI_160                        0x00000040
-#define IEEE80211_VHT_CAP_TXSTBC                              0x00000080
-#define IEEE80211_VHT_CAP_RXSTBC_1                            0x00000100
-#define IEEE80211_VHT_CAP_RXSTBC_2                            0x00000200
-#define IEEE80211_VHT_CAP_RXSTBC_3                            0x00000300
-#define IEEE80211_VHT_CAP_RXSTBC_4                            0x00000400
-#define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE               0x00000800
-#define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE               0x00001000
-#define IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX             0x00006000
-#define IEEE80211_VHT_CAP_SOUNDING_DIMENTION_MAX              0x00030000
-#define IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE               0x00080000
-#define IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE               0x00100000
-#define IEEE80211_VHT_CAP_VHT_TXOP_PS                         0x00200000
-#define IEEE80211_VHT_CAP_HTC_VHT                             0x00400000
-#define IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT          0x00800000
-#define IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB   0x08000000
-#define IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB     0x0c000000
-#define IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN                  0x10000000
-#define IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN                  0x20000000
+#define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895			0x00000000
+#define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991			0x00000001
+#define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454			0x00000002
+#define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ		0x00000004
+#define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ	0x00000008
+#define IEEE80211_VHT_CAP_RXLDPC				0x00000010
+#define IEEE80211_VHT_CAP_SHORT_GI_80				0x00000020
+#define IEEE80211_VHT_CAP_SHORT_GI_160				0x00000040
+#define IEEE80211_VHT_CAP_TXSTBC				0x00000080
+#define IEEE80211_VHT_CAP_RXSTBC_1				0x00000100
+#define IEEE80211_VHT_CAP_RXSTBC_2				0x00000200
+#define IEEE80211_VHT_CAP_RXSTBC_3				0x00000300
+#define IEEE80211_VHT_CAP_RXSTBC_4				0x00000400
+#define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE			0x00000800
+#define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE			0x00001000
+#define IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX		0x00006000
+#define IEEE80211_VHT_CAP_SOUNDING_DIMENTION_MAX		0x00030000
+#define IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE			0x00080000
+#define IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE			0x00100000
+#define IEEE80211_VHT_CAP_VHT_TXOP_PS				0x00200000
+#define IEEE80211_VHT_CAP_HTC_VHT				0x00400000
+#define IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT	23
+#define IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK	\
+		(7 << IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT)
+#define IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB	0x08000000
+#define IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB	0x0c000000
+#define IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN			0x10000000
+#define IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN			0x20000000
 
 /* Authentication algorithms */
 #define WLAN_AUTH_OPEN 0
-- 
cgit v1.2.3-55-g7522


From 3f3299d5c0268d6cc3f47b446e8aca436e4a5651 Mon Sep 17 00:00:00 2001
From: Bart Van Assche
Date: Wed, 28 Nov 2012 13:42:38 +0100
Subject: block: Rename queue dead flag

QUEUE_FLAG_DEAD is used to indicate that queuing new requests must
stop. After this flag has been set queue draining starts. However,
during the queue draining phase it is still safe to invoke the
queue's request_fn, so QUEUE_FLAG_DYING is a better name for this
flag.

This patch has been generated by running the following command
over the kernel source tree:

git grep -lEw 'blk_queue_dead|QUEUE_FLAG_DEAD' |
    xargs sed -i.tmp -e 's/blk_queue_dead/blk_queue_dying/g'      \
        -e 's/QUEUE_FLAG_DEAD/QUEUE_FLAG_DYING/g';                \
sed -i.tmp -e "s/QUEUE_FLAG_DYING$(printf \\t)*5/QUEUE_FLAG_DYING$(printf \\t)5/g" \
    include/linux/blkdev.h;                                       \
sed -i.tmp -e 's/ DEAD/ DYING/g' -e 's/dead queue/a dying queue/' \
    -e 's/Dead queue/A dying queue/' block/blk-core.c

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: James Bottomley <JBottomley@Parallels.com>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Chanho Min <chanho.min@lge.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c      |  2 +-
 block/blk-core.c        | 26 +++++++++++++-------------
 block/blk-exec.c        |  2 +-
 block/blk-sysfs.c       |  4 ++--
 block/blk-throttle.c    |  2 +-
 block/blk.h             |  2 +-
 drivers/scsi/scsi_lib.c |  2 +-
 include/linux/blkdev.h  |  4 ++--
 8 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index d0b770391ad4..5dea4e8dbc55 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -231,7 +231,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 	 * we shouldn't allow anything to go through for a bypassing queue.
 	 */
 	if (unlikely(blk_queue_bypass(q)))
-		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
+		return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY);
 	return __blkg_lookup_create(blkcg, q, NULL);
 }
 EXPORT_SYMBOL_GPL(blkg_lookup_create);
diff --git a/block/blk-core.c b/block/blk-core.c
index ee0e5cafa859..1a95272cca50 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -473,20 +473,20 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
  * blk_cleanup_queue - shutdown a request queue
  * @q: request queue to shutdown
  *
- * Mark @q DEAD, drain all pending requests, destroy and put it.  All
+ * Mark @q DYING, drain all pending requests, destroy and put it.  All
  * future requests will be failed immediately with -ENODEV.
  */
 void blk_cleanup_queue(struct request_queue *q)
 {
 	spinlock_t *lock = q->queue_lock;
 
-	/* mark @q DEAD, no new request or merges will be allowed afterwards */
+	/* mark @q DYING, no new request or merges will be allowed afterwards */
 	mutex_lock(&q->sysfs_lock);
-	queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
+	queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
 	spin_lock_irq(lock);
 
 	/*
-	 * Dead queue is permanently in bypass mode till released.  Note
+	 * A dying queue is permanently in bypass mode till released.  Note
 	 * that, unlike blk_queue_bypass_start(), we aren't performing
 	 * synchronize_rcu() after entering bypass mode to avoid the delay
 	 * as some drivers create and destroy a lot of queues while
@@ -499,11 +499,11 @@ void blk_cleanup_queue(struct request_queue *q)
 
 	queue_flag_set(QUEUE_FLAG_NOMERGES, q);
 	queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
-	queue_flag_set(QUEUE_FLAG_DEAD, q);
+	queue_flag_set(QUEUE_FLAG_DYING, q);
 	spin_unlock_irq(lock);
 	mutex_unlock(&q->sysfs_lock);
 
-	/* drain all requests queued before DEAD marking */
+	/* drain all requests queued before DYING marking */
 	blk_drain_queue(q, true);
 
 	/* @q won't process any more request, flush async actions */
@@ -716,7 +716,7 @@ EXPORT_SYMBOL(blk_init_allocated_queue);
 
 bool blk_get_queue(struct request_queue *q)
 {
-	if (likely(!blk_queue_dead(q))) {
+	if (likely(!blk_queue_dying(q))) {
 		__blk_get_queue(q);
 		return true;
 	}
@@ -870,7 +870,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	int may_queue;
 
-	if (unlikely(blk_queue_dead(q)))
+	if (unlikely(blk_queue_dying(q)))
 		return NULL;
 
 	may_queue = elv_may_queue(q, rw_flags);
@@ -1050,7 +1050,7 @@ retry:
 	if (rq)
 		return rq;
 
-	if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) {
+	if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) {
 		blk_put_rl(rl);
 		return NULL;
 	}
@@ -1910,7 +1910,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 		return -EIO;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (unlikely(blk_queue_dead(q))) {
+	if (unlikely(blk_queue_dying(q))) {
 		spin_unlock_irqrestore(q->queue_lock, flags);
 		return -ENODEV;
 	}
@@ -2885,9 +2885,9 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 	trace_block_unplug(q, depth, !from_schedule);
 
 	/*
-	 * Don't mess with dead queue.
+	 * Don't mess with a dying queue.
 	 */
-	if (unlikely(blk_queue_dead(q))) {
+	if (unlikely(blk_queue_dying(q))) {
 		spin_unlock(q->queue_lock);
 		return;
 	}
@@ -2996,7 +2996,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 		/*
 		 * Short-circuit if @q is dead
 		 */
-		if (unlikely(blk_queue_dead(q))) {
+		if (unlikely(blk_queue_dying(q))) {
 			__blk_end_request_all(rq, -ENODEV);
 			continue;
 		}
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 8b6dc5bd4dd0..4aec98df7ba5 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -60,7 +60,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 
 	spin_lock_irq(q->queue_lock);
 
-	if (unlikely(blk_queue_dead(q))) {
+	if (unlikely(blk_queue_dying(q))) {
 		rq->errors = -ENXIO;
 		if (rq->end_io)
 			rq->end_io(rq, rq->errors);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index ce6204608822..788147797a79 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -466,7 +466,7 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 	if (!entry->show)
 		return -EIO;
 	mutex_lock(&q->sysfs_lock);
-	if (blk_queue_dead(q)) {
+	if (blk_queue_dying(q)) {
 		mutex_unlock(&q->sysfs_lock);
 		return -ENOENT;
 	}
@@ -488,7 +488,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
 
 	q = container_of(kobj, struct request_queue, kobj);
 	mutex_lock(&q->sysfs_lock);
-	if (blk_queue_dead(q)) {
+	if (blk_queue_dying(q)) {
 		mutex_unlock(&q->sysfs_lock);
 		return -ENOENT;
 	}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a9664fa0b609..31146225f3d0 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -302,7 +302,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
 		/* if %NULL and @q is alive, fall back to root_tg */
 		if (!IS_ERR(blkg))
 			tg = blkg_to_tg(blkg);
-		else if (!blk_queue_dead(q))
+		else if (!blk_queue_dying(q))
 			tg = td_root_tg(td);
 	}
 
diff --git a/block/blk.h b/block/blk.h
index ca51543b248c..2218a8a78292 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -96,7 +96,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 			q->flush_queue_delayed = 1;
 			return NULL;
 		}
-		if (unlikely(blk_queue_dead(q)) ||
+		if (unlikely(blk_queue_dying(q)) ||
 		    !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
 			return NULL;
 	}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index da36a3a81a9e..f29a1a9b54d2 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1406,7 +1406,7 @@ static int scsi_lld_busy(struct request_queue *q)
 	struct scsi_device *sdev = q->queuedata;
 	struct Scsi_Host *shost;
 
-	if (blk_queue_dead(q))
+	if (blk_queue_dying(q))
 		return 0;
 
 	shost = sdev->host;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1756001210d2..aba8246afe72 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -437,7 +437,7 @@ struct request_queue {
 #define QUEUE_FLAG_STOPPED	2	/* queue is stopped */
 #define	QUEUE_FLAG_SYNCFULL	3	/* read queue has been filled */
 #define QUEUE_FLAG_ASYNCFULL	4	/* write queue has been filled */
-#define QUEUE_FLAG_DEAD		5	/* queue being torn down */
+#define QUEUE_FLAG_DYING	5	/* queue being torn down */
 #define QUEUE_FLAG_BYPASS	6	/* act as dumb FIFO queue */
 #define QUEUE_FLAG_BIDI		7	/* queue supports bidi requests */
 #define QUEUE_FLAG_NOMERGES     8	/* disable merge attempts */
@@ -521,7 +521,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
-#define blk_queue_dead(q)	test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
+#define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
 #define blk_queue_bypass(q)	test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q)	\
-- 
cgit v1.2.3-55-g7522


From c246e80d86736312933646896c4157daf511dadc Mon Sep 17 00:00:00 2001
From: Bart Van Assche
Date: Thu, 6 Dec 2012 14:32:01 +0100
Subject: block: Avoid that request_fn is invoked on a dead queue

A block driver may start cleaning up resources needed by its
request_fn as soon as blk_cleanup_queue() finished, so request_fn
must not be invoked after draining finished. This is important
when blk_run_queue() is invoked without any requests in progress.
As an example, if blk_drain_queue() and scsi_run_queue() run in
parallel, blk_drain_queue() may have finished all requests after
scsi_run_queue() has taken a SCSI device off the starved list but
before that last function has had a chance to run the queue.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Cc: James Bottomley <JBottomley@Parallels.com>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Chanho Min <chanho.min@lge.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 31 +++++++++++++++++++++++++++----
 block/blk-exec.c       |  2 +-
 block/blk.h            |  2 ++
 include/linux/blkdev.h |  2 ++
 4 files changed, 32 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index a182b586b06a..f52d05ff5d24 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -292,6 +292,25 @@ void blk_sync_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
+/**
+ * __blk_run_queue_uncond - run a queue whether or not it has been stopped
+ * @q:	The queue to run
+ *
+ * Description:
+ *    Invoke request handling on a queue if there are any pending requests.
+ *    May be used to restart request handling after a request has completed.
+ *    This variant runs the queue whether or not the queue has been
+ *    stopped. Must be called with the queue lock held and interrupts
+ *    disabled. See also @blk_run_queue.
+ */
+inline void __blk_run_queue_uncond(struct request_queue *q)
+{
+	if (unlikely(blk_queue_dead(q)))
+		return;
+
+	q->request_fn(q);
+}
+
 /**
  * __blk_run_queue - run a single device queue
  * @q:	The queue to run
@@ -305,7 +324,7 @@ void __blk_run_queue(struct request_queue *q)
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 
-	q->request_fn(q);
+	__blk_run_queue_uncond(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -477,8 +496,8 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
  * blk_cleanup_queue - shutdown a request queue
  * @q: request queue to shutdown
  *
- * Mark @q DYING, drain all pending requests, destroy and put it.  All
- * future requests will be failed immediately with -ENODEV.
+ * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
+ * put it.  All future requests will be failed immediately with -ENODEV.
  */
 void blk_cleanup_queue(struct request_queue *q)
 {
@@ -507,9 +526,13 @@ void blk_cleanup_queue(struct request_queue *q)
 	spin_unlock_irq(lock);
 	mutex_unlock(&q->sysfs_lock);
 
-	/* drain all requests queued before DYING marking */
+	/*
+	 * Drain all requests queued before DYING marking. Set DEAD flag to
+	 * prevent that q->request_fn() gets invoked after draining finished.
+	 */
 	spin_lock_irq(lock);
 	__blk_drain_queue(q, true);
+	queue_flag_set(QUEUE_FLAG_DEAD, q);
 	spin_unlock_irq(lock);
 
 	/* @q won't process any more request, flush async actions */
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 4aec98df7ba5..1320e74d79b8 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -72,7 +72,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	__blk_run_queue(q);
 	/* the queue is stopped so it won't be run */
 	if (rq->cmd_type == REQ_TYPE_PM_RESUME)
-		q->request_fn(q);
+		__blk_run_queue_uncond(q);
 	spin_unlock_irq(q->queue_lock);
 }
 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
diff --git a/block/blk.h b/block/blk.h
index 2218a8a78292..47fdfdd41520 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -145,6 +145,8 @@ int blk_try_merge(struct request *rq, struct bio *bio);
 
 void blk_queue_congestion_threshold(struct request_queue *q);
 
+void __blk_run_queue_uncond(struct request_queue *q);
+
 int blk_dev_init(void);
 
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aba8246afe72..8bc46c250ca4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -452,6 +452,7 @@ struct request_queue {
 #define QUEUE_FLAG_ADD_RANDOM  16	/* Contributes to random pool */
 #define QUEUE_FLAG_SECDISCARD  17	/* supports SECDISCARD */
 #define QUEUE_FLAG_SAME_FORCE  18	/* force complete on same CPU */
+#define QUEUE_FLAG_DEAD        19	/* queue tear-down finished */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -522,6 +523,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
+#define blk_queue_dead(q)	test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
 #define blk_queue_bypass(q)	test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q)	\
-- 
cgit v1.2.3-55-g7522


From 24faf6f604efe18236bded4303009fc252913bf0 Mon Sep 17 00:00:00 2001
From: Bart Van Assche
Date: Wed, 28 Nov 2012 13:46:45 +0100
Subject: block: Make blk_cleanup_queue() wait until request_fn finished

Some request_fn implementations, e.g. scsi_request_fn(), unlock
the queue lock internally. This may result in multiple threads
executing request_fn for the same queue simultaneously. Keep
track of the number of active request_fn calls and make sure that
blk_cleanup_queue() waits until all active request_fn invocations
have finished. A block driver may start cleaning up resources
needed by its request_fn as soon as blk_cleanup_queue() finished,
so blk_cleanup_queue() must wait for all outstanding request_fn
invocations to finish.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reported-by: Chanho Min <chanho.min@lge.com>
Cc: James Bottomley <JBottomley@Parallels.com>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 10 ++++++++++
 include/linux/blkdev.h |  6 ++++++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 9fb23537c7ad..473015ee28a2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -309,7 +309,16 @@ inline void __blk_run_queue_uncond(struct request_queue *q)
 	if (unlikely(blk_queue_dead(q)))
 		return;
 
+	/*
+	 * Some request_fn implementations, e.g. scsi_request_fn(), unlock
+	 * the queue lock internally. As a result multiple threads may be
+	 * running such a request function concurrently. Keep track of the
+	 * number of active request_fn invocations such that blk_drain_queue()
+	 * can wait until all these request_fn calls have finished.
+	 */
+	q->request_fn_active++;
 	q->request_fn(q);
+	q->request_fn_active--;
 }
 
 /**
@@ -408,6 +417,7 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
 			__blk_run_queue(q);
 
 		drain |= q->nr_rqs_elvpriv;
+		drain |= q->request_fn_active;
 
 		/*
 		 * Unfortunately, requests are queued at and tracked from
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8bc46c250ca4..c9d233e727f2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -378,6 +378,12 @@ struct request_queue {
 
 	unsigned int		nr_sorted;
 	unsigned int		in_flight[2];
+	/*
+	 * Number of active block driver functions for which blk_drain_queue()
+	 * must wait. Must be incremented around functions that unlock the
+	 * queue_lock internally, e.g. scsi_request_fn().
+	 */
+	unsigned int		request_fn_active;
 
 	unsigned int		rq_timeout;
 	struct timer_list	timeout;
-- 
cgit v1.2.3-55-g7522


From 80729beb3326fd682543f4f4ea534df47ab48967 Mon Sep 17 00:00:00 2001
From: Bart Van Assche
Date: Wed, 28 Nov 2012 13:47:36 +0100
Subject: bsg: Remove unused function bsg_goose_queue()

The function bsg_goose_queue() does not have any in-tree callers,
so let's remove it.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bsg-lib.c         | 13 -------------
 include/linux/bsg-lib.h |  1 -
 2 files changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index deee61fbb741..650f427d915b 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -151,19 +151,6 @@ failjob_rls_job:
 	return -ENOMEM;
 }
 
-/*
- * bsg_goose_queue - restart queue in case it was stopped
- * @q: request q to be restarted
- */
-void bsg_goose_queue(struct request_queue *q)
-{
-	if (!q)
-		return;
-
-	blk_run_queue_async(q);
-}
-EXPORT_SYMBOL_GPL(bsg_goose_queue);
-
 /**
  * bsg_request_fn - generic handler for bsg requests
  * @q: request queue to manage
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 4d0fb3df2f4a..a226652a5a6c 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -67,6 +67,5 @@ void bsg_job_done(struct bsg_job *job, int result,
 int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
 		    bsg_job_fn *job_fn, int dd_job_size);
 void bsg_request_fn(struct request_queue *q);
-void bsg_goose_queue(struct request_queue *q);
 
 #endif
-- 
cgit v1.2.3-55-g7522


From bc245cc36c5687dd3fbf6d4a1b3c13d41f9cb189 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Wed, 5 Dec 2012 18:45:58 +0100
Subject: ssb/bcma: add common header for watchdog

This adds a common header for watchdog functions, so a watchdog driver
just needs to use this and could provide watchdog functionality for ssb
and bcma based SoCs. Patches for a watchdog driver using this interface
will be send later.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcm47xx_wdt.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 include/linux/bcm47xx_wdt.h

(limited to 'include/linux')

diff --git a/include/linux/bcm47xx_wdt.h b/include/linux/bcm47xx_wdt.h
new file mode 100644
index 000000000000..e5dfc256485b
--- /dev/null
+++ b/include/linux/bcm47xx_wdt.h
@@ -0,0 +1,19 @@
+#ifndef LINUX_BCM47XX_WDT_H_
+#define LINUX_BCM47XX_WDT_H_
+
+#include <linux/types.h>
+
+
+struct bcm47xx_wdt {
+	u32 (*timer_set)(struct bcm47xx_wdt *, u32);
+	u32 (*timer_set_ms)(struct bcm47xx_wdt *, u32);
+	u32 max_timer_ms;
+
+	void *driver_data;
+};
+
+static inline void *bcm47xx_wdt_get_drvdata(struct bcm47xx_wdt *wdt)
+{
+	return wdt->driver_data;
+}
+#endif /* LINUX_BCM47XX_WDT_H_ */
-- 
cgit v1.2.3-55-g7522


From a22a3114a820ca3eadee6af53d90736e5ca68fa1 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Wed, 5 Dec 2012 18:46:01 +0100
Subject: bcma: add methods for watchdog driver

The watchdog driver wants to set the watchdog timeout in ms and not in
ticks, which is depending on the SoC type and the clock.
Calculate the number of ticks per millisecond and provide two functions
for the watchdog driver. Also return the ticks or millisecond the timer
was set to in case the provided value was bigger than the max allowed
value.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_chipcommon.c            | 38 ++++++++++++++++++++++++++++-
 include/linux/bcma/bcma_driver_chipcommon.h |  4 +--
 2 files changed, 39 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index 7c132e5eceb0..117222697e1f 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -10,6 +10,7 @@
  */
 
 #include "bcma_private.h"
+#include <linux/bcm47xx_wdt.h>
 #include <linux/export.h>
 #include <linux/bcma/bcma.h>
 
@@ -52,6 +53,39 @@ static u32 bcma_chipco_watchdog_get_max_timer(struct bcma_drv_cc *cc)
 		return (1 << nb) - 1;
 }
 
+static u32 bcma_chipco_watchdog_timer_set_wdt(struct bcm47xx_wdt *wdt,
+					      u32 ticks)
+{
+	struct bcma_drv_cc *cc = bcm47xx_wdt_get_drvdata(wdt);
+
+	return bcma_chipco_watchdog_timer_set(cc, ticks);
+}
+
+static u32 bcma_chipco_watchdog_timer_set_ms_wdt(struct bcm47xx_wdt *wdt,
+						 u32 ms)
+{
+	struct bcma_drv_cc *cc = bcm47xx_wdt_get_drvdata(wdt);
+	u32 ticks;
+
+	ticks = bcma_chipco_watchdog_timer_set(cc, cc->ticks_per_ms * ms);
+	return ticks / cc->ticks_per_ms;
+}
+
+static int bcma_chipco_watchdog_ticks_per_ms(struct bcma_drv_cc *cc)
+{
+	struct bcma_bus *bus = cc->core->bus;
+
+	if (cc->capabilities & BCMA_CC_CAP_PMU) {
+		if (bus->chipinfo.id == BCMA_CHIP_ID_BCM4706)
+			/* 4706 CC and PMU watchdogs are clocked at 1/4 of ALP clock */
+			return bcma_chipco_alp_clock(cc) / 4000;
+		else
+			/* based on 32KHz ILP clock */
+			return 32;
+	} else {
+		return bcma_chipco_alp_clock(cc) / 1000;
+	}
+}
 
 void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc)
 {
@@ -100,12 +134,13 @@ void bcma_core_chipcommon_init(struct bcma_drv_cc *cc)
 			((leddc_on << BCMA_CC_GPIOTIMER_ONTIME_SHIFT) |
 			 (leddc_off << BCMA_CC_GPIOTIMER_OFFTIME_SHIFT)));
 	}
+	cc->ticks_per_ms = bcma_chipco_watchdog_ticks_per_ms(cc);
 
 	cc->setup_done = true;
 }
 
 /* Set chip watchdog reset timer to fire in 'ticks' backplane cycles */
-void bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks)
+u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks)
 {
 	u32 maxt;
 	enum bcma_clkmode clkmode;
@@ -125,6 +160,7 @@ void bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks)
 		/* instant NMI */
 		bcma_cc_write32(cc, BCMA_CC_WATCHDOG, ticks);
 	}
+	return ticks;
 }
 
 void bcma_chipco_irq_mask(struct bcma_drv_cc *cc, u32 mask, u32 value)
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 145f3c56227f..2f9b01493a90 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -570,6 +570,7 @@ struct bcma_drv_cc {
 	int nr_serial_ports;
 	struct bcma_serial_port serial_ports[4];
 #endif /* CONFIG_BCMA_DRIVER_MIPS */
+	u32 ticks_per_ms;
 };
 
 /* Register access */
@@ -593,8 +594,7 @@ extern void bcma_chipco_resume(struct bcma_drv_cc *cc);
 
 void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable);
 
-extern void bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc,
-					  u32 ticks);
+extern u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks);
 
 void bcma_chipco_irq_mask(struct bcma_drv_cc *cc, u32 mask, u32 value);
 
-- 
cgit v1.2.3-55-g7522


From a4855f39d4eb3f550ca5f4aac79bd999da42dc54 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Wed, 5 Dec 2012 18:46:02 +0100
Subject: bcma: register watchdog driver

Register the watchdog driver to the system if this is a SoC. Using the
watchdog on a non SoC device, like a PCIe card, will make the PCIe
card die when the timeout expired, but starting it again is not
supported by bcma.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/bcma_private.h                 |  2 ++
 drivers/bcma/driver_chipcommon.c            | 22 ++++++++++++++++++++++
 drivers/bcma/main.c                         |  8 ++++++++
 include/linux/bcma/bcma_driver_chipcommon.h |  3 +++
 4 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 169fc58427d3..bcb830ec4bb4 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -84,6 +84,8 @@ extern void __exit bcma_host_pci_exit(void);
 /* driver_pci.c */
 u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address);
 
+extern int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc);
+
 #ifdef CONFIG_BCMA_DRIVER_PCI_HOSTMODE
 bool __devinit bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc);
 void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc);
diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index 117222697e1f..d017f2512275 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -12,6 +12,7 @@
 #include "bcma_private.h"
 #include <linux/bcm47xx_wdt.h>
 #include <linux/export.h>
+#include <linux/platform_device.h>
 #include <linux/bcma/bcma.h>
 
 static inline u32 bcma_cc_write32_masked(struct bcma_drv_cc *cc, u16 offset,
@@ -87,6 +88,27 @@ static int bcma_chipco_watchdog_ticks_per_ms(struct bcma_drv_cc *cc)
 	}
 }
 
+int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc)
+{
+	struct bcm47xx_wdt wdt = {};
+	struct platform_device *pdev;
+
+	wdt.driver_data = cc;
+	wdt.timer_set = bcma_chipco_watchdog_timer_set_wdt;
+	wdt.timer_set_ms = bcma_chipco_watchdog_timer_set_ms_wdt;
+	wdt.max_timer_ms = bcma_chipco_watchdog_get_max_timer(cc) / cc->ticks_per_ms;
+
+	pdev = platform_device_register_data(NULL, "bcm47xx-wdt",
+					     cc->core->bus->num, &wdt,
+					     sizeof(wdt));
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	cc->watchdog = pdev;
+
+	return 0;
+}
+
 void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc)
 {
 	if (cc->early_setup_done)
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index a9718893000b..debd4f142f93 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -165,6 +165,12 @@ static int bcma_register_cores(struct bcma_bus *bus)
 	}
 #endif
 
+	if (bus->hosttype == BCMA_HOSTTYPE_SOC) {
+		err = bcma_chipco_watchdog_register(&bus->drv_cc);
+		if (err)
+			bcma_err(bus, "Error registering watchdog driver\n");
+	}
+
 	return 0;
 }
 
@@ -177,6 +183,8 @@ static void bcma_unregister_cores(struct bcma_bus *bus)
 		if (core->dev_registered)
 			device_unregister(&core->dev);
 	}
+	if (bus->hosttype == BCMA_HOSTTYPE_SOC)
+		platform_device_unregister(bus->drv_cc.watchdog);
 }
 
 int __devinit bcma_bus_register(struct bcma_bus *bus)
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 2f9b01493a90..e51359180b6f 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -1,6 +1,8 @@
 #ifndef LINUX_BCMA_DRIVER_CC_H_
 #define LINUX_BCMA_DRIVER_CC_H_
 
+#include <linux/platform_device.h>
+
 /** ChipCommon core registers. **/
 #define BCMA_CC_ID			0x0000
 #define  BCMA_CC_ID_ID			0x0000FFFF
@@ -571,6 +573,7 @@ struct bcma_drv_cc {
 	struct bcma_serial_port serial_ports[4];
 #endif /* CONFIG_BCMA_DRIVER_MIPS */
 	u32 ticks_per_ms;
+	struct platform_device *watchdog;
 };
 
 /* Register access */
-- 
cgit v1.2.3-55-g7522


From 7ffbffe37de3979d43c1105e38eb2918bf5d35fe Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Wed, 5 Dec 2012 18:46:05 +0100
Subject: ssb: add methods for watchdog driver

The watchdog driver wants to set the watchdog timeout in ms and not in
ticks, which is depending on the SoC type and the clock.
Calculate the number of ticks per millisecond and provide two functions
for the watchdog driver. Also return the ticks or millisecond the timer
was set to in case the provided value was bigger than the max allowed
value.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/driver_chipcommon.c           | 47 ++++++++++++++++++++++++++++++-
 drivers/ssb/ssb_private.h                 |  5 ++++
 include/linux/ssb/ssb_driver_chipcommon.h |  5 ++--
 3 files changed, 54 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c
index 6e080f6a07a5..95c33a05f434 100644
--- a/drivers/ssb/driver_chipcommon.c
+++ b/drivers/ssb/driver_chipcommon.c
@@ -4,6 +4,7 @@
  *
  * Copyright 2005, Broadcom Corporation
  * Copyright 2006, 2007, Michael Buesch <m@bues.ch>
+ * Copyright 2012, Hauke Mehrtens <hauke@hauke-m.de>
  *
  * Licensed under the GNU/GPL. See COPYING for details.
  */
@@ -12,6 +13,7 @@
 #include <linux/ssb/ssb_regs.h>
 #include <linux/export.h>
 #include <linux/pci.h>
+#include <linux/bcm47xx_wdt.h>
 
 #include "ssb_private.h"
 
@@ -306,6 +308,43 @@ static u32 ssb_chipco_watchdog_get_max_timer(struct ssb_chipcommon *cc)
 		return (1 << nb) - 1;
 }
 
+u32 ssb_chipco_watchdog_timer_set_wdt(struct bcm47xx_wdt *wdt, u32 ticks)
+{
+	struct ssb_chipcommon *cc = bcm47xx_wdt_get_drvdata(wdt);
+
+	if (cc->dev->bus->bustype != SSB_BUSTYPE_SSB)
+		return 0;
+
+	return ssb_chipco_watchdog_timer_set(cc, ticks);
+}
+
+u32 ssb_chipco_watchdog_timer_set_ms(struct bcm47xx_wdt *wdt, u32 ms)
+{
+	struct ssb_chipcommon *cc = bcm47xx_wdt_get_drvdata(wdt);
+	u32 ticks;
+
+	if (cc->dev->bus->bustype != SSB_BUSTYPE_SSB)
+		return 0;
+
+	ticks = ssb_chipco_watchdog_timer_set(cc, cc->ticks_per_ms * ms);
+	return ticks / cc->ticks_per_ms;
+}
+
+static int ssb_chipco_watchdog_ticks_per_ms(struct ssb_chipcommon *cc)
+{
+	struct ssb_bus *bus = cc->dev->bus;
+
+	if (cc->capabilities & SSB_CHIPCO_CAP_PMU) {
+			/* based on 32KHz ILP clock */
+			return 32;
+	} else {
+		if (cc->dev->id.revision < 18)
+			return ssb_clockspeed(bus) / 1000;
+		else
+			return ssb_chipco_alp_clock(cc) / 1000;
+	}
+}
+
 void ssb_chipcommon_init(struct ssb_chipcommon *cc)
 {
 	if (!cc->dev)
@@ -323,6 +362,11 @@ void ssb_chipcommon_init(struct ssb_chipcommon *cc)
 	chipco_powercontrol_init(cc);
 	ssb_chipco_set_clockmode(cc, SSB_CLKMODE_FAST);
 	calc_fast_powerup_delay(cc);
+
+	if (cc->dev->bus->bustype == SSB_BUSTYPE_SSB) {
+		cc->ticks_per_ms = ssb_chipco_watchdog_ticks_per_ms(cc);
+		cc->max_timer_ms = ssb_chipco_watchdog_get_max_timer(cc) / cc->ticks_per_ms;
+	}
 }
 
 void ssb_chipco_suspend(struct ssb_chipcommon *cc)
@@ -421,7 +465,7 @@ void ssb_chipco_timing_init(struct ssb_chipcommon *cc,
 }
 
 /* Set chip watchdog reset timer to fire in 'ticks' backplane cycles */
-void ssb_chipco_watchdog_timer_set(struct ssb_chipcommon *cc, u32 ticks)
+u32 ssb_chipco_watchdog_timer_set(struct ssb_chipcommon *cc, u32 ticks)
 {
 	u32 maxt;
 	enum ssb_clkmode clkmode;
@@ -441,6 +485,7 @@ void ssb_chipco_watchdog_timer_set(struct ssb_chipcommon *cc, u32 ticks)
 		/* instant NMI */
 		chipco_write32(cc, SSB_CHIPCO_WATCHDOG, ticks);
 	}
+	return ticks;
 }
 
 void ssb_chipco_irq_mask(struct ssb_chipcommon *cc, u32 mask, u32 value)
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index 98b2915cd30b..03cc40a71510 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -3,6 +3,7 @@
 
 #include <linux/ssb/ssb.h>
 #include <linux/types.h>
+#include <linux/bcm47xx_wdt.h>
 
 
 #define PFX	"ssb: "
@@ -212,4 +213,8 @@ extern u32 ssb_pmu_get_cpu_clock(struct ssb_chipcommon *cc);
 extern u32 ssb_pmu_get_controlclock(struct ssb_chipcommon *cc);
 extern u32 ssb_pmu_get_alp_clock(struct ssb_chipcommon *cc);
 
+extern u32 ssb_chipco_watchdog_timer_set_wdt(struct bcm47xx_wdt *wdt,
+					     u32 ticks);
+extern u32 ssb_chipco_watchdog_timer_set_ms(struct bcm47xx_wdt *wdt, u32 ms);
+
 #endif /* LINUX_SSB_PRIVATE_H_ */
diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h
index c2b02a5c86ae..38339fd68a5f 100644
--- a/include/linux/ssb/ssb_driver_chipcommon.h
+++ b/include/linux/ssb/ssb_driver_chipcommon.h
@@ -591,6 +591,8 @@ struct ssb_chipcommon {
 	/* Fast Powerup Delay constant */
 	u16 fast_pwrup_delay;
 	struct ssb_chipcommon_pmu pmu;
+	u32 ticks_per_ms;
+	u32 max_timer_ms;
 };
 
 static inline bool ssb_chipco_available(struct ssb_chipcommon *cc)
@@ -630,8 +632,7 @@ enum ssb_clkmode {
 extern void ssb_chipco_set_clockmode(struct ssb_chipcommon *cc,
 				     enum ssb_clkmode mode);
 
-extern void ssb_chipco_watchdog_timer_set(struct ssb_chipcommon *cc,
-					  u32 ticks);
+extern u32 ssb_chipco_watchdog_timer_set(struct ssb_chipcommon *cc, u32 ticks);
 
 void ssb_chipco_irq_mask(struct ssb_chipcommon *cc, u32 mask, u32 value);
 
-- 
cgit v1.2.3-55-g7522


From 7280b51a29f8e6cc7d449d565182d1e1b6183907 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Wed, 5 Dec 2012 18:46:06 +0100
Subject: ssb: extif: add check for max value before setting watchdog register

Prevent the watchdog register on the extif core to be set to a too
high value.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/driver_extif.c           | 5 +++--
 include/linux/ssb/ssb_driver_extif.h | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_extif.c b/drivers/ssb/driver_extif.c
index dc47f30e9cf7..0aa4c2a85774 100644
--- a/drivers/ssb/driver_extif.c
+++ b/drivers/ssb/driver_extif.c
@@ -112,9 +112,10 @@ void ssb_extif_get_clockcontrol(struct ssb_extif *extif,
 	*m = extif_read32(extif, SSB_EXTIF_CLOCK_SB);
 }
 
-void ssb_extif_watchdog_timer_set(struct ssb_extif *extif,
-				  u32 ticks)
+void ssb_extif_watchdog_timer_set(struct ssb_extif *extif, u32 ticks)
 {
+	if (ticks > SSB_EXTIF_WATCHDOG_MAX_TIMER)
+		ticks = SSB_EXTIF_WATCHDOG_MAX_TIMER;
 	extif_write32(extif, SSB_EXTIF_WATCHDOG, ticks);
 }
 
diff --git a/include/linux/ssb/ssb_driver_extif.h b/include/linux/ssb/ssb_driver_extif.h
index 2604efa7dc4d..b618188939d2 100644
--- a/include/linux/ssb/ssb_driver_extif.h
+++ b/include/linux/ssb/ssb_driver_extif.h
@@ -152,6 +152,7 @@
 /* watchdog */
 #define SSB_EXTIF_WATCHDOG_CLK		48000000	/* Hz */
 
+#define SSB_EXTIF_WATCHDOG_MAX_TIMER	((1 << 28) - 1)
 
 
 #ifdef CONFIG_SSB_DRIVER_EXTIF
-- 
cgit v1.2.3-55-g7522


From 9f640a6376e54fa9ae834c32cbe92cefeec970dc Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Wed, 5 Dec 2012 18:46:07 +0100
Subject: ssb: extif: add methods for watchdog driver

The watchdog driver wants to set the watchdog timeout in ms and not in
ticks, add a method converting ms to ticks before setting the watchdog
register. Return the ticks or millisecond the timer was set to in case
the provided value was bigger than the max allowed value.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/driver_extif.c           | 21 ++++++++++++++++++++-
 drivers/ssb/ssb_private.h            | 15 +++++++++++++++
 include/linux/ssb/ssb_driver_extif.h |  9 +++++----
 3 files changed, 40 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_extif.c b/drivers/ssb/driver_extif.c
index 0aa4c2a85774..553227a3062d 100644
--- a/drivers/ssb/driver_extif.c
+++ b/drivers/ssb/driver_extif.c
@@ -112,11 +112,30 @@ void ssb_extif_get_clockcontrol(struct ssb_extif *extif,
 	*m = extif_read32(extif, SSB_EXTIF_CLOCK_SB);
 }
 
-void ssb_extif_watchdog_timer_set(struct ssb_extif *extif, u32 ticks)
+u32 ssb_extif_watchdog_timer_set_wdt(struct bcm47xx_wdt *wdt, u32 ticks)
+{
+	struct ssb_extif *extif = bcm47xx_wdt_get_drvdata(wdt);
+
+	return ssb_extif_watchdog_timer_set(extif, ticks);
+}
+
+u32 ssb_extif_watchdog_timer_set_ms(struct bcm47xx_wdt *wdt, u32 ms)
+{
+	struct ssb_extif *extif = bcm47xx_wdt_get_drvdata(wdt);
+	u32 ticks = (SSB_EXTIF_WATCHDOG_CLK / 1000) * ms;
+
+	ticks = ssb_extif_watchdog_timer_set(extif, ticks);
+
+	return (ticks * 1000) / SSB_EXTIF_WATCHDOG_CLK;
+}
+
+u32 ssb_extif_watchdog_timer_set(struct ssb_extif *extif, u32 ticks)
 {
 	if (ticks > SSB_EXTIF_WATCHDOG_MAX_TIMER)
 		ticks = SSB_EXTIF_WATCHDOG_MAX_TIMER;
 	extif_write32(extif, SSB_EXTIF_WATCHDOG, ticks);
+
+	return ticks;
 }
 
 u32 ssb_extif_gpio_in(struct ssb_extif *extif, u32 mask)
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index 03cc40a71510..50ea02877777 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -217,4 +217,19 @@ extern u32 ssb_chipco_watchdog_timer_set_wdt(struct bcm47xx_wdt *wdt,
 					     u32 ticks);
 extern u32 ssb_chipco_watchdog_timer_set_ms(struct bcm47xx_wdt *wdt, u32 ms);
 
+#ifdef CONFIG_SSB_DRIVER_EXTIF
+extern u32 ssb_extif_watchdog_timer_set_wdt(struct bcm47xx_wdt *wdt, u32 ticks);
+extern u32 ssb_extif_watchdog_timer_set_ms(struct bcm47xx_wdt *wdt, u32 ms);
+#else
+static inline u32 ssb_extif_watchdog_timer_set_wdt(struct bcm47xx_wdt *wdt,
+						   u32 ticks)
+{
+	return 0;
+}
+static inline u32 ssb_extif_watchdog_timer_set_ms(struct bcm47xx_wdt *wdt,
+						  u32 ms)
+{
+	return 0;
+}
+#endif
 #endif /* LINUX_SSB_PRIVATE_H_ */
diff --git a/include/linux/ssb/ssb_driver_extif.h b/include/linux/ssb/ssb_driver_extif.h
index b618188939d2..99511d0e931d 100644
--- a/include/linux/ssb/ssb_driver_extif.h
+++ b/include/linux/ssb/ssb_driver_extif.h
@@ -153,6 +153,8 @@
 #define SSB_EXTIF_WATCHDOG_CLK		48000000	/* Hz */
 
 #define SSB_EXTIF_WATCHDOG_MAX_TIMER	((1 << 28) - 1)
+#define SSB_EXTIF_WATCHDOG_MAX_TIMER_MS	(SSB_EXTIF_WATCHDOG_MAX_TIMER \
+					 / (SSB_EXTIF_WATCHDOG_CLK / 1000))
 
 
 #ifdef CONFIG_SSB_DRIVER_EXTIF
@@ -172,8 +174,7 @@ extern void ssb_extif_get_clockcontrol(struct ssb_extif *extif,
 extern void ssb_extif_timing_init(struct ssb_extif *extif,
 				  unsigned long ns);
 
-extern void ssb_extif_watchdog_timer_set(struct ssb_extif *extif,
-					 u32 ticks);
+extern u32 ssb_extif_watchdog_timer_set(struct ssb_extif *extif, u32 ticks);
 
 /* Extif GPIO pin access */
 u32 ssb_extif_gpio_in(struct ssb_extif *extif, u32 mask);
@@ -211,9 +212,9 @@ void ssb_extif_timing_init(struct ssb_extif *extif, unsigned long ns)
 }
 
 static inline
-void ssb_extif_watchdog_timer_set(struct ssb_extif *extif,
-				  u32 ticks)
+u32 ssb_extif_watchdog_timer_set(struct ssb_extif *extif, u32 ticks)
 {
+	return 0;
 }
 
 static inline u32 ssb_extif_gpio_in(struct ssb_extif *extif, u32 mask)
-- 
cgit v1.2.3-55-g7522


From bde327eff8a722df1198df9b14464f84f1adfb65 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens
Date: Wed, 5 Dec 2012 18:46:08 +0100
Subject: ssb: register watchdog driver

Register the watchdog driver to the system if it is a SoC. Using the
watchdog on a non SoC device, like a PCI card, will make the PCI
card die when the timeout expired, but starting it again is not
supported by ssb.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/embedded.c    | 35 +++++++++++++++++++++++++++++++++++
 drivers/ssb/main.c        |  8 ++++++++
 drivers/ssb/ssb_private.h | 10 ++++++++++
 include/linux/ssb/ssb.h   |  2 ++
 4 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ssb/embedded.c b/drivers/ssb/embedded.c
index 9ef124f9ee2d..bb18d76f9f2c 100644
--- a/drivers/ssb/embedded.c
+++ b/drivers/ssb/embedded.c
@@ -4,11 +4,13 @@
  *
  * Copyright 2005-2008, Broadcom Corporation
  * Copyright 2006-2008, Michael Buesch <m@bues.ch>
+ * Copyright 2012, Hauke Mehrtens <hauke@hauke-m.de>
  *
  * Licensed under the GNU/GPL. See COPYING for details.
  */
 
 #include <linux/export.h>
+#include <linux/platform_device.h>
 #include <linux/ssb/ssb.h>
 #include <linux/ssb/ssb_embedded.h>
 #include <linux/ssb/ssb_driver_pci.h>
@@ -32,6 +34,39 @@ int ssb_watchdog_timer_set(struct ssb_bus *bus, u32 ticks)
 }
 EXPORT_SYMBOL(ssb_watchdog_timer_set);
 
+int ssb_watchdog_register(struct ssb_bus *bus)
+{
+	struct bcm47xx_wdt wdt = {};
+	struct platform_device *pdev;
+
+	if (ssb_chipco_available(&bus->chipco)) {
+		wdt.driver_data = &bus->chipco;
+		wdt.timer_set = ssb_chipco_watchdog_timer_set_wdt;
+		wdt.timer_set_ms = ssb_chipco_watchdog_timer_set_ms;
+		wdt.max_timer_ms = bus->chipco.max_timer_ms;
+	} else if (ssb_extif_available(&bus->extif)) {
+		wdt.driver_data = &bus->extif;
+		wdt.timer_set = ssb_extif_watchdog_timer_set_wdt;
+		wdt.timer_set_ms = ssb_extif_watchdog_timer_set_ms;
+		wdt.max_timer_ms = SSB_EXTIF_WATCHDOG_MAX_TIMER_MS;
+	} else {
+		return -ENODEV;
+	}
+
+	pdev = platform_device_register_data(NULL, "bcm47xx-wdt",
+					     bus->busnumber, &wdt,
+					     sizeof(wdt));
+	if (IS_ERR(pdev)) {
+		ssb_dprintk(KERN_INFO PFX
+			    "can not register watchdog device, err: %li\n",
+			    PTR_ERR(pdev));
+		return PTR_ERR(pdev);
+	}
+
+	bus->watchdog = pdev;
+	return 0;
+}
+
 u32 ssb_gpio_in(struct ssb_bus *bus, u32 mask)
 {
 	unsigned long flags;
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index df0f145c22fc..58c7da29a37c 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -13,6 +13,7 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/platform_device.h>
 #include <linux/ssb/ssb.h>
 #include <linux/ssb/ssb_regs.h>
 #include <linux/ssb/ssb_driver_gige.h>
@@ -433,6 +434,11 @@ static void ssb_devices_unregister(struct ssb_bus *bus)
 		if (sdev->dev)
 			device_unregister(sdev->dev);
 	}
+
+#ifdef CONFIG_SSB_EMBEDDED
+	if (bus->bustype == SSB_BUSTYPE_SSB)
+		platform_device_unregister(bus->watchdog);
+#endif
 }
 
 void ssb_bus_unregister(struct ssb_bus *bus)
@@ -561,6 +567,8 @@ static int __devinit ssb_attach_queued_buses(void)
 		if (err)
 			goto error;
 		ssb_pcicore_init(&bus->pcicore);
+		if (bus->bustype == SSB_BUSTYPE_SSB)
+			ssb_watchdog_register(bus);
 		ssb_bus_may_powerdown(bus);
 
 		err = ssb_devices_register(bus);
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index 50ea02877777..8942db1d855a 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -232,4 +232,14 @@ static inline u32 ssb_extif_watchdog_timer_set_ms(struct bcm47xx_wdt *wdt,
 	return 0;
 }
 #endif
+
+#ifdef CONFIG_SSB_EMBEDDED
+extern int ssb_watchdog_register(struct ssb_bus *bus);
+#else /* CONFIG_SSB_EMBEDDED */
+static inline int ssb_watchdog_register(struct ssb_bus *bus)
+{
+	return 0;
+}
+#endif /* CONFIG_SSB_EMBEDDED */
+
 #endif /* LINUX_SSB_PRIVATE_H_ */
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index bb674c02f306..1f64e3f1f22b 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -8,6 +8,7 @@
 #include <linux/pci.h>
 #include <linux/mod_devicetable.h>
 #include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
 
 #include <linux/ssb/ssb_regs.h>
 
@@ -432,6 +433,7 @@ struct ssb_bus {
 #ifdef CONFIG_SSB_EMBEDDED
 	/* Lock for GPIO register access. */
 	spinlock_t gpio_lock;
+	struct platform_device *watchdog;
 #endif /* EMBEDDED */
 
 	/* Internal-only stuff follows. Do not touch. */
-- 
cgit v1.2.3-55-g7522


From eb044c48446f676ae5c38b30a2e2165742a52f4a Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi
Date: Thu, 6 Dec 2012 15:35:25 +0100
Subject: ASoC: omap-twl4030: Update the header file to support more boards

The common machine driver will be able to support new boards where the voice
port is also in use.
At the same time add possibility to fine tune the connections from twl4030
on the board: jack detection GPIO, input/output selection

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/platform_data/omap-twl4030.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/omap-twl4030.h b/include/linux/platform_data/omap-twl4030.h
index c7bef788daab..ee60ef79d792 100644
--- a/include/linux/platform_data/omap-twl4030.h
+++ b/include/linux/platform_data/omap-twl4030.h
@@ -25,8 +25,34 @@
 #ifndef _OMAP_TWL4030_H_
 #define _OMAP_TWL4030_H_
 
+/* To select if only one channel is connected in a stereo port */
+#define OMAP_TWL4030_LEFT	(1 << 0)
+#define OMAP_TWL4030_RIGHT	(1 << 1)
+
 struct omap_tw4030_pdata {
 	const char *card_name;
+	/* Voice port is connected to McBSP3 */
+	bool voice_connected;
+
+	/* The driver will parse the connection flags if this flag is set */
+	bool	custom_routing;
+	/* Flags to indicate connected audio ports. */
+	u8	has_hs;
+	u8	has_hf;
+	u8	has_predriv;
+	u8	has_carkit;
+	bool	has_ear;
+
+	bool	has_mainmic;
+	bool	has_submic;
+	bool	has_hsmic;
+	bool	has_carkitmic;
+	bool	has_digimic0;
+	bool	has_digimic1;
+	u8	has_linein;
+
+	/* Jack detect GPIO or  <= 0 if it is not implemented */
+	int jack_detect;
 };
 
 #endif /* _OMAP_TWL4030_H_ */
-- 
cgit v1.2.3-55-g7522


From d0c6c482f6d8c2145717ab0266c87e3e792527ef Mon Sep 17 00:00:00 2001
From: Daniel Mack
Date: Wed, 5 Dec 2012 18:20:36 +0100
Subject: ASoC: McASP: remove unused variables

codec_fmt and sample_rate variables are unused in both snd_platform_data
and davinci_audio_dev, so drop them.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/platform_data/davinci_asp.h | 1 -
 sound/soc/davinci/davinci-mcasp.c         | 1 -
 sound/soc/davinci/davinci-mcasp.h         | 2 --
 3 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/davinci_asp.h b/include/linux/platform_data/davinci_asp.h
index f3d6e4f20962..8db5ae03b6e3 100644
--- a/include/linux/platform_data/davinci_asp.h
+++ b/include/linux/platform_data/davinci_asp.h
@@ -23,7 +23,6 @@ struct snd_platform_data {
 	u32 rx_dma_offset;
 	int asp_chan_q;	/* event queue number for ASP channel */
 	int ram_chan_q;	/* event queue number for RAM channel */
-	unsigned int codec_fmt;
 	/*
 	 * Allowing this is more efficient and eliminates left and right swaps
 	 * caused by underruns, but will swap the left and right channels
diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index 571559501b0b..7c248371f7ec 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -1158,7 +1158,6 @@ static int davinci_mcasp_probe(struct platform_device *pdev)
 	dev->tdm_slots = pdata->tdm_slots;
 	dev->num_serializer = pdata->num_serializer;
 	dev->serial_dir = pdata->serial_dir;
-	dev->codec_fmt = pdata->codec_fmt;
 	dev->version = pdata->version;
 	dev->txnumevt = pdata->txnumevt;
 	dev->rxnumevt = pdata->rxnumevt;
diff --git a/sound/soc/davinci/davinci-mcasp.h b/sound/soc/davinci/davinci-mcasp.h
index 156f15f55744..a42b5d96739f 100644
--- a/sound/soc/davinci/davinci-mcasp.h
+++ b/sound/soc/davinci/davinci-mcasp.h
@@ -40,9 +40,7 @@ enum {
 struct davinci_audio_dev {
 	struct davinci_pcm_dma_params dma_params[2];
 	void __iomem *base;
-	int sample_rate;
 	struct device *dev;
-	unsigned int codec_fmt;
 
 	/* McASP specific data */
 	int	tdm_slots;
-- 
cgit v1.2.3-55-g7522


From 805f864ebefc39065b6b0cf2548f13c2fbf888d9 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto
Date: Thu, 6 Dec 2012 01:10:28 -0800
Subject: gpio: pcf857x: use client->irq for gpio_to_irq()

6e20a0a429bd4dc07d6de16d9c247270e04e4aa0
(gpio: pcf857x: enable gpio_to_irq() support)
added gpio_to_irq() support on pcf857x driver,
but it used pdata->irq.
This patch modifies driver to use client->irq instead of it.
It modifies kzm9g board platform settings,
and device probe information too.
This patch is tested on kzm9g board

Reported-by: Christian Engelmayer <christian.engelmayer@frequentis.com>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-shmobile/board-kzm9g.c |  2 +-
 drivers/gpio/gpio-pcf857x.c          | 29 +++++++++++------------------
 include/linux/i2c/pcf857x.h          |  3 ---
 3 files changed, 12 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c
index 0a43f3189c21..7a05de794a8c 100644
--- a/arch/arm/mach-shmobile/board-kzm9g.c
+++ b/arch/arm/mach-shmobile/board-kzm9g.c
@@ -548,7 +548,6 @@ static struct platform_device fsi_ak4648_device = {
 /* I2C */
 static struct pcf857x_platform_data pcf8575_pdata = {
 	.gpio_base	= GPIO_PCF8575_BASE,
-	.irq		= intcs_evt2irq(0x3260), /* IRQ19 */
 };
 
 static struct i2c_board_info i2c0_devices[] = {
@@ -570,6 +569,7 @@ static struct i2c_board_info i2c1_devices[] = {
 static struct i2c_board_info i2c3_devices[] = {
 	{
 		I2C_BOARD_INFO("pcf8575", 0x20),
+		.irq		= intcs_evt2irq(0x3260), /* IRQ19 */
 		.platform_data = &pcf8575_pdata,
 	},
 };
diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c
index 16af35cd2b10..a19b7457a726 100644
--- a/drivers/gpio/gpio-pcf857x.c
+++ b/drivers/gpio/gpio-pcf857x.c
@@ -223,11 +223,11 @@ static void pcf857x_irq_domain_cleanup(struct pcf857x *gpio)
 
 static int pcf857x_irq_domain_init(struct pcf857x *gpio,
 				   struct pcf857x_platform_data *pdata,
-				   struct device *dev)
+				   struct i2c_client *client)
 {
 	int status;
 
-	gpio->irq_domain = irq_domain_add_linear(dev->of_node,
+	gpio->irq_domain = irq_domain_add_linear(client->dev.of_node,
 						 gpio->chip.ngpio,
 						 &pcf857x_irq_domain_ops,
 						 NULL);
@@ -235,15 +235,15 @@ static int pcf857x_irq_domain_init(struct pcf857x *gpio,
 		goto fail;
 
 	/* enable real irq */
-	status = request_irq(pdata->irq, pcf857x_irq_demux, 0,
-			     dev_name(dev), gpio);
+	status = request_irq(client->irq, pcf857x_irq_demux, 0,
+			     dev_name(&client->dev), gpio);
 	if (status)
 		goto fail;
 
 	/* enable gpio_to_irq() */
 	INIT_WORK(&gpio->work, pcf857x_irq_demux_work);
 	gpio->chip.to_irq	= pcf857x_to_irq;
-	gpio->irq		= pdata->irq;
+	gpio->irq		= client->irq;
 
 	return 0;
 
@@ -285,8 +285,8 @@ static int pcf857x_probe(struct i2c_client *client,
 	gpio->chip.ngpio		= id->driver_data;
 
 	/* enable gpio_to_irq() if platform has settings */
-	if (pdata && pdata->irq) {
-		status = pcf857x_irq_domain_init(gpio, pdata, &client->dev);
+	if (pdata && client->irq) {
+		status = pcf857x_irq_domain_init(gpio, pdata, client);
 		if (status < 0) {
 			dev_err(&client->dev, "irq_domain init failed\n");
 			goto fail;
@@ -368,15 +368,6 @@ static int pcf857x_probe(struct i2c_client *client,
 	if (status < 0)
 		goto fail;
 
-	/* NOTE: these chips can issue "some pin-changed" IRQs, which we
-	 * don't yet even try to use.  Among other issues, the relevant
-	 * genirq state isn't available to modular drivers; and most irq
-	 * methods can't be called from sleeping contexts.
-	 */
-
-	dev_info(&client->dev, "%s\n",
-			client->irq ? " (irq ignored)" : "");
-
 	/* Let platform code set up the GPIOs and their users.
 	 * Now is the first time anyone could use them.
 	 */
@@ -388,13 +379,15 @@ static int pcf857x_probe(struct i2c_client *client,
 			dev_warn(&client->dev, "setup --> %d\n", status);
 	}
 
+	dev_info(&client->dev, "probed\n");
+
 	return 0;
 
 fail:
 	dev_dbg(&client->dev, "probe error %d for '%s'\n",
 			status, client->name);
 
-	if (pdata && pdata->irq)
+	if (pdata && client->irq)
 		pcf857x_irq_domain_cleanup(gpio);
 
 	kfree(gpio);
@@ -418,7 +411,7 @@ static int pcf857x_remove(struct i2c_client *client)
 		}
 	}
 
-	if (pdata && pdata->irq)
+	if (pdata && client->irq)
 		pcf857x_irq_domain_cleanup(gpio);
 
 	status = gpiochip_remove(&gpio->chip);
diff --git a/include/linux/i2c/pcf857x.h b/include/linux/i2c/pcf857x.h
index 781e6bd06c34..0767a2a6b2f1 100644
--- a/include/linux/i2c/pcf857x.h
+++ b/include/linux/i2c/pcf857x.h
@@ -10,7 +10,6 @@
  * @setup: optional callback issued once the GPIOs are valid
  * @teardown: optional callback issued before the GPIOs are invalidated
  * @context: optional parameter passed to setup() and teardown()
- * @irq: optional interrupt number
  *
  * In addition to the I2C_BOARD_INFO() state appropriate to each chip,
  * the i2c_board_info used with the pcf875x driver must provide its
@@ -40,8 +39,6 @@ struct pcf857x_platform_data {
 					int gpio, unsigned ngpio,
 					void *context);
 	void		*context;
-
-	int		irq;
 };
 
 #endif /* __LINUX_PCF857X_H */
-- 
cgit v1.2.3-55-g7522


From 4529eefad087f97b33c0f31984d924b1f15d7bae Mon Sep 17 00:00:00 2001
From: Lamarque V. Souza
Date: Thu, 6 Dec 2012 12:39:55 -0200
Subject: HID: hidp: fallback to input session properly if hid is blacklisted

This patch against kernel 3.7.0-rc8 fixes a kernel oops when turning on the
bluetooth mouse with id 0458:0058 [1].

The mouse in question supports both input and hid sessions, however it is
blacklisted in drivers/hid/hid-core.c so the input session is one that should
be used. Long ago (around kernel 3.0.0) some changes in the bluetooth
subsystem made the kernel do not fallback to input session when hid session is
not supported or blacklisted. This patch restore that behaviour by making the
kernel try the input session if hid_add_device returns ENODEV.

The patch exports hid_ignore() from hid-core.c so that it can be used in the
bluetooth subsystem.

[1] https://bugzilla.kernel.org/show_bug.cgi?id=39882

Signed-off-by: Lamarque V. Souza <lamarque@gmail.com>
Acked-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c    | 11 ++++++++---
 include/linux/hid.h       |  1 +
 net/bluetooth/hidp/core.c |  9 ++++++++-
 3 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index a7550bb30836..2629d5240d63 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2145,8 +2145,13 @@ static const struct hid_device_id hid_mouse_ignore_list[] = {
 	{ }
 };
 
-static bool hid_ignore(struct hid_device *hdev)
+bool hid_ignore(struct hid_device *hdev)
 {
+	if (hdev->quirks & HID_QUIRK_NO_IGNORE)
+		return false;
+	if (hdev->quirks & HID_QUIRK_IGNORE)
+		return true;
+
 	switch (hdev->vendor) {
 	case USB_VENDOR_ID_CODEMERCS:
 		/* ignore all Code Mercenaries IOWarrior devices */
@@ -2201,6 +2206,7 @@ static bool hid_ignore(struct hid_device *hdev)
 
 	return !!hid_match_id(hdev, hid_ignore_list);
 }
+EXPORT_SYMBOL_GPL(hid_ignore);
 
 int hid_add_device(struct hid_device *hdev)
 {
@@ -2212,8 +2218,7 @@ int hid_add_device(struct hid_device *hdev)
 
 	/* we need to kill them here, otherwise they will stay allocated to
 	 * wait for coming driver */
-	if (!(hdev->quirks & HID_QUIRK_NO_IGNORE)
-            && (hid_ignore(hdev) || (hdev->quirks & HID_QUIRK_IGNORE)))
+	if (hid_ignore(hdev))
 		return -ENODEV;
 
 	/*
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 7e1f37db7582..abce7eb4f258 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -721,6 +721,7 @@ struct hid_ll_driver {
 
 extern int hid_debug;
 
+extern bool hid_ignore(struct hid_device *);
 extern int hid_add_device(struct hid_device *);
 extern void hid_destroy_device(struct hid_device *);
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index ccd985da6518..1f4a32bb2971 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -941,6 +941,13 @@ static int hidp_setup_hid(struct hidp_session *session,
 	hid->hid_get_raw_report = hidp_get_raw_report;
 	hid->hid_output_raw_report = hidp_output_raw_report;
 
+	/* True if device is blacklisted in drivers/hid/hid-core.c */
+	if (hid_ignore(hid)) {
+		hid_destroy_device(session->hid);
+		session->hid = NULL;
+		return -ENODEV;
+	}
+
 	return 0;
 
 fault:
@@ -1013,7 +1020,7 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
 
 	if (req->rd_size > 0) {
 		err = hidp_setup_hid(session, req);
-		if (err)
+		if (err && err != -ENODEV)
 			goto purge;
 	}
 
-- 
cgit v1.2.3-55-g7522


From 6ded7cd605502eff7341bbd912ebc90c3674c764 Mon Sep 17 00:00:00 2001
From: trem
Date: Tue, 4 Dec 2012 08:52:10 +0000
Subject: net: phy: smsc: force all capable mode if the phy is started in
 powerdown mode

A SMSC PHY in power down mode can't be used.
If a SMSC PHY is in this mode in the config_init
stage, the mode "all capable" is set. So the PHY
could then be used.

Signed-off-by: Philippe Reynes <tremyfr@yahoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/smsc.c  | 26 +++++++++++++++++++++++++-
 include/linux/smscphy.h |  5 +++++
 2 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 16dceed29d8c..ef883e3048c0 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -43,7 +43,31 @@ static int smsc_phy_ack_interrupt(struct phy_device *phydev)
 
 static int smsc_phy_config_init(struct phy_device *phydev)
 {
-	int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
+	int rc = phy_read(phydev, MII_LAN83C185_SPECIAL_MODES);
+	if (rc < 0)
+		return rc;
+
+	/* If the SMSC PHY is in power down mode, then set it
+	 * in all capable mode before using it.
+	 */
+	if ((rc & MII_LAN83C185_MODE_MASK) == MII_LAN83C185_MODE_POWERDOWN) {
+		int timeout = 50000;
+
+		/* set "all capable" mode and reset the phy */
+		rc |= MII_LAN83C185_MODE_ALL;
+		phy_write(phydev, MII_LAN83C185_SPECIAL_MODES, rc);
+		phy_write(phydev, MII_BMCR, BMCR_RESET);
+
+		/* wait end of reset (max 500 ms) */
+		do {
+			udelay(10);
+			if (timeout-- == 0)
+				return -1;
+			rc = phy_read(phydev, MII_BMCR);
+		} while (rc & BMCR_RESET);
+	}
+
+	rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
 	if (rc < 0)
 		return rc;
 
diff --git a/include/linux/smscphy.h b/include/linux/smscphy.h
index ce718cbce435..f4bf16e16e16 100644
--- a/include/linux/smscphy.h
+++ b/include/linux/smscphy.h
@@ -4,6 +4,7 @@
 #define MII_LAN83C185_ISF 29 /* Interrupt Source Flags */
 #define MII_LAN83C185_IM  30 /* Interrupt Mask */
 #define MII_LAN83C185_CTRL_STATUS 17 /* Mode/Status Register */
+#define MII_LAN83C185_SPECIAL_MODES 18 /* Special Modes Register */
 
 #define MII_LAN83C185_ISF_INT1 (1<<1) /* Auto-Negotiation Page Received */
 #define MII_LAN83C185_ISF_INT2 (1<<2) /* Parallel Detection Fault */
@@ -22,4 +23,8 @@
 #define MII_LAN83C185_EDPWRDOWN (1 << 13) /* EDPWRDOWN */
 #define MII_LAN83C185_ENERGYON  (1 << 1)  /* ENERGYON */
 
+#define MII_LAN83C185_MODE_MASK      0xE0
+#define MII_LAN83C185_MODE_POWERDOWN 0xC0 /* Power Down mode */
+#define MII_LAN83C185_MODE_ALL       0xE0 /* All capable mode */
+
 #endif /* __LINUX_SMSCPHY_H__ */
-- 
cgit v1.2.3-55-g7522


From 6a674e9c75b17e7a88ff15b3c2e269eed54f7cfb Mon Sep 17 00:00:00 2001
From: Joseph Gasparakis
Date: Fri, 7 Dec 2012 14:14:14 +0000
Subject: net: Add support for hardware-offloaded encapsulation

This patch adds support in the kernel for offloading in the NIC Tx and Rx
checksumming for encapsulated packets (such as VXLAN and IP GRE).

For Tx encapsulation offload, the driver will need to set the right bits
in netdev->hw_enc_features. The protocol driver will have to set the
skb->encapsulation bit and populate the inner headers, so the NIC driver will
use those inner headers to calculate the csum in hardware.

For Rx encapsulation offload, the driver will need to set again the
skb->encapsulation flag and the skb->ip_csum to CHECKSUM_UNNECESSARY.
In that case the protocol driver should push the decapsulated packet up
to the stack, again with CHECKSUM_UNNECESSARY. In ether case, the protocol
driver should set the skb->encapsulation flag back to zero. Finally the
protocol driver should have NETIF_F_RXCSUM flag set in its features.

Signed-off-by: Joseph Gasparakis <joseph.gasparakis@intel.com>
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ip.h        |  5 +++
 include/linux/ipv6.h      |  5 +++
 include/linux/netdevice.h |  6 +++
 include/linux/skbuff.h    | 95 ++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/tcp.h       | 10 +++++
 include/linux/udp.h       |  5 +++
 net/core/skbuff.c         |  9 +++++
 7 files changed, 134 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ip.h b/include/linux/ip.h
index 58b82a22a52b..492bc6513533 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -25,6 +25,11 @@ static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
 	return (struct iphdr *)skb_network_header(skb);
 }
 
+static inline struct iphdr *inner_ip_hdr(const struct sk_buff *skb)
+{
+	return (struct iphdr *)skb_inner_network_header(skb);
+}
+
 static inline struct iphdr *ipip_hdr(const struct sk_buff *skb)
 {
 	return (struct iphdr *)skb_transport_header(skb);
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 12729e966dc9..faed1e357dd6 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -67,6 +67,11 @@ static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb)
 	return (struct ipv6hdr *)skb_network_header(skb);
 }
 
+static inline struct ipv6hdr *inner_ipv6_hdr(const struct sk_buff *skb)
+{
+	return (struct ipv6hdr *)skb_inner_network_header(skb);
+}
+
 static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb)
 {
 	return (struct ipv6hdr *)skb_transport_header(skb);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 18c5dc98f6dc..c6a14d4d1396 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1063,6 +1063,12 @@ struct net_device {
 	netdev_features_t	wanted_features;
 	/* mask of features inheritable by VLAN devices */
 	netdev_features_t	vlan_features;
+	/* mask of features inherited by encapsulating devices
+	 * This field indicates what encapsulation offloads
+	 * the hardware is capable of doing, and drivers will
+	 * need to set them appropriately.
+	 */
+	netdev_features_t	hw_enc_features;
 
 	/* Interface index. Unique device identifier	*/
 	int			ifindex;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f2af494330ab..320e976d5ab8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -376,6 +376,8 @@ typedef unsigned char *sk_buff_data_t;
  *	@mark: Generic packet mark
  *	@dropcount: total number of sk_receive_queue overflows
  *	@vlan_tci: vlan tag control information
+ *	@inner_transport_header: Inner transport layer header (encapsulation)
+ *	@inner_network_header: Network layer header (encapsulation)
  *	@transport_header: Transport layer header
  *	@network_header: Network layer header
  *	@mac_header: Link layer header
@@ -471,7 +473,13 @@ struct sk_buff {
 	__u8			wifi_acked:1;
 	__u8			no_fcs:1;
 	__u8			head_frag:1;
-	/* 8/10 bit hole (depending on ndisc_nodetype presence) */
+	/* Encapsulation protocol and NIC drivers should use
+	 * this flag to indicate to each other if the skb contains
+	 * encapsulated packet or not and maybe use the inner packet
+	 * headers if needed
+	 */
+	__u8			encapsulation:1;
+	/* 7/9 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #ifdef CONFIG_NET_DMA
@@ -486,6 +494,8 @@ struct sk_buff {
 		__u32		avail_size;
 	};
 
+	sk_buff_data_t		inner_transport_header;
+	sk_buff_data_t		inner_network_header;
 	sk_buff_data_t		transport_header;
 	sk_buff_data_t		network_header;
 	sk_buff_data_t		mac_header;
@@ -1435,12 +1445,53 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+static inline void skb_reset_inner_headers(struct sk_buff *skb)
+{
+	skb->inner_network_header = skb->network_header;
+	skb->inner_transport_header = skb->transport_header;
+}
+
 static inline void skb_reset_mac_len(struct sk_buff *skb)
 {
 	skb->mac_len = skb->network_header - skb->mac_header;
 }
 
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
+static inline unsigned char *skb_inner_transport_header(const struct sk_buff
+							*skb)
+{
+	return skb->head + skb->inner_transport_header;
+}
+
+static inline void skb_reset_inner_transport_header(struct sk_buff *skb)
+{
+	skb->inner_transport_header = skb->data - skb->head;
+}
+
+static inline void skb_set_inner_transport_header(struct sk_buff *skb,
+						   const int offset)
+{
+	skb_reset_inner_transport_header(skb);
+	skb->inner_transport_header += offset;
+}
+
+static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb)
+{
+	return skb->head + skb->inner_network_header;
+}
+
+static inline void skb_reset_inner_network_header(struct sk_buff *skb)
+{
+	skb->inner_network_header = skb->data - skb->head;
+}
+
+static inline void skb_set_inner_network_header(struct sk_buff *skb,
+						const int offset)
+{
+	skb_reset_inner_network_header(skb);
+	skb->inner_network_header += offset;
+}
+
 static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
 {
 	return skb->head + skb->transport_header;
@@ -1496,6 +1547,38 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
 }
 
 #else /* NET_SKBUFF_DATA_USES_OFFSET */
+static inline unsigned char *skb_inner_transport_header(const struct sk_buff
+							*skb)
+{
+	return skb->inner_transport_header;
+}
+
+static inline void skb_reset_inner_transport_header(struct sk_buff *skb)
+{
+	skb->inner_transport_header = skb->data;
+}
+
+static inline void skb_set_inner_transport_header(struct sk_buff *skb,
+						   const int offset)
+{
+	skb->inner_transport_header = skb->data + offset;
+}
+
+static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb)
+{
+	return skb->inner_network_header;
+}
+
+static inline void skb_reset_inner_network_header(struct sk_buff *skb)
+{
+	skb->inner_network_header = skb->data;
+}
+
+static inline void skb_set_inner_network_header(struct sk_buff *skb,
+						const int offset)
+{
+	skb->inner_network_header = skb->data + offset;
+}
 
 static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
 {
@@ -1574,11 +1657,21 @@ static inline u32 skb_network_header_len(const struct sk_buff *skb)
 	return skb->transport_header - skb->network_header;
 }
 
+static inline u32 skb_inner_network_header_len(const struct sk_buff *skb)
+{
+	return skb->inner_transport_header - skb->inner_network_header;
+}
+
 static inline int skb_network_offset(const struct sk_buff *skb)
 {
 	return skb_network_header(skb) - skb->data;
 }
 
+static inline int skb_inner_network_offset(const struct sk_buff *skb)
+{
+	return skb_inner_network_header(skb) - skb->data;
+}
+
 static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
 {
 	return pskb_may_pull(skb, skb_network_offset(skb) + len);
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 60b7aac15e0e..4e1d2283e3cc 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -35,6 +35,16 @@ static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
 	return tcp_hdr(skb)->doff * 4;
 }
 
+static inline struct tcphdr *inner_tcp_hdr(const struct sk_buff *skb)
+{
+	return (struct tcphdr *)skb_inner_transport_header(skb);
+}
+
+static inline unsigned int inner_tcp_hdrlen(const struct sk_buff *skb)
+{
+	return inner_tcp_hdr(skb)->doff * 4;
+}
+
 static inline unsigned int tcp_optlen(const struct sk_buff *skb)
 {
 	return (tcp_hdr(skb)->doff - 5) * 4;
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 0b67d7793520..9d81de123c90 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -27,6 +27,11 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
 	return (struct udphdr *)skb_transport_header(skb);
 }
 
+static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb)
+{
+	return (struct udphdr *)skb_inner_transport_header(skb);
+}
+
 #define UDP_HTABLE_SIZE_MIN		(CONFIG_BASE_SMALL ? 128 : 256)
 
 static inline int udp_hashfn(struct net *net, unsigned num, unsigned mask)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 880722e22cc5..ccbabf565732 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -682,11 +682,14 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->transport_header	= old->transport_header;
 	new->network_header	= old->network_header;
 	new->mac_header		= old->mac_header;
+	new->inner_transport_header = old->inner_transport_header;
+	new->inner_network_header = old->inner_transport_header;
 	skb_dst_copy(new, old);
 	new->rxhash		= old->rxhash;
 	new->ooo_okay		= old->ooo_okay;
 	new->l4_rxhash		= old->l4_rxhash;
 	new->no_fcs		= old->no_fcs;
+	new->encapsulation	= old->encapsulation;
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
 #endif
@@ -892,6 +895,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->network_header   += offset;
 	if (skb_mac_header_was_set(new))
 		new->mac_header	      += offset;
+	new->inner_transport_header += offset;
+	new->inner_network_header   += offset;
 #endif
 	skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
 	skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
@@ -1089,6 +1094,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	skb->network_header   += off;
 	if (skb_mac_header_was_set(skb))
 		skb->mac_header += off;
+	skb->inner_transport_header += off;
+	skb->inner_network_header += off;
 	/* Only adjust this if it actually is csum_start rather than csum */
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		skb->csum_start += nhead;
@@ -1188,6 +1195,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 	n->network_header   += off;
 	if (skb_mac_header_was_set(skb))
 		n->mac_header += off;
+	n->inner_transport_header += off;
+	n->inner_network_header	   += off;
 #endif
 
 	return n;
-- 
cgit v1.2.3-55-g7522


From be2f6f5a7833b99bdee97c4b877dcd2afc6cdd00 Mon Sep 17 00:00:00 2001
From: Mark Brown
Date: Sun, 9 Dec 2012 15:40:30 +0100
Subject: mfd: wm5102: Add readback of DSP status 3 register

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm5102-tables.c           | 2 ++
 include/linux/mfd/arizona/registers.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index 005de63d01d6..8844b2f9de87 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -2336,6 +2336,7 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_DSP1_CLOCKING_1:
 	case ARIZONA_DSP1_STATUS_1:
 	case ARIZONA_DSP1_STATUS_2:
+	case ARIZONA_DSP1_STATUS_3:
 		return true;
 	default:
 		return false;
@@ -2386,6 +2387,7 @@ static bool wm5102_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_AOD_IRQ_RAW_STATUS:
 	case ARIZONA_DSP1_STATUS_1:
 	case ARIZONA_DSP1_STATUS_2:
+	case ARIZONA_DSP1_STATUS_3:
 	case ARIZONA_HEADPHONE_DETECT_2:
 	case ARIZONA_MIC_DETECT_3:
 		return true;
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index 7671a287dfee..81bca23c9a92 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -979,6 +979,7 @@
 #define ARIZONA_DSP1_CLOCKING_1                  0x1101
 #define ARIZONA_DSP1_STATUS_1                    0x1104
 #define ARIZONA_DSP1_STATUS_2                    0x1105
+#define ARIZONA_DSP1_STATUS_3                    0x1106
 #define ARIZONA_DSP2_CONTROL_1                   0x1200
 #define ARIZONA_DSP2_CLOCKING_1                  0x1201
 #define ARIZONA_DSP2_STATUS_1                    0x1204
-- 
cgit v1.2.3-55-g7522


From 759f5f3752e03f15727688b5288e360ef90c1306 Mon Sep 17 00:00:00 2001
From: Vivien Didelot
Date: Fri, 7 Dec 2012 21:36:34 -0500
Subject: gpio: add TS-5500 DIO blocks support

Technologic Systems TS-5500 provides digital I/O lines exposed through
pin blocks. On this platform, there are three of them, named DIO1, DIO2
and LCD port, that may be used as a DIO block.

The TS-5500 pin blocks are described in the product's wiki:
http://wiki.embeddedarm.com/wiki/TS-5500#Digital_I.2FO

This driver is not limited to the TS-5500 blocks. It can be extended to
support similar boards pin blocks, such as on the TS-5600.

This patch is the V2 of the previous https://lkml.org/lkml/2012/9/25/671
with corrections suggested by Linus Walleij.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: Jerome Oufella <jerome.oufella@savoirfairelinux.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/Kconfig                      |   8 +
 drivers/gpio/Makefile                     |   1 +
 drivers/gpio/gpio-ts5500.c                | 466 ++++++++++++++++++++++++++++++
 include/linux/platform_data/gpio-ts5500.h |  27 ++
 4 files changed, 502 insertions(+)
 create mode 100644 drivers/gpio/gpio-ts5500.c
 create mode 100644 include/linux/platform_data/gpio-ts5500.h

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 46e96f3fc3f8..39d9323abbda 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -211,6 +211,14 @@ config GPIO_STA2X11
 	  Say yes here to support the STA2x11/ConneXt GPIO device.
 	  The GPIO module has 128 GPIO pins with alternate functions.
 
+config GPIO_TS5500
+	tristate "TS-5500 DIO blocks and compatibles"
+	help
+	  This driver supports Digital I/O exposed by pin blocks found on some
+	  Technologic Systems platforms. It includes, but is not limited to, 3
+	  blocks of the TS-5500: DIO1, DIO2 and the LCD port, and the TS-5600
+	  LCD port.
+
 config GPIO_VT8500
 	bool "VIA/Wondermedia SoC GPIO Support"
 	depends on ARCH_VT8500
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index a268d99f4e43..a41c2503bd40 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_ARCH_DAVINCI_TNETV107X) += gpio-tnetv107x.o
 obj-$(CONFIG_GPIO_TPS6586X)	+= gpio-tps6586x.o
 obj-$(CONFIG_GPIO_TPS65910)	+= gpio-tps65910.o
 obj-$(CONFIG_GPIO_TPS65912)	+= gpio-tps65912.o
+obj-$(CONFIG_GPIO_TS5500)	+= gpio-ts5500.o
 obj-$(CONFIG_GPIO_TWL4030)	+= gpio-twl4030.o
 obj-$(CONFIG_GPIO_TWL6040)	+= gpio-twl6040.o
 obj-$(CONFIG_GPIO_UCB1400)	+= gpio-ucb1400.o
diff --git a/drivers/gpio/gpio-ts5500.c b/drivers/gpio/gpio-ts5500.c
new file mode 100644
index 000000000000..0634ceea3c24
--- /dev/null
+++ b/drivers/gpio/gpio-ts5500.c
@@ -0,0 +1,466 @@
+/*
+ * Digital I/O driver for Technologic Systems TS-5500
+ *
+ * Copyright (c) 2012 Savoir-faire Linux Inc.
+ *	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * Technologic Systems platforms have pin blocks, exposing several Digital
+ * Input/Output lines (DIO). This driver aims to support single pin blocks.
+ * In that sense, the support is not limited to the TS-5500 blocks.
+ * Actually, the following platforms have DIO support:
+ *
+ * TS-5500:
+ *   Documentation: http://wiki.embeddedarm.com/wiki/TS-5500
+ *   Blocks: DIO1, DIO2 and LCD port.
+ *
+ * TS-5600:
+ *   Documentation: http://wiki.embeddedarm.com/wiki/TS-5600
+ *   Blocks: LCD port (identical to TS-5500 LCD).
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/gpio.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_data/gpio-ts5500.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+/* List of supported Technologic Systems platforms DIO blocks */
+enum ts5500_blocks { TS5500_DIO1, TS5500_DIO2, TS5500_LCD, TS5600_LCD };
+
+struct ts5500_priv {
+	const struct ts5500_dio *pinout;
+	struct gpio_chip gpio_chip;
+	spinlock_t lock;
+	bool strap;
+	u8 hwirq;
+};
+
+/*
+ * Hex 7D is used to control several blocks (e.g. DIO2 and LCD port).
+ * This flag ensures that the region has been requested by this driver.
+ */
+static bool hex7d_reserved;
+
+/*
+ * This structure is used to describe capabilities of DIO lines,
+ * such as available directions and connected interrupt (if any).
+ */
+struct ts5500_dio {
+	const u8 value_addr;
+	const u8 value_mask;
+	const u8 control_addr;
+	const u8 control_mask;
+	const bool no_input;
+	const bool no_output;
+	const u8 irq;
+};
+
+#define TS5500_DIO_IN_OUT(vaddr, vbit, caddr, cbit)	\
+	{						\
+		.value_addr = vaddr,			\
+		.value_mask = BIT(vbit),		\
+		.control_addr = caddr,			\
+		.control_mask = BIT(cbit),		\
+	}
+
+#define TS5500_DIO_IN(addr, bit)		\
+	{					\
+		.value_addr = addr,		\
+		.value_mask = BIT(bit),		\
+		.no_output = true,		\
+	}
+
+#define TS5500_DIO_IN_IRQ(addr, bit, _irq)	\
+	{					\
+		.value_addr = addr,		\
+		.value_mask = BIT(bit),		\
+		.no_output = true,		\
+		.irq = _irq,			\
+	}
+
+#define TS5500_DIO_OUT(addr, bit)		\
+	{					\
+		.value_addr = addr,		\
+		.value_mask = BIT(bit),		\
+		.no_input = true,		\
+	}
+
+/*
+ * Input/Output DIO lines are programmed in groups of 4. Their values are
+ * available through 4 consecutive bits in a value port, whereas the direction
+ * of these 4 lines is driven by only 1 bit in a control port.
+ */
+#define TS5500_DIO_GROUP(vaddr, vbitfrom, caddr, cbit)		\
+	TS5500_DIO_IN_OUT(vaddr, vbitfrom + 0, caddr, cbit),	\
+	TS5500_DIO_IN_OUT(vaddr, vbitfrom + 1, caddr, cbit),	\
+	TS5500_DIO_IN_OUT(vaddr, vbitfrom + 2, caddr, cbit),	\
+	TS5500_DIO_IN_OUT(vaddr, vbitfrom + 3, caddr, cbit)
+
+/*
+ * TS-5500 DIO1 block
+ *
+ *  value    control  dir    hw
+ *  addr bit addr bit in out irq name     pin offset
+ *
+ *  0x7b  0  0x7a  0  x   x      DIO1_0   1   0
+ *  0x7b  1  0x7a  0  x   x      DIO1_1   3   1
+ *  0x7b  2  0x7a  0  x   x      DIO1_2   5   2
+ *  0x7b  3  0x7a  0  x   x      DIO1_3   7   3
+ *  0x7b  4  0x7a  1  x   x      DIO1_4   9   4
+ *  0x7b  5  0x7a  1  x   x      DIO1_5   11  5
+ *  0x7b  6  0x7a  1  x   x      DIO1_6   13  6
+ *  0x7b  7  0x7a  1  x   x      DIO1_7   15  7
+ *  0x7c  0  0x7a  5  x   x      DIO1_8   4   8
+ *  0x7c  1  0x7a  5  x   x      DIO1_9   6   9
+ *  0x7c  2  0x7a  5  x   x      DIO1_10  8   10
+ *  0x7c  3  0x7a  5  x   x      DIO1_11  10  11
+ *  0x7c  4           x          DIO1_12  12  12
+ *  0x7c  5           x      7   DIO1_13  14  13
+ */
+static const struct ts5500_dio ts5500_dio1[] = {
+	TS5500_DIO_GROUP(0x7b, 0, 0x7a, 0),
+	TS5500_DIO_GROUP(0x7b, 4, 0x7a, 1),
+	TS5500_DIO_GROUP(0x7c, 0, 0x7a, 5),
+	TS5500_DIO_IN(0x7c, 4),
+	TS5500_DIO_IN_IRQ(0x7c, 5, 7),
+};
+
+/*
+ * TS-5500 DIO2 block
+ *
+ *  value    control  dir    hw
+ *  addr bit addr bit in out irq name     pin offset
+ *
+ *  0x7e  0  0x7d  0  x   x      DIO2_0   1   0
+ *  0x7e  1  0x7d  0  x   x      DIO2_1   3   1
+ *  0x7e  2  0x7d  0  x   x      DIO2_2   5   2
+ *  0x7e  3  0x7d  0  x   x      DIO2_3   7   3
+ *  0x7e  4  0x7d  1  x   x      DIO2_4   9   4
+ *  0x7e  5  0x7d  1  x   x      DIO2_5   11  5
+ *  0x7e  6  0x7d  1  x   x      DIO2_6   13  6
+ *  0x7e  7  0x7d  1  x   x      DIO2_7   15  7
+ *  0x7f  0  0x7d  5  x   x      DIO2_8   4   8
+ *  0x7f  1  0x7d  5  x   x      DIO2_9   6   9
+ *  0x7f  2  0x7d  5  x   x      DIO2_10  8   10
+ *  0x7f  3  0x7d  5  x   x      DIO2_11  10  11
+ *  0x7f  4           x      6   DIO2_13  14  12
+ */
+static const struct ts5500_dio ts5500_dio2[] = {
+	TS5500_DIO_GROUP(0x7e, 0, 0x7d, 0),
+	TS5500_DIO_GROUP(0x7e, 4, 0x7d, 1),
+	TS5500_DIO_GROUP(0x7f, 0, 0x7d, 5),
+	TS5500_DIO_IN_IRQ(0x7f, 4, 6),
+};
+
+/*
+ * TS-5500 LCD port used as DIO block
+ * TS-5600 LCD port is identical
+ *
+ *  value    control  dir    hw
+ *  addr bit addr bit in out irq name    pin offset
+ *
+ *  0x72  0  0x7d  2  x   x      LCD_0   8   0
+ *  0x72  1  0x7d  2  x   x      LCD_1   7   1
+ *  0x72  2  0x7d  2  x   x      LCD_2   10  2
+ *  0x72  3  0x7d  2  x   x      LCD_3   9   3
+ *  0x72  4  0x7d  3  x   x      LCD_4   12  4
+ *  0x72  5  0x7d  3  x   x      LCD_5   11  5
+ *  0x72  6  0x7d  3  x   x      LCD_6   14  6
+ *  0x72  7  0x7d  3  x   x      LCD_7   13  7
+ *  0x73  0               x      LCD_EN  5   8
+ *  0x73  6           x          LCD_WR  6   9
+ *  0x73  7           x      1   LCD_RS  3   10
+ */
+static const struct ts5500_dio ts5500_lcd[] = {
+	TS5500_DIO_GROUP(0x72, 0, 0x7d, 2),
+	TS5500_DIO_GROUP(0x72, 4, 0x7d, 3),
+	TS5500_DIO_OUT(0x73, 0),
+	TS5500_DIO_IN(0x73, 6),
+	TS5500_DIO_IN_IRQ(0x73, 7, 1),
+};
+
+static inline struct ts5500_priv *ts5500_gc_to_priv(struct gpio_chip *chip)
+{
+	return container_of(chip, struct ts5500_priv, gpio_chip);
+}
+
+static inline void ts5500_set_mask(u8 mask, u8 addr)
+{
+	u8 val = inb(addr);
+	val |= mask;
+	outb(val, addr);
+}
+
+static inline void ts5500_clear_mask(u8 mask, u8 addr)
+{
+	u8 val = inb(addr);
+	val &= ~mask;
+	outb(val, addr);
+}
+
+static int ts5500_gpio_input(struct gpio_chip *chip, unsigned offset)
+{
+	struct ts5500_priv *priv = ts5500_gc_to_priv(chip);
+	const struct ts5500_dio line = priv->pinout[offset];
+	unsigned long flags;
+
+	if (line.no_input)
+		return -ENXIO;
+
+	if (line.no_output)
+		return 0;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	ts5500_clear_mask(line.control_mask, line.control_addr);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static int ts5500_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct ts5500_priv *priv = ts5500_gc_to_priv(chip);
+	const struct ts5500_dio line = priv->pinout[offset];
+
+	return !!(inb(line.value_addr) & line.value_mask);
+}
+
+static int ts5500_gpio_output(struct gpio_chip *chip, unsigned offset, int val)
+{
+	struct ts5500_priv *priv = ts5500_gc_to_priv(chip);
+	const struct ts5500_dio line = priv->pinout[offset];
+	unsigned long flags;
+
+	if (line.no_output)
+		return -ENXIO;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (!line.no_input)
+		ts5500_set_mask(line.control_mask, line.control_addr);
+
+	if (val)
+		ts5500_set_mask(line.value_mask, line.value_addr);
+	else
+		ts5500_clear_mask(line.value_mask, line.value_addr);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static void ts5500_gpio_set(struct gpio_chip *chip, unsigned offset, int val)
+{
+	struct ts5500_priv *priv = ts5500_gc_to_priv(chip);
+	const struct ts5500_dio line = priv->pinout[offset];
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (val)
+		ts5500_set_mask(line.value_mask, line.value_addr);
+	else
+		ts5500_clear_mask(line.value_mask, line.value_addr);
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int ts5500_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+	struct ts5500_priv *priv = ts5500_gc_to_priv(chip);
+	const struct ts5500_dio *block = priv->pinout;
+	const struct ts5500_dio line = block[offset];
+
+	/* Only one pin is connected to an interrupt */
+	if (line.irq)
+		return line.irq;
+
+	/* As this pin is input-only, we may strap it to another in/out pin */
+	if (priv->strap)
+		return priv->hwirq;
+
+	return -ENXIO;
+}
+
+static int ts5500_enable_irq(struct ts5500_priv *priv)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (priv->hwirq == 7)
+		ts5500_set_mask(BIT(7), 0x7a); /* DIO1_13 on IRQ7 */
+	else if (priv->hwirq == 6)
+		ts5500_set_mask(BIT(7), 0x7d); /* DIO2_13 on IRQ6 */
+	else if (priv->hwirq == 1)
+		ts5500_set_mask(BIT(6), 0x7d); /* LCD_RS on IRQ1 */
+	else
+		ret = -EINVAL;
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return ret;
+}
+
+static void ts5500_disable_irq(struct ts5500_priv *priv)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (priv->hwirq == 7)
+		ts5500_clear_mask(BIT(7), 0x7a); /* DIO1_13 on IRQ7 */
+	else if (priv->hwirq == 6)
+		ts5500_clear_mask(BIT(7), 0x7d); /* DIO2_13 on IRQ6 */
+	else if (priv->hwirq == 1)
+		ts5500_clear_mask(BIT(6), 0x7d); /* LCD_RS on IRQ1 */
+	else
+		dev_err(priv->gpio_chip.dev, "invalid hwirq %d\n", priv->hwirq);
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int __devinit ts5500_dio_probe(struct platform_device *pdev)
+{
+	enum ts5500_blocks block = platform_get_device_id(pdev)->driver_data;
+	struct ts5500_dio_platform_data *pdata = pdev->dev.platform_data;
+	struct device *dev = &pdev->dev;
+	const char *name = dev_name(dev);
+	struct ts5500_priv *priv;
+	struct resource *res;
+	unsigned long flags;
+	int ret;
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res) {
+		dev_err(dev, "missing IRQ resource\n");
+		return -EINVAL;
+	}
+
+	priv = devm_kzalloc(dev, sizeof(struct ts5500_priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, priv);
+	priv->hwirq = res->start;
+	spin_lock_init(&priv->lock);
+
+	priv->gpio_chip.owner = THIS_MODULE;
+	priv->gpio_chip.label = name;
+	priv->gpio_chip.dev = dev;
+	priv->gpio_chip.direction_input = ts5500_gpio_input;
+	priv->gpio_chip.direction_output = ts5500_gpio_output;
+	priv->gpio_chip.get = ts5500_gpio_get;
+	priv->gpio_chip.set = ts5500_gpio_set;
+	priv->gpio_chip.to_irq = ts5500_gpio_to_irq;
+	priv->gpio_chip.base = -1;
+	if (pdata) {
+		priv->gpio_chip.base = pdata->base;
+		priv->strap = pdata->strap;
+	}
+
+	switch (block) {
+	case TS5500_DIO1:
+		priv->pinout = ts5500_dio1;
+		priv->gpio_chip.ngpio = ARRAY_SIZE(ts5500_dio1);
+
+		if (!devm_request_region(dev, 0x7a, 3, name)) {
+			dev_err(dev, "failed to request %s ports\n", name);
+			return -EBUSY;
+		}
+		break;
+	case TS5500_DIO2:
+		priv->pinout = ts5500_dio2;
+		priv->gpio_chip.ngpio = ARRAY_SIZE(ts5500_dio2);
+
+		if (!devm_request_region(dev, 0x7e, 2, name)) {
+			dev_err(dev, "failed to request %s ports\n", name);
+			return -EBUSY;
+		}
+
+		if (hex7d_reserved)
+			break;
+
+		if (!devm_request_region(dev, 0x7d, 1, name)) {
+			dev_err(dev, "failed to request %s 7D\n", name);
+			return -EBUSY;
+		}
+
+		hex7d_reserved = true;
+		break;
+	case TS5500_LCD:
+	case TS5600_LCD:
+		priv->pinout = ts5500_lcd;
+		priv->gpio_chip.ngpio = ARRAY_SIZE(ts5500_lcd);
+
+		if (!devm_request_region(dev, 0x72, 2, name)) {
+			dev_err(dev, "failed to request %s ports\n", name);
+			return -EBUSY;
+		}
+
+		if (!hex7d_reserved) {
+			if (!devm_request_region(dev, 0x7d, 1, name)) {
+				dev_err(dev, "failed to request %s 7D\n", name);
+				return -EBUSY;
+			}
+
+			hex7d_reserved = true;
+		}
+
+		/* Ensure usage of LCD port as DIO */
+		spin_lock_irqsave(&priv->lock, flags);
+		ts5500_clear_mask(BIT(4), 0x7d);
+		spin_unlock_irqrestore(&priv->lock, flags);
+		break;
+	}
+
+	ret = gpiochip_add(&priv->gpio_chip);
+	if (ret) {
+		dev_err(dev, "failed to register the gpio chip\n");
+		return ret;
+	}
+
+	ret = ts5500_enable_irq(priv);
+	if (ret) {
+		dev_err(dev, "invalid interrupt %d\n", priv->hwirq);
+		goto cleanup;
+	}
+
+	return 0;
+cleanup:
+	if (gpiochip_remove(&priv->gpio_chip))
+		dev_err(dev, "failed to remove gpio chip\n");
+	return ret;
+}
+
+static int __devexit ts5500_dio_remove(struct platform_device *pdev)
+{
+	struct ts5500_priv *priv = platform_get_drvdata(pdev);
+
+	ts5500_disable_irq(priv);
+	return gpiochip_remove(&priv->gpio_chip);
+}
+
+static struct platform_device_id ts5500_dio_ids[] = {
+	{ "ts5500-dio1", TS5500_DIO1 },
+	{ "ts5500-dio2", TS5500_DIO2 },
+	{ "ts5500-dio-lcd", TS5500_LCD },
+	{ "ts5600-dio-lcd", TS5600_LCD },
+	{ }
+};
+MODULE_DEVICE_TABLE(platform, ts5500_dio_ids);
+
+static struct platform_driver ts5500_dio_driver = {
+	.driver = {
+		.name = "ts5500-dio",
+		.owner = THIS_MODULE,
+	},
+	.probe = ts5500_dio_probe,
+	.remove = __devexit_p(ts5500_dio_remove),
+	.id_table = ts5500_dio_ids,
+};
+
+module_platform_driver(ts5500_dio_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Savoir-faire Linux Inc. <kernel@savoirfairelinux.com>");
+MODULE_DESCRIPTION("Technologic Systems TS-5500 Digital I/O driver");
diff --git a/include/linux/platform_data/gpio-ts5500.h b/include/linux/platform_data/gpio-ts5500.h
new file mode 100644
index 000000000000..b10d11c9bb49
--- /dev/null
+++ b/include/linux/platform_data/gpio-ts5500.h
@@ -0,0 +1,27 @@
+/*
+ * GPIO (DIO) header for Technologic Systems TS-5500
+ *
+ * Copyright (c) 2012 Savoir-faire Linux Inc.
+ *	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _PDATA_GPIO_TS5500_H
+#define _PDATA_GPIO_TS5500_H
+
+/**
+ * struct ts5500_dio_platform_data - TS-5500 pin block configuration
+ * @base:	The GPIO base number to use.
+ * @strap:	The only pin connected to an interrupt in a block is input-only.
+ *		If you need a bidirectional line which can trigger an IRQ, you
+ *		may strap it with an in/out pin. This flag indicates this case.
+ */
+struct ts5500_dio_platform_data {
+	int base;
+	bool strap;
+};
+
+#endif /* _PDATA_GPIO_TS5500_H */
-- 
cgit v1.2.3-55-g7522


From dbd3fc3345390a989a033427aa915a0dfb62149f Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas
Date: Mon, 10 Dec 2012 11:24:42 -0700
Subject: PCI: Use phys_addr_t for physical ROM address

Use phys_addr_t rather than "void *" for physical memory address.
This removes casts and fixes a "cast from pointer to integer of different
size" warning on ppc44x_defconfig.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>---
 arch/x86/pci/common.c | 4 ++--
 drivers/pci/rom.c     | 2 +-
 include/linux/pci.h   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index fddb9f66cc47..d07f3bbca5a1 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -628,8 +628,8 @@ int pcibios_add_device(struct pci_dev *dev)
 			    (PCI_FUNC(dev->devfn) == rom->function) &&
 			    (dev->vendor == rom->vendor) &&
 			    (dev->device == rom->devid)) {
-				dev->rom = (void *)(unsigned long)(pa_data +
-				      offsetof(struct pci_setup_rom, romdata));
+				dev->rom = pa_data +
+				      offsetof(struct pci_setup_rom, romdata);
 				dev->romlen = rom->pcilen;
 			}
 		}
diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index 3a3828fbc879..ab886b7ee327 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c
@@ -122,7 +122,7 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size)
 	 */
 	if (pdev->rom && pdev->romlen) {
 		*size = pdev->romlen;
-		return phys_to_virt((phys_addr_t)pdev->rom);
+		return phys_to_virt(pdev->rom);
 	/*
 	 * IORESOURCE_ROM_SHADOW set on x86, x86_64 and IA64 supports legacy
 	 * memory map if the VGA enable bit of the Bridge Control register is
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f116b2d859dc..957563b7a5e3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -333,7 +333,7 @@ struct pci_dev {
 	};
 	struct pci_ats	*ats;	/* Address Translation Service */
 #endif
-	void *rom; /* Physical pointer to ROM if it's not from the BAR */
+	phys_addr_t rom; /* Physical address of ROM if it's not from the BAR */
 	size_t romlen; /* Length of ROM if it's not from the BAR */
 };
 
-- 
cgit v1.2.3-55-g7522


From ab5c4f71d8c7add173a2d32e5beefdaaf1b7cbbc Mon Sep 17 00:00:00 2001
From: Gabor Juhos
Date: Mon, 10 Dec 2012 15:30:28 +0100
Subject: ath9k: allow to load EEPROM content via firmware API

The calibration data for devices w/o a separate
EEPROM chip can be specified via the 'eeprom_data'
field of 'ath9k_platform_data'. The 'eeprom_data'
is usually filled from board specific setup
functions. It is easy if the EEPROM data is mapped
to the memory, but it can be complicated if it is
stored elsewhere.

The patch adds support for loading of the EEPROM
data via the firmware API to avoid this limitation.

Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/eeprom.c | 19 ++++++++++-
 drivers/net/wireless/ath/ath9k/hw.h     |  3 ++
 drivers/net/wireless/ath/ath9k/init.c   | 60 ++++++++++++++++++++++++++++++++-
 include/linux/ath9k_platform.h          |  2 ++
 4 files changed, 82 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath9k/eeprom.c b/drivers/net/wireless/ath/ath9k/eeprom.c
index eb9ac5ea6180..971d770722cf 100644
--- a/drivers/net/wireless/ath/ath9k/eeprom.c
+++ b/drivers/net/wireless/ath/ath9k/eeprom.c
@@ -113,12 +113,29 @@ void ath9k_hw_usb_gen_fill_eeprom(struct ath_hw *ah, u16 *eep_data,
 	}
 }
 
+static bool ath9k_hw_nvram_read_blob(struct ath_hw *ah, u32 off,
+				     u16 *data)
+{
+	u16 *blob_data;
+
+	if (off * sizeof(u16) > ah->eeprom_blob->size)
+		return false;
+
+	blob_data = (u16 *)ah->eeprom_blob->data;
+	*data =  blob_data[off];
+	return true;
+}
+
 bool ath9k_hw_nvram_read(struct ath_hw *ah, u32 off, u16 *data)
 {
 	struct ath_common *common = ath9k_hw_common(ah);
 	bool ret;
 
-	ret = common->bus_ops->eeprom_read(common, off, data);
+	if (ah->eeprom_blob)
+		ret = ath9k_hw_nvram_read_blob(ah, off, data);
+	else
+		ret = common->bus_ops->eeprom_read(common, off, data);
+
 	if (!ret)
 		ath_dbg(common, EEPROM,
 			"unable to read eeprom region at offset %u\n", off);
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index eff67592db18..7f1a8e91c908 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -20,6 +20,7 @@
 #include <linux/if_ether.h>
 #include <linux/delay.h>
 #include <linux/io.h>
+#include <linux/firmware.h>
 
 #include "mac.h"
 #include "ani.h"
@@ -921,6 +922,8 @@ struct ath_hw {
 	bool is_clk_25mhz;
 	int (*get_mac_revision)(void);
 	int (*external_reset)(void);
+
+	const struct firmware *eeprom_blob;
 };
 
 struct ath_bus_ops {
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 80cae53a33e5..27703a5e48d6 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -23,6 +23,11 @@
 
 #include "ath9k.h"
 
+struct ath9k_eeprom_ctx {
+	struct completion complete;
+	struct ath_hw *ah;
+};
+
 static char *dev_info = "ath9k";
 
 MODULE_AUTHOR("Atheros Communications");
@@ -506,6 +511,51 @@ static void ath9k_init_misc(struct ath_softc *sc)
 		sc->ant_comb.count = ATH_ANT_DIV_COMB_INIT_COUNT;
 }
 
+static void ath9k_eeprom_request_cb(const struct firmware *eeprom_blob,
+				    void *ctx)
+{
+	struct ath9k_eeprom_ctx *ec = ctx;
+
+	if (eeprom_blob)
+		ec->ah->eeprom_blob = eeprom_blob;
+
+	complete(&ec->complete);
+}
+
+static int ath9k_eeprom_request(struct ath_softc *sc, const char *name)
+{
+	struct ath9k_eeprom_ctx ec;
+	struct ath_hw *ah = ah = sc->sc_ah;
+	int err;
+
+	/* try to load the EEPROM content asynchronously */
+	init_completion(&ec.complete);
+	ec.ah = sc->sc_ah;
+
+	err = request_firmware_nowait(THIS_MODULE, 1, name, sc->dev, GFP_KERNEL,
+				      &ec, ath9k_eeprom_request_cb);
+	if (err < 0) {
+		ath_err(ath9k_hw_common(ah),
+			"EEPROM request failed\n");
+		return err;
+	}
+
+	wait_for_completion(&ec.complete);
+
+	if (!ah->eeprom_blob) {
+		ath_err(ath9k_hw_common(ah),
+			"Unable to load EEPROM file %s\n", name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void ath9k_eeprom_release(struct ath_softc *sc)
+{
+	release_firmware(sc->sc_ah->eeprom_blob);
+}
+
 static int ath9k_init_softc(u16 devid, struct ath_softc *sc,
 			    const struct ath_bus_ops *bus_ops)
 {
@@ -583,6 +633,12 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc,
 	ath_read_cachesize(common, &csz);
 	common->cachelsz = csz << 2; /* convert to bytes */
 
+	if (pdata->eeprom_name) {
+		ret = ath9k_eeprom_request(sc, pdata->eeprom_name);
+		if (ret)
+			goto err_eeprom;
+	}
+
 	/* Initializes the hardware for all supported chipsets */
 	ret = ath9k_hw_init(ah);
 	if (ret)
@@ -619,7 +675,8 @@ err_btcoex:
 err_queues:
 	ath9k_hw_deinit(ah);
 err_hw:
-
+	ath9k_eeprom_release(sc);
+err_eeprom:
 	kfree(ah);
 	sc->sc_ah = NULL;
 
@@ -882,6 +939,7 @@ static void ath9k_deinit_softc(struct ath_softc *sc)
 	if (sc->dfs_detector != NULL)
 		sc->dfs_detector->exit(sc->dfs_detector);
 
+	ath9k_eeprom_release(sc);
 	kfree(sc->sc_ah);
 	sc->sc_ah = NULL;
 }
diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
index 6e3f54f37844..fcdd81bd5314 100644
--- a/include/linux/ath9k_platform.h
+++ b/include/linux/ath9k_platform.h
@@ -22,6 +22,8 @@
 #define ATH9K_PLAT_EEP_MAX_WORDS	2048
 
 struct ath9k_platform_data {
+	const char *eeprom_name;
+
 	u16 eeprom_data[ATH9K_PLAT_EEP_MAX_WORDS];
 	u8 *macaddr;
 
-- 
cgit v1.2.3-55-g7522


From dd31866b0d55c9b70722ebad6ccd643223d9269e Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim
Date: Fri, 2 Nov 2012 17:06:26 +0900
Subject: f2fs: add on-disk layout

This adds a header file describing the on-disk layout of f2fs.

Signed-off-by: Changman Lee <cm224.lee@samsung.com>
Signed-off-by: Chul Lee <chur.lee@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 include/linux/f2fs_fs.h | 410 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 410 insertions(+)
 create mode 100644 include/linux/f2fs_fs.h

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
new file mode 100644
index 000000000000..1429ece7caab
--- /dev/null
+++ b/include/linux/f2fs_fs.h
@@ -0,0 +1,410 @@
+/**
+ * include/linux/f2fs_fs.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _LINUX_F2FS_FS_H
+#define _LINUX_F2FS_FS_H
+
+#include <linux/pagemap.h>
+#include <linux/types.h>
+
+#define F2FS_SUPER_OFFSET		1024	/* byte-size offset */
+#define F2FS_LOG_SECTOR_SIZE		9	/* 9 bits for 512 byte */
+#define F2FS_LOG_SECTORS_PER_BLOCK	3	/* 4KB: F2FS_BLKSIZE */
+#define F2FS_BLKSIZE			4096	/* support only 4KB block */
+#define F2FS_MAX_EXTENSION		64	/* # of extension entries */
+
+#define NULL_ADDR		0x0U
+#define NEW_ADDR		-1U
+
+#define F2FS_ROOT_INO(sbi)	(sbi->root_ino_num)
+#define F2FS_NODE_INO(sbi)	(sbi->node_ino_num)
+#define F2FS_META_INO(sbi)	(sbi->meta_ino_num)
+
+/* This flag is used by node and meta inodes, and by recovery */
+#define GFP_F2FS_ZERO	(GFP_NOFS | __GFP_ZERO)
+
+/*
+ * For further optimization on multi-head logs, on-disk layout supports maximum
+ * 16 logs by default. The number, 16, is expected to cover all the cases
+ * enoughly. The implementaion currently uses no more than 6 logs.
+ * Half the logs are used for nodes, and the other half are used for data.
+ */
+#define MAX_ACTIVE_LOGS	16
+#define MAX_ACTIVE_NODE_LOGS	8
+#define MAX_ACTIVE_DATA_LOGS	8
+
+/*
+ * For superblock
+ */
+struct f2fs_super_block {
+	__le32 magic;			/* Magic Number */
+	__le16 major_ver;		/* Major Version */
+	__le16 minor_ver;		/* Minor Version */
+	__le32 log_sectorsize;		/* log2 sector size in bytes */
+	__le32 log_sectors_per_block;	/* log2 # of sectors per block */
+	__le32 log_blocksize;		/* log2 block size in bytes */
+	__le32 log_blocks_per_seg;	/* log2 # of blocks per segment */
+	__le32 segs_per_sec;		/* # of segments per section */
+	__le32 secs_per_zone;		/* # of sections per zone */
+	__le32 checksum_offset;		/* checksum offset inside super block */
+	__le64 block_count;		/* total # of user blocks */
+	__le32 section_count;		/* total # of sections */
+	__le32 segment_count;		/* total # of segments */
+	__le32 segment_count_ckpt;	/* # of segments for checkpoint */
+	__le32 segment_count_sit;	/* # of segments for SIT */
+	__le32 segment_count_nat;	/* # of segments for NAT */
+	__le32 segment_count_ssa;	/* # of segments for SSA */
+	__le32 segment_count_main;	/* # of segments for main area */
+	__le32 segment0_blkaddr;	/* start block address of segment 0 */
+	__le32 cp_blkaddr;		/* start block address of checkpoint */
+	__le32 sit_blkaddr;		/* start block address of SIT */
+	__le32 nat_blkaddr;		/* start block address of NAT */
+	__le32 ssa_blkaddr;		/* start block address of SSA */
+	__le32 main_blkaddr;		/* start block address of main area */
+	__le32 root_ino;		/* root inode number */
+	__le32 node_ino;		/* node inode number */
+	__le32 meta_ino;		/* meta inode number */
+	__u8 uuid[16];			/* 128-bit uuid for volume */
+	__le16 volume_name[512];	/* volume name */
+	__le32 extension_count;		/* # of extensions below */
+	__u8 extension_list[F2FS_MAX_EXTENSION][8];	/* extension array */
+} __packed;
+
+/*
+ * For checkpoint
+ */
+#define CP_ERROR_FLAG		0x00000008
+#define CP_COMPACT_SUM_FLAG	0x00000004
+#define CP_ORPHAN_PRESENT_FLAG	0x00000002
+#define CP_UMOUNT_FLAG		0x00000001
+
+struct f2fs_checkpoint {
+	__le64 checkpoint_ver;		/* checkpoint block version number */
+	__le64 user_block_count;	/* # of user blocks */
+	__le64 valid_block_count;	/* # of valid blocks in main area */
+	__le32 rsvd_segment_count;	/* # of reserved segments for gc */
+	__le32 overprov_segment_count;	/* # of overprovision segments */
+	__le32 free_segment_count;	/* # of free segments in main area */
+
+	/* information of current node segments */
+	__le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS];
+	__le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS];
+	/* information of current data segments */
+	__le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS];
+	__le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS];
+	__le32 ckpt_flags;		/* Flags : umount and journal_present */
+	__le32 cp_pack_total_block_count;	/* total # of one cp pack */
+	__le32 cp_pack_start_sum;	/* start block number of data summary */
+	__le32 valid_node_count;	/* Total number of valid nodes */
+	__le32 valid_inode_count;	/* Total number of valid inodes */
+	__le32 next_free_nid;		/* Next free node number */
+	__le32 sit_ver_bitmap_bytesize;	/* Default value 64 */
+	__le32 nat_ver_bitmap_bytesize; /* Default value 256 */
+	__le32 checksum_offset;		/* checksum offset inside cp block */
+	__le64 elapsed_time;		/* mounted time */
+	/* allocation type of current segment */
+	unsigned char alloc_type[MAX_ACTIVE_LOGS];
+
+	/* SIT and NAT version bitmap */
+	unsigned char sit_nat_version_bitmap[1];
+} __packed;
+
+/*
+ * For orphan inode management
+ */
+#define F2FS_ORPHANS_PER_BLOCK	1020
+
+struct f2fs_orphan_block {
+	__le32 ino[F2FS_ORPHANS_PER_BLOCK];	/* inode numbers */
+	__le32 reserved;	/* reserved */
+	__le16 blk_addr;	/* block index in current CP */
+	__le16 blk_count;	/* Number of orphan inode blocks in CP */
+	__le32 entry_count;	/* Total number of orphan nodes in current CP */
+	__le32 check_sum;	/* CRC32 for orphan inode block */
+} __packed;
+
+/*
+ * For NODE structure
+ */
+struct f2fs_extent {
+	__le32 fofs;		/* start file offset of the extent */
+	__le32 blk_addr;	/* start block address of the extent */
+	__le32 len;		/* lengh of the extent */
+} __packed;
+
+#define F2FS_MAX_NAME_LEN	256
+#define ADDRS_PER_INODE         923	/* Address Pointers in an Inode */
+#define ADDRS_PER_BLOCK         1018	/* Address Pointers in a Direct Block */
+#define NIDS_PER_BLOCK          1018	/* Node IDs in an Indirect Block */
+
+struct f2fs_inode {
+	__le16 i_mode;			/* file mode */
+	__u8 i_advise;			/* file hints */
+	__u8 i_reserved;		/* reserved */
+	__le32 i_uid;			/* user ID */
+	__le32 i_gid;			/* group ID */
+	__le32 i_links;			/* links count */
+	__le64 i_size;			/* file size in bytes */
+	__le64 i_blocks;		/* file size in blocks */
+	__le64 i_atime;			/* access time */
+	__le64 i_ctime;			/* change time */
+	__le64 i_mtime;			/* modification time */
+	__le32 i_atime_nsec;		/* access time in nano scale */
+	__le32 i_ctime_nsec;		/* change time in nano scale */
+	__le32 i_mtime_nsec;		/* modification time in nano scale */
+	__le32 i_generation;		/* file version (for NFS) */
+	__le32 i_current_depth;		/* only for directory depth */
+	__le32 i_xattr_nid;		/* nid to save xattr */
+	__le32 i_flags;			/* file attributes */
+	__le32 i_pino;			/* parent inode number */
+	__le32 i_namelen;		/* file name length */
+	__u8 i_name[F2FS_MAX_NAME_LEN];	/* file name for SPOR */
+
+	struct f2fs_extent i_ext;	/* caching a largest extent */
+
+	__le32 i_addr[ADDRS_PER_INODE];	/* Pointers to data blocks */
+
+	__le32 i_nid[5];		/* direct(2), indirect(2),
+						double_indirect(1) node id */
+} __packed;
+
+struct direct_node {
+	__le32 addr[ADDRS_PER_BLOCK];	/* array of data block address */
+} __packed;
+
+struct indirect_node {
+	__le32 nid[NIDS_PER_BLOCK];	/* array of data block address */
+} __packed;
+
+enum {
+	COLD_BIT_SHIFT = 0,
+	FSYNC_BIT_SHIFT,
+	DENT_BIT_SHIFT,
+	OFFSET_BIT_SHIFT
+};
+
+struct node_footer {
+	__le32 nid;		/* node id */
+	__le32 ino;		/* inode nunmber */
+	__le32 flag;		/* include cold/fsync/dentry marks and offset */
+	__le64 cp_ver;		/* checkpoint version */
+	__le32 next_blkaddr;	/* next node page block address */
+} __packed;
+
+struct f2fs_node {
+	/* can be one of three types: inode, direct, and indirect types */
+	union {
+		struct f2fs_inode i;
+		struct direct_node dn;
+		struct indirect_node in;
+	};
+	struct node_footer footer;
+} __packed;
+
+/*
+ * For NAT entries
+ */
+#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry))
+
+struct f2fs_nat_entry {
+	__u8 version;		/* latest version of cached nat entry */
+	__le32 ino;		/* inode number */
+	__le32 block_addr;	/* block address */
+} __packed;
+
+struct f2fs_nat_block {
+	struct f2fs_nat_entry entries[NAT_ENTRY_PER_BLOCK];
+} __packed;
+
+/*
+ * For SIT entries
+ *
+ * Each segment is 2MB in size by default so that a bitmap for validity of
+ * there-in blocks should occupy 64 bytes, 512 bits.
+ * Not allow to change this.
+ */
+#define SIT_VBLOCK_MAP_SIZE 64
+#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry))
+
+/*
+ * Note that f2fs_sit_entry->vblocks has the following bit-field information.
+ * [15:10] : allocation type such as CURSEG_XXXX_TYPE
+ * [9:0] : valid block count
+ */
+#define SIT_VBLOCKS_SHIFT	10
+#define SIT_VBLOCKS_MASK	((1 << SIT_VBLOCKS_SHIFT) - 1)
+#define GET_SIT_VBLOCKS(raw_sit)				\
+	(le16_to_cpu((raw_sit)->vblocks) & SIT_VBLOCKS_MASK)
+#define GET_SIT_TYPE(raw_sit)					\
+	((le16_to_cpu((raw_sit)->vblocks) & ~SIT_VBLOCKS_MASK)	\
+	 >> SIT_VBLOCKS_SHIFT)
+
+struct f2fs_sit_entry {
+	__le16 vblocks;				/* reference above */
+	__u8 valid_map[SIT_VBLOCK_MAP_SIZE];	/* bitmap for valid blocks */
+	__le64 mtime;				/* segment age for cleaning */
+} __packed;
+
+struct f2fs_sit_block {
+	struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK];
+} __packed;
+
+/*
+ * For segment summary
+ *
+ * One summary block contains exactly 512 summary entries, which represents
+ * exactly 2MB segment by default. Not allow to change the basic units.
+ *
+ * NOTE: For initializing fields, you must use set_summary
+ *
+ * - If data page, nid represents dnode's nid
+ * - If node page, nid represents the node page's nid.
+ *
+ * The ofs_in_node is used by only data page. It represents offset
+ * from node's page's beginning to get a data block address.
+ * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node)
+ */
+#define ENTRIES_IN_SUM		512
+#define	SUMMARY_SIZE		(sizeof(struct f2fs_summary))
+#define	SUM_FOOTER_SIZE		(sizeof(struct summary_footer))
+#define SUM_ENTRY_SIZE		(SUMMARY_SIZE * ENTRIES_IN_SUM)
+
+/* a summary entry for a 4KB-sized block in a segment */
+struct f2fs_summary {
+	__le32 nid;		/* parent node id */
+	union {
+		__u8 reserved[3];
+		struct {
+			__u8 version;		/* node version number */
+			__le16 ofs_in_node;	/* block index in parent node */
+		} __packed;
+	};
+} __packed;
+
+/* summary block type, node or data, is stored to the summary_footer */
+#define SUM_TYPE_NODE		(1)
+#define SUM_TYPE_DATA		(0)
+
+struct summary_footer {
+	unsigned char entry_type;	/* SUM_TYPE_XXX */
+	__u32 check_sum;		/* summary checksum */
+} __packed;
+
+#define SUM_JOURNAL_SIZE	(PAGE_CACHE_SIZE - SUM_FOOTER_SIZE -\
+				SUM_ENTRY_SIZE)
+#define NAT_JOURNAL_ENTRIES	((SUM_JOURNAL_SIZE - 2) /\
+				sizeof(struct nat_journal_entry))
+#define NAT_JOURNAL_RESERVED	((SUM_JOURNAL_SIZE - 2) %\
+				sizeof(struct nat_journal_entry))
+#define SIT_JOURNAL_ENTRIES	((SUM_JOURNAL_SIZE - 2) /\
+				sizeof(struct sit_journal_entry))
+#define SIT_JOURNAL_RESERVED	((SUM_JOURNAL_SIZE - 2) %\
+				sizeof(struct sit_journal_entry))
+/*
+ * frequently updated NAT/SIT entries can be stored in the spare area in
+ * summary blocks
+ */
+enum {
+	NAT_JOURNAL = 0,
+	SIT_JOURNAL
+};
+
+struct nat_journal_entry {
+	__le32 nid;
+	struct f2fs_nat_entry ne;
+} __packed;
+
+struct nat_journal {
+	struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES];
+	__u8 reserved[NAT_JOURNAL_RESERVED];
+} __packed;
+
+struct sit_journal_entry {
+	__le32 segno;
+	struct f2fs_sit_entry se;
+} __packed;
+
+struct sit_journal {
+	struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES];
+	__u8 reserved[SIT_JOURNAL_RESERVED];
+} __packed;
+
+/* 4KB-sized summary block structure */
+struct f2fs_summary_block {
+	struct f2fs_summary entries[ENTRIES_IN_SUM];
+	union {
+		__le16 n_nats;
+		__le16 n_sits;
+	};
+	/* spare area is used by NAT or SIT journals */
+	union {
+		struct nat_journal nat_j;
+		struct sit_journal sit_j;
+	};
+	struct summary_footer footer;
+} __packed;
+
+/*
+ * For directory operations
+ */
+#define F2FS_DOT_HASH		0
+#define F2FS_DDOT_HASH		F2FS_DOT_HASH
+#define F2FS_MAX_HASH		(~((0x3ULL) << 62))
+#define F2FS_HASH_COL_BIT	((0x1ULL) << 63)
+
+typedef __le32	f2fs_hash_t;
+
+/* One directory entry slot covers 8bytes-long file name */
+#define F2FS_NAME_LEN		8
+
+/* the number of dentry in a block */
+#define NR_DENTRY_IN_BLOCK	214
+
+/* MAX level for dir lookup */
+#define MAX_DIR_HASH_DEPTH	63
+
+#define SIZE_OF_DIR_ENTRY	11	/* by byte */
+#define SIZE_OF_DENTRY_BITMAP	((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
+					BITS_PER_BYTE)
+#define SIZE_OF_RESERVED	(PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \
+				F2FS_NAME_LEN) * \
+				NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP))
+
+/* One directory entry slot representing F2FS_NAME_LEN-sized file name */
+struct f2fs_dir_entry {
+	__le32 hash_code;	/* hash code of file name */
+	__le32 ino;		/* inode number */
+	__le16 name_len;	/* lengh of file name */
+	__u8 file_type;		/* file type */
+} __packed;
+
+/* 4KB-sized directory entry block */
+struct f2fs_dentry_block {
+	/* validity bitmap for directory entries in each block */
+	__u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP];
+	__u8 reserved[SIZE_OF_RESERVED];
+	struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK];
+	__u8 filename[NR_DENTRY_IN_BLOCK][F2FS_NAME_LEN];
+} __packed;
+
+/* file types used in inode_info->flags */
+enum {
+	F2FS_FT_UNKNOWN,
+	F2FS_FT_REG_FILE,
+	F2FS_FT_DIR,
+	F2FS_FT_CHRDEV,
+	F2FS_FT_BLKDEV,
+	F2FS_FT_FIFO,
+	F2FS_FT_SOCK,
+	F2FS_FT_SYMLINK,
+	F2FS_FT_MAX
+};
+
+#endif  /* _LINUX_F2FS_FS_H */
-- 
cgit v1.2.3-55-g7522


From 25ca923b2a766b9c93b63777ead351137533a623 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim
Date: Wed, 28 Nov 2012 16:12:41 +0900
Subject: f2fs: fix endian conversion bugs reported by sparse

This patch should resolve the bugs reported by the sparse tool.
Initial reports were written by "kbuild test robot" managed by fengguang.wu.

In my local machines, I've tested also by running:
> make C=2 CF="-D__CHECK_ENDIAN__"

Accordingly, I've found lots of warnings and bugs related to the endian
conversion. And I've fixed all at this moment.

Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/checkpoint.c    | 32 +++++++++++++++++---------------
 fs/f2fs/data.c          |  2 +-
 fs/f2fs/debug.c         |  2 +-
 fs/f2fs/dir.c           |  8 ++++----
 fs/f2fs/f2fs.h          | 25 +++++++++++++++++++++++--
 fs/f2fs/hash.c          |  3 +--
 fs/f2fs/node.c          |  4 ++--
 fs/f2fs/node.h          |  2 +-
 fs/f2fs/recovery.c      |  2 +-
 fs/f2fs/segment.c       | 14 +++++++-------
 fs/f2fs/super.c         |  8 ++++----
 include/linux/f2fs_fs.h |  6 +++---
 12 files changed, 65 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ab743f92ee06..7c18f8efaadc 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -268,7 +268,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
 {
 	block_t start_blk, orphan_blkaddr, i, j;
 
-	if (!(F2FS_CKPT(sbi)->ckpt_flags & CP_ORPHAN_PRESENT_FLAG))
+	if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
 		return 0;
 
 	sbi->por_doing = 1;
@@ -287,7 +287,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
 		f2fs_put_page(page, 1);
 	}
 	/* clear Orphan Flag */
-	F2FS_CKPT(sbi)->ckpt_flags &= (~CP_ORPHAN_PRESENT_FLAG);
+	clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
 	sbi->por_doing = 0;
 	return 0;
 }
@@ -376,7 +376,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
 	pre_version = le64_to_cpu(cp_block->checkpoint_ver);
 
 	/* Read the 2nd cp block in this CP pack */
-	cp_addr += le64_to_cpu(cp_block->cp_pack_total_block_count) - 1;
+	cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
 	cp_page_2 = get_meta_page(sbi, cp_addr);
 
 	cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
@@ -605,8 +605,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	block_t start_blk;
 	struct page *cp_page;
 	unsigned int data_sum_blocks, orphan_blocks;
+	unsigned int crc32 = 0;
 	void *kaddr;
-	__u32 crc32 = 0;
 	int i;
 
 	/* Flush all the NAT/SIT pages */
@@ -646,33 +646,35 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	/* 2 cp  + n data seg summary + orphan inode blocks */
 	data_sum_blocks = npages_for_summary_flush(sbi);
 	if (data_sum_blocks < 3)
-		ckpt->ckpt_flags |= CP_COMPACT_SUM_FLAG;
+		set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 	else
-		ckpt->ckpt_flags &= (~CP_COMPACT_SUM_FLAG);
+		clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 
 	orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
 					/ F2FS_ORPHANS_PER_BLOCK;
-	ckpt->cp_pack_start_sum = 1 + orphan_blocks;
-	ckpt->cp_pack_total_block_count = 2 + data_sum_blocks + orphan_blocks;
+	ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks);
 
 	if (is_umount) {
-		ckpt->ckpt_flags |= CP_UMOUNT_FLAG;
-		ckpt->cp_pack_total_block_count += NR_CURSEG_NODE_TYPE;
+		set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
+		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
+			data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE);
 	} else {
-		ckpt->ckpt_flags &= (~CP_UMOUNT_FLAG);
+		clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
+		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
+			data_sum_blocks + orphan_blocks);
 	}
 
 	if (sbi->n_orphans)
-		ckpt->ckpt_flags |= CP_ORPHAN_PRESENT_FLAG;
+		set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 	else
-		ckpt->ckpt_flags &= (~CP_ORPHAN_PRESENT_FLAG);
+		clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 
 	/* update SIT/NAT bitmap */
 	get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
 	get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
 
 	crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
-	*(__u32 *)((unsigned char *)ckpt +
+	*(__le32 *)((unsigned char *)ckpt +
 				le32_to_cpu(ckpt->checksum_offset))
 				= cpu_to_le32(crc32);
 
@@ -716,7 +718,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	sbi->alloc_valid_block_count = 0;
 
 	/* Here, we only have one bio having CP pack */
-	if (sbi->ckpt->ckpt_flags & CP_ERROR_FLAG)
+	if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))
 		sbi->sb->s_flags |= MS_RDONLY;
 	else
 		sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c2fd0a80db16..5635cc5a9d4d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -545,7 +545,7 @@ redirty_out:
 
 #define MAX_DESIRED_PAGES_WP	4096
 
-int f2fs_write_data_pages(struct address_space *mapping,
+static int f2fs_write_data_pages(struct address_space *mapping,
 			    struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a56181c1b28b..fb62960a1dc1 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -27,7 +27,7 @@
 static LIST_HEAD(f2fs_stat_list);
 static struct dentry *debugfs_root;
 
-void update_general_status(struct f2fs_sb_info *sbi)
+static void update_general_status(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_stat_info *si = sbi->stat_info;
 	int i;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 5975568d03df..5ec7a06120e1 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -80,7 +80,7 @@ static bool early_match_name(const char *name, int namelen,
 	if (le16_to_cpu(de->name_len) != namelen)
 		return false;
 
-	if (le32_to_cpu(de->hash_code) != namehash)
+	if (de->hash_code != namehash)
 		return false;
 
 	return true;
@@ -143,7 +143,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 	nbucket = dir_buckets(level);
 	nblock = bucket_blocks(level);
 
-	bidx = dir_block_index(level, namehash % nbucket);
+	bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket);
 	end_block = bidx + nblock;
 
 	for (; bidx < end_block; bidx++) {
@@ -406,7 +406,7 @@ start:
 	nbucket = dir_buckets(level);
 	nblock = bucket_blocks(level);
 
-	bidx = dir_block_index(level, (dentry_hash % nbucket));
+	bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket));
 
 	for (block = bidx; block <= (bidx + nblock - 1); block++) {
 		mutex_lock_op(sbi, DENTRY_OPS);
@@ -437,7 +437,7 @@ add_dentry:
 	wait_on_page_writeback(dentry_page);
 
 	de = &dentry_blk->dentry[bit_pos];
-	de->hash_code = cpu_to_le32(dentry_hash);
+	de->hash_code = dentry_hash;
 	de->name_len = cpu_to_le16(namelen);
 	memcpy(dentry_blk->filename[bit_pos], name, namelen);
 	de->ino = cpu_to_le32(inode->i_ino);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d3f5a70e2a49..8d7fde1bda1e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -463,6 +463,26 @@ static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi)
 	sbi->s_dirty = 0;
 }
 
+static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+{
+	unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+	return ckpt_flags & f;
+}
+
+static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+{
+	unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+	ckpt_flags |= f;
+	cp->ckpt_flags = cpu_to_le32(ckpt_flags);
+}
+
+static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+{
+	unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+	ckpt_flags &= (~f);
+	cp->ckpt_flags = cpu_to_le32(ckpt_flags);
+}
+
 static inline void mutex_lock_op(struct f2fs_sb_info *sbi, enum lock_type t)
 {
 	mutex_lock_nested(&sbi->fs_lock[t], t);
@@ -577,7 +597,8 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
 static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
-	int offset = (flag == NAT_BITMAP) ? ckpt->sit_ver_bitmap_bytesize : 0;
+	int offset = (flag == NAT_BITMAP) ?
+			le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
 	return &ckpt->sit_nat_version_bitmap + offset;
 }
 
@@ -587,7 +608,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 	unsigned long long ckpt_version = le64_to_cpu(ckpt->checkpoint_ver);
 
-	start_addr = le64_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
+	start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
 
 	/*
 	 * odd numbered checkpoint should at cp segment 0
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 098a1963d7c7..beb155e8d06d 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -92,7 +92,6 @@ f2fs_hash_t f2fs_dentry_hash(const char *name, int len)
 	hash = buf[0];
 	minor_hash = buf[1];
 
-	f2fs_hash = hash;
-	f2fs_hash &= ~F2FS_HASH_COL_BIT;
+	f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
 	return f2fs_hash;
 }
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 216f04dc1177..5d421fe22575 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1445,8 +1445,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
 
 	memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i);
 	dst->i.i_size = 0;
-	dst->i.i_blocks = 1;
-	dst->i.i_links = 1;
+	dst->i.i_blocks = cpu_to_le64(1);
+	dst->i.i_links = cpu_to_le32(1);
 	dst->i.i_xattr_nid = 0;
 
 	new_ni = old_ni;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 5d525ed312ba..0ab92d643052 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -177,7 +177,7 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
 	void *kaddr = page_address(page);
 	struct f2fs_node *rn = (struct f2fs_node *)kaddr;
 	rn->footer.cp_ver = ckpt->checkpoint_ver;
-	rn->footer.next_blkaddr = blkaddr;
+	rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
 }
 
 static inline nid_t ino_of_node(struct page *node_page)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 7a43df0b72c1..222a7bb92214 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -81,7 +81,7 @@ static int recover_inode(struct inode *inode, struct page *node_page)
 	struct f2fs_node *raw_node = (struct f2fs_node *)kaddr;
 	struct f2fs_inode *raw_inode = &(raw_node->i);
 
-	inode->i_mode = le32_to_cpu(raw_inode->i_mode);
+	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
 	i_size_write(inode, le64_to_cpu(raw_inode->i_size));
 	inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
 	inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index ed7c079cfc7f..d973c56e8bd6 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -630,7 +630,7 @@ static void f2fs_end_io_write(struct bio *bio, int err)
 			SetPageError(page);
 			if (page->mapping)
 				set_bit(AS_EIO, &page->mapping->flags);
-			p->sbi->ckpt->ckpt_flags |= CP_ERROR_FLAG;
+			set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
 			set_page_dirty(page);
 		}
 		end_page_writeback(page);
@@ -1067,7 +1067,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
 		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
 							CURSEG_HOT_DATA]);
-		if (ckpt->ckpt_flags & CP_UMOUNT_FLAG)
+		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
 		else
 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
@@ -1076,7 +1076,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 							CURSEG_HOT_NODE]);
 		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
 							CURSEG_HOT_NODE]);
-		if (ckpt->ckpt_flags & CP_UMOUNT_FLAG)
+		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
 							type - CURSEG_HOT_NODE);
 		else
@@ -1087,7 +1087,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 	sum = (struct f2fs_summary_block *)page_address(new);
 
 	if (IS_NODESEG(type)) {
-		if (ckpt->ckpt_flags & CP_UMOUNT_FLAG) {
+		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) {
 			struct f2fs_summary *ns = &sum->entries[0];
 			int i;
 			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
@@ -1119,7 +1119,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
 {
 	int type = CURSEG_HOT_DATA;
 
-	if (sbi->ckpt->ckpt_flags & CP_COMPACT_SUM_FLAG) {
+	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
 		/* restore for compacted data summary */
 		if (read_compacted_summaries(sbi))
 			return -EINVAL;
@@ -1208,7 +1208,7 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi,
 
 void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
 {
-	if (sbi->ckpt->ckpt_flags & CP_COMPACT_SUM_FLAG)
+	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
 		write_compacted_summaries(sbi, start_blk);
 	else
 		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
@@ -1216,7 +1216,7 @@ void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
 
 void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
 {
-	if (sbi->ckpt->ckpt_flags & CP_UMOUNT_FLAG)
+	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
 		write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
 	return;
 }
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8661c93538af..878bf382f848 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -89,7 +89,7 @@ static void f2fs_i_callback(struct rcu_head *head)
 	kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode));
 }
 
-void f2fs_destroy_inode(struct inode *inode)
+static void f2fs_destroy_inode(struct inode *inode)
 {
 	call_rcu(&inode->i_rcu, f2fs_i_callback);
 }
@@ -445,7 +445,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	if (sanity_check_raw_super(raw_super))
 		goto free_sb_buf;
 
-	sb->s_maxbytes = max_file_size(raw_super->log_blocksize);
+	sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
 	sb->s_max_links = F2FS_LINK_MAX;
 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 
@@ -527,7 +527,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 
 	/* if there are nt orphan nodes free them */
 	err = -EINVAL;
-	if (!(sbi->ckpt->ckpt_flags & CP_UMOUNT_FLAG) &&
+	if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) &&
 				recover_orphan_inodes(sbi))
 		goto free_node_inode;
 
@@ -547,7 +547,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	/* recover fsynced data */
-	if (!(sbi->ckpt->ckpt_flags & CP_UMOUNT_FLAG) &&
+	if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) &&
 				!test_opt(sbi, DISABLE_ROLL_FORWARD))
 		recover_fsync_data(sbi);
 
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 1429ece7caab..c2fbbc35c1e6 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -272,8 +272,8 @@ struct f2fs_sit_block {
  * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node)
  */
 #define ENTRIES_IN_SUM		512
-#define	SUMMARY_SIZE		(sizeof(struct f2fs_summary))
-#define	SUM_FOOTER_SIZE		(sizeof(struct summary_footer))
+#define	SUMMARY_SIZE		(7)	/* sizeof(struct summary) */
+#define	SUM_FOOTER_SIZE		(5)	/* sizeof(struct summary_footer) */
 #define SUM_ENTRY_SIZE		(SUMMARY_SIZE * ENTRIES_IN_SUM)
 
 /* a summary entry for a 4KB-sized block in a segment */
@@ -297,7 +297,7 @@ struct summary_footer {
 	__u32 check_sum;		/* summary checksum */
 } __packed;
 
-#define SUM_JOURNAL_SIZE	(PAGE_CACHE_SIZE - SUM_FOOTER_SIZE -\
+#define SUM_JOURNAL_SIZE	(F2FS_BLKSIZE - SUM_FOOTER_SIZE -\
 				SUM_ENTRY_SIZE)
 #define NAT_JOURNAL_ENTRIES	((SUM_JOURNAL_SIZE - 2) /\
 				sizeof(struct nat_journal_entry))
-- 
cgit v1.2.3-55-g7522


From 457d08ee4fd91c8df17917ff2d32565e6adacbfc Mon Sep 17 00:00:00 2001
From: Namjae Jeon
Date: Sat, 8 Dec 2012 14:54:50 +0900
Subject: f2fs: introduce accessor to retrieve number of dentry slots

Simplify code by providing the accessor macro to retrieve the
number of dentry slots for a given filename length.

Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Amit Sahrawat <a.sahrawat@samsung.com>
---
 fs/f2fs/dir.c           | 13 +++++--------
 include/linux/f2fs_fs.h |  3 +++
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index fc02d8b43aea..d900c088c7c6 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -99,8 +99,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
 					NR_DENTRY_IN_BLOCK, 0);
 	while (bit_pos < NR_DENTRY_IN_BLOCK) {
 		de = &dentry_blk->dentry[bit_pos];
-		slots = (le16_to_cpu(de->name_len) + F2FS_NAME_LEN - 1) /
-							F2FS_NAME_LEN;
+		slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
 
 		if (early_match_name(name, namelen, namehash, de)) {
 			if (!memcmp(dentry_blk->filename[bit_pos],
@@ -130,7 +129,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 		unsigned int level, const char *name, int namelen,
 			f2fs_hash_t namehash, struct page **res_page)
 {
-	int s = (namelen + F2FS_NAME_LEN - 1) / F2FS_NAME_LEN;
+	int s = GET_DENTRY_SLOTS(namelen);
 	unsigned int nbucket, nblock;
 	unsigned int bidx, end_block;
 	struct page *dentry_page;
@@ -383,7 +382,7 @@ int f2fs_add_link(struct dentry *dentry, struct inode *inode)
 	int namelen = dentry->d_name.len;
 	struct page *dentry_page = NULL;
 	struct f2fs_dentry_block *dentry_blk = NULL;
-	int slots = (namelen + F2FS_NAME_LEN - 1) / F2FS_NAME_LEN;
+	int slots = GET_DENTRY_SLOTS(namelen);
 	int err = 0;
 	int i;
 
@@ -465,8 +464,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
 	struct address_space *mapping = page->mapping;
 	struct inode *dir = mapping->host;
 	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
-	int slots = (le16_to_cpu(dentry->name_len) + F2FS_NAME_LEN - 1) /
-							F2FS_NAME_LEN;
+	int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
 	void *kaddr = page_address(page);
 	int i;
 
@@ -641,8 +639,7 @@ static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir)
 				file->f_pos += bit_pos - start_bit_pos;
 				goto success;
 			}
-			slots = (le16_to_cpu(de->name_len) + F2FS_NAME_LEN - 1)
-								/ F2FS_NAME_LEN;
+			slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
 			bit_pos += slots;
 		}
 		bit_pos = 0;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index c2fbbc35c1e6..f9a12f6243a5 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -363,6 +363,9 @@ typedef __le32	f2fs_hash_t;
 
 /* One directory entry slot covers 8bytes-long file name */
 #define F2FS_NAME_LEN		8
+#define F2FS_NAME_LEN_BITS	3
+
+#define GET_DENTRY_SLOTS(x)	((x + F2FS_NAME_LEN - 1) >> F2FS_NAME_LEN_BITS)
 
 /* the number of dentry in a block */
 #define NR_DENTRY_IN_BLOCK	214
-- 
cgit v1.2.3-55-g7522


From 3c58346525d82625e68e24f071804c2dc057b6f4 Mon Sep 17 00:00:00 2001
From: Christoph Lameter
Date: Wed, 28 Nov 2012 16:23:01 +0000
Subject: slab: Simplify bootstrap

The nodelists field in kmem_cache is pointing to the first unused
object in the array field when bootstrap is complete.

A problem with the current approach is that the statically sized
kmem_cache structure use on boot can only contain NR_CPUS entries.
If the number of nodes plus the number of cpus is greater then we
would overwrite memory following the kmem_cache_boot definition.

Increase the size of the array field to ensure that also the node
pointers fit into the array field.

Once we do that we no longer need the kmem_cache_nodelists
array and we can then also use that structure elsewhere.

Acked-by: Glauber Costa <glommer@parallels.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slab_def.h |  6 +++++-
 mm/slab.c                | 21 +++++++++++++--------
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index cc290f0bdb34..45c0356fdc8c 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -89,9 +89,13 @@ struct kmem_cache {
 	 * (see kmem_cache_init())
 	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
 	 * is statically defined, so we reserve the max number of cpus.
+	 *
+	 * We also need to guarantee that the list is able to accomodate a
+	 * pointer for each node since "nodelists" uses the remainder of
+	 * available pointers.
 	 */
 	struct kmem_list3 **nodelists;
-	struct array_cache *array[NR_CPUS];
+	struct array_cache *array[NR_CPUS + MAX_NUMNODES];
 	/*
 	 * Do not add fields after array[]
 	 */
diff --git a/mm/slab.c b/mm/slab.c
index e26bff5ed1a6..c7ea5234c4e9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -553,9 +553,7 @@ static struct arraycache_init initarray_generic =
     { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 
 /* internal cache of cache description objs */
-static struct kmem_list3 *kmem_cache_nodelists[MAX_NUMNODES];
 static struct kmem_cache kmem_cache_boot = {
-	.nodelists = kmem_cache_nodelists,
 	.batchcount = 1,
 	.limit = BOOT_CPUCACHE_ENTRIES,
 	.shared = 1,
@@ -1559,6 +1557,15 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index)
 	}
 }
 
+/*
+ * The memory after the last cpu cache pointer is used for the
+ * the nodelists pointer.
+ */
+static void setup_nodelists_pointer(struct kmem_cache *cachep)
+{
+	cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
+}
+
 /*
  * Initialisation.  Called after the page allocator have been initialised and
  * before smp_init().
@@ -1573,15 +1580,14 @@ void __init kmem_cache_init(void)
 	int node;
 
 	kmem_cache = &kmem_cache_boot;
+	setup_nodelists_pointer(kmem_cache);
 
 	if (num_possible_nodes() == 1)
 		use_alien_caches = 0;
 
-	for (i = 0; i < NUM_INIT_LISTS; i++) {
+	for (i = 0; i < NUM_INIT_LISTS; i++)
 		kmem_list3_init(&initkmem_list3[i]);
-		if (i < MAX_NUMNODES)
-			kmem_cache->nodelists[i] = NULL;
-	}
+
 	set_up_list3s(kmem_cache, CACHE_CACHE);
 
 	/*
@@ -1619,7 +1625,6 @@ void __init kmem_cache_init(void)
 	list_add(&kmem_cache->list, &slab_caches);
 	kmem_cache->colour_off = cache_line_size();
 	kmem_cache->array[smp_processor_id()] = &initarray_cache.cache;
-	kmem_cache->nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
 
 	/*
 	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
@@ -2422,7 +2427,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	else
 		gfp = GFP_NOWAIT;
 
-	cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
+	setup_nodelists_pointer(cachep);
 #if DEBUG
 
 	/*
-- 
cgit v1.2.3-55-g7522


From 7da4d641c58d201c3cc1835c05ca1a7fa26f0856 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra
Date: Mon, 19 Nov 2012 03:14:23 +0100
Subject: mm: Count the number of pages affected in change_protection()

This will be used for three kinds of purposes:

 - to optimize mprotect()

 - to speed up working set scanning for working set areas that
   have not been touched

 - to more accurately scan per real working set

No change in functionality from this patch.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/hugetlb.h |  8 +++++--
 include/linux/mm.h      |  3 +++
 mm/hugetlb.c            | 10 +++++++--
 mm/mprotect.c           | 58 +++++++++++++++++++++++++++++++++++++------------
 4 files changed, 61 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 225164842ab6..06e691baab86 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -87,7 +87,7 @@ struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
 				pud_t *pud, int write);
 int pmd_huge(pmd_t pmd);
 int pud_huge(pud_t pmd);
-void hugetlb_change_protection(struct vm_area_struct *vma,
+unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 		unsigned long address, unsigned long end, pgprot_t newprot);
 
 #else /* !CONFIG_HUGETLB_PAGE */
@@ -132,7 +132,11 @@ static inline void copy_huge_page(struct page *dst, struct page *src)
 {
 }
 
-#define hugetlb_change_protection(vma, address, end, newprot)
+static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
+		unsigned long address, unsigned long end, pgprot_t newprot)
+{
+	return 0;
+}
 
 static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
 			struct vm_area_struct *vma, unsigned long start,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bcaab4e6fe91..1856c62d82cd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1078,6 +1078,9 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma,
 extern unsigned long do_mremap(unsigned long addr,
 			       unsigned long old_len, unsigned long new_len,
 			       unsigned long flags, unsigned long new_addr);
+extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
+			      unsigned long end, pgprot_t newprot,
+			      int dirty_accountable);
 extern int mprotect_fixup(struct vm_area_struct *vma,
 			  struct vm_area_struct **pprev, unsigned long start,
 			  unsigned long end, unsigned long newflags);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 59a0059b39e2..712895eda0d0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3014,7 +3014,7 @@ same_page:
 	return i ? i : -EFAULT;
 }
 
-void hugetlb_change_protection(struct vm_area_struct *vma,
+unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 		unsigned long address, unsigned long end, pgprot_t newprot)
 {
 	struct mm_struct *mm = vma->vm_mm;
@@ -3022,6 +3022,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 	pte_t *ptep;
 	pte_t pte;
 	struct hstate *h = hstate_vma(vma);
+	unsigned long pages = 0;
 
 	BUG_ON(address >= end);
 	flush_cache_range(vma, address, end);
@@ -3032,12 +3033,15 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 		ptep = huge_pte_offset(mm, address);
 		if (!ptep)
 			continue;
-		if (huge_pmd_unshare(mm, &address, ptep))
+		if (huge_pmd_unshare(mm, &address, ptep)) {
+			pages++;
 			continue;
+		}
 		if (!huge_pte_none(huge_ptep_get(ptep))) {
 			pte = huge_ptep_get_and_clear(mm, address, ptep);
 			pte = pte_mkhuge(pte_modify(pte, newprot));
 			set_huge_pte_at(mm, address, ptep, pte);
+			pages++;
 		}
 	}
 	spin_unlock(&mm->page_table_lock);
@@ -3049,6 +3053,8 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 	 */
 	flush_tlb_range(vma, start, end);
 	mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
+
+	return pages << h->order;
 }
 
 int hugetlb_reserve_pages(struct inode *inode,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index a40992610ab6..1e265be25f85 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -35,12 +35,13 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 }
 #endif
 
-static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
+static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
 		int dirty_accountable)
 {
 	pte_t *pte, oldpte;
 	spinlock_t *ptl;
+	unsigned long pages = 0;
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	arch_enter_lazy_mmu_mode();
@@ -60,6 +61,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 				ptent = pte_mkwrite(ptent);
 
 			ptep_modify_prot_commit(mm, addr, pte, ptent);
+			pages++;
 		} else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
 
@@ -72,18 +74,22 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 				set_pte_at(mm, addr, pte,
 					swp_entry_to_pte(entry));
 			}
+			pages++;
 		}
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
+
+	return pages;
 }
 
-static inline void change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
 		int dirty_accountable)
 {
 	pmd_t *pmd;
 	unsigned long next;
+	unsigned long pages = 0;
 
 	pmd = pmd_offset(pud, addr);
 	do {
@@ -91,35 +97,42 @@ static inline void change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 		if (pmd_trans_huge(*pmd)) {
 			if (next - addr != HPAGE_PMD_SIZE)
 				split_huge_page_pmd(vma->vm_mm, pmd);
-			else if (change_huge_pmd(vma, pmd, addr, newprot))
+			else if (change_huge_pmd(vma, pmd, addr, newprot)) {
+				pages += HPAGE_PMD_NR;
 				continue;
+			}
 			/* fall through */
 		}
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		change_pte_range(vma->vm_mm, pmd, addr, next, newprot,
+		pages += change_pte_range(vma->vm_mm, pmd, addr, next, newprot,
 				 dirty_accountable);
 	} while (pmd++, addr = next, addr != end);
+
+	return pages;
 }
 
-static inline void change_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
 		int dirty_accountable)
 {
 	pud_t *pud;
 	unsigned long next;
+	unsigned long pages = 0;
 
 	pud = pud_offset(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
 			continue;
-		change_pmd_range(vma, pud, addr, next, newprot,
+		pages += change_pmd_range(vma, pud, addr, next, newprot,
 				 dirty_accountable);
 	} while (pud++, addr = next, addr != end);
+
+	return pages;
 }
 
-static void change_protection(struct vm_area_struct *vma,
+static unsigned long change_protection_range(struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
 		int dirty_accountable)
 {
@@ -127,6 +140,7 @@ static void change_protection(struct vm_area_struct *vma,
 	pgd_t *pgd;
 	unsigned long next;
 	unsigned long start = addr;
+	unsigned long pages = 0;
 
 	BUG_ON(addr >= end);
 	pgd = pgd_offset(mm, addr);
@@ -135,10 +149,30 @@ static void change_protection(struct vm_area_struct *vma,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		change_pud_range(vma, pgd, addr, next, newprot,
+		pages += change_pud_range(vma, pgd, addr, next, newprot,
 				 dirty_accountable);
 	} while (pgd++, addr = next, addr != end);
+
 	flush_tlb_range(vma, start, end);
+
+	return pages;
+}
+
+unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end, pgprot_t newprot,
+		       int dirty_accountable)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long pages;
+
+	mmu_notifier_invalidate_range_start(mm, start, end);
+	if (is_vm_hugetlb_page(vma))
+		pages = hugetlb_change_protection(vma, start, end, newprot);
+	else
+		pages = change_protection_range(vma, start, end, newprot, dirty_accountable);
+	mmu_notifier_invalidate_range_end(mm, start, end);
+
+	return pages;
 }
 
 int
@@ -213,12 +247,8 @@ success:
 		dirty_accountable = 1;
 	}
 
-	mmu_notifier_invalidate_range_start(mm, start, end);
-	if (is_vm_hugetlb_page(vma))
-		hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
-	else
-		change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
-	mmu_notifier_invalidate_range_end(mm, start, end);
+	change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
+
 	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
 	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
 	perf_event_mmap(vma);
-- 
cgit v1.2.3-55-g7522


From 5647bc293ab15f66a7b1cda850c5e9d162a6c7c2 Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Fri, 19 Oct 2012 10:46:20 +0100
Subject: mm: compaction: Move migration fail/success stats to migrate.c

The compact_pages_moved and compact_pagemigrate_failed events are
convenient for determining if compaction is active and to what
degree migration is succeeding but it's at the wrong level. Other
users of migration may also want to know if migration is working
properly and this will be particularly true for any automated
NUMA migration. This patch moves the counters down to migration
with the new events called pgmigrate_success and pgmigrate_fail.
The compact_blocks_moved counter is removed because while it was
useful for debugging initially, it's worthless now as no meaningful
conclusions can be drawn from its value.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/vm_event_item.h | 4 +++-
 mm/compaction.c               | 4 ----
 mm/migrate.c                  | 6 ++++++
 mm/vmstat.c                   | 7 ++++---
 4 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 3d3114594370..8aa7cb94b5fb 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -38,8 +38,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
 		KSWAPD_SKIP_CONGESTION_WAIT,
 		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+#ifdef CONFIG_MIGRATION
+		PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
+#endif
 #ifdef CONFIG_COMPACTION
-		COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED,
 		COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
 #endif
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/compaction.c b/mm/compaction.c
index 9eef55838fca..00ad88395216 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -994,10 +994,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 		update_nr_listpages(cc);
 		nr_remaining = cc->nr_migratepages;
 
-		count_vm_event(COMPACTBLOCKS);
-		count_vm_events(COMPACTPAGES, nr_migrate - nr_remaining);
-		if (nr_remaining)
-			count_vm_events(COMPACTPAGEFAILED, nr_remaining);
 		trace_mm_compaction_migratepages(nr_migrate - nr_remaining,
 						nr_remaining);
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 77ed2d773705..04687f69cc17 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -962,6 +962,7 @@ int migrate_pages(struct list_head *from,
 {
 	int retry = 1;
 	int nr_failed = 0;
+	int nr_succeeded = 0;
 	int pass = 0;
 	struct page *page;
 	struct page *page2;
@@ -988,6 +989,7 @@ int migrate_pages(struct list_head *from,
 				retry++;
 				break;
 			case 0:
+				nr_succeeded++;
 				break;
 			default:
 				/* Permanent failure */
@@ -998,6 +1000,10 @@ int migrate_pages(struct list_head *from,
 	}
 	rc = 0;
 out:
+	if (nr_succeeded)
+		count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
+	if (nr_failed)
+		count_vm_events(PGMIGRATE_FAIL, nr_failed);
 	if (!swapwrite)
 		current->flags &= ~PF_SWAPWRITE;
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c7370579111b..89a7fd665b32 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -774,10 +774,11 @@ const char * const vmstat_text[] = {
 
 	"pgrotated",
 
+#ifdef CONFIG_MIGRATION
+	"pgmigrate_success",
+	"pgmigrate_fail",
+#endif
 #ifdef CONFIG_COMPACTION
-	"compact_blocks_moved",
-	"compact_pages_moved",
-	"compact_pagemigrate_failed",
 	"compact_stall",
 	"compact_fail",
 	"compact_success",
-- 
cgit v1.2.3-55-g7522


From 7b2a2d4a18fffac3c4872021529b0657896db788 Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Fri, 19 Oct 2012 14:07:31 +0100
Subject: mm: migrate: Add a tracepoint for migrate_pages

The pgmigrate_success and pgmigrate_fail vmstat counters tells the user
about migration activity but not the type or the reason. This patch adds
a tracepoint to identify the type of page migration and why the page is
being migrated.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/migrate.h        | 13 +++++++++--
 include/trace/events/migrate.h | 51 ++++++++++++++++++++++++++++++++++++++++++
 mm/compaction.c                |  3 ++-
 mm/memory-failure.c            |  3 ++-
 mm/memory_hotplug.c            |  3 ++-
 mm/mempolicy.c                 |  6 +++--
 mm/migrate.c                   | 10 +++++++--
 mm/page_alloc.c                |  3 ++-
 8 files changed, 82 insertions(+), 10 deletions(-)
 create mode 100644 include/trace/events/migrate.h

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ce7e6671968b..9d1c159e2427 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,6 +7,15 @@
 
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
+enum migrate_reason {
+	MR_COMPACTION,
+	MR_MEMORY_FAILURE,
+	MR_MEMORY_HOTPLUG,
+	MR_SYSCALL,		/* also applies to cpusets */
+	MR_MEMPOLICY_MBIND,
+	MR_CMA
+};
+
 #ifdef CONFIG_MIGRATION
 
 extern void putback_lru_pages(struct list_head *l);
@@ -14,7 +23,7 @@ extern int migrate_page(struct address_space *,
 			struct page *, struct page *, enum migrate_mode);
 extern int migrate_pages(struct list_head *l, new_page_t x,
 			unsigned long private, bool offlining,
-			enum migrate_mode mode);
+			enum migrate_mode mode, int reason);
 extern int migrate_huge_page(struct page *, new_page_t x,
 			unsigned long private, bool offlining,
 			enum migrate_mode mode);
@@ -35,7 +44,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 static inline void putback_lru_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private, bool offlining,
-		enum migrate_mode mode) { return -ENOSYS; }
+		enum migrate_mode mode, int reason) { return -ENOSYS; }
 static inline int migrate_huge_page(struct page *page, new_page_t x,
 		unsigned long private, bool offlining,
 		enum migrate_mode mode) { return -ENOSYS; }
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
new file mode 100644
index 000000000000..ec2a6ccfd7e5
--- /dev/null
+++ b/include/trace/events/migrate.h
@@ -0,0 +1,51 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM migrate
+
+#if !defined(_TRACE_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MIGRATE_H
+
+#define MIGRATE_MODE						\
+	{MIGRATE_ASYNC,		"MIGRATE_ASYNC"},		\
+	{MIGRATE_SYNC_LIGHT,	"MIGRATE_SYNC_LIGHT"},		\
+	{MIGRATE_SYNC,		"MIGRATE_SYNC"}		
+
+#define MIGRATE_REASON						\
+	{MR_COMPACTION,		"compaction"},			\
+	{MR_MEMORY_FAILURE,	"memory_failure"},		\
+	{MR_MEMORY_HOTPLUG,	"memory_hotplug"},		\
+	{MR_SYSCALL,		"syscall_or_cpuset"},		\
+	{MR_MEMPOLICY_MBIND,	"mempolicy_mbind"},		\
+	{MR_CMA,		"cma"}
+
+TRACE_EVENT(mm_migrate_pages,
+
+	TP_PROTO(unsigned long succeeded, unsigned long failed,
+		 enum migrate_mode mode, int reason),
+
+	TP_ARGS(succeeded, failed, mode, reason),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,		succeeded)
+		__field(	unsigned long,		failed)
+		__field(	enum migrate_mode,	mode)
+		__field(	int,			reason)
+	),
+
+	TP_fast_assign(
+		__entry->succeeded	= succeeded;
+		__entry->failed		= failed;
+		__entry->mode		= mode;
+		__entry->reason		= reason;
+	),
+
+	TP_printk("nr_succeeded=%lu nr_failed=%lu mode=%s reason=%s",
+		__entry->succeeded,
+		__entry->failed,
+		__print_symbolic(__entry->mode, MIGRATE_MODE),
+		__print_symbolic(__entry->reason, MIGRATE_REASON))
+);
+
+#endif /* _TRACE_MIGRATE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/compaction.c b/mm/compaction.c
index 00ad88395216..2c077a78487c 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -990,7 +990,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 		nr_migrate = cc->nr_migratepages;
 		err = migrate_pages(&cc->migratepages, compaction_alloc,
 				(unsigned long)cc, false,
-				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
+				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC,
+				MR_COMPACTION);
 		update_nr_listpages(cc);
 		nr_remaining = cc->nr_migratepages;
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 6c5899b9034a..ddb68a169e45 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1558,7 +1558,8 @@ int soft_offline_page(struct page *page, int flags)
 					    page_is_file_cache(page));
 		list_add(&page->lru, &pagelist);
 		ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
-							false, MIGRATE_SYNC);
+							false, MIGRATE_SYNC,
+							MR_MEMORY_FAILURE);
 		if (ret) {
 			putback_lru_pages(&pagelist);
 			pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e4eeacae2b91..e598bd15c041 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -812,7 +812,8 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 		 * migrate_pages returns # of failed pages.
 		 */
 		ret = migrate_pages(&source, alloc_migrate_target, 0,
-							true, MIGRATE_SYNC);
+							true, MIGRATE_SYNC,
+							MR_MEMORY_HOTPLUG);
 		if (ret)
 			putback_lru_pages(&source);
 	}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d04a8a54c294..66e90ecc2350 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -961,7 +961,8 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
 
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_node_page, dest,
-							false, MIGRATE_SYNC);
+							false, MIGRATE_SYNC,
+							MR_SYSCALL);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}
@@ -1202,7 +1203,8 @@ static long do_mbind(unsigned long start, unsigned long len,
 		if (!list_empty(&pagelist)) {
 			nr_failed = migrate_pages(&pagelist, new_vma_page,
 						(unsigned long)vma,
-						false, MIGRATE_SYNC);
+						false, MIGRATE_SYNC,
+						MR_MEMPOLICY_MBIND);
 			if (nr_failed)
 				putback_lru_pages(&pagelist);
 		}
diff --git a/mm/migrate.c b/mm/migrate.c
index 04687f69cc17..27be9c923dc1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -38,6 +38,9 @@
 
 #include <asm/tlbflush.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/migrate.h>
+
 #include "internal.h"
 
 /*
@@ -958,7 +961,7 @@ out:
  */
 int migrate_pages(struct list_head *from,
 		new_page_t get_new_page, unsigned long private, bool offlining,
-		enum migrate_mode mode)
+		enum migrate_mode mode, int reason)
 {
 	int retry = 1;
 	int nr_failed = 0;
@@ -1004,6 +1007,8 @@ out:
 		count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
 	if (nr_failed)
 		count_vm_events(PGMIGRATE_FAIL, nr_failed);
+	trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);
+
 	if (!swapwrite)
 		current->flags &= ~PF_SWAPWRITE;
 
@@ -1145,7 +1150,8 @@ set_status:
 	err = 0;
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_page_node,
-				(unsigned long)pm, 0, MIGRATE_SYNC);
+				(unsigned long)pm, 0, MIGRATE_SYNC,
+				MR_SYSCALL);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7bb35ac0964a..5953dc2d196f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5707,7 +5707,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
 
 		ret = migrate_pages(&cc->migratepages,
 				    alloc_migrate_target,
-				    0, false, MIGRATE_SYNC);
+				    0, false, MIGRATE_SYNC,
+				    MR_CMA);
 	}
 
 	putback_lru_pages(&cc->migratepages);
-- 
cgit v1.2.3-55-g7522


From 397487db696cae0b026a474a5cd66f4e372995e6 Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Fri, 19 Oct 2012 12:00:10 +0100
Subject: mm: compaction: Add scanned and isolated counters for compaction

Compaction already has tracepoints to count scanned and isolated pages
but it requires that ftrace be enabled and if that information has to be
written to disk then it can be disruptive. This patch adds vmstat counters
for compaction called compact_migrate_scanned, compact_free_scanned and
compact_isolated.

With these counters, it is possible to define a basic cost model for
compaction. This approximates of how much work compaction is doing and can
be compared that with an oprofile showing TLB misses and see if the cost of
compaction is being offset by THP for example. Minimally a compaction patch
can be evaluated in terms of whether it increases or decreases cost. The
basic cost model looks like this

Fundamental unit u:	a word	sizeof(void *)

Ca  = cost of struct page access = sizeof(struct page) / u

Cmc = Cost migrate page copy = (Ca + PAGE_SIZE/u) * 2
Cmf = Cost migrate failure   = Ca * 2
Ci  = Cost page isolation    = (Ca + Wi)
	where Wi is a constant that should reflect the approximate
	cost of the locking operation.

Csm = Cost migrate scanning = Ca
Csf = Cost free    scanning = Ca

Overall cost =	(Csm * compact_migrate_scanned) +
	      	(Csf * compact_free_scanned)    +
	      	(Ci  * compact_isolated)	+
		(Cmc * pgmigrate_success)	+
		(Cmf * pgmigrate_failed)

Where the values are read from /proc/vmstat.

This is very basic and ignores certain costs such as the allocation cost
to do a migrate page copy but any improvement to the model would still
use the same vmstat counters.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/vm_event_item.h | 2 ++
 mm/compaction.c               | 8 ++++++++
 mm/vmstat.c                   | 3 +++
 3 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 8aa7cb94b5fb..a1f750b8e72a 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -42,6 +42,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
 #endif
 #ifdef CONFIG_COMPACTION
+		COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED,
+		COMPACTISOLATED,
 		COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
 #endif
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/compaction.c b/mm/compaction.c
index 2c077a78487c..aee7443a4d5a 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -356,6 +356,10 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
 	if (blockpfn == end_pfn)
 		update_pageblock_skip(cc, valid_page, total_isolated, false);
 
+	count_vm_events(COMPACTFREE_SCANNED, nr_scanned);
+	if (total_isolated)
+		count_vm_events(COMPACTISOLATED, total_isolated);
+
 	return total_isolated;
 }
 
@@ -646,6 +650,10 @@ next_pageblock:
 
 	trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
 
+	count_vm_events(COMPACTMIGRATE_SCANNED, nr_scanned);
+	if (nr_isolated)
+		count_vm_events(COMPACTISOLATED, nr_isolated);
+
 	return low_pfn;
 }
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 89a7fd665b32..3a067fabe190 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -779,6 +779,9 @@ const char * const vmstat_text[] = {
 	"pgmigrate_fail",
 #endif
 #ifdef CONFIG_COMPACTION
+	"compact_migrate_scanned",
+	"compact_free_scanned",
+	"compact_isolated",
 	"compact_stall",
 	"compact_fail",
 	"compact_success",
-- 
cgit v1.2.3-55-g7522


From 0b9d705297b273657923518dbea2377cd03532ed Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli
Date: Fri, 5 Oct 2012 21:36:27 +0200
Subject: mm: numa: Support NUMA hinting page faults from gup/gup_fast

Introduce FOLL_NUMA to tell follow_page to check
pte/pmd_numa. get_user_pages must use FOLL_NUMA, and it's safe to do
so because it always invokes handle_mm_fault and retries the
follow_page later.

KVM secondary MMU page faults will trigger the NUMA hinting page
faults through gup_fast -> get_user_pages -> follow_page ->
handle_mm_fault.

Other follow_page callers like KSM should not use FOLL_NUMA, or they
would fail to get the pages if they use follow_page instead of
get_user_pages.

[ This patch was picked up from the AutoNUMA tree. ]

Originally-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
[ ported to this tree. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/mm.h |  1 +
 mm/memory.c        | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1856c62d82cd..fa1615211159 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1572,6 +1572,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_MLOCK	0x40	/* mark page as mlocked */
 #define FOLL_SPLIT	0x80	/* don't return transhuge pages, split them */
 #define FOLL_HWPOISON	0x100	/* check page is hwpoisoned */
+#define FOLL_NUMA	0x200	/* force NUMA hinting page fault */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/mm/memory.c b/mm/memory.c
index 7cf762857baa..cd8e0daf1912 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1517,6 +1517,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 		page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
 		goto out;
 	}
+	if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+		goto no_page_table;
 	if (pmd_trans_huge(*pmd)) {
 		if (flags & FOLL_SPLIT) {
 			split_huge_page_pmd(mm, pmd);
@@ -1546,6 +1548,8 @@ split_fallthrough:
 	pte = *ptep;
 	if (!pte_present(pte))
 		goto no_page;
+	if ((flags & FOLL_NUMA) && pte_numa(pte))
+		goto no_page;
 	if ((flags & FOLL_WRITE) && !pte_write(pte))
 		goto unlock;
 
@@ -1697,6 +1701,19 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			(VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 	vm_flags &= (gup_flags & FOLL_FORCE) ?
 			(VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+
+	/*
+	 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
+	 * would be called on PROT_NONE ranges. We must never invoke
+	 * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
+	 * page faults would unprotect the PROT_NONE ranges if
+	 * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
+	 * bitflag. So to avoid that, don't set FOLL_NUMA if
+	 * FOLL_FORCE is set.
+	 */
+	if (!(gup_flags & FOLL_FORCE))
+		gup_flags |= FOLL_NUMA;
+
 	i = 0;
 
 	do {
-- 
cgit v1.2.3-55-g7522


From d10e63f29488b0f312a443f9507ea9b6fd3c9090 Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Thu, 25 Oct 2012 14:16:31 +0200
Subject: mm: numa: Create basic numa page hinting infrastructure

Note: This patch started as "mm/mpol: Create special PROT_NONE
	infrastructure" and preserves the basic idea but steals *very*
	heavily from "autonuma: numa hinting page faults entry points" for
	the actual fault handlers without the migration parts.	The end
	result is barely recognisable as either patch so all Signed-off
	and Reviewed-bys are dropped. If Peter, Ingo and Andrea are ok with
	this version, I will re-add the signed-offs-by to reflect the history.

In order to facilitate a lazy -- fault driven -- migration of pages, create
a special transient PAGE_NUMA variant, we can then use the 'spurious'
protection faults to drive our migrations from.

The meaning of PAGE_NUMA depends on the architecture but on x86 it is
effectively PROT_NONE. Actual PROT_NONE mappings will not generate these
NUMA faults for the reason that the page fault code checks the permission on
the VMA (and will throw a segmentation fault on actual PROT_NONE mappings),
before it ever calls handle_mm_fault.

[dhillf@gmail.com: Fix typo]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/huge_mm.h |  10 +++++
 mm/huge_memory.c        |  22 ++++++++++
 mm/memory.c             | 112 ++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 141 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b31cb7da0346..a1d26a98c655 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -159,6 +159,10 @@ static inline struct page *compound_trans_head(struct page *page)
 	}
 	return page;
 }
+
+extern int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
+				  pmd_t pmd, pmd_t *pmdp);
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -195,6 +199,12 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd,
 {
 	return 0;
 }
+
+static inline int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
+					pmd_t pmd, pmd_t *pmdp)
+{
+}
+
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_HUGE_MM_H */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index cd24aa562144..f5f37630c54d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1018,6 +1018,28 @@ out:
 	return page;
 }
 
+/* NUMA hinting page fault entry point for trans huge pmds */
+int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
+				pmd_t pmd, pmd_t *pmdp)
+{
+	struct page *page;
+	unsigned long haddr = addr & HPAGE_PMD_MASK;
+
+	spin_lock(&mm->page_table_lock);
+	if (unlikely(!pmd_same(pmd, *pmdp)))
+		goto out_unlock;
+
+	page = pmd_page(pmd);
+	pmd = pmd_mknonnuma(pmd);
+	set_pmd_at(mm, haddr, pmdp, pmd);
+	VM_BUG_ON(pmd_numa(*pmdp));
+	update_mmu_cache_pmd(vma, addr, pmdp);
+
+out_unlock:
+	spin_unlock(&mm->page_table_lock);
+	return 0;
+}
+
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		 pmd_t *pmd, unsigned long addr)
 {
diff --git a/mm/memory.c b/mm/memory.c
index cd8e0daf1912..e30616f2cc3d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3448,6 +3448,103 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
 }
 
+int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		   unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
+{
+	struct page *page;
+	spinlock_t *ptl;
+
+	/*
+	* The "pte" at this point cannot be used safely without
+	* validation through pte_unmap_same(). It's of NUMA type but
+	* the pfn may be screwed if the read is non atomic.
+	*
+	* ptep_modify_prot_start is not called as this is clearing
+	* the _PAGE_NUMA bit and it is not really expected that there
+	* would be concurrent hardware modifications to the PTE.
+	*/
+	ptl = pte_lockptr(mm, pmd);
+	spin_lock(ptl);
+	if (unlikely(!pte_same(*ptep, pte)))
+		goto out_unlock;
+	pte = pte_mknonnuma(pte);
+	set_pte_at(mm, addr, ptep, pte);
+	update_mmu_cache(vma, addr, ptep);
+
+	page = vm_normal_page(vma, addr, pte);
+	if (!page) {
+		pte_unmap_unlock(ptep, ptl);
+		return 0;
+	}
+
+out_unlock:
+	pte_unmap_unlock(ptep, ptl);
+	return 0;
+}
+
+/* NUMA hinting page fault entry point for regular pmds */
+#ifdef CONFIG_NUMA_BALANCING
+static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		     unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t pmd;
+	pte_t *pte, *orig_pte;
+	unsigned long _addr = addr & PMD_MASK;
+	unsigned long offset;
+	spinlock_t *ptl;
+	bool numa = false;
+
+	spin_lock(&mm->page_table_lock);
+	pmd = *pmdp;
+	if (pmd_numa(pmd)) {
+		set_pmd_at(mm, _addr, pmdp, pmd_mknonnuma(pmd));
+		numa = true;
+	}
+	spin_unlock(&mm->page_table_lock);
+
+	if (!numa)
+		return 0;
+
+	/* we're in a page fault so some vma must be in the range */
+	BUG_ON(!vma);
+	BUG_ON(vma->vm_start >= _addr + PMD_SIZE);
+	offset = max(_addr, vma->vm_start) & ~PMD_MASK;
+	VM_BUG_ON(offset >= PMD_SIZE);
+	orig_pte = pte = pte_offset_map_lock(mm, pmdp, _addr, &ptl);
+	pte += offset >> PAGE_SHIFT;
+	for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
+		pte_t pteval = *pte;
+		struct page *page;
+		if (!pte_present(pteval))
+			continue;
+		if (!pte_numa(pteval))
+			continue;
+		if (addr >= vma->vm_end) {
+			vma = find_vma(mm, addr);
+			/* there's a pte present so there must be a vma */
+			BUG_ON(!vma);
+			BUG_ON(addr < vma->vm_start);
+		}
+		if (pte_numa(pteval)) {
+			pteval = pte_mknonnuma(pteval);
+			set_pte_at(mm, addr, pte, pteval);
+		}
+		page = vm_normal_page(vma, addr, pteval);
+		if (unlikely(!page))
+			continue;
+	}
+	pte_unmap_unlock(orig_pte, ptl);
+
+	return 0;
+}
+#else
+static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		     unsigned long addr, pmd_t *pmdp)
+{
+	BUG();
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 /*
  * These routines also need to handle stuff like marking pages dirty
  * and/or accessed for architectures that don't do it in hardware (most
@@ -3486,6 +3583,9 @@ int handle_pte_fault(struct mm_struct *mm,
 					pte, pmd, flags, entry);
 	}
 
+	if (pte_numa(entry))
+		return do_numa_page(mm, vma, address, entry, pte, pmd);
+
 	ptl = pte_lockptr(mm, pmd);
 	spin_lock(ptl);
 	if (unlikely(!pte_same(*pte, entry)))
@@ -3554,9 +3654,11 @@ retry:
 
 		barrier();
 		if (pmd_trans_huge(orig_pmd)) {
-			if (flags & FAULT_FLAG_WRITE &&
-			    !pmd_write(orig_pmd) &&
-			    !pmd_trans_splitting(orig_pmd)) {
+			if (pmd_numa(*pmd))
+				return do_huge_pmd_numa_page(mm, address,
+							     orig_pmd, pmd);
+
+			if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) {
 				ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
 							  orig_pmd);
 				/*
@@ -3568,10 +3670,14 @@ retry:
 					goto retry;
 				return ret;
 			}
+
 			return 0;
 		}
 	}
 
+	if (pmd_numa(*pmd))
+		return do_pmd_numa_page(mm, vma, address, pmd);
+
 	/*
 	 * Use __pte_alloc instead of pte_alloc_map, because we can't
 	 * run pte_offset_map on the pmd, if an huge pmd could
-- 
cgit v1.2.3-55-g7522


From 771fb4d806a92bf6c988fcfbd286ae40a9374332 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn
Date: Thu, 25 Oct 2012 14:16:30 +0200
Subject: mm: mempolicy: Check for misplaced page

This patch provides a new function to test whether a page resides
on a node that is appropriate for the mempolicy for the vma and
address where the page is supposed to be mapped.  This involves
looking up the node where the page belongs.  So, the function
returns that node so that it may be used to allocated the page
without consulting the policy again.

A subsequent patch will call this function from the fault path.
Because of this, I don't want to go ahead and allocate the page, e.g.,
via alloc_page_vma() only to have to free it if it has the correct
policy.  So, I just mimic the alloc_page_vma() node computation
logic--sort of.

Note:  we could use this function to implement a MPOL_MF_STRICT
behavior when migrating pages to match mbind() mempolicy--e.g.,
to ensure that pages in an interleaved range are reinterleaved
rather than left where they are when they reside on any page in
the interleave nodemask.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
[ Added MPOL_F_LAZY to trigger migrate-on-fault;
  simplified code now that we don't have to bother
  with special crap for interleaved ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/mempolicy.h      |  8 +++++
 include/uapi/linux/mempolicy.h |  1 +
 mm/mempolicy.c                 | 76 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 85 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index e5ccb9ddd90e..c511e2523560 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -198,6 +198,8 @@ static inline int vma_migratable(struct vm_area_struct *vma)
 	return 1;
 }
 
+extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
+
 #else
 
 struct mempolicy {};
@@ -323,5 +325,11 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
 	return 0;
 }
 
+static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
+				 unsigned long address)
+{
+	return -1; /* no node preference */
+}
+
 #endif /* CONFIG_NUMA */
 #endif
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index d23dca8367cc..472de8a5d37e 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -61,6 +61,7 @@ enum mpol_rebind_step {
 #define MPOL_F_SHARED  (1 << 0)	/* identify shared policies */
 #define MPOL_F_LOCAL   (1 << 1)	/* preferred local allocation */
 #define MPOL_F_REBINDING (1 << 2)	/* identify policies in rebinding */
+#define MPOL_F_MOF	(1 << 3) /* this policy wants migrate on fault */
 
 
 #endif /* _UAPI_LINUX_MEMPOLICY_H */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c21e91477c4f..df1466d3d2d8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2181,6 +2181,82 @@ static void sp_free(struct sp_node *n)
 	kmem_cache_free(sn_cache, n);
 }
 
+/**
+ * mpol_misplaced - check whether current page node is valid in policy
+ *
+ * @page   - page to be checked
+ * @vma    - vm area where page mapped
+ * @addr   - virtual address where page mapped
+ *
+ * Lookup current policy node id for vma,addr and "compare to" page's
+ * node id.
+ *
+ * Returns:
+ *	-1	- not misplaced, page is in the right node
+ *	node	- node id where the page should be
+ *
+ * Policy determination "mimics" alloc_page_vma().
+ * Called from fault path where we know the vma and faulting address.
+ */
+int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long addr)
+{
+	struct mempolicy *pol;
+	struct zone *zone;
+	int curnid = page_to_nid(page);
+	unsigned long pgoff;
+	int polnid = -1;
+	int ret = -1;
+
+	BUG_ON(!vma);
+
+	pol = get_vma_policy(current, vma, addr);
+	if (!(pol->flags & MPOL_F_MOF))
+		goto out;
+
+	switch (pol->mode) {
+	case MPOL_INTERLEAVE:
+		BUG_ON(addr >= vma->vm_end);
+		BUG_ON(addr < vma->vm_start);
+
+		pgoff = vma->vm_pgoff;
+		pgoff += (addr - vma->vm_start) >> PAGE_SHIFT;
+		polnid = offset_il_node(pol, vma, pgoff);
+		break;
+
+	case MPOL_PREFERRED:
+		if (pol->flags & MPOL_F_LOCAL)
+			polnid = numa_node_id();
+		else
+			polnid = pol->v.preferred_node;
+		break;
+
+	case MPOL_BIND:
+		/*
+		 * allows binding to multiple nodes.
+		 * use current page if in policy nodemask,
+		 * else select nearest allowed node, if any.
+		 * If no allowed nodes, use current [!misplaced].
+		 */
+		if (node_isset(curnid, pol->v.nodes))
+			goto out;
+		(void)first_zones_zonelist(
+				node_zonelist(numa_node_id(), GFP_HIGHUSER),
+				gfp_zone(GFP_HIGHUSER),
+				&pol->v.nodes, &zone);
+		polnid = zone->node;
+		break;
+
+	default:
+		BUG();
+	}
+	if (curnid != polnid)
+		ret = polnid;
+out:
+	mpol_cond_put(pol);
+
+	return ret;
+}
+
 static void sp_delete(struct shared_policy *sp, struct sp_node *n)
 {
 	pr_debug("deleting %lx-l%lx\n", n->start, n->end);
-- 
cgit v1.2.3-55-g7522


From 7039e1dbec6eeaa8ecab43a82d6589eeced995c3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra
Date: Thu, 25 Oct 2012 14:16:34 +0200
Subject: mm: migrate: Introduce migrate_misplaced_page()

Note: This was originally based on Peter's patch "mm/migrate: Introduce
	migrate_misplaced_page()" but borrows extremely heavily from Andrea's
	"autonuma: memory follows CPU algorithm and task/mm_autonuma stats
	collection". The end result is barely recognisable so signed-offs
	had to be dropped. If original authors are ok with it, I'll
	re-add the signed-off-bys.

Add migrate_misplaced_page() which deals with migrating pages from
faults.

Based-on-work-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Based-on-work-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Based-on-work-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/migrate.h |  11 +++++
 mm/migrate.c            | 108 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 117 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 9d1c159e2427..f0d0313eea6f 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -13,6 +13,7 @@ enum migrate_reason {
 	MR_MEMORY_HOTPLUG,
 	MR_SYSCALL,		/* also applies to cpusets */
 	MR_MEMPOLICY_MBIND,
+	MR_NUMA_MISPLACED,
 	MR_CMA
 };
 
@@ -73,4 +74,14 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 #define fail_migrate_page NULL
 
 #endif /* CONFIG_MIGRATION */
+
+#ifdef CONFIG_NUMA_BALANCING
+extern int migrate_misplaced_page(struct page *page, int node);
+#else
+static inline int migrate_misplaced_page(struct page *page, int node)
+{
+	return -EAGAIN; /* can't migrate now */
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 #endif /* _LINUX_MIGRATE_H */
diff --git a/mm/migrate.c b/mm/migrate.c
index 27be9c923dc1..d168aec98427 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -282,7 +282,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 		struct page *newpage, struct page *page,
 		struct buffer_head *head, enum migrate_mode mode)
 {
-	int expected_count;
+	int expected_count = 0;
 	void **pslot;
 
 	if (!mapping) {
@@ -1415,4 +1415,108 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
  	}
  	return err;
 }
-#endif
+
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * Returns true if this is a safe migration target node for misplaced NUMA
+ * pages. Currently it only checks the watermarks which crude
+ */
+static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
+				   int nr_migrate_pages)
+{
+	int z;
+	for (z = pgdat->nr_zones - 1; z >= 0; z--) {
+		struct zone *zone = pgdat->node_zones + z;
+
+		if (!populated_zone(zone))
+			continue;
+
+		if (zone->all_unreclaimable)
+			continue;
+
+		/* Avoid waking kswapd by allocating pages_to_migrate pages. */
+		if (!zone_watermark_ok(zone, 0,
+				       high_wmark_pages(zone) +
+				       nr_migrate_pages,
+				       0, 0))
+			continue;
+		return true;
+	}
+	return false;
+}
+
+static struct page *alloc_misplaced_dst_page(struct page *page,
+					   unsigned long data,
+					   int **result)
+{
+	int nid = (int) data;
+	struct page *newpage;
+
+	newpage = alloc_pages_exact_node(nid,
+					 (GFP_HIGHUSER_MOVABLE | GFP_THISNODE |
+					  __GFP_NOMEMALLOC | __GFP_NORETRY |
+					  __GFP_NOWARN) &
+					 ~GFP_IOFS, 0);
+	return newpage;
+}
+
+/*
+ * Attempt to migrate a misplaced page to the specified destination
+ * node. Caller is expected to have an elevated reference count on
+ * the page that will be dropped by this function before returning.
+ */
+int migrate_misplaced_page(struct page *page, int node)
+{
+	int isolated = 0;
+	LIST_HEAD(migratepages);
+
+	/*
+	 * Don't migrate pages that are mapped in multiple processes.
+	 * TODO: Handle false sharing detection instead of this hammer
+	 */
+	if (page_mapcount(page) != 1) {
+		put_page(page);
+		goto out;
+	}
+
+	/* Avoid migrating to a node that is nearly full */
+	if (migrate_balanced_pgdat(NODE_DATA(node), 1)) {
+		int page_lru;
+
+		if (isolate_lru_page(page)) {
+			put_page(page);
+			goto out;
+		}
+		isolated = 1;
+
+		/*
+		 * Page is isolated which takes a reference count so now the
+		 * callers reference can be safely dropped without the page
+		 * disappearing underneath us during migration
+		 */
+		put_page(page);
+
+		page_lru = page_is_file_cache(page);
+		inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
+		list_add(&page->lru, &migratepages);
+	}
+
+	if (isolated) {
+		int nr_remaining;
+
+		nr_remaining = migrate_pages(&migratepages,
+				alloc_misplaced_dst_page,
+				node, false, MIGRATE_ASYNC,
+				MR_NUMA_MISPLACED);
+		if (nr_remaining) {
+			putback_lru_pages(&migratepages);
+			isolated = 0;
+		}
+	}
+	BUG_ON(!list_empty(&migratepages));
+out:
+	return isolated;
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+#endif /* CONFIG_NUMA */
-- 
cgit v1.2.3-55-g7522


From 4daae3b4b9e49b7e0935499a352f1c59d90287d2 Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Fri, 2 Nov 2012 11:33:45 +0000
Subject: mm: mempolicy: Use _PAGE_NUMA to migrate pages

Note: Based on "mm/mpol: Use special PROT_NONE to migrate pages" but
	sufficiently different that the signed-off-bys were dropped

Combine our previous _PAGE_NUMA, mpol_misplaced and migrate_misplaced_page()
pieces into an effective migrate on fault scheme.

Note that (on x86) we rely on PROT_NONE pages being !present and avoid
the TLB flush from try_to_unmap(TTU_MIGRATION). This greatly improves the
page-migration performance.

Based-on-work-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/huge_mm.h |  9 +++++----
 mm/huge_memory.c        | 31 ++++++++++++++++++++++++++++---
 mm/memory.c             | 32 +++++++++++++++++++++++++++-----
 3 files changed, 60 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a1d26a98c655..dabb5108d6c0 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -160,8 +160,8 @@ static inline struct page *compound_trans_head(struct page *page)
 	return page;
 }
 
-extern int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
-				  pmd_t pmd, pmd_t *pmdp);
+extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+				unsigned long addr, pmd_t pmd, pmd_t *pmdp);
 
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
@@ -200,9 +200,10 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd,
 	return 0;
 }
 
-static inline int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
-					pmd_t pmd, pmd_t *pmdp)
+static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+					unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 {
+	return 0;
 }
 
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f5f37630c54d..5723b551c023 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -18,6 +18,7 @@
 #include <linux/freezer.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
+#include <linux/migrate.h>
 #include <asm/tlb.h>
 #include <asm/pgalloc.h>
 #include "internal.h"
@@ -1019,17 +1020,39 @@ out:
 }
 
 /* NUMA hinting page fault entry point for trans huge pmds */
-int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
-				pmd_t pmd, pmd_t *pmdp)
+int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+				unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 {
-	struct page *page;
+	struct page *page = NULL;
 	unsigned long haddr = addr & HPAGE_PMD_MASK;
+	int target_nid;
 
 	spin_lock(&mm->page_table_lock);
 	if (unlikely(!pmd_same(pmd, *pmdp)))
 		goto out_unlock;
 
 	page = pmd_page(pmd);
+	get_page(page);
+	spin_unlock(&mm->page_table_lock);
+
+	target_nid = mpol_misplaced(page, vma, haddr);
+	if (target_nid == -1)
+		goto clear_pmdnuma;
+
+	/*
+	 * Due to lacking code to migrate thp pages, we'll split
+	 * (which preserves the special PROT_NONE) and re-take the
+	 * fault on the normal pages.
+	 */
+	split_huge_page(page);
+	put_page(page);
+	return 0;
+
+clear_pmdnuma:
+	spin_lock(&mm->page_table_lock);
+	if (unlikely(!pmd_same(pmd, *pmdp)))
+		goto out_unlock;
+
 	pmd = pmd_mknonnuma(pmd);
 	set_pmd_at(mm, haddr, pmdp, pmd);
 	VM_BUG_ON(pmd_numa(*pmdp));
@@ -1037,6 +1060,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
 
 out_unlock:
 	spin_unlock(&mm->page_table_lock);
+	if (page)
+		put_page(page);
 	return 0;
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index e30616f2cc3d..d52542680e10 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -57,6 +57,7 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 #include <linux/gfp.h>
+#include <linux/migrate.h>
 
 #include <asm/io.h>
 #include <asm/pgalloc.h>
@@ -3451,8 +3452,9 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		   unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
 {
-	struct page *page;
+	struct page *page = NULL;
 	spinlock_t *ptl;
+	int current_nid, target_nid;
 
 	/*
 	* The "pte" at this point cannot be used safely without
@@ -3465,8 +3467,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	*/
 	ptl = pte_lockptr(mm, pmd);
 	spin_lock(ptl);
-	if (unlikely(!pte_same(*ptep, pte)))
-		goto out_unlock;
+	if (unlikely(!pte_same(*ptep, pte))) {
+		pte_unmap_unlock(ptep, ptl);
+		goto out;
+	}
+
 	pte = pte_mknonnuma(pte);
 	set_pte_at(mm, addr, ptep, pte);
 	update_mmu_cache(vma, addr, ptep);
@@ -3477,8 +3482,25 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		return 0;
 	}
 
-out_unlock:
+	get_page(page);
+	current_nid = page_to_nid(page);
+	target_nid = mpol_misplaced(page, vma, addr);
 	pte_unmap_unlock(ptep, ptl);
+	if (target_nid == -1) {
+		/*
+		 * Account for the fault against the current node if it not
+		 * being replaced regardless of where the page is located.
+		 */
+		current_nid = numa_node_id();
+		put_page(page);
+		goto out;
+	}
+
+	/* Migrate to the requested node */
+	if (migrate_misplaced_page(page, target_nid))
+		current_nid = target_nid;
+
+out:
 	return 0;
 }
 
@@ -3655,7 +3677,7 @@ retry:
 		barrier();
 		if (pmd_trans_huge(orig_pmd)) {
 			if (pmd_numa(*pmd))
-				return do_huge_pmd_numa_page(mm, address,
+				return do_huge_pmd_numa_page(mm, vma, address,
 							     orig_pmd, pmd);
 
 			if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) {
-- 
cgit v1.2.3-55-g7522


From b24f53a0bea38b266d219ee651b22dba727c44ae Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn
Date: Thu, 25 Oct 2012 14:16:32 +0200
Subject: mm: mempolicy: Add MPOL_MF_LAZY

NOTE: Once again there is a lot of patch stealing and the end result
	is sufficiently different that I had to drop the signed-offs.
	Will re-add if the original authors are ok with that.

This patch adds another mbind() flag to request "lazy migration".  The
flag, MPOL_MF_LAZY, modifies MPOL_MF_MOVE* such that the selected
pages are marked PROT_NONE. The pages will be migrated in the fault
path on "first touch", if the policy dictates at that time.

"Lazy Migration" will allow testing of migrate-on-fault via mbind().
Also allows applications to specify that only subsequently touched
pages be migrated to obey new policy, instead of all pages in range.
This can be useful for multi-threaded applications working on a
large shared data area that is initialized by an initial thread
resulting in all pages on one [or a few, if overflowed] nodes.
After PROT_NONE, the pages in regions assigned to the worker threads
will be automatically migrated local to the threads on 1st touch.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/mm.h             |   5 ++
 include/uapi/linux/mempolicy.h |  13 ++-
 mm/mempolicy.c                 | 185 +++++++++++++++++++++++++++++++++++++----
 3 files changed, 185 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fa1615211159..471185e29bab 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1551,6 +1551,11 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
 }
 #endif
 
+#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
+void change_prot_numa(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end);
+#endif
+
 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 472de8a5d37e..6a1baae3775d 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -49,9 +49,16 @@ enum mpol_rebind_step {
 
 /* Flags for mbind */
 #define MPOL_MF_STRICT	(1<<0)	/* Verify existing pages in the mapping */
-#define MPOL_MF_MOVE	(1<<1)	/* Move pages owned by this process to conform to mapping */
-#define MPOL_MF_MOVE_ALL (1<<2)	/* Move every page to conform to mapping */
-#define MPOL_MF_INTERNAL (1<<3)	/* Internal flags start here */
+#define MPOL_MF_MOVE	 (1<<1)	/* Move pages owned by this process to conform
+				   to policy */
+#define MPOL_MF_MOVE_ALL (1<<2)	/* Move every page to conform to policy */
+#define MPOL_MF_LAZY	 (1<<3)	/* Modifies '_MOVE:  lazy migrate on fault */
+#define MPOL_MF_INTERNAL (1<<4)	/* Internal flags start here */
+
+#define MPOL_MF_VALID	(MPOL_MF_STRICT   | 	\
+			 MPOL_MF_MOVE     | 	\
+			 MPOL_MF_MOVE_ALL |	\
+			 MPOL_MF_LAZY)
 
 /*
  * Internal flags that share the struct mempolicy flags word with
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index df1466d3d2d8..51d3ebd8561e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -90,6 +90,7 @@
 #include <linux/syscalls.h>
 #include <linux/ctype.h>
 #include <linux/mm_inline.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
@@ -565,6 +566,145 @@ static inline int check_pgd_range(struct vm_area_struct *vma,
 	return 0;
 }
 
+#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
+/*
+ * Here we search for not shared page mappings (mapcount == 1) and we
+ * set up the pmd/pte_numa on those mappings so the very next access
+ * will fire a NUMA hinting page fault.
+ */
+static int
+change_prot_numa_range(struct mm_struct *mm, struct vm_area_struct *vma,
+			unsigned long address)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte, *_pte;
+	struct page *page;
+	unsigned long _address, end;
+	spinlock_t *ptl;
+	int ret = 0;
+
+	VM_BUG_ON(address & ~PAGE_MASK);
+
+	pgd = pgd_offset(mm, address);
+	if (!pgd_present(*pgd))
+		goto out;
+
+	pud = pud_offset(pgd, address);
+	if (!pud_present(*pud))
+		goto out;
+
+	pmd = pmd_offset(pud, address);
+	if (pmd_none(*pmd))
+		goto out;
+
+	if (pmd_trans_huge_lock(pmd, vma) == 1) {
+		int page_nid;
+		ret = HPAGE_PMD_NR;
+
+		VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+		if (pmd_numa(*pmd)) {
+			spin_unlock(&mm->page_table_lock);
+			goto out;
+		}
+
+		page = pmd_page(*pmd);
+
+		/* only check non-shared pages */
+		if (page_mapcount(page) != 1) {
+			spin_unlock(&mm->page_table_lock);
+			goto out;
+		}
+
+		page_nid = page_to_nid(page);
+
+		if (pmd_numa(*pmd)) {
+			spin_unlock(&mm->page_table_lock);
+			goto out;
+		}
+
+		set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
+		ret += HPAGE_PMD_NR;
+		/* defer TLB flush to lower the overhead */
+		spin_unlock(&mm->page_table_lock);
+		goto out;
+	}
+
+	if (pmd_trans_unstable(pmd))
+		goto out;
+	VM_BUG_ON(!pmd_present(*pmd));
+
+	end = min(vma->vm_end, (address + PMD_SIZE) & PMD_MASK);
+	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
+	for (_address = address, _pte = pte; _address < end;
+	     _pte++, _address += PAGE_SIZE) {
+		pte_t pteval = *_pte;
+		if (!pte_present(pteval))
+			continue;
+		if (pte_numa(pteval))
+			continue;
+		page = vm_normal_page(vma, _address, pteval);
+		if (unlikely(!page))
+			continue;
+		/* only check non-shared pages */
+		if (page_mapcount(page) != 1)
+			continue;
+
+		set_pte_at(mm, _address, _pte, pte_mknuma(pteval));
+
+		/* defer TLB flush to lower the overhead */
+		ret++;
+	}
+	pte_unmap_unlock(pte, ptl);
+
+	if (ret && !pmd_numa(*pmd)) {
+		spin_lock(&mm->page_table_lock);
+		set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
+		spin_unlock(&mm->page_table_lock);
+		/* defer TLB flush to lower the overhead */
+	}
+
+out:
+	return ret;
+}
+
+/* Assumes mmap_sem is held */
+void
+change_prot_numa(struct vm_area_struct *vma,
+			unsigned long address, unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int progress = 0;
+
+	while (address < end) {
+		VM_BUG_ON(address < vma->vm_start ||
+			  address + PAGE_SIZE > vma->vm_end);
+
+		progress += change_prot_numa_range(mm, vma, address);
+		address = (address + PMD_SIZE) & PMD_MASK;
+	}
+
+	/*
+	 * Flush the TLB for the mm to start the NUMA hinting
+	 * page faults after we finish scanning this vma part
+	 * if there were any PTE updates
+	 */
+	if (progress) {
+		mmu_notifier_invalidate_range_start(vma->vm_mm, address, end);
+		flush_tlb_range(vma, address, end);
+		mmu_notifier_invalidate_range_end(vma->vm_mm, address, end);
+	}
+}
+#else
+static unsigned long change_prot_numa(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long end)
+{
+	return 0;
+}
+#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */
+
 /*
  * Check if all pages in a range are on a set of nodes.
  * If pagelist != NULL then isolate pages from the LRU and
@@ -583,22 +723,32 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
 		return ERR_PTR(-EFAULT);
 	prev = NULL;
 	for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
+		unsigned long endvma = vma->vm_end;
+
+		if (endvma > end)
+			endvma = end;
+		if (vma->vm_start > start)
+			start = vma->vm_start;
+
 		if (!(flags & MPOL_MF_DISCONTIG_OK)) {
 			if (!vma->vm_next && vma->vm_end < end)
 				return ERR_PTR(-EFAULT);
 			if (prev && prev->vm_end < vma->vm_start)
 				return ERR_PTR(-EFAULT);
 		}
-		if (!is_vm_hugetlb_page(vma) &&
-		    ((flags & MPOL_MF_STRICT) ||
+
+		if (is_vm_hugetlb_page(vma))
+			goto next;
+
+		if (flags & MPOL_MF_LAZY) {
+			change_prot_numa(vma, start, endvma);
+			goto next;
+		}
+
+		if ((flags & MPOL_MF_STRICT) ||
 		     ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
-				vma_migratable(vma)))) {
-			unsigned long endvma = vma->vm_end;
+		      vma_migratable(vma))) {
 
-			if (endvma > end)
-				endvma = end;
-			if (vma->vm_start > start)
-				start = vma->vm_start;
 			err = check_pgd_range(vma, start, endvma, nodes,
 						flags, private);
 			if (err) {
@@ -606,6 +756,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
 				break;
 			}
 		}
+next:
 		prev = vma;
 	}
 	return first;
@@ -1138,8 +1289,7 @@ static long do_mbind(unsigned long start, unsigned long len,
 	int err;
 	LIST_HEAD(pagelist);
 
-	if (flags & ~(unsigned long)(MPOL_MF_STRICT |
-				     MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+	if (flags & ~(unsigned long)MPOL_MF_VALID)
 		return -EINVAL;
 	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
 		return -EPERM;
@@ -1162,6 +1312,9 @@ static long do_mbind(unsigned long start, unsigned long len,
 	if (IS_ERR(new))
 		return PTR_ERR(new);
 
+	if (flags & MPOL_MF_LAZY)
+		new->flags |= MPOL_F_MOF;
+
 	/*
 	 * If we are using the default policy then operation
 	 * on discontinuous address spaces is okay after all
@@ -1198,13 +1351,15 @@ static long do_mbind(unsigned long start, unsigned long len,
 	vma = check_range(mm, start, end, nmask,
 			  flags | MPOL_MF_INVERT, &pagelist);
 
-	err = PTR_ERR(vma);
-	if (!IS_ERR(vma)) {
-		int nr_failed = 0;
-
+	err = PTR_ERR(vma);	/* maybe ... */
+	if (!IS_ERR(vma) && mode != MPOL_NOOP)
 		err = mbind_range(mm, start, end, new);
 
+	if (!err) {
+		int nr_failed = 0;
+
 		if (!list_empty(&pagelist)) {
+			WARN_ON_ONCE(flags & MPOL_MF_LAZY);
 			nr_failed = migrate_pages(&pagelist, new_vma_page,
 						(unsigned long)vma,
 						false, MIGRATE_SYNC,
@@ -1213,7 +1368,7 @@ static long do_mbind(unsigned long start, unsigned long len,
 				putback_lru_pages(&pagelist);
 		}
 
-		if (!err && nr_failed && (flags & MPOL_MF_STRICT))
+		if (nr_failed && (flags & MPOL_MF_STRICT))
 			err = -EIO;
 	} else
 		putback_lru_pages(&pagelist);
-- 
cgit v1.2.3-55-g7522


From 4b10e7d562c90d0a72f324832c26653947a07381 Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Thu, 25 Oct 2012 14:16:32 +0200
Subject: mm: mempolicy: Implement change_prot_numa() in terms of
 change_protection()

This patch converts change_prot_numa() to use change_protection(). As
pte_numa and friends check the PTE bits directly it is necessary for
change_protection() to use pmd_mknuma(). Hence the required
modifications to change_protection() are a little clumsy but the
end result is that most of the numa page table helpers are just one or
two instructions.

Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/huge_mm.h |   3 +-
 include/linux/mm.h      |   4 +-
 mm/huge_memory.c        |  14 ++++-
 mm/mempolicy.c          | 137 +++++-------------------------------------------
 mm/mprotect.c           |  72 +++++++++++++++++++------
 5 files changed, 85 insertions(+), 145 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index dabb5108d6c0..027ad04ef3a8 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -27,7 +27,8 @@ extern int move_huge_pmd(struct vm_area_struct *vma,
 			 unsigned long new_addr, unsigned long old_end,
 			 pmd_t *old_pmd, pmd_t *new_pmd);
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-			unsigned long addr, pgprot_t newprot);
+			unsigned long addr, pgprot_t newprot,
+			int prot_numa);
 
 enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_FLAG,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 471185e29bab..d04c2f0aab36 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1080,7 +1080,7 @@ extern unsigned long do_mremap(unsigned long addr,
 			       unsigned long flags, unsigned long new_addr);
 extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 			      unsigned long end, pgprot_t newprot,
-			      int dirty_accountable);
+			      int dirty_accountable, int prot_numa);
 extern int mprotect_fixup(struct vm_area_struct *vma,
 			  struct vm_area_struct **pprev, unsigned long start,
 			  unsigned long end, unsigned long newflags);
@@ -1552,7 +1552,7 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
 #endif
 
 #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
-void change_prot_numa(struct vm_area_struct *vma,
+unsigned long change_prot_numa(struct vm_area_struct *vma,
 			unsigned long start, unsigned long end);
 #endif
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5723b551c023..d79f7a55bf6f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1147,7 +1147,7 @@ out:
 }
 
 int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-		unsigned long addr, pgprot_t newprot)
+		unsigned long addr, pgprot_t newprot, int prot_numa)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	int ret = 0;
@@ -1155,7 +1155,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 	if (__pmd_trans_huge_lock(pmd, vma) == 1) {
 		pmd_t entry;
 		entry = pmdp_get_and_clear(mm, addr, pmd);
-		entry = pmd_modify(entry, newprot);
+		if (!prot_numa)
+			entry = pmd_modify(entry, newprot);
+		else {
+			struct page *page = pmd_page(*pmd);
+
+			/* only check non-shared pages */
+			if (page_mapcount(page) == 1 &&
+			    !pmd_numa(*pmd)) {
+				entry = pmd_mknuma(entry);
+			}
+		}
 		set_pmd_at(mm, addr, pmd, entry);
 		spin_unlock(&vma->vm_mm->page_table_lock);
 		ret = 1;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 51d3ebd8561e..75d4600a5e92 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -568,134 +568,23 @@ static inline int check_pgd_range(struct vm_area_struct *vma,
 
 #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
 /*
- * Here we search for not shared page mappings (mapcount == 1) and we
- * set up the pmd/pte_numa on those mappings so the very next access
- * will fire a NUMA hinting page fault.
+ * This is used to mark a range of virtual addresses to be inaccessible.
+ * These are later cleared by a NUMA hinting fault. Depending on these
+ * faults, pages may be migrated for better NUMA placement.
+ *
+ * This is assuming that NUMA faults are handled using PROT_NONE. If
+ * an architecture makes a different choice, it will need further
+ * changes to the core.
  */
-static int
-change_prot_numa_range(struct mm_struct *mm, struct vm_area_struct *vma,
-			unsigned long address)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte, *_pte;
-	struct page *page;
-	unsigned long _address, end;
-	spinlock_t *ptl;
-	int ret = 0;
-
-	VM_BUG_ON(address & ~PAGE_MASK);
-
-	pgd = pgd_offset(mm, address);
-	if (!pgd_present(*pgd))
-		goto out;
-
-	pud = pud_offset(pgd, address);
-	if (!pud_present(*pud))
-		goto out;
-
-	pmd = pmd_offset(pud, address);
-	if (pmd_none(*pmd))
-		goto out;
-
-	if (pmd_trans_huge_lock(pmd, vma) == 1) {
-		int page_nid;
-		ret = HPAGE_PMD_NR;
-
-		VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-
-		if (pmd_numa(*pmd)) {
-			spin_unlock(&mm->page_table_lock);
-			goto out;
-		}
-
-		page = pmd_page(*pmd);
-
-		/* only check non-shared pages */
-		if (page_mapcount(page) != 1) {
-			spin_unlock(&mm->page_table_lock);
-			goto out;
-		}
-
-		page_nid = page_to_nid(page);
-
-		if (pmd_numa(*pmd)) {
-			spin_unlock(&mm->page_table_lock);
-			goto out;
-		}
-
-		set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
-		ret += HPAGE_PMD_NR;
-		/* defer TLB flush to lower the overhead */
-		spin_unlock(&mm->page_table_lock);
-		goto out;
-	}
-
-	if (pmd_trans_unstable(pmd))
-		goto out;
-	VM_BUG_ON(!pmd_present(*pmd));
-
-	end = min(vma->vm_end, (address + PMD_SIZE) & PMD_MASK);
-	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
-	for (_address = address, _pte = pte; _address < end;
-	     _pte++, _address += PAGE_SIZE) {
-		pte_t pteval = *_pte;
-		if (!pte_present(pteval))
-			continue;
-		if (pte_numa(pteval))
-			continue;
-		page = vm_normal_page(vma, _address, pteval);
-		if (unlikely(!page))
-			continue;
-		/* only check non-shared pages */
-		if (page_mapcount(page) != 1)
-			continue;
-
-		set_pte_at(mm, _address, _pte, pte_mknuma(pteval));
-
-		/* defer TLB flush to lower the overhead */
-		ret++;
-	}
-	pte_unmap_unlock(pte, ptl);
-
-	if (ret && !pmd_numa(*pmd)) {
-		spin_lock(&mm->page_table_lock);
-		set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
-		spin_unlock(&mm->page_table_lock);
-		/* defer TLB flush to lower the overhead */
-	}
-
-out:
-	return ret;
-}
-
-/* Assumes mmap_sem is held */
-void
-change_prot_numa(struct vm_area_struct *vma,
-			unsigned long address, unsigned long end)
+unsigned long change_prot_numa(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long end)
 {
-	struct mm_struct *mm = vma->vm_mm;
-	int progress = 0;
-
-	while (address < end) {
-		VM_BUG_ON(address < vma->vm_start ||
-			  address + PAGE_SIZE > vma->vm_end);
+	int nr_updated;
+	BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE);
 
-		progress += change_prot_numa_range(mm, vma, address);
-		address = (address + PMD_SIZE) & PMD_MASK;
-	}
+	nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
 
-	/*
-	 * Flush the TLB for the mm to start the NUMA hinting
-	 * page faults after we finish scanning this vma part
-	 * if there were any PTE updates
-	 */
-	if (progress) {
-		mmu_notifier_invalidate_range_start(vma->vm_mm, address, end);
-		flush_tlb_range(vma, address, end);
-		mmu_notifier_invalidate_range_end(vma->vm_mm, address, end);
-	}
+	return nr_updated;
 }
 #else
 static unsigned long change_prot_numa(struct vm_area_struct *vma,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 7c3628a8b486..7ef6ae964e8f 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -35,10 +35,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 }
 #endif
 
-static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
+static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
-		int dirty_accountable)
+		int dirty_accountable, int prot_numa)
 {
+	struct mm_struct *mm = vma->vm_mm;
 	pte_t *pte, oldpte;
 	spinlock_t *ptl;
 	unsigned long pages = 0;
@@ -49,19 +50,39 @@ static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 		oldpte = *pte;
 		if (pte_present(oldpte)) {
 			pte_t ptent;
+			bool updated = false;
 
 			ptent = ptep_modify_prot_start(mm, addr, pte);
-			ptent = pte_modify(ptent, newprot);
+			if (!prot_numa) {
+				ptent = pte_modify(ptent, newprot);
+				updated = true;
+			} else {
+				struct page *page;
+
+				page = vm_normal_page(vma, addr, oldpte);
+				if (page) {
+					/* only check non-shared pages */
+					if (!pte_numa(oldpte) &&
+					    page_mapcount(page) == 1) {
+						ptent = pte_mknuma(ptent);
+						updated = true;
+					}
+				}
+			}
 
 			/*
 			 * Avoid taking write faults for pages we know to be
 			 * dirty.
 			 */
-			if (dirty_accountable && pte_dirty(ptent))
+			if (dirty_accountable && pte_dirty(ptent)) {
 				ptent = pte_mkwrite(ptent);
+				updated = true;
+			}
+
+			if (updated)
+				pages++;
 
 			ptep_modify_prot_commit(mm, addr, pte, ptent);
-			pages++;
 		} else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
 
@@ -83,9 +104,25 @@ static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 	return pages;
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmd)
+{
+	spin_lock(&mm->page_table_lock);
+	set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd));
+	spin_unlock(&mm->page_table_lock);
+}
+#else
+static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmd)
+{
+	BUG();
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
-		int dirty_accountable)
+		int dirty_accountable, int prot_numa)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -97,7 +134,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
 		if (pmd_trans_huge(*pmd)) {
 			if (next - addr != HPAGE_PMD_SIZE)
 				split_huge_page_pmd(vma->vm_mm, pmd);
-			else if (change_huge_pmd(vma, pmd, addr, newprot)) {
+			else if (change_huge_pmd(vma, pmd, addr, newprot, prot_numa)) {
 				pages += HPAGE_PMD_NR;
 				continue;
 			}
@@ -105,8 +142,11 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
 		}
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		pages += change_pte_range(vma->vm_mm, pmd, addr, next, newprot,
-				 dirty_accountable);
+		pages += change_pte_range(vma, pmd, addr, next, newprot,
+				 dirty_accountable, prot_numa);
+
+		if (prot_numa)
+			change_pmd_protnuma(vma->vm_mm, addr, pmd);
 	} while (pmd++, addr = next, addr != end);
 
 	return pages;
@@ -114,7 +154,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
 
 static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
-		int dirty_accountable)
+		int dirty_accountable, int prot_numa)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -126,7 +166,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *
 		if (pud_none_or_clear_bad(pud))
 			continue;
 		pages += change_pmd_range(vma, pud, addr, next, newprot,
-				 dirty_accountable);
+				 dirty_accountable, prot_numa);
 	} while (pud++, addr = next, addr != end);
 
 	return pages;
@@ -134,7 +174,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *
 
 static unsigned long change_protection_range(struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
-		int dirty_accountable)
+		int dirty_accountable, int prot_numa)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgd;
@@ -150,7 +190,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
 		pages += change_pud_range(vma, pgd, addr, next, newprot,
-				 dirty_accountable);
+				 dirty_accountable, prot_numa);
 	} while (pgd++, addr = next, addr != end);
 
 	/* Only flush the TLB if we actually modified any entries: */
@@ -162,7 +202,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 
 unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 		       unsigned long end, pgprot_t newprot,
-		       int dirty_accountable)
+		       int dirty_accountable, int prot_numa)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long pages;
@@ -171,7 +211,7 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 	if (is_vm_hugetlb_page(vma))
 		pages = hugetlb_change_protection(vma, start, end, newprot);
 	else
-		pages = change_protection_range(vma, start, end, newprot, dirty_accountable);
+		pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
 	mmu_notifier_invalidate_range_end(mm, start, end);
 
 	return pages;
@@ -249,7 +289,7 @@ success:
 		dirty_accountable = 1;
 	}
 
-	change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
+	change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0);
 
 	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
 	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
-- 
cgit v1.2.3-55-g7522


From cbee9f88ec1b8dd6b58f25f54e4f52c82ed77690 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra
Date: Thu, 25 Oct 2012 14:16:43 +0200
Subject: mm: numa: Add fault driven placement and migration

NOTE: This patch is based on "sched, numa, mm: Add fault driven
	placement and migration policy" but as it throws away all the policy
	to just leave a basic foundation I had to drop the signed-offs-by.

This patch creates a bare-bones method for setting PTEs pte_numa in the
context of the scheduler that when faulted later will be faulted onto the
node the CPU is running on.  In itself this does nothing useful but any
placement policy will fundamentally depend on receiving hints on placement
from fault context and doing something intelligent about it.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
---
 arch/sh/mm/Kconfig       |   1 +
 arch/x86/Kconfig         |   2 +
 include/linux/mm_types.h |  11 +++++
 include/linux/sched.h    |  20 ++++++++
 kernel/sched/core.c      |  13 +++++
 kernel/sched/fair.c      | 125 +++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/features.h  |   7 +++
 kernel/sched/sched.h     |   6 +++
 kernel/sysctl.c          |  24 ++++++++-
 mm/huge_memory.c         |   5 +-
 mm/memory.c              |  14 +++++-
 11 files changed, 224 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index cb8f9920f4dd..0f7c852f355c 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -111,6 +111,7 @@ config VSYSCALL
 config NUMA
 	bool "Non Uniform Memory Access (NUMA) Support"
 	depends on MMU && SYS_SUPPORTS_NUMA && EXPERIMENTAL
+	select ARCH_WANT_NUMA_VARIABLE_LOCALITY
 	default n
 	help
 	  Some SH systems have many various memories scattered around
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 46c3bff3ced2..1137028fc6d9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,6 +22,8 @@ config X86
 	def_bool y
 	select HAVE_AOUT if X86_32
 	select HAVE_UNSTABLE_SCHED_CLOCK
+	select ARCH_SUPPORTS_NUMA_BALANCING
+	select ARCH_WANTS_PROT_NUMA_PROT_NONE
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_PCSPKR_PLATFORM
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 31f8a3af7d94..ed8638c29b3e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -397,6 +397,17 @@ struct mm_struct {
 #endif
 #ifdef CONFIG_CPUMASK_OFFSTACK
 	struct cpumask cpumask_allocation;
+#endif
+#ifdef CONFIG_NUMA_BALANCING
+	/*
+	 * numa_next_scan is the next time when the PTEs will me marked
+	 * pte_numa to gather statistics and migrate pages to new nodes
+	 * if necessary
+	 */
+	unsigned long numa_next_scan;
+
+	/* numa_scan_seq prevents two threads setting pte_numa */
+	int numa_scan_seq;
 #endif
 	struct uprobes_state uprobes_state;
 };
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0dd42a02df2e..844af5b12cb2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1479,6 +1479,14 @@ struct task_struct {
 	short il_next;
 	short pref_node_fork;
 #endif
+#ifdef CONFIG_NUMA_BALANCING
+	int numa_scan_seq;
+	int numa_migrate_seq;
+	unsigned int numa_scan_period;
+	u64 node_stamp;			/* migration stamp  */
+	struct callback_head numa_work;
+#endif /* CONFIG_NUMA_BALANCING */
+
 	struct rcu_head rcu;
 
 	/*
@@ -1553,6 +1561,14 @@ struct task_struct {
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
+#ifdef CONFIG_NUMA_BALANCING
+extern void task_numa_fault(int node, int pages);
+#else
+static inline void task_numa_fault(int node, int pages)
+{
+}
+#endif
+
 /*
  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
  * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
@@ -1990,6 +2006,10 @@ enum sched_tunable_scaling {
 };
 extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
 
+extern unsigned int sysctl_numa_balancing_scan_period_min;
+extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_settle_count;
+
 #ifdef CONFIG_SCHED_DEBUG
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d8927fda712..cad0d092ce3b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1533,6 +1533,19 @@ static void __sched_fork(struct task_struct *p)
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	INIT_HLIST_HEAD(&p->preempt_notifiers);
 #endif
+
+#ifdef CONFIG_NUMA_BALANCING
+	if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
+		p->mm->numa_next_scan = jiffies;
+		p->mm->numa_scan_seq = 0;
+	}
+
+	p->node_stamp = 0ULL;
+	p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
+	p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0;
+	p->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+	p->numa_work.next = &p->numa_work;
+#endif /* CONFIG_NUMA_BALANCING */
 }
 
 /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b800a14b990..6831abb5dbef 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -26,6 +26,8 @@
 #include <linux/slab.h>
 #include <linux/profile.h>
 #include <linux/interrupt.h>
+#include <linux/mempolicy.h>
+#include <linux/task_work.h>
 
 #include <trace/events/sched.h>
 
@@ -776,6 +778,126 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Scheduling class queueing methods:
  */
 
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * numa task sample period in ms: 5s
+ */
+unsigned int sysctl_numa_balancing_scan_period_min = 5000;
+unsigned int sysctl_numa_balancing_scan_period_max = 5000*16;
+
+static void task_numa_placement(struct task_struct *p)
+{
+	int seq = ACCESS_ONCE(p->mm->numa_scan_seq);
+
+	if (p->numa_scan_seq == seq)
+		return;
+	p->numa_scan_seq = seq;
+
+	/* FIXME: Scheduling placement policy hints go here */
+}
+
+/*
+ * Got a PROT_NONE fault for a page on @node.
+ */
+void task_numa_fault(int node, int pages)
+{
+	struct task_struct *p = current;
+
+	/* FIXME: Allocate task-specific structure for placement policy here */
+
+	task_numa_placement(p);
+}
+
+/*
+ * The expensive part of numa migration is done from task_work context.
+ * Triggered from task_tick_numa().
+ */
+void task_numa_work(struct callback_head *work)
+{
+	unsigned long migrate, next_scan, now = jiffies;
+	struct task_struct *p = current;
+	struct mm_struct *mm = p->mm;
+
+	WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work));
+
+	work->next = work; /* protect against double add */
+	/*
+	 * Who cares about NUMA placement when they're dying.
+	 *
+	 * NOTE: make sure not to dereference p->mm before this check,
+	 * exit_task_work() happens _after_ exit_mm() so we could be called
+	 * without p->mm even though we still had it when we enqueued this
+	 * work.
+	 */
+	if (p->flags & PF_EXITING)
+		return;
+
+	/*
+	 * Enforce maximal scan/migration frequency..
+	 */
+	migrate = mm->numa_next_scan;
+	if (time_before(now, migrate))
+		return;
+
+	if (p->numa_scan_period == 0)
+		p->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+
+	next_scan = now + 2*msecs_to_jiffies(p->numa_scan_period);
+	if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate)
+		return;
+
+	ACCESS_ONCE(mm->numa_scan_seq)++;
+	{
+		struct vm_area_struct *vma;
+
+		down_read(&mm->mmap_sem);
+		for (vma = mm->mmap; vma; vma = vma->vm_next) {
+			if (!vma_migratable(vma))
+				continue;
+			change_prot_numa(vma, vma->vm_start, vma->vm_end);
+		}
+		up_read(&mm->mmap_sem);
+	}
+}
+
+/*
+ * Drive the periodic memory faults..
+ */
+void task_tick_numa(struct rq *rq, struct task_struct *curr)
+{
+	struct callback_head *work = &curr->numa_work;
+	u64 period, now;
+
+	/*
+	 * We don't care about NUMA placement if we don't have memory.
+	 */
+	if (!curr->mm || (curr->flags & PF_EXITING) || work->next != work)
+		return;
+
+	/*
+	 * Using runtime rather than walltime has the dual advantage that
+	 * we (mostly) drive the selection from busy threads and that the
+	 * task needs to have done some actual work before we bother with
+	 * NUMA placement.
+	 */
+	now = curr->se.sum_exec_runtime;
+	period = (u64)curr->numa_scan_period * NSEC_PER_MSEC;
+
+	if (now - curr->node_stamp > period) {
+		curr->node_stamp = now;
+
+		if (!time_before(jiffies, curr->mm->numa_next_scan)) {
+			init_task_work(work, task_numa_work); /* TODO: move this into sched_fork() */
+			task_work_add(curr, work, true);
+		}
+	}
+}
+#else
+static void task_tick_numa(struct rq *rq, struct task_struct *curr)
+{
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 static void
 account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
@@ -4954,6 +5076,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 		cfs_rq = cfs_rq_of(se);
 		entity_tick(cfs_rq, se, queued);
 	}
+
+	if (sched_feat_numa(NUMA))
+		task_tick_numa(rq, curr);
 }
 
 /*
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index eebefcad7027..5fb7aefbec80 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -61,3 +61,10 @@ SCHED_FEAT(TTWU_QUEUE, true)
 SCHED_FEAT(FORCE_SD_OVERLAP, false)
 SCHED_FEAT(RT_RUNTIME_SHARE, true)
 SCHED_FEAT(LB_MIN, false)
+
+/*
+ * Apply the automatic NUMA scheduling policy
+ */
+#ifdef CONFIG_NUMA_BALANCING
+SCHED_FEAT(NUMA,	true)
+#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7a7db09cfabc..ae31c051ff2f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -648,6 +648,12 @@ extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
 #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
 #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
 
+#ifdef CONFIG_NUMA_BALANCING
+#define sched_feat_numa(x) sched_feat(x)
+#else
+#define sched_feat_numa(x) (0)
+#endif
+
 static inline u64 global_rt_period(void)
 {
 	return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 26f65eaa01f9..025e1ae50ef1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -256,9 +256,11 @@ static int min_sched_granularity_ns = 100000;		/* 100 usecs */
 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
 static int min_wakeup_granularity_ns;			/* 0 usecs */
 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
+#ifdef CONFIG_SMP
 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
-#endif
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_SCHED_DEBUG */
 
 #ifdef CONFIG_COMPACTION
 static int min_extfrag_threshold;
@@ -301,6 +303,7 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &min_wakeup_granularity_ns,
 		.extra2		= &max_wakeup_granularity_ns,
 	},
+#ifdef CONFIG_SMP
 	{
 		.procname	= "sched_tunable_scaling",
 		.data		= &sysctl_sched_tunable_scaling,
@@ -347,7 +350,24 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
-#endif
+#endif /* CONFIG_SMP */
+#ifdef CONFIG_NUMA_BALANCING
+	{
+		.procname	= "numa_balancing_scan_period_min_ms",
+		.data		= &sysctl_numa_balancing_scan_period_min,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "numa_balancing_scan_period_max_ms",
+		.data		= &sysctl_numa_balancing_scan_period_max,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_SCHED_DEBUG */
 	{
 		.procname	= "sched_rt_period_us",
 		.data		= &sysctl_sched_rt_period,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d79f7a55bf6f..ee8133794a56 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1046,6 +1046,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 */
 	split_huge_page(page);
 	put_page(page);
+
 	return 0;
 
 clear_pmdnuma:
@@ -1060,8 +1061,10 @@ clear_pmdnuma:
 
 out_unlock:
 	spin_unlock(&mm->page_table_lock);
-	if (page)
+	if (page) {
 		put_page(page);
+		task_numa_fault(numa_node_id(), HPAGE_PMD_NR);
+	}
 	return 0;
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index d52542680e10..8012c1907895 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3454,7 +3454,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	struct page *page = NULL;
 	spinlock_t *ptl;
-	int current_nid, target_nid;
+	int current_nid = -1;
+	int target_nid;
 
 	/*
 	* The "pte" at this point cannot be used safely without
@@ -3501,6 +3502,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		current_nid = target_nid;
 
 out:
+	task_numa_fault(current_nid, 1);
 	return 0;
 }
 
@@ -3537,6 +3539,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
 		pte_t pteval = *pte;
 		struct page *page;
+		int curr_nid;
 		if (!pte_present(pteval))
 			continue;
 		if (!pte_numa(pteval))
@@ -3554,6 +3557,15 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		page = vm_normal_page(vma, addr, pteval);
 		if (unlikely(!page))
 			continue;
+		/* only check non-shared pages */
+		if (unlikely(page_mapcount(page) != 1))
+			continue;
+		pte_unmap_unlock(pte, ptl);
+
+		curr_nid = page_to_nid(page);
+		task_numa_fault(curr_nid, 1);
+
+		pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
 	}
 	pte_unmap_unlock(orig_pte, ptl);
 
-- 
cgit v1.2.3-55-g7522


From 6e5fb223e89dbe5cb5c563f8d4a4a0a7d62455a8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra
Date: Thu, 25 Oct 2012 14:16:45 +0200
Subject: mm: sched: numa: Implement constant, per task Working Set Sampling
 (WSS) rate

Previously, to probe the working set of a task, we'd use
a very simple and crude method: mark all of its address
space PROT_NONE.

That method has various (obvious) disadvantages:

 - it samples the working set at dissimilar rates,
   giving some tasks a sampling quality advantage
   over others.

 - creates performance problems for tasks with very
   large working sets

 - over-samples processes with large address spaces but
   which only very rarely execute

Improve that method by keeping a rotating offset into the
address space that marks the current position of the scan,
and advance it by a constant rate (in a CPU cycles execution
proportional manner). If the offset reaches the last mapped
address of the mm then it then it starts over at the first
address.

The per-task nature of the working set sampling functionality in this tree
allows such constant rate, per task, execution-weight proportional sampling
of the working set, with an adaptive sampling interval/frequency that
goes from once per 100ms up to just once per 8 seconds.  The current
sampling volume is 256 MB per interval.

As tasks mature and converge their working set, so does the
sampling rate slow down to just a trickle, 256 MB per 8
seconds of CPU time executed.

This, beyond being adaptive, also rate-limits rarely
executing systems and does not over-sample on overloaded
systems.

[ In AutoNUMA speak, this patch deals with the effective sampling
  rate of the 'hinting page fault'. AutoNUMA's scanning is
  currently rate-limited, but it is also fundamentally
  single-threaded, executing in the knuma_scand kernel thread,
  so the limit in AutoNUMA is global and does not scale up with
  the number of CPUs, nor does it scan tasks in an execution
  proportional manner.

  So the idea of rate-limiting the scanning was first implemented
  in the AutoNUMA tree via a global rate limit. This patch goes
  beyond that by implementing an execution rate proportional
  working set sampling rate that is not implemented via a single
  global scanning daemon. ]

[ Dan Carpenter pointed out a possible NULL pointer dereference in the
  first version of this patch. ]

Based-on-idea-by: Andrea Arcangeli <aarcange@redhat.com>
Bug-Found-By: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
[ Wrote changelog and fixed bug. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/mm_types.h |  3 +++
 include/linux/sched.h    |  1 +
 kernel/sched/fair.c      | 65 ++++++++++++++++++++++++++++++++++++++----------
 kernel/sysctl.c          |  7 ++++++
 4 files changed, 63 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ed8638c29b3e..d1e246c5e50c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -406,6 +406,9 @@ struct mm_struct {
 	 */
 	unsigned long numa_next_scan;
 
+	/* Restart point for scanning and setting pte_numa */
+	unsigned long numa_scan_offset;
+
 	/* numa_scan_seq prevents two threads setting pte_numa */
 	int numa_scan_seq;
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 844af5b12cb2..37841958d234 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2008,6 +2008,7 @@ extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
 
 extern unsigned int sysctl_numa_balancing_scan_period_min;
 extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_scan_size;
 extern unsigned int sysctl_numa_balancing_settle_count;
 
 #ifdef CONFIG_SCHED_DEBUG
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6831abb5dbef..0a349dd1fa60 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -780,10 +780,13 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
 #ifdef CONFIG_NUMA_BALANCING
 /*
- * numa task sample period in ms: 5s
+ * numa task sample period in ms
  */
-unsigned int sysctl_numa_balancing_scan_period_min = 5000;
-unsigned int sysctl_numa_balancing_scan_period_max = 5000*16;
+unsigned int sysctl_numa_balancing_scan_period_min = 100;
+unsigned int sysctl_numa_balancing_scan_period_max = 100*16;
+
+/* Portion of address space to scan in MB */
+unsigned int sysctl_numa_balancing_scan_size = 256;
 
 static void task_numa_placement(struct task_struct *p)
 {
@@ -808,6 +811,12 @@ void task_numa_fault(int node, int pages)
 	task_numa_placement(p);
 }
 
+static void reset_ptenuma_scan(struct task_struct *p)
+{
+	ACCESS_ONCE(p->mm->numa_scan_seq)++;
+	p->mm->numa_scan_offset = 0;
+}
+
 /*
  * The expensive part of numa migration is done from task_work context.
  * Triggered from task_tick_numa().
@@ -817,6 +826,9 @@ void task_numa_work(struct callback_head *work)
 	unsigned long migrate, next_scan, now = jiffies;
 	struct task_struct *p = current;
 	struct mm_struct *mm = p->mm;
+	struct vm_area_struct *vma;
+	unsigned long offset, end;
+	long length;
 
 	WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work));
 
@@ -846,18 +858,45 @@ void task_numa_work(struct callback_head *work)
 	if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate)
 		return;
 
-	ACCESS_ONCE(mm->numa_scan_seq)++;
-	{
-		struct vm_area_struct *vma;
+	offset = mm->numa_scan_offset;
+	length = sysctl_numa_balancing_scan_size;
+	length <<= 20;
 
-		down_read(&mm->mmap_sem);
-		for (vma = mm->mmap; vma; vma = vma->vm_next) {
-			if (!vma_migratable(vma))
-				continue;
-			change_prot_numa(vma, vma->vm_start, vma->vm_end);
-		}
-		up_read(&mm->mmap_sem);
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, offset);
+	if (!vma) {
+		reset_ptenuma_scan(p);
+		offset = 0;
+		vma = mm->mmap;
+	}
+	for (; vma && length > 0; vma = vma->vm_next) {
+		if (!vma_migratable(vma))
+			continue;
+
+		/* Skip small VMAs. They are not likely to be of relevance */
+		if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) < HPAGE_PMD_NR)
+			continue;
+
+		offset = max(offset, vma->vm_start);
+		end = min(ALIGN(offset + length, HPAGE_SIZE), vma->vm_end);
+		length -= end - offset;
+
+		change_prot_numa(vma, offset, end);
+
+		offset = end;
 	}
+
+	/*
+	 * It is possible to reach the end of the VMA list but the last few VMAs are
+	 * not guaranteed to the vma_migratable. If they are not, we would find the
+	 * !migratable VMA on the next scan but not reset the scanner to the start
+	 * so check it now.
+	 */
+	if (vma)
+		mm->numa_scan_offset = offset;
+	else
+		reset_ptenuma_scan(p);
+	up_read(&mm->mmap_sem);
 }
 
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 025e1ae50ef1..7d3a2e0475e5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -366,6 +366,13 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "numa_balancing_scan_size_mb",
+		.data		= &sysctl_numa_balancing_scan_size,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #endif /* CONFIG_NUMA_BALANCING */
 #endif /* CONFIG_SCHED_DEBUG */
 	{
-- 
cgit v1.2.3-55-g7522


From 4b96a29ba891dd59734cb7be80a900fe93aa2d9f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra
Date: Thu, 25 Oct 2012 14:16:47 +0200
Subject: mm: sched: numa: Implement slow start for working set sampling

Add a 1 second delay before starting to scan the working set of
a task and starting to balance it amongst nodes.

[ note that before the constant per task WSS sampling rate patch
  the initial scan would happen much later still, in effect that
  patch caused this regression. ]

The theory is that short-run tasks benefit very little from NUMA
placement: they come and go, and they better stick to the node
they were started on. As tasks mature and rebalance to other CPUs
and nodes, so does their NUMA placement have to change and so
does it start to matter more and more.

In practice this change fixes an observable kbuild regression:

   # [ a perf stat --null --repeat 10 test of ten bzImage builds to /dev/shm ]

   !NUMA:
   45.291088843 seconds time elapsed                                          ( +-  0.40% )
   45.154231752 seconds time elapsed                                          ( +-  0.36% )

   +NUMA, no slow start:
   46.172308123 seconds time elapsed                                          ( +-  0.30% )
   46.343168745 seconds time elapsed                                          ( +-  0.25% )

   +NUMA, 1 sec slow start:
   45.224189155 seconds time elapsed                                          ( +-  0.25% )
   45.160866532 seconds time elapsed                                          ( +-  0.17% )

and it also fixes an observable perf bench (hackbench) regression:

   # perf stat --null --repeat 10 perf bench sched messaging

   -NUMA:

   -NUMA:                  0.246225691 seconds time elapsed                   ( +-  1.31% )
   +NUMA no slow start:    0.252620063 seconds time elapsed                   ( +-  1.13% )

   +NUMA 1sec delay:       0.248076230 seconds time elapsed                   ( +-  1.35% )

The implementation is simple and straightforward, most of the patch
deals with adding the /proc/sys/kernel/numa_balancing_scan_delay_ms tunable
knob.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
[ Wrote the changelog, ran measurements, tuned the default. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/sched.h | 1 +
 kernel/sched/core.c   | 2 +-
 kernel/sched/fair.c   | 5 +++++
 kernel/sysctl.c       | 7 +++++++
 4 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 37841958d234..7d95a232b5b9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2006,6 +2006,7 @@ enum sched_tunable_scaling {
 };
 extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
 
+extern unsigned int sysctl_numa_balancing_scan_delay;
 extern unsigned int sysctl_numa_balancing_scan_period_min;
 extern unsigned int sysctl_numa_balancing_scan_period_max;
 extern unsigned int sysctl_numa_balancing_scan_size;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cad0d092ce3b..fbfc4843063f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1543,7 +1543,7 @@ static void __sched_fork(struct task_struct *p)
 	p->node_stamp = 0ULL;
 	p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
 	p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0;
-	p->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+	p->numa_scan_period = sysctl_numa_balancing_scan_delay;
 	p->numa_work.next = &p->numa_work;
 #endif /* CONFIG_NUMA_BALANCING */
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f6e1f25ed2bd..7727b0161579 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -788,6 +788,9 @@ unsigned int sysctl_numa_balancing_scan_period_max = 100*16;
 /* Portion of address space to scan in MB */
 unsigned int sysctl_numa_balancing_scan_size = 256;
 
+/* Scan @scan_size MB every @scan_period after an initial @scan_delay in ms */
+unsigned int sysctl_numa_balancing_scan_delay = 1000;
+
 static void task_numa_placement(struct task_struct *p)
 {
 	int seq = ACCESS_ONCE(p->mm->numa_scan_seq);
@@ -929,6 +932,8 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
 	period = (u64)curr->numa_scan_period * NSEC_PER_MSEC;
 
 	if (now - curr->node_stamp > period) {
+		if (!curr->node_stamp)
+			curr->numa_scan_period = sysctl_numa_balancing_scan_period_min;
 		curr->node_stamp = now;
 
 		if (!time_before(jiffies, curr->mm->numa_next_scan)) {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7d3a2e0475e5..48a68cc258c1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -352,6 +352,13 @@ static struct ctl_table kern_table[] = {
 	},
 #endif /* CONFIG_SMP */
 #ifdef CONFIG_NUMA_BALANCING
+	{
+		.procname	= "numa_balancing_scan_delay_ms",
+		.data		= &sysctl_numa_balancing_scan_delay,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{
 		.procname	= "numa_balancing_scan_period_min_ms",
 		.data		= &sysctl_numa_balancing_scan_period_min,
-- 
cgit v1.2.3-55-g7522


From 03c5a6e16322c997bf8f264851bfa3f532ad515f Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Fri, 2 Nov 2012 14:52:48 +0000
Subject: mm: numa: Add pte updates, hinting and migration stats

It is tricky to quantify the basic cost of automatic NUMA placement in a
meaningful manner. This patch adds some vmstats that can be used as part
of a basic costing model.

u    = basic unit = sizeof(void *)
Ca   = cost of struct page access = sizeof(struct page) / u
Cpte = Cost PTE access = Ca
Cupdate = Cost PTE update = (2 * Cpte) + (2 * Wlock)
	where Cpte is incurred twice for a read and a write and Wlock
	is a constant representing the cost of taking or releasing a
	lock
Cnumahint = Cost of a minor page fault = some high constant e.g. 1000
Cpagerw = Cost to read or write a full page = Ca + PAGE_SIZE/u
Ci = Cost of page isolation = Ca + Wi
	where Wi is a constant that should reflect the approximate cost
	of the locking operation
Cpagecopy = Cpagerw + (Cpagerw * Wnuma) + Ci + (Ci * Wnuma)
	where Wnuma is the approximate NUMA factor. 1 is local. 1.2
	would imply that remote accesses are 20% more expensive

Balancing cost = Cpte * numa_pte_updates +
		Cnumahint * numa_hint_faults +
		Ci * numa_pages_migrated +
		Cpagecopy * numa_pages_migrated

Note that numa_pages_migrated is used as a measure of how many pages
were isolated even though it would miss pages that failed to migrate. A
vmstat counter could have been added for it but the isolation cost is
pretty marginal in comparison to the overall cost so it seemed overkill.

The ideal way to measure automatic placement benefit would be to count
the number of remote accesses versus local accesses and do something like

	benefit = (remote_accesses_before - remove_access_after) * Wnuma

but the information is not readily available. As a workload converges, the
expection would be that the number of remote numa hints would reduce to 0.

	convergence = numa_hint_faults_local / numa_hint_faults
		where this is measured for the last N number of
		numa hints recorded. When the workload is fully
		converged the value is 1.

This can measure if the placement policy is converging and how fast it is
doing it.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
---
 include/linux/vm_event_item.h |  6 ++++++
 include/linux/vmstat.h        |  8 ++++++++
 mm/huge_memory.c              |  5 +++++
 mm/memory.c                   | 12 ++++++++++++
 mm/mempolicy.c                |  2 ++
 mm/migrate.c                  |  3 ++-
 mm/vmstat.c                   |  6 ++++++
 7 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index a1f750b8e72a..55600049e794 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -38,6 +38,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
 		KSWAPD_SKIP_CONGESTION_WAIT,
 		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+#ifdef CONFIG_NUMA_BALANCING
+		NUMA_PTE_UPDATES,
+		NUMA_HINT_FAULTS,
+		NUMA_HINT_FAULTS_LOCAL,
+		NUMA_PAGE_MIGRATE,
+#endif
 #ifdef CONFIG_MIGRATION
 		PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
 #endif
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 92a86b2cce33..a13291f7da88 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -80,6 +80,14 @@ static inline void vm_events_fold_cpu(int cpu)
 
 #endif /* CONFIG_VM_EVENT_COUNTERS */
 
+#ifdef CONFIG_NUMA_BALANCING
+#define count_vm_numa_event(x)     count_vm_event(x)
+#define count_vm_numa_events(x, y) count_vm_events(x, y)
+#else
+#define count_vm_numa_event(x) do {} while (0)
+#define count_vm_numa_events(x, y) do {} while (0)
+#endif /* CONFIG_NUMA_BALANCING */
+
 #define __count_zone_vm_events(item, zone, delta) \
 		__count_vm_events(item##_NORMAL - ZONE_NORMAL + \
 		zone_idx(zone), delta)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ee8133794a56..f3a477fffd09 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1026,6 +1026,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page *page = NULL;
 	unsigned long haddr = addr & HPAGE_PMD_MASK;
 	int target_nid;
+	int current_nid = -1;
 
 	spin_lock(&mm->page_table_lock);
 	if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1034,6 +1035,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	page = pmd_page(pmd);
 	get_page(page);
 	spin_unlock(&mm->page_table_lock);
+	current_nid = page_to_nid(page);
+	count_vm_numa_event(NUMA_HINT_FAULTS);
+	if (current_nid == numa_node_id())
+		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
 
 	target_nid = mpol_misplaced(page, vma, haddr);
 	if (target_nid == -1)
diff --git a/mm/memory.c b/mm/memory.c
index 8012c1907895..8a7b4ccbe136 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3477,6 +3477,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	set_pte_at(mm, addr, ptep, pte);
 	update_mmu_cache(vma, addr, ptep);
 
+	count_vm_numa_event(NUMA_HINT_FAULTS);
 	page = vm_normal_page(vma, addr, pte);
 	if (!page) {
 		pte_unmap_unlock(ptep, ptl);
@@ -3485,6 +3486,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	get_page(page);
 	current_nid = page_to_nid(page);
+	if (current_nid == numa_node_id())
+		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
 	target_nid = mpol_misplaced(page, vma, addr);
 	pte_unmap_unlock(ptep, ptl);
 	if (target_nid == -1) {
@@ -3517,6 +3520,9 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long offset;
 	spinlock_t *ptl;
 	bool numa = false;
+	int local_nid = numa_node_id();
+	unsigned long nr_faults = 0;
+	unsigned long nr_faults_local = 0;
 
 	spin_lock(&mm->page_table_lock);
 	pmd = *pmdp;
@@ -3565,10 +3571,16 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		curr_nid = page_to_nid(page);
 		task_numa_fault(curr_nid, 1);
 
+		nr_faults++;
+		if (curr_nid == local_nid)
+			nr_faults_local++;
+
 		pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
 	}
 	pte_unmap_unlock(orig_pte, ptl);
 
+	count_vm_numa_events(NUMA_HINT_FAULTS, nr_faults);
+	count_vm_numa_events(NUMA_HINT_FAULTS_LOCAL, nr_faults_local);
 	return 0;
 }
 #else
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a7a62fe7c280..516491fbfaa8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -583,6 +583,8 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
 	BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE);
 
 	nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
+	if (nr_updated)
+		count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);
 
 	return nr_updated;
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index c7d550011a64..23bba5d6edff 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1514,7 +1514,8 @@ int migrate_misplaced_page(struct page *page, int node)
 		if (nr_remaining) {
 			putback_lru_pages(&migratepages);
 			isolated = 0;
-		}
+		} else
+			count_vm_numa_event(NUMA_PAGE_MIGRATE);
 	}
 	BUG_ON(!list_empty(&migratepages));
 out:
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 3a067fabe190..c0f1f6db5182 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -774,6 +774,12 @@ const char * const vmstat_text[] = {
 
 	"pgrotated",
 
+#ifdef CONFIG_NUMA_BALANCING
+	"numa_pte_updates",
+	"numa_hint_faults",
+	"numa_hint_faults_local",
+	"numa_pages_migrated",
+#endif
 #ifdef CONFIG_MIGRATION
 	"pgmigrate_success",
 	"pgmigrate_fail",
-- 
cgit v1.2.3-55-g7522


From 8177a420ed7c16c171ed3c3aec5b0676db38c247 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli
Date: Fri, 23 Mar 2012 20:56:34 +0100
Subject: mm: numa: Structures for Migrate On Fault per NUMA migration rate
 limiting

This defines the per-node data used by Migrate On Fault in order to
rate limit the migration. The rate limiting is applied independently
to each destination node.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/mmzone.h | 13 +++++++++++++
 mm/page_alloc.c        |  5 +++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a23923ba8263..28601fdfcdb2 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -717,6 +717,19 @@ typedef struct pglist_data {
 	struct task_struct *kswapd;	/* Protected by lock_memory_hotplug() */
 	int kswapd_max_order;
 	enum zone_type classzone_idx;
+#ifdef CONFIG_NUMA_BALANCING
+	/*
+	 * Lock serializing the per destination node AutoNUMA memory
+	 * migration rate limiting data.
+	 */
+	spinlock_t numabalancing_migrate_lock;
+
+	/* Rate limiting time interval */
+	unsigned long numabalancing_migrate_next_window;
+
+	/* Number of pages migrated during the rate limiting time interval */
+	unsigned long numabalancing_migrate_nr_pages;
+#endif
 } pg_data_t;
 
 #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5953dc2d196f..ef025e20dbee 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4449,6 +4449,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 	int ret;
 
 	pgdat_resize_init(pgdat);
+#ifdef CONFIG_NUMA_BALANCING
+	spin_lock_init(&pgdat->numabalancing_migrate_lock);
+	pgdat->numabalancing_migrate_nr_pages = 0;
+	pgdat->numabalancing_migrate_next_window = jiffies;
+#endif
 	init_waitqueue_head(&pgdat->kswapd_wait);
 	init_waitqueue_head(&pgdat->pfmemalloc_wait);
 	pgdat_page_cgroup_init(pgdat);
-- 
cgit v1.2.3-55-g7522


From e14808b49f55e0e1135da5e4a154a540dd9f3662 Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Mon, 19 Nov 2012 10:59:15 +0000
Subject: mm: numa: Rate limit setting of pte_numa if node is saturated

If there are a large number of NUMA hinting faults and all of them
are resulting in migrations it may indicate that memory is just
bouncing uselessly around. NUMA balancing cost is likely exceeding
any benefit from locality. Rate limit the PTE updates if the node
is migration rate-limited. As noted in the comments, this distorts
the NUMA faulting statistics.

Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/migrate.h |  6 ++++++
 kernel/sched/fair.c     |  9 +++++++++
 mm/migrate.c            | 20 ++++++++++++++++++++
 3 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index f0d0313eea6f..91556889adac 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -77,11 +77,17 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 
 #ifdef CONFIG_NUMA_BALANCING
 extern int migrate_misplaced_page(struct page *page, int node);
+extern int migrate_misplaced_page(struct page *page, int node);
+extern bool migrate_ratelimited(int node);
 #else
 static inline int migrate_misplaced_page(struct page *page, int node)
 {
 	return -EAGAIN; /* can't migrate now */
 }
+static inline bool migrate_ratelimited(int node)
+{
+	return false;
+}
 #endif /* CONFIG_NUMA_BALANCING */
 
 #endif /* _LINUX_MIGRATE_H */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7727b0161579..37e895a941ab 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -27,6 +27,7 @@
 #include <linux/profile.h>
 #include <linux/interrupt.h>
 #include <linux/mempolicy.h>
+#include <linux/migrate.h>
 #include <linux/task_work.h>
 
 #include <trace/events/sched.h>
@@ -861,6 +862,14 @@ void task_numa_work(struct callback_head *work)
 	if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate)
 		return;
 
+	/*
+	 * Do not set pte_numa if the current running node is rate-limited.
+	 * This loses statistics on the fault but if we are unwilling to
+	 * migrate to this node, it is less likely we can do useful work
+	 */
+	if (migrate_ratelimited(numa_node_id()))
+		return;
+
 	start = mm->numa_scan_offset;
 	pages = sysctl_numa_balancing_scan_size;
 	pages <<= 20 - PAGE_SHIFT; /* MB in pages */
diff --git a/mm/migrate.c b/mm/migrate.c
index 4b8267f1842f..32a1afca6009 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1464,10 +1464,30 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
  * page migration rate limiting control.
  * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
  * window of time. Default here says do not migrate more than 1280M per second.
+ * If a node is rate-limited then PTE NUMA updates are also rate-limited. However
+ * as it is faults that reset the window, pte updates will happen unconditionally
+ * if there has not been a fault since @pteupdate_interval_millisecs after the
+ * throttle window closed.
  */
 static unsigned int migrate_interval_millisecs __read_mostly = 100;
+static unsigned int pteupdate_interval_millisecs __read_mostly = 1000;
 static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
 
+/* Returns true if NUMA migration is currently rate limited */
+bool migrate_ratelimited(int node)
+{
+	pg_data_t *pgdat = NODE_DATA(node);
+
+	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window +
+				msecs_to_jiffies(pteupdate_interval_millisecs)))
+		return false;
+
+	if (pgdat->numabalancing_migrate_nr_pages < ratelimit_pages)
+		return false;
+
+	return true;
+}
+
 /*
  * Attempt to migrate a misplaced page to the specified destination
  * node. Caller is expected to have an elevated reference count on
-- 
cgit v1.2.3-55-g7522


From 57e0a0309160b1b4ebde9f3c6a867cd96ac368bf Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Mon, 12 Nov 2012 09:06:20 +0000
Subject: mm: numa: Introduce last_nid to the page frame

This patch introduces a last_nid field to the page struct. This is used
to build a two-stage filter in the next patch that is aimed at
mitigating a problem whereby pages migrate to the wrong node when
referenced by a process that was running off its home node.

Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/mm.h       | 30 ++++++++++++++++++++++++++++++
 include/linux/mm_types.h |  4 ++++
 mm/page_alloc.c          |  2 ++
 3 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d04c2f0aab36..d87f9ec4a145 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -693,6 +693,36 @@ static inline int page_to_nid(const struct page *page)
 }
 #endif
 
+#ifdef CONFIG_NUMA_BALANCING
+static inline int page_xchg_last_nid(struct page *page, int nid)
+{
+	return xchg(&page->_last_nid, nid);
+}
+
+static inline int page_last_nid(struct page *page)
+{
+	return page->_last_nid;
+}
+static inline void reset_page_last_nid(struct page *page)
+{
+	page->_last_nid = -1;
+}
+#else
+static inline int page_xchg_last_nid(struct page *page, int nid)
+{
+	return page_to_nid(page);
+}
+
+static inline int page_last_nid(struct page *page)
+{
+	return page_to_nid(page);
+}
+
+static inline void reset_page_last_nid(struct page *page)
+{
+}
+#endif
+
 static inline struct zone *page_zone(const struct page *page)
 {
 	return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d1e246c5e50c..c5fffa239861 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -175,6 +175,10 @@ struct page {
 	 */
 	void *shadow;
 #endif
+
+#ifdef CONFIG_NUMA_BALANCING
+	int _last_nid;
+#endif
 }
 /*
  * The struct page can be forced to be double word aligned so that atomic ops
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef025e20dbee..73f226a1206e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -608,6 +608,7 @@ static inline int free_pages_check(struct page *page)
 		bad_page(page);
 		return 1;
 	}
+	reset_page_last_nid(page);
 	if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
 		page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 	return 0;
@@ -3826,6 +3827,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		mminit_verify_page_links(page, zone, nid, pfn);
 		init_page_count(page);
 		reset_page_mapcount(page);
+		reset_page_last_nid(page);
 		SetPageReserved(page);
 		/*
 		 * Mark the block movable so that blocks are reserved for
-- 
cgit v1.2.3-55-g7522


From b8593bfda1652755136333cdd362de125b283a9c Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Wed, 21 Nov 2012 01:18:23 +0000
Subject: mm: sched: Adapt the scanning rate if a NUMA hinting fault does not
 migrate

The PTE scanning rate and fault rates are two of the biggest sources of
system CPU overhead with automatic NUMA placement.  Ideally a proper policy
would detect if a workload was properly placed, schedule and adjust the
PTE scanning rate accordingly. We do not track the necessary information
to do that but we at least know if we migrated or not.

This patch scans slower if a page was not migrated as the result of a
NUMA hinting fault up to sysctl_numa_balancing_scan_period_max which is
now higher than the previous default. Once every minute it will reset
the scanner in case of phase changes.

This is hilariously crude and the numbers are arbitrary. Workloads will
converge quite slowly in comparison to what a proper policy should be able
to do. On the plus side, we will chew up less CPU for workloads that have
no need for automatic balancing.

Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/mm_types.h |  3 +++
 include/linux/sched.h    |  5 +++--
 kernel/sched/core.c      |  1 +
 kernel/sched/fair.c      | 29 +++++++++++++++++++++--------
 kernel/sysctl.c          |  7 +++++++
 mm/huge_memory.c         |  2 +-
 mm/memory.c              | 12 ++++++++----
 7 files changed, 44 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c5fffa239861..e850a23dd6ec 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -410,6 +410,9 @@ struct mm_struct {
 	 */
 	unsigned long numa_next_scan;
 
+	/* numa_next_reset is when the PTE scanner period will be reset */
+	unsigned long numa_next_reset;
+
 	/* Restart point for scanning and setting pte_numa */
 	unsigned long numa_scan_offset;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d95a232b5b9..0f4ff2bd03f6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1562,9 +1562,9 @@ struct task_struct {
 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
 #ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int node, int pages);
+extern void task_numa_fault(int node, int pages, bool migrated);
 #else
-static inline void task_numa_fault(int node, int pages)
+static inline void task_numa_fault(int node, int pages, bool migrated)
 {
 }
 #endif
@@ -2009,6 +2009,7 @@ extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
 extern unsigned int sysctl_numa_balancing_scan_delay;
 extern unsigned int sysctl_numa_balancing_scan_period_min;
 extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_scan_period_reset;
 extern unsigned int sysctl_numa_balancing_scan_size;
 extern unsigned int sysctl_numa_balancing_settle_count;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fbfc4843063f..9d255bc0e278 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1537,6 +1537,7 @@ static void __sched_fork(struct task_struct *p)
 #ifdef CONFIG_NUMA_BALANCING
 	if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
 		p->mm->numa_next_scan = jiffies;
+		p->mm->numa_next_reset = jiffies;
 		p->mm->numa_scan_seq = 0;
 	}
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index dd18087fd369..4b577863933f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -784,7 +784,8 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * numa task sample period in ms
  */
 unsigned int sysctl_numa_balancing_scan_period_min = 100;
-unsigned int sysctl_numa_balancing_scan_period_max = 100*16;
+unsigned int sysctl_numa_balancing_scan_period_max = 100*50;
+unsigned int sysctl_numa_balancing_scan_period_reset = 100*600;
 
 /* Portion of address space to scan in MB */
 unsigned int sysctl_numa_balancing_scan_size = 256;
@@ -806,20 +807,19 @@ static void task_numa_placement(struct task_struct *p)
 /*
  * Got a PROT_NONE fault for a page on @node.
  */
-void task_numa_fault(int node, int pages)
+void task_numa_fault(int node, int pages, bool migrated)
 {
 	struct task_struct *p = current;
 
 	/* FIXME: Allocate task-specific structure for placement policy here */
 
 	/*
-	 * Assume that as faults occur that pages are getting properly placed
-	 * and fewer NUMA hints are required. Note that this is a big
-	 * assumption, it assumes processes reach a steady steady with no
-	 * further phase changes.
+	 * If pages are properly placed (did not migrate) then scan slower.
+	 * This is reset periodically in case of phase changes
 	 */
-	p->numa_scan_period = min(sysctl_numa_balancing_scan_period_max,
-				p->numa_scan_period + jiffies_to_msecs(2));
+        if (!migrated)
+		p->numa_scan_period = min(sysctl_numa_balancing_scan_period_max,
+			p->numa_scan_period + jiffies_to_msecs(10));
 
 	task_numa_placement(p);
 }
@@ -857,6 +857,19 @@ void task_numa_work(struct callback_head *work)
 	if (p->flags & PF_EXITING)
 		return;
 
+	/*
+	 * Reset the scan period if enough time has gone by. Objective is that
+	 * scanning will be reduced if pages are properly placed. As tasks
+	 * can enter different phases this needs to be re-examined. Lacking
+	 * proper tracking of reference behaviour, this blunt hammer is used.
+	 */
+	migrate = mm->numa_next_reset;
+	if (time_after(now, migrate)) {
+		p->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+		next_scan = now + msecs_to_jiffies(sysctl_numa_balancing_scan_period_reset);
+		xchg(&mm->numa_next_reset, next_scan);
+	}
+
 	/*
 	 * Enforce maximal scan/migration frequency..
 	 */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 48a68cc258c1..8906f90d6fa2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -366,6 +366,13 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "numa_balancing_scan_period_reset",
+		.data		= &sysctl_numa_balancing_scan_period_reset,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{
 		.procname	= "numa_balancing_scan_period_max_ms",
 		.data		= &sysctl_numa_balancing_scan_period_max,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 79b96064f8fc..199b261a257e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1068,7 +1068,7 @@ out_unlock:
 	spin_unlock(&mm->page_table_lock);
 	if (page) {
 		put_page(page);
-		task_numa_fault(numa_node_id(), HPAGE_PMD_NR);
+		task_numa_fault(numa_node_id(), HPAGE_PMD_NR, false);
 	}
 	return 0;
 }
diff --git a/mm/memory.c b/mm/memory.c
index 84c6d9eab182..39edb11b63dc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3468,6 +3468,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	spinlock_t *ptl;
 	int current_nid = -1;
 	int target_nid;
+	bool migrated = false;
 
 	/*
 	* The "pte" at this point cannot be used safely without
@@ -3509,12 +3510,13 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	/* Migrate to the requested node */
-	if (migrate_misplaced_page(page, target_nid))
+	migrated = migrate_misplaced_page(page, target_nid);
+	if (migrated)
 		current_nid = target_nid;
 
 out:
 	if (current_nid != -1)
-		task_numa_fault(current_nid, 1);
+		task_numa_fault(current_nid, 1, migrated);
 	return 0;
 }
 
@@ -3554,6 +3556,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct page *page;
 		int curr_nid = local_nid;
 		int target_nid;
+		bool migrated;
 		if (!pte_present(pteval))
 			continue;
 		if (!pte_numa(pteval))
@@ -3590,9 +3593,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 		/* Migrate to the requested node */
 		pte_unmap_unlock(pte, ptl);
-		if (migrate_misplaced_page(page, target_nid))
+		migrated = migrate_misplaced_page(page, target_nid);
+		if (migrated)
 			curr_nid = target_nid;
-		task_numa_fault(curr_nid, 1);
+		task_numa_fault(curr_nid, 1, migrated);
 
 		pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
 	}
-- 
cgit v1.2.3-55-g7522


From 1a687c2e9a99335c9e77392f050fe607fa18a652 Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Thu, 22 Nov 2012 11:16:36 +0000
Subject: mm: sched: numa: Control enabling and disabling of NUMA balancing

This patch adds Kconfig options and kernel parameters to allow the
enabling and disabling of automatic NUMA balancing. The existance
of such a switch was and is very important when debugging problems
related to transparent hugepages and we should have the same for
automatic NUMA placement.

Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 Documentation/kernel-parameters.txt |  3 +++
 include/linux/sched.h               |  4 ++++
 init/Kconfig                        |  8 +++++++
 kernel/sched/core.c                 | 48 +++++++++++++++++++++++++------------
 kernel/sched/fair.c                 |  3 +++
 kernel/sched/features.h             |  6 +++--
 mm/mempolicy.c                      | 46 +++++++++++++++++++++++++++++++++++
 7 files changed, 101 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9776f068306b..2e8d2625b814 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1996,6 +1996,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	nr_uarts=	[SERIAL] maximum number of UARTs to be registered.
 
+	numa_balancing=	[KNL,X86] Enable or disable automatic NUMA balancing.
+			Allowed values are enable and disable
+
 	numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
 			one of ['zone', 'node', 'default'] can be specified
 			This can be set from sysctl after boot.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0f4ff2bd03f6..b1e619f9ff1a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1563,10 +1563,14 @@ struct task_struct {
 
 #ifdef CONFIG_NUMA_BALANCING
 extern void task_numa_fault(int node, int pages, bool migrated);
+extern void set_numabalancing_state(bool enabled);
 #else
 static inline void task_numa_fault(int node, int pages, bool migrated)
 {
 }
+static inline void set_numabalancing_state(bool enabled)
+{
+}
 #endif
 
 /*
diff --git a/init/Kconfig b/init/Kconfig
index 9f00f004796a..18e2a5920a34 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -720,6 +720,14 @@ config ARCH_USES_NUMA_PROT_NONE
 	depends on ARCH_WANTS_PROT_NUMA_PROT_NONE
 	depends on NUMA_BALANCING
 
+config NUMA_BALANCING_DEFAULT_ENABLED
+	bool "Automatically enable NUMA aware memory/task placement"
+	default y
+	depends on NUMA_BALANCING
+	help
+	  If set, autonumic NUMA balancing will be enabled if running on a NUMA
+	  machine.
+
 config NUMA_BALANCING
 	bool "Memory placement aware NUMA scheduler"
 	default y
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9d255bc0e278..7a45015274ab 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -192,23 +192,10 @@ static void sched_feat_disable(int i) { };
 static void sched_feat_enable(int i) { };
 #endif /* HAVE_JUMP_LABEL */
 
-static ssize_t
-sched_feat_write(struct file *filp, const char __user *ubuf,
-		size_t cnt, loff_t *ppos)
+static int sched_feat_set(char *cmp)
 {
-	char buf[64];
-	char *cmp;
-	int neg = 0;
 	int i;
-
-	if (cnt > 63)
-		cnt = 63;
-
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-
-	buf[cnt] = 0;
-	cmp = strstrip(buf);
+	int neg = 0;
 
 	if (strncmp(cmp, "NO_", 3) == 0) {
 		neg = 1;
@@ -228,6 +215,27 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
 		}
 	}
 
+	return i;
+}
+
+static ssize_t
+sched_feat_write(struct file *filp, const char __user *ubuf,
+		size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	char *cmp;
+	int i;
+
+	if (cnt > 63)
+		cnt = 63;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+	cmp = strstrip(buf);
+
+	i = sched_feat_set(cmp);
 	if (i == __SCHED_FEAT_NR)
 		return -EINVAL;
 
@@ -1549,6 +1557,16 @@ static void __sched_fork(struct task_struct *p)
 #endif /* CONFIG_NUMA_BALANCING */
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+void set_numabalancing_state(bool enabled)
+{
+	if (enabled)
+		sched_feat_set("NUMA");
+	else
+		sched_feat_set("NO_NUMA");
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 /*
  * fork()/clone()-time setup:
  */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4b577863933f..7a02a2082e95 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -811,6 +811,9 @@ void task_numa_fault(int node, int pages, bool migrated)
 {
 	struct task_struct *p = current;
 
+	if (!sched_feat_numa(NUMA))
+		return;
+
 	/* FIXME: Allocate task-specific structure for placement policy here */
 
 	/*
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 5fb7aefbec80..d2373a3e3252 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -63,8 +63,10 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true)
 SCHED_FEAT(LB_MIN, false)
 
 /*
- * Apply the automatic NUMA scheduling policy
+ * Apply the automatic NUMA scheduling policy. Enabled automatically
+ * at runtime if running on a NUMA machine. Can be controlled via
+ * numa_balancing=
  */
 #ifdef CONFIG_NUMA_BALANCING
-SCHED_FEAT(NUMA,	true)
+SCHED_FEAT(NUMA,	false)
 #endif
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index fd20e28fd2ad..046308e9b999 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2521,6 +2521,50 @@ void mpol_free_shared_policy(struct shared_policy *p)
 	mutex_unlock(&p->mutex);
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+static bool __initdata numabalancing_override;
+
+static void __init check_numabalancing_enable(void)
+{
+	bool numabalancing_default = false;
+
+	if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED))
+		numabalancing_default = true;
+
+	if (nr_node_ids > 1 && !numabalancing_override) {
+		printk(KERN_INFO "Enabling automatic NUMA balancing. "
+			"Configure with numa_balancing= or sysctl");
+		set_numabalancing_state(numabalancing_default);
+	}
+}
+
+static int __init setup_numabalancing(char *str)
+{
+	int ret = 0;
+	if (!str)
+		goto out;
+	numabalancing_override = true;
+
+	if (!strcmp(str, "enable")) {
+		set_numabalancing_state(true);
+		ret = 1;
+	} else if (!strcmp(str, "disable")) {
+		set_numabalancing_state(false);
+		ret = 1;
+	}
+out:
+	if (!ret)
+		printk(KERN_WARNING "Unable to parse numa_balancing=\n");
+
+	return ret;
+}
+__setup("numa_balancing=", setup_numabalancing);
+#else
+static inline void __init check_numabalancing_enable(void)
+{
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 /* assumes fs == KERNEL_DS */
 void __init numa_policy_init(void)
 {
@@ -2571,6 +2615,8 @@ void __init numa_policy_init(void)
 
 	if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
 		printk("numa_policy_init: interleaving failed\n");
+
+	check_numabalancing_enable();
 }
 
 /* Reset policy of current process to default */
-- 
cgit v1.2.3-55-g7522


From 5bca23035391928c4c7301835accca3551b96cc2 Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Thu, 22 Nov 2012 14:40:03 +0000
Subject: mm: sched: numa: Delay PTE scanning until a task is scheduled on a
 new node

Due to the fact that migrations are driven by the CPU a task is running
on there is no point tracking NUMA faults until one task runs on a new
node. This patch tracks the first node used by an address space. Until
it changes, PTE scanning is disabled and no NUMA hinting faults are
trapped. This should help workloads that are short-lived, do not care
about NUMA placement or have bound themselves to a single node.

This takes advantage of the logic in "mm: sched: numa: Implement slow
start for working set sampling" to delay when the checks are made. This
will take advantage of processes that set their CPU and node bindings
early in their lifetime. It will also potentially allow any initial load
balancing to take place.

Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/mm_types.h | 10 ++++++++++
 kernel/fork.c            |  3 +++
 kernel/sched/fair.c      | 18 ++++++++++++++++++
 kernel/sched/features.h  |  4 +++-
 4 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e850a23dd6ec..197422a1598c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -418,10 +418,20 @@ struct mm_struct {
 
 	/* numa_scan_seq prevents two threads setting pte_numa */
 	int numa_scan_seq;
+
+	/*
+	 * The first node a task was scheduled on. If a task runs on
+	 * a different node than Make PTE Scan Go Now.
+	 */
+	int first_nid;
 #endif
 	struct uprobes_state uprobes_state;
 };
 
+/* first nid will either be a valid NID or one of these values */
+#define NUMA_PTE_SCAN_INIT	-1
+#define NUMA_PTE_SCAN_ACTIVE	-2
+
 static inline void mm_init_cpumask(struct mm_struct *mm)
 {
 #ifdef CONFIG_CPUMASK_OFFSTACK
diff --git a/kernel/fork.c b/kernel/fork.c
index 8b20ab7d3aa2..296ea308096d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -820,6 +820,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	mm->pmd_huge_pte = NULL;
+#endif
+#ifdef CONFIG_NUMA_BALANCING
+	mm->first_nid = NUMA_PTE_SCAN_INIT;
 #endif
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7a02a2082e95..3e18f611a5aa 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -860,6 +860,24 @@ void task_numa_work(struct callback_head *work)
 	if (p->flags & PF_EXITING)
 		return;
 
+	/*
+	 * We do not care about task placement until a task runs on a node
+	 * other than the first one used by the address space. This is
+	 * largely because migrations are driven by what CPU the task
+	 * is running on. If it's never scheduled on another node, it'll
+	 * not migrate so why bother trapping the fault.
+	 */
+	if (mm->first_nid == NUMA_PTE_SCAN_INIT)
+		mm->first_nid = numa_node_id();
+	if (mm->first_nid != NUMA_PTE_SCAN_ACTIVE) {
+		/* Are we running on a new node yet? */
+		if (numa_node_id() == mm->first_nid &&
+		    !sched_feat_numa(NUMA_FORCE))
+			return;
+
+		mm->first_nid = NUMA_PTE_SCAN_ACTIVE;
+	}
+
 	/*
 	 * Reset the scan period if enough time has gone by. Objective is that
 	 * scanning will be reduced if pages are properly placed. As tasks
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index d2373a3e3252..e7c25fff1e94 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -65,8 +65,10 @@ SCHED_FEAT(LB_MIN, false)
 /*
  * Apply the automatic NUMA scheduling policy. Enabled automatically
  * at runtime if running on a NUMA machine. Can be controlled via
- * numa_balancing=
+ * numa_balancing=. Allow PTE scanning to be forced on UMA machines
+ * for debugging the core machinery.
  */
 #ifdef CONFIG_NUMA_BALANCING
 SCHED_FEAT(NUMA,	false)
+SCHED_FEAT(NUMA_FORCE,	false)
 #endif
-- 
cgit v1.2.3-55-g7522


From b32967ff101a7508f70be8de59b278d4df92fa00 Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Mon, 19 Nov 2012 12:35:47 +0000
Subject: mm: numa: Add THP migration for the NUMA working set scanning fault
 case.

Note: This is very heavily based on a patch from Peter Zijlstra with
	fixes from Ingo Molnar, Hugh Dickins and Johannes Weiner.  That patch
	put a lot of migration logic into mm/huge_memory.c where it does
	not belong. This version puts tries to share some of the migration
	logic with migrate_misplaced_page.  However, it should be noted
	that now migrate.c is doing more with the pagetable manipulation
	than is preferred. The end result is barely recognisable so as
	before, the signed-offs had to be removed but will be re-added if
	the original authors are ok with it.

Add THP migration for the NUMA working set scanning fault case.

It uses the page lock to serialize. No migration pte dance is
necessary because the pte is already unmapped when we decide
to migrate.

[dhillf@gmail.com: Fix memory leak on isolation failure]
[dhillf@gmail.com: Fix transfer of last_nid information]
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/migrate.h |  15 ++++
 mm/huge_memory.c        |  59 +++++++++----
 mm/internal.h           |   7 +-
 mm/memcontrol.c         |   7 +-
 mm/migrate.c            | 231 +++++++++++++++++++++++++++++++++++++++---------
 5 files changed, 255 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 91556889adac..51eac4bdc606 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -79,6 +79,12 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 extern int migrate_misplaced_page(struct page *page, int node);
 extern int migrate_misplaced_page(struct page *page, int node);
 extern bool migrate_ratelimited(int node);
+extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			pmd_t *pmd, pmd_t entry,
+			unsigned long address,
+			struct page *page, int node);
+
 #else
 static inline int migrate_misplaced_page(struct page *page, int node)
 {
@@ -88,6 +94,15 @@ static inline bool migrate_ratelimited(int node)
 {
 	return false;
 }
+
+static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			pmd_t *pmd, pmd_t entry,
+			unsigned long address,
+			struct page *page, int node)
+{
+	return -EAGAIN;
+}
 #endif /* CONFIG_NUMA_BALANCING */
 
 #endif /* _LINUX_MIGRATE_H */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 199b261a257e..711baf84b153 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -600,7 +600,7 @@ out:
 }
 __setup("transparent_hugepage=", setup_transparent_hugepage);
 
-static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
+pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 {
 	if (likely(vma->vm_flags & VM_WRITE))
 		pmd = pmd_mkwrite(pmd);
@@ -1023,10 +1023,12 @@ out:
 int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 				unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 {
-	struct page *page = NULL;
+	struct page *page;
 	unsigned long haddr = addr & HPAGE_PMD_MASK;
 	int target_nid;
 	int current_nid = -1;
+	bool migrated;
+	bool page_locked = false;
 
 	spin_lock(&mm->page_table_lock);
 	if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1034,42 +1036,61 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	page = pmd_page(pmd);
 	get_page(page);
-	spin_unlock(&mm->page_table_lock);
 	current_nid = page_to_nid(page);
 	count_vm_numa_event(NUMA_HINT_FAULTS);
 	if (current_nid == numa_node_id())
 		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
 
 	target_nid = mpol_misplaced(page, vma, haddr);
-	if (target_nid == -1)
+	if (target_nid == -1) {
+		put_page(page);
 		goto clear_pmdnuma;
+	}
 
-	/*
-	 * Due to lacking code to migrate thp pages, we'll split
-	 * (which preserves the special PROT_NONE) and re-take the
-	 * fault on the normal pages.
-	 */
-	split_huge_page(page);
-	put_page(page);
-
-	return 0;
+	/* Acquire the page lock to serialise THP migrations */
+	spin_unlock(&mm->page_table_lock);
+	lock_page(page);
+	page_locked = true;
 
-clear_pmdnuma:
+	/* Confirm the PTE did not while locked */
 	spin_lock(&mm->page_table_lock);
-	if (unlikely(!pmd_same(pmd, *pmdp)))
+	if (unlikely(!pmd_same(pmd, *pmdp))) {
+		unlock_page(page);
+		put_page(page);
 		goto out_unlock;
+	}
+	spin_unlock(&mm->page_table_lock);
+
+	/* Migrate the THP to the requested node */
+	migrated = migrate_misplaced_transhuge_page(mm, vma,
+				pmdp, pmd, addr,
+				page, target_nid);
+	if (migrated)
+		current_nid = target_nid;
+	else {
+		spin_lock(&mm->page_table_lock);
+		if (unlikely(!pmd_same(pmd, *pmdp))) {
+			unlock_page(page);
+			goto out_unlock;
+		}
+		goto clear_pmdnuma;
+	}
+
+	task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
+	return 0;
 
+clear_pmdnuma:
 	pmd = pmd_mknonnuma(pmd);
 	set_pmd_at(mm, haddr, pmdp, pmd);
 	VM_BUG_ON(pmd_numa(*pmdp));
 	update_mmu_cache_pmd(vma, addr, pmdp);
+	if (page_locked)
+		unlock_page(page);
 
 out_unlock:
 	spin_unlock(&mm->page_table_lock);
-	if (page) {
-		put_page(page);
-		task_numa_fault(numa_node_id(), HPAGE_PMD_NR, false);
-	}
+	if (current_nid != -1)
+		task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
 	return 0;
 }
 
diff --git a/mm/internal.h b/mm/internal.h
index a4fa284f6bc2..7e60ac826f2b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -212,15 +212,18 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
 {
 	if (TestClearPageMlocked(page)) {
 		unsigned long flags;
+		int nr_pages = hpage_nr_pages(page);
 
 		local_irq_save(flags);
-		__dec_zone_page_state(page, NR_MLOCK);
+		__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
 		SetPageMlocked(newpage);
-		__inc_zone_page_state(newpage, NR_MLOCK);
+		__mod_zone_page_state(page_zone(newpage), NR_MLOCK, nr_pages);
 		local_irq_restore(flags);
 	}
 }
 
+extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern unsigned long vma_address(struct page *page,
 				 struct vm_area_struct *vma);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dd39ba000b31..d97af9636ab2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3288,15 +3288,18 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 				  struct mem_cgroup **memcgp)
 {
 	struct mem_cgroup *memcg = NULL;
+	unsigned int nr_pages = 1;
 	struct page_cgroup *pc;
 	enum charge_type ctype;
 
 	*memcgp = NULL;
 
-	VM_BUG_ON(PageTransHuge(page));
 	if (mem_cgroup_disabled())
 		return;
 
+	if (PageTransHuge(page))
+		nr_pages <<= compound_order(page);
+
 	pc = lookup_page_cgroup(page);
 	lock_page_cgroup(pc);
 	if (PageCgroupUsed(pc)) {
@@ -3358,7 +3361,7 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 	 * charged to the res_counter since we plan on replacing the
 	 * old one and only one page is going to be left afterwards.
 	 */
-	__mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false);
+	__mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
 }
 
 /* remove redundant charge if migration failed*/
diff --git a/mm/migrate.c b/mm/migrate.c
index 2a5ce135eef0..c9400960fd52 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -410,7 +410,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
  */
 void migrate_page_copy(struct page *newpage, struct page *page)
 {
-	if (PageHuge(page))
+	if (PageHuge(page) || PageTransHuge(page))
 		copy_huge_page(newpage, page);
 	else
 		copy_highpage(newpage, page);
@@ -1491,25 +1491,10 @@ bool migrate_ratelimited(int node)
 	return true;
 }
 
-/*
- * Attempt to migrate a misplaced page to the specified destination
- * node. Caller is expected to have an elevated reference count on
- * the page that will be dropped by this function before returning.
- */
-int migrate_misplaced_page(struct page *page, int node)
+/* Returns true if the node is migrate rate-limited after the update */
+bool numamigrate_update_ratelimit(pg_data_t *pgdat)
 {
-	pg_data_t *pgdat = NODE_DATA(node);
-	int isolated = 0;
-	LIST_HEAD(migratepages);
-
-	/*
-	 * Don't migrate pages that are mapped in multiple processes.
-	 * TODO: Handle false sharing detection instead of this hammer
-	 */
-	if (page_mapcount(page) != 1) {
-		put_page(page);
-		goto out;
-	}
+	bool rate_limited = false;
 
 	/*
 	 * Rate-limit the amount of data that is being migrated to a node.
@@ -1522,13 +1507,18 @@ int migrate_misplaced_page(struct page *page, int node)
 		pgdat->numabalancing_migrate_next_window = jiffies +
 			msecs_to_jiffies(migrate_interval_millisecs);
 	}
-	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
-		spin_unlock(&pgdat->numabalancing_migrate_lock);
-		put_page(page);
-		goto out;
-	}
-	pgdat->numabalancing_migrate_nr_pages++;
+	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages)
+		rate_limited = true;
+	else
+		pgdat->numabalancing_migrate_nr_pages++;
 	spin_unlock(&pgdat->numabalancing_migrate_lock);
+	
+	return rate_limited;
+}
+
+int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
+{
+	int ret = 0;
 
 	/* Avoid migrating to a node that is nearly full */
 	if (migrate_balanced_pgdat(pgdat, 1)) {
@@ -1536,13 +1526,18 @@ int migrate_misplaced_page(struct page *page, int node)
 
 		if (isolate_lru_page(page)) {
 			put_page(page);
-			goto out;
+			return 0;
 		}
-		isolated = 1;
 
+		/* Page is isolated */
+		ret = 1;
 		page_lru = page_is_file_cache(page);
-		inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
-		list_add(&page->lru, &migratepages);
+		if (!PageTransHuge(page))
+			inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
+		else
+			mod_zone_page_state(page_zone(page),
+					NR_ISOLATED_ANON + page_lru,
+					HPAGE_PMD_NR);
 	}
 
 	/*
@@ -1555,23 +1550,177 @@ int migrate_misplaced_page(struct page *page, int node)
 	 */
 	put_page(page);
 
-	if (isolated) {
-		int nr_remaining;
-
-		nr_remaining = migrate_pages(&migratepages,
-				alloc_misplaced_dst_page,
-				node, false, MIGRATE_ASYNC,
-				MR_NUMA_MISPLACED);
-		if (nr_remaining) {
-			putback_lru_pages(&migratepages);
-			isolated = 0;
-		} else
-			count_vm_numa_event(NUMA_PAGE_MIGRATE);
+	return ret;
+}
+
+/*
+ * Attempt to migrate a misplaced page to the specified destination
+ * node. Caller is expected to have an elevated reference count on
+ * the page that will be dropped by this function before returning.
+ */
+int migrate_misplaced_page(struct page *page, int node)
+{
+	pg_data_t *pgdat = NODE_DATA(node);
+	int isolated = 0;
+	int nr_remaining;
+	LIST_HEAD(migratepages);
+
+	/*
+	 * Don't migrate pages that are mapped in multiple processes.
+	 * TODO: Handle false sharing detection instead of this hammer
+	 */
+	if (page_mapcount(page) != 1) {
+		put_page(page);
+		goto out;
 	}
+
+	/*
+	 * Rate-limit the amount of data that is being migrated to a node.
+	 * Optimal placement is no good if the memory bus is saturated and
+	 * all the time is being spent migrating!
+	 */
+	if (numamigrate_update_ratelimit(pgdat)) {
+		put_page(page);
+		goto out;
+	}
+
+	isolated = numamigrate_isolate_page(pgdat, page);
+	if (!isolated)
+		goto out;
+
+	list_add(&page->lru, &migratepages);
+	nr_remaining = migrate_pages(&migratepages,
+			alloc_misplaced_dst_page,
+			node, false, MIGRATE_ASYNC,
+			MR_NUMA_MISPLACED);
+	if (nr_remaining) {
+		putback_lru_pages(&migratepages);
+		isolated = 0;
+	} else
+		count_vm_numa_event(NUMA_PAGE_MIGRATE);
 	BUG_ON(!list_empty(&migratepages));
 out:
 	return isolated;
 }
+
+int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+				struct vm_area_struct *vma,
+				pmd_t *pmd, pmd_t entry,
+				unsigned long address,
+				struct page *page, int node)
+{
+	unsigned long haddr = address & HPAGE_PMD_MASK;
+	pg_data_t *pgdat = NODE_DATA(node);
+	int isolated = 0;
+	struct page *new_page = NULL;
+	struct mem_cgroup *memcg = NULL;
+	int page_lru = page_is_file_cache(page);
+
+	/*
+	 * Don't migrate pages that are mapped in multiple processes.
+	 * TODO: Handle false sharing detection instead of this hammer
+	 */
+	if (page_mapcount(page) != 1)
+		goto out_dropref;
+
+	/*
+	 * Rate-limit the amount of data that is being migrated to a node.
+	 * Optimal placement is no good if the memory bus is saturated and
+	 * all the time is being spent migrating!
+	 */
+	if (numamigrate_update_ratelimit(pgdat))
+		goto out_dropref;
+
+	new_page = alloc_pages_node(node,
+		(GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
+	if (!new_page)
+		goto out_dropref;
+	page_xchg_last_nid(new_page, page_last_nid(page));
+
+	isolated = numamigrate_isolate_page(pgdat, page);
+	if (!isolated) {
+		put_page(new_page);
+		goto out_keep_locked;
+	}
+
+	/* Prepare a page as a migration target */
+	__set_page_locked(new_page);
+	SetPageSwapBacked(new_page);
+
+	/* anon mapping, we can simply copy page->mapping to the new page: */
+	new_page->mapping = page->mapping;
+	new_page->index = page->index;
+	migrate_page_copy(new_page, page);
+	WARN_ON(PageLRU(new_page));
+
+	/* Recheck the target PMD */
+	spin_lock(&mm->page_table_lock);
+	if (unlikely(!pmd_same(*pmd, entry))) {
+		spin_unlock(&mm->page_table_lock);
+
+		/* Reverse changes made by migrate_page_copy() */
+		if (TestClearPageActive(new_page))
+			SetPageActive(page);
+		if (TestClearPageUnevictable(new_page))
+			SetPageUnevictable(page);
+		mlock_migrate_page(page, new_page);
+
+		unlock_page(new_page);
+		put_page(new_page);		/* Free it */
+
+		unlock_page(page);
+		putback_lru_page(page);
+
+		count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
+		goto out;
+	}
+
+	/*
+	 * Traditional migration needs to prepare the memcg charge
+	 * transaction early to prevent the old page from being
+	 * uncharged when installing migration entries.  Here we can
+	 * save the potential rollback and start the charge transfer
+	 * only when migration is already known to end successfully.
+	 */
+	mem_cgroup_prepare_migration(page, new_page, &memcg);
+
+	entry = mk_pmd(new_page, vma->vm_page_prot);
+	entry = pmd_mknonnuma(entry);
+	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+	entry = pmd_mkhuge(entry);
+
+	page_add_new_anon_rmap(new_page, vma, haddr);
+
+	set_pmd_at(mm, haddr, pmd, entry);
+	update_mmu_cache_pmd(vma, address, entry);
+	page_remove_rmap(page);
+	/*
+	 * Finish the charge transaction under the page table lock to
+	 * prevent split_huge_page() from dividing up the charge
+	 * before it's fully transferred to the new page.
+	 */
+	mem_cgroup_end_migration(memcg, page, new_page, true);
+	spin_unlock(&mm->page_table_lock);
+
+	unlock_page(new_page);
+	unlock_page(page);
+	put_page(page);			/* Drop the rmap reference */
+	put_page(page);			/* Drop the LRU isolation reference */
+
+	count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
+	count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
+
+out:
+	mod_zone_page_state(page_zone(page),
+			NR_ISOLATED_ANON + page_lru,
+			-HPAGE_PMD_NR);
+	return isolated;
+
+out_dropref:
+	put_page(page);
+out_keep_locked:
+	return 0;
+}
 #endif /* CONFIG_NUMA_BALANCING */
 
 #endif /* CONFIG_NUMA */
-- 
cgit v1.2.3-55-g7522


From 220018d388b8ab1fca1c5f0c6474bab47ad2c9c0 Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Wed, 5 Dec 2012 09:32:56 +0000
Subject: mm: numa: Add THP migration for the NUMA working set scanning fault
 case build fix

Commit "Add THP migration for the NUMA working set scanning fault case"
breaks the build because HPAGE_PMD_SHIFT and HPAGE_PMD_MASK defined to
explode without CONFIG_TRANSPARENT_HUGEPAGE:

mm/migrate.c: In function 'migrate_misplaced_transhuge_page_put':
mm/migrate.c:1549: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed
mm/migrate.c:1564: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed
mm/migrate.c:1566: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed
mm/migrate.c:1573: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed
mm/migrate.c:1606: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed
mm/migrate.c:1648: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed

CONFIG_NUMA_BALANCING allows compilation without enabling transparent
hugepages, so define the dummy function for such a configuration and only
define migrate_misplaced_transhuge_page_put() when transparent hugepages
are enabled.

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/migrate.h | 16 +++++++++-------
 mm/migrate.c            |  2 ++
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 51eac4bdc606..d52afb9a790c 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -79,12 +79,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 extern int migrate_misplaced_page(struct page *page, int node);
 extern int migrate_misplaced_page(struct page *page, int node);
 extern bool migrate_ratelimited(int node);
-extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
-			struct vm_area_struct *vma,
-			pmd_t *pmd, pmd_t entry,
-			unsigned long address,
-			struct page *page, int node);
-
 #else
 static inline int migrate_misplaced_page(struct page *page, int node)
 {
@@ -94,7 +88,15 @@ static inline bool migrate_ratelimited(int node)
 {
 	return false;
 }
+#endif /* CONFIG_NUMA_BALANCING */
 
+#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			pmd_t *pmd, pmd_t entry,
+			unsigned long address,
+			struct page *page, int node);
+#else
 static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 			struct vm_area_struct *vma,
 			pmd_t *pmd, pmd_t entry,
@@ -103,6 +105,6 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 {
 	return -EAGAIN;
 }
-#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/
 
 #endif /* _LINUX_MIGRATE_H */
diff --git a/mm/migrate.c b/mm/migrate.c
index c9400960fd52..9341a501d168 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1602,7 +1602,9 @@ int migrate_misplaced_page(struct page *page, int node)
 out:
 	return isolated;
 }
+#endif /* CONFIG_NUMA_BALANCING */
 
+#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
 int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 				struct vm_area_struct *vma,
 				pmd_t *pmd, pmd_t entry,
-- 
cgit v1.2.3-55-g7522


From 5a505085f043e8380f83610f79642853c051e2f1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Sun, 2 Dec 2012 19:56:46 +0000
Subject: mm/rmap: Convert the struct anon_vma::mutex to an rwsem

Convert the struct anon_vma::mutex to an rwsem, which will help
in solving a page-migration scalability problem. (Addressed in
a separate patch.)

The conversion is simple and straightforward: in every case
where we mutex_lock()ed we'll now down_write().

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Turner <pjt@google.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/rmap.h | 16 ++++++++--------
 mm/huge_memory.c     |  4 ++--
 mm/mmap.c            |  8 ++++----
 mm/rmap.c            | 22 +++++++++++-----------
 4 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bfe1f4780644..f3f41d242e25 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -7,7 +7,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/memcontrol.h>
 
 /*
@@ -25,8 +25,8 @@
  * pointing to this anon_vma once its vma list is empty.
  */
 struct anon_vma {
-	struct anon_vma *root;	/* Root of this anon_vma tree */
-	struct mutex mutex;	/* Serialize access to vma list */
+	struct anon_vma *root;		/* Root of this anon_vma tree */
+	struct rw_semaphore rwsem;	/* W: modification, R: walking the list */
 	/*
 	 * The refcount is taken on an anon_vma when there is no
 	 * guarantee that the vma of page tables will exist for
@@ -64,7 +64,7 @@ struct anon_vma_chain {
 	struct vm_area_struct *vma;
 	struct anon_vma *anon_vma;
 	struct list_head same_vma;   /* locked by mmap_sem & page_table_lock */
-	struct rb_node rb;			/* locked by anon_vma->mutex */
+	struct rb_node rb;			/* locked by anon_vma->rwsem */
 	unsigned long rb_subtree_last;
 #ifdef CONFIG_DEBUG_VM_RB
 	unsigned long cached_vma_start, cached_vma_last;
@@ -108,24 +108,24 @@ static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	if (anon_vma)
-		mutex_lock(&anon_vma->root->mutex);
+		down_write(&anon_vma->root->rwsem);
 }
 
 static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	if (anon_vma)
-		mutex_unlock(&anon_vma->root->mutex);
+		up_write(&anon_vma->root->rwsem);
 }
 
 static inline void anon_vma_lock(struct anon_vma *anon_vma)
 {
-	mutex_lock(&anon_vma->root->mutex);
+	down_write(&anon_vma->root->rwsem);
 }
 
 static inline void anon_vma_unlock(struct anon_vma *anon_vma)
 {
-	mutex_unlock(&anon_vma->root->mutex);
+	up_write(&anon_vma->root->rwsem);
 }
 
 /*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 711baf84b153..acd37fe55eb7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1292,7 +1292,7 @@ static int __split_huge_page_splitting(struct page *page,
 		 * We can't temporarily set the pmd to null in order
 		 * to split it, the pmd must remain marked huge at all
 		 * times or the VM won't take the pmd_trans_huge paths
-		 * and it won't wait on the anon_vma->root->mutex to
+		 * and it won't wait on the anon_vma->root->rwsem to
 		 * serialize against split_huge_page*.
 		 */
 		pmdp_splitting_flush(vma, address, pmd);
@@ -1495,7 +1495,7 @@ static int __split_huge_page_map(struct page *page,
 	return ret;
 }
 
-/* must be called with anon_vma->root->mutex hold */
+/* must be called with anon_vma->root->rwsem held */
 static void __split_huge_page(struct page *page,
 			      struct anon_vma *anon_vma)
 {
diff --git a/mm/mmap.c b/mm/mmap.c
index 9a796c41e7d9..88408632da66 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2561,15 +2561,15 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
 		 * The LSB of head.next can't change from under us
 		 * because we hold the mm_all_locks_mutex.
 		 */
-		mutex_lock_nest_lock(&anon_vma->root->mutex, &mm->mmap_sem);
+		down_write(&anon_vma->root->rwsem);
 		/*
 		 * We can safely modify head.next after taking the
-		 * anon_vma->root->mutex. If some other vma in this mm shares
+		 * anon_vma->root->rwsem. If some other vma in this mm shares
 		 * the same anon_vma we won't take it again.
 		 *
 		 * No need of atomic instructions here, head.next
 		 * can't change from under us thanks to the
-		 * anon_vma->root->mutex.
+		 * anon_vma->root->rwsem.
 		 */
 		if (__test_and_set_bit(0, (unsigned long *)
 				       &anon_vma->root->rb_root.rb_node))
@@ -2671,7 +2671,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
 		 *
 		 * No need of atomic instructions here, head.next
 		 * can't change from under us until we release the
-		 * anon_vma->root->mutex.
+		 * anon_vma->root->rwsem.
 		 */
 		if (!__test_and_clear_bit(0, (unsigned long *)
 					  &anon_vma->root->rb_root.rb_node))
diff --git a/mm/rmap.c b/mm/rmap.c
index 2ee1ef0f317b..6e3ee3b82798 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -24,7 +24,7 @@
  *   mm->mmap_sem
  *     page->flags PG_locked (lock_page)
  *       mapping->i_mmap_mutex
- *         anon_vma->mutex
+ *         anon_vma->rwsem
  *           mm->page_table_lock or pte_lock
  *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
  *             swap_lock (in swap_duplicate, swap_info_get)
@@ -37,7 +37,7 @@
  *                           in arch-dependent flush_dcache_mmap_lock,
  *                           within bdi.wb->list_lock in __sync_single_inode)
  *
- * anon_vma->mutex,mapping->i_mutex      (memory_failure, collect_procs_anon)
+ * anon_vma->rwsem,mapping->i_mutex      (memory_failure, collect_procs_anon)
  *   ->tasklist_lock
  *     pte map lock
  */
@@ -103,7 +103,7 @@ static inline void anon_vma_free(struct anon_vma *anon_vma)
 	 * LOCK should suffice since the actual taking of the lock must
 	 * happen _before_ what follows.
 	 */
-	if (mutex_is_locked(&anon_vma->root->mutex)) {
+	if (rwsem_is_locked(&anon_vma->root->rwsem)) {
 		anon_vma_lock(anon_vma);
 		anon_vma_unlock(anon_vma);
 	}
@@ -219,9 +219,9 @@ static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct
 	struct anon_vma *new_root = anon_vma->root;
 	if (new_root != root) {
 		if (WARN_ON_ONCE(root))
-			mutex_unlock(&root->mutex);
+			up_write(&root->rwsem);
 		root = new_root;
-		mutex_lock(&root->mutex);
+		down_write(&root->rwsem);
 	}
 	return root;
 }
@@ -229,7 +229,7 @@ static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct
 static inline void unlock_anon_vma_root(struct anon_vma *root)
 {
 	if (root)
-		mutex_unlock(&root->mutex);
+		up_write(&root->rwsem);
 }
 
 /*
@@ -349,7 +349,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
 	/*
 	 * Iterate the list once more, it now only contains empty and unlinked
 	 * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
-	 * needing to acquire the anon_vma->root->mutex.
+	 * needing to write-acquire the anon_vma->root->rwsem.
 	 */
 	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
 		struct anon_vma *anon_vma = avc->anon_vma;
@@ -365,7 +365,7 @@ static void anon_vma_ctor(void *data)
 {
 	struct anon_vma *anon_vma = data;
 
-	mutex_init(&anon_vma->mutex);
+	init_rwsem(&anon_vma->rwsem);
 	atomic_set(&anon_vma->refcount, 0);
 	anon_vma->rb_root = RB_ROOT;
 }
@@ -457,14 +457,14 @@ struct anon_vma *page_lock_anon_vma(struct page *page)
 
 	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
 	root_anon_vma = ACCESS_ONCE(anon_vma->root);
-	if (mutex_trylock(&root_anon_vma->mutex)) {
+	if (down_write_trylock(&root_anon_vma->rwsem)) {
 		/*
 		 * If the page is still mapped, then this anon_vma is still
 		 * its anon_vma, and holding the mutex ensures that it will
 		 * not go away, see anon_vma_free().
 		 */
 		if (!page_mapped(page)) {
-			mutex_unlock(&root_anon_vma->mutex);
+			up_write(&root_anon_vma->rwsem);
 			anon_vma = NULL;
 		}
 		goto out;
@@ -1299,7 +1299,7 @@ out_mlock:
 	/*
 	 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
 	 * unstable result and race. Plus, We can't wait here because
-	 * we now hold anon_vma->mutex or mapping->i_mmap_mutex.
+	 * we now hold anon_vma->rwsem or mapping->i_mmap_mutex.
 	 * if trylock failed, the page remain in evictable lru and later
 	 * vmscan could retry to move the page to unevictable lru if the
 	 * page is actually mlocked.
-- 
cgit v1.2.3-55-g7522


From 4fc3f1d66b1ef0d7b8dc11f4ff1cc510f78b37d6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Sun, 2 Dec 2012 19:56:50 +0000
Subject: mm/rmap, migration: Make rmap_walk_anon() and try_to_unmap_anon()
 more scalable

rmap_walk_anon() and try_to_unmap_anon() appears to be too
careful about locking the anon vma: while it needs protection
against anon vma list modifications, it does not need exclusive
access to the list itself.

Transforming this exclusive lock to a read-locked rwsem removes
a global lock from the hot path of page-migration intense
threaded workloads which can cause pathological performance like
this:

    96.43%        process 0  [kernel.kallsyms]  [k] perf_trace_sched_switch
                  |
                  --- perf_trace_sched_switch
                      __schedule
                      schedule
                      schedule_preempt_disabled
                      __mutex_lock_common.isra.6
                      __mutex_lock_slowpath
                      mutex_lock
                     |
                     |--50.61%-- rmap_walk
                     |          move_to_new_page
                     |          migrate_pages
                     |          migrate_misplaced_page
                     |          __do_numa_page.isra.69
                     |          handle_pte_fault
                     |          handle_mm_fault
                     |          __do_page_fault
                     |          do_page_fault
                     |          page_fault
                     |          __memset_sse2
                     |          |
                     |           --100.00%-- worker_thread
                     |                     |
                     |                      --100.00%-- start_thread
                     |
                      --49.39%-- page_lock_anon_vma
                                try_to_unmap_anon
                                try_to_unmap
                                migrate_pages
                                migrate_misplaced_page
                                __do_numa_page.isra.69
                                handle_pte_fault
                                handle_mm_fault
                                __do_page_fault
                                do_page_fault
                                page_fault
                                __memset_sse2
                                |
                                 --100.00%-- worker_thread
                                           start_thread

With this change applied the profile is now nicely flat
and there's no anon-vma related scheduling/blocking.

Rename anon_vma_[un]lock() => anon_vma_[un]lock_write(),
to make it clearer that it's an exclusive write-lock in
that case - suggested by Rik van Riel.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Turner <pjt@google.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/huge_mm.h |  2 +-
 include/linux/rmap.h    | 17 ++++++++++++++---
 mm/huge_memory.c        |  6 +++---
 mm/ksm.c                |  6 +++---
 mm/memory-failure.c     |  4 ++--
 mm/migrate.c            |  2 +-
 mm/mmap.c               |  2 +-
 mm/mremap.c             |  2 +-
 mm/rmap.c               | 48 ++++++++++++++++++++++++------------------------
 9 files changed, 50 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 027ad04ef3a8..0d1208c0bdc4 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -102,7 +102,7 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
 #define wait_split_huge_page(__anon_vma, __pmd)				\
 	do {								\
 		pmd_t *____pmd = (__pmd);				\
-		anon_vma_lock(__anon_vma);				\
+		anon_vma_lock_write(__anon_vma);			\
 		anon_vma_unlock(__anon_vma);				\
 		BUG_ON(pmd_trans_splitting(*____pmd) ||			\
 		       pmd_trans_huge(*____pmd));			\
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index f3f41d242e25..c20635c527a9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -118,7 +118,7 @@ static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
 		up_write(&anon_vma->root->rwsem);
 }
 
-static inline void anon_vma_lock(struct anon_vma *anon_vma)
+static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
 {
 	down_write(&anon_vma->root->rwsem);
 }
@@ -128,6 +128,17 @@ static inline void anon_vma_unlock(struct anon_vma *anon_vma)
 	up_write(&anon_vma->root->rwsem);
 }
 
+static inline void anon_vma_lock_read(struct anon_vma *anon_vma)
+{
+	down_read(&anon_vma->root->rwsem);
+}
+
+static inline void anon_vma_unlock_read(struct anon_vma *anon_vma)
+{
+	up_read(&anon_vma->root->rwsem);
+}
+
+
 /*
  * anon_vma helper functions.
  */
@@ -220,8 +231,8 @@ int try_to_munlock(struct page *);
 /*
  * Called by memory-failure.c to kill processes.
  */
-struct anon_vma *page_lock_anon_vma(struct page *page);
-void page_unlock_anon_vma(struct anon_vma *anon_vma);
+struct anon_vma *page_lock_anon_vma_read(struct page *page);
+void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
 
 /*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index acd37fe55eb7..a24c9cb9c83e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1549,7 +1549,7 @@ int split_huge_page(struct page *page)
 	int ret = 1;
 
 	BUG_ON(!PageAnon(page));
-	anon_vma = page_lock_anon_vma(page);
+	anon_vma = page_lock_anon_vma_read(page);
 	if (!anon_vma)
 		goto out;
 	ret = 0;
@@ -1562,7 +1562,7 @@ int split_huge_page(struct page *page)
 
 	BUG_ON(PageCompound(page));
 out_unlock:
-	page_unlock_anon_vma(anon_vma);
+	page_unlock_anon_vma_read(anon_vma);
 out:
 	return ret;
 }
@@ -2074,7 +2074,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 	if (!pmd_present(*pmd) || pmd_trans_huge(*pmd))
 		goto out;
 
-	anon_vma_lock(vma->anon_vma);
+	anon_vma_lock_write(vma->anon_vma);
 
 	pte = pte_offset_map(pmd, address);
 	ptl = pte_lockptr(mm, pmd);
diff --git a/mm/ksm.c b/mm/ksm.c
index ae539f0b8aa1..7fa37de1ee0c 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1634,7 +1634,7 @@ again:
 		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_write(anon_vma);
 		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
 					       0, ULONG_MAX) {
 			vma = vmac->vma;
@@ -1688,7 +1688,7 @@ again:
 		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_write(anon_vma);
 		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
 					       0, ULONG_MAX) {
 			vma = vmac->vma;
@@ -1741,7 +1741,7 @@ again:
 		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_write(anon_vma);
 		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
 					       0, ULONG_MAX) {
 			vma = vmac->vma;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index ddb68a169e45..f2cd830f66c0 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -402,7 +402,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
 	struct anon_vma *av;
 	pgoff_t pgoff;
 
-	av = page_lock_anon_vma(page);
+	av = page_lock_anon_vma_read(page);
 	if (av == NULL)	/* Not actually mapped anymore */
 		return;
 
@@ -423,7 +423,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
 		}
 	}
 	read_unlock(&tasklist_lock);
-	page_unlock_anon_vma(av);
+	page_unlock_anon_vma_read(av);
 }
 
 /*
diff --git a/mm/migrate.c b/mm/migrate.c
index f24e9cc49cc4..6e46485f014c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -754,7 +754,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 	 */
 	if (PageAnon(page)) {
 		/*
-		 * Only page_lock_anon_vma() understands the subtleties of
+		 * Only page_lock_anon_vma_read() understands the subtleties of
 		 * getting a hold on an anon_vma from outside one of its mms.
 		 */
 		anon_vma = page_get_anon_vma(page);
diff --git a/mm/mmap.c b/mm/mmap.c
index 88408632da66..68a16b40c209 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -602,7 +602,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 	if (anon_vma) {
 		VM_BUG_ON(adjust_next && next->anon_vma &&
 			  anon_vma != next->anon_vma);
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_write(anon_vma);
 		anon_vma_interval_tree_pre_update_vma(vma);
 		if (adjust_next)
 			anon_vma_interval_tree_pre_update_vma(next);
diff --git a/mm/mremap.c b/mm/mremap.c
index 1b61c2d3307a..3dabd170753a 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -104,7 +104,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		}
 		if (vma->anon_vma) {
 			anon_vma = vma->anon_vma;
-			anon_vma_lock(anon_vma);
+			anon_vma_lock_write(anon_vma);
 		}
 	}
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 6e3ee3b82798..b0f612df7b9d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -87,24 +87,24 @@ static inline void anon_vma_free(struct anon_vma *anon_vma)
 	VM_BUG_ON(atomic_read(&anon_vma->refcount));
 
 	/*
-	 * Synchronize against page_lock_anon_vma() such that
+	 * Synchronize against page_lock_anon_vma_read() such that
 	 * we can safely hold the lock without the anon_vma getting
 	 * freed.
 	 *
 	 * Relies on the full mb implied by the atomic_dec_and_test() from
 	 * put_anon_vma() against the acquire barrier implied by
-	 * mutex_trylock() from page_lock_anon_vma(). This orders:
+	 * down_read_trylock() from page_lock_anon_vma_read(). This orders:
 	 *
-	 * page_lock_anon_vma()		VS	put_anon_vma()
-	 *   mutex_trylock()			  atomic_dec_and_test()
+	 * page_lock_anon_vma_read()	VS	put_anon_vma()
+	 *   down_read_trylock()		  atomic_dec_and_test()
 	 *   LOCK				  MB
-	 *   atomic_read()			  mutex_is_locked()
+	 *   atomic_read()			  rwsem_is_locked()
 	 *
 	 * LOCK should suffice since the actual taking of the lock must
 	 * happen _before_ what follows.
 	 */
 	if (rwsem_is_locked(&anon_vma->root->rwsem)) {
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_write(anon_vma);
 		anon_vma_unlock(anon_vma);
 	}
 
@@ -146,7 +146,7 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
  * allocate a new one.
  *
  * Anon-vma allocations are very subtle, because we may have
- * optimistically looked up an anon_vma in page_lock_anon_vma()
+ * optimistically looked up an anon_vma in page_lock_anon_vma_read()
  * and that may actually touch the spinlock even in the newly
  * allocated vma (it depends on RCU to make sure that the
  * anon_vma isn't actually destroyed).
@@ -181,7 +181,7 @@ int anon_vma_prepare(struct vm_area_struct *vma)
 			allocated = anon_vma;
 		}
 
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_write(anon_vma);
 		/* page_table_lock to protect against threads */
 		spin_lock(&mm->page_table_lock);
 		if (likely(!vma->anon_vma)) {
@@ -306,7 +306,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
 	get_anon_vma(anon_vma->root);
 	/* Mark this anon_vma as the one where our new (COWed) pages go. */
 	vma->anon_vma = anon_vma;
-	anon_vma_lock(anon_vma);
+	anon_vma_lock_write(anon_vma);
 	anon_vma_chain_link(vma, avc, anon_vma);
 	anon_vma_unlock(anon_vma);
 
@@ -442,7 +442,7 @@ out:
  * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
  * reference like with page_get_anon_vma() and then block on the mutex.
  */
-struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma_read(struct page *page)
 {
 	struct anon_vma *anon_vma = NULL;
 	struct anon_vma *root_anon_vma;
@@ -457,14 +457,14 @@ struct anon_vma *page_lock_anon_vma(struct page *page)
 
 	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
 	root_anon_vma = ACCESS_ONCE(anon_vma->root);
-	if (down_write_trylock(&root_anon_vma->rwsem)) {
+	if (down_read_trylock(&root_anon_vma->rwsem)) {
 		/*
 		 * If the page is still mapped, then this anon_vma is still
 		 * its anon_vma, and holding the mutex ensures that it will
 		 * not go away, see anon_vma_free().
 		 */
 		if (!page_mapped(page)) {
-			up_write(&root_anon_vma->rwsem);
+			up_read(&root_anon_vma->rwsem);
 			anon_vma = NULL;
 		}
 		goto out;
@@ -484,15 +484,15 @@ struct anon_vma *page_lock_anon_vma(struct page *page)
 
 	/* we pinned the anon_vma, its safe to sleep */
 	rcu_read_unlock();
-	anon_vma_lock(anon_vma);
+	anon_vma_lock_read(anon_vma);
 
 	if (atomic_dec_and_test(&anon_vma->refcount)) {
 		/*
 		 * Oops, we held the last refcount, release the lock
 		 * and bail -- can't simply use put_anon_vma() because
-		 * we'll deadlock on the anon_vma_lock() recursion.
+		 * we'll deadlock on the anon_vma_lock_write() recursion.
 		 */
-		anon_vma_unlock(anon_vma);
+		anon_vma_unlock_read(anon_vma);
 		__put_anon_vma(anon_vma);
 		anon_vma = NULL;
 	}
@@ -504,9 +504,9 @@ out:
 	return anon_vma;
 }
 
-void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
 {
-	anon_vma_unlock(anon_vma);
+	anon_vma_unlock_read(anon_vma);
 }
 
 /*
@@ -732,7 +732,7 @@ static int page_referenced_anon(struct page *page,
 	struct anon_vma_chain *avc;
 	int referenced = 0;
 
-	anon_vma = page_lock_anon_vma(page);
+	anon_vma = page_lock_anon_vma_read(page);
 	if (!anon_vma)
 		return referenced;
 
@@ -754,7 +754,7 @@ static int page_referenced_anon(struct page *page,
 			break;
 	}
 
-	page_unlock_anon_vma(anon_vma);
+	page_unlock_anon_vma_read(anon_vma);
 	return referenced;
 }
 
@@ -1474,7 +1474,7 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
 	struct anon_vma_chain *avc;
 	int ret = SWAP_AGAIN;
 
-	anon_vma = page_lock_anon_vma(page);
+	anon_vma = page_lock_anon_vma_read(page);
 	if (!anon_vma)
 		return ret;
 
@@ -1501,7 +1501,7 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
 			break;
 	}
 
-	page_unlock_anon_vma(anon_vma);
+	page_unlock_anon_vma_read(anon_vma);
 	return ret;
 }
 
@@ -1696,7 +1696,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
 	int ret = SWAP_AGAIN;
 
 	/*
-	 * Note: remove_migration_ptes() cannot use page_lock_anon_vma()
+	 * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
 	 * because that depends on page_mapped(); but not all its usages
 	 * are holding mmap_sem. Users without mmap_sem are required to
 	 * take a reference count to prevent the anon_vma disappearing
@@ -1704,7 +1704,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
 	anon_vma = page_anon_vma(page);
 	if (!anon_vma)
 		return ret;
-	anon_vma_lock(anon_vma);
+	anon_vma_lock_read(anon_vma);
 	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
 		struct vm_area_struct *vma = avc->vma;
 		unsigned long address = vma_address(page, vma);
@@ -1712,7 +1712,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
 		if (ret != SWAP_AGAIN)
 			break;
 	}
-	anon_vma_unlock(anon_vma);
+	anon_vma_unlock_read(anon_vma);
 	return ret;
 }
 
-- 
cgit v1.2.3-55-g7522


From 1daa8a0b2000974e07eacbabecd61bccadc933b9 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski
Date: Mon, 10 Dec 2012 20:41:27 +0100
Subject: of/i2c: add dummy inline functions for when CONFIG_OF_I2C(_MODULE)
 isn't defined

If CONFIG_OF_I2C and CONFIG_OF_I2C_MODULE are undefined no declaration of
of_find_i2c_device_by_node and of_find_i2c_adapter_by_node will be
available. Add dummy inline functions to avoid compilation breakage.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of_i2c.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_i2c.h b/include/linux/of_i2c.h
index 1cb775f8e663..cfb545cd86b5 100644
--- a/include/linux/of_i2c.h
+++ b/include/linux/of_i2c.h
@@ -29,6 +29,18 @@ static inline void of_i2c_register_devices(struct i2c_adapter *adap)
 {
 	return;
 }
+
+static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
+{
+	return NULL;
+}
+
+/* must call put_device() when done with returned i2c_adapter device */
+static inline struct i2c_adapter *of_find_i2c_adapter_by_node(
+						struct device_node *node)
+{
+	return NULL;
+}
 #endif /* CONFIG_OF_I2C */
 
 #endif /* __LINUX_OF_I2C_H */
-- 
cgit v1.2.3-55-g7522


From 93c667ca2598bd84f1bd3f2fa176af69707699fe Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski
Date: Mon, 10 Dec 2012 20:41:30 +0100
Subject: of: *node argument to of_parse_phandle_with_args should be const

The "struct device_node *" argument of of_parse_phandle_with_args() can
be const. Making this change makes it explicit that the function will
not modify a node.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
[grant.likely: Resolved conflict with previous patch modifying of_parse_phandle()]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/base.c  | 2 +-
 include/linux/of.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 538e3cfad23e..be846408dbc1 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1025,7 +1025,7 @@ EXPORT_SYMBOL(of_parse_phandle);
  * To get a device_node of the `node2' node you may call this:
  * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args);
  */
-int of_parse_phandle_with_args(struct device_node *np, const char *list_name,
+int of_parse_phandle_with_args(const struct device_node *np, const char *list_name,
 				const char *cells_name, int index,
 				struct of_phandle_args *out_args)
 {
diff --git a/include/linux/of.h b/include/linux/of.h
index 60053bd7e79a..6cfea9aa401f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -273,7 +273,7 @@ extern int of_modalias_node(struct device_node *node, char *modalias, int len);
 extern struct device_node *of_parse_phandle(const struct device_node *np,
 					    const char *phandle_name,
 					    int index);
-extern int of_parse_phandle_with_args(struct device_node *np,
+extern int of_parse_phandle_with_args(const struct device_node *np,
 	const char *list_name, const char *cells_name, int index,
 	struct of_phandle_args *out_args);
 
-- 
cgit v1.2.3-55-g7522


From d8153d4d8b7b6141770e1416c4a338161205ed1b Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo
Date: Tue, 14 Jun 2011 17:29:45 +0200
Subject: inotify, fanotify: replace fsnotify_put_group() with
 fsnotify_destroy_group()

Currently in fsnotify_put_group() the ref count of a group is decremented and if
it becomes 0 fsnotify_destroy_group() is called. Since a groups ref count is only
at group creation set to 1 and never increased after that a call to fsnotify_put_group()
always results in a call to fsnotify_destroy_group().
With this patch fsnotify_destroy_group() is called directly.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 14 +++++++-------
 fs/notify/group.c                  |  2 +-
 fs/notify/inotify/inotify_user.c   |  8 +++-----
 include/linux/fsnotify_backend.h   |  3 ++-
 4 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index d43803669739..82ae6d783c14 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -415,7 +415,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 	wake_up(&group->fanotify_data.access_waitq);
 #endif
 	/* matches the fanotify_init->fsnotify_alloc_group */
-	fsnotify_put_group(group);
+	fsnotify_destroy_group(group);
 
 	return 0;
 }
@@ -728,13 +728,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		break;
 	default:
 		fd = -EINVAL;
-		goto out_put_group;
+		goto out_destroy_group;
 	}
 
 	if (flags & FAN_UNLIMITED_QUEUE) {
 		fd = -EPERM;
 		if (!capable(CAP_SYS_ADMIN))
-			goto out_put_group;
+			goto out_destroy_group;
 		group->max_events = UINT_MAX;
 	} else {
 		group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
@@ -743,7 +743,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	if (flags & FAN_UNLIMITED_MARKS) {
 		fd = -EPERM;
 		if (!capable(CAP_SYS_ADMIN))
-			goto out_put_group;
+			goto out_destroy_group;
 		group->fanotify_data.max_marks = UINT_MAX;
 	} else {
 		group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
@@ -751,12 +751,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 
 	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
 	if (fd < 0)
-		goto out_put_group;
+		goto out_destroy_group;
 
 	return fd;
 
-out_put_group:
-	fsnotify_put_group(group);
+out_destroy_group:
+	fsnotify_destroy_group(group);
 	return fd;
 }
 
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 63fc294a4692..cfda328c3d11 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -50,7 +50,7 @@ void fsnotify_final_destroy_group(struct fsnotify_group *group)
  * situtation, the fsnotify_final_destroy_group will get called when that final
  * mark is freed.
  */
-static void fsnotify_destroy_group(struct fsnotify_group *group)
+void fsnotify_destroy_group(struct fsnotify_group *group)
 {
 	/* clear all inode marks for this group */
 	fsnotify_clear_marks_by_group(group);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 8445fbc8985c..dbafbfc8ceca 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -293,10 +293,8 @@ static int inotify_release(struct inode *ignored, struct file *file)
 
 	pr_debug("%s: group=%p\n", __func__, group);
 
-	fsnotify_clear_marks_by_group(group);
-
 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
-	fsnotify_put_group(group);
+	fsnotify_destroy_group(group);
 
 	return 0;
 }
@@ -712,7 +710,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
 
 	if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
 	    inotify_max_user_instances) {
-		fsnotify_put_group(group);
+		fsnotify_destroy_group(group);
 		return ERR_PTR(-EMFILE);
 	}
 
@@ -741,7 +739,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 	ret = anon_inode_getfd("inotify", &inotify_fops, group,
 				  O_RDONLY | flags);
 	if (ret < 0)
-		fsnotify_put_group(group);
+		fsnotify_destroy_group(group);
 
 	return ret;
 }
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 63d966d5c2ea..d2ad345bdeec 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -364,7 +364,8 @@ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode
 extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops);
 /* drop reference on a group from fsnotify_alloc_group */
 extern void fsnotify_put_group(struct fsnotify_group *group);
-
+/* destroy group */
+extern void fsnotify_destroy_group(struct fsnotify_group *group);
 /* take a reference to an event */
 extern void fsnotify_get_event(struct fsnotify_event *event);
 extern void fsnotify_put_event(struct fsnotify_event *event);
-- 
cgit v1.2.3-55-g7522


From 986129520479d689962a42c31acdeaf854ac91f5 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo
Date: Tue, 14 Jun 2011 17:29:46 +0200
Subject: fsnotify: introduce fsnotify_get_group()

Introduce fsnotify_get_group() which increments the reference counter of a group.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/group.c                | 8 ++++++++
 include/linux/fsnotify_backend.h | 4 +++-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/notify/group.c b/fs/notify/group.c
index cfda328c3d11..1d57c35f1043 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -62,6 +62,14 @@ void fsnotify_destroy_group(struct fsnotify_group *group)
 		fsnotify_final_destroy_group(group);
 }
 
+/*
+ * Get reference to a group.
+ */
+void fsnotify_get_group(struct fsnotify_group *group)
+{
+	atomic_inc(&group->refcnt);
+}
+
 /*
  * Drop a reference to a group.  Free it if it's through.
  */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index d2ad345bdeec..e76cef75295d 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -360,8 +360,10 @@ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode
 
 /* called from fsnotify listeners, such as fanotify or dnotify */
 
-/* get a reference to an existing or create a new group */
+/* create a new group */
 extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops);
+/* get reference to a group */
+extern void fsnotify_get_group(struct fsnotify_group *group);
 /* drop reference on a group from fsnotify_alloc_group */
 extern void fsnotify_put_group(struct fsnotify_group *group);
 /* destroy group */
-- 
cgit v1.2.3-55-g7522


From 986ab09807ca9454c3f54aae4db7e1bb00daeed3 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo
Date: Tue, 14 Jun 2011 17:29:50 +0200
Subject: fsnotify: use a mutex instead of a spinlock to protect a groups mark
 list

Replaces the groups mark_lock spinlock with a mutex. Using a mutex instead
of a spinlock results in more flexibility (i.e it allows to sleep while the
lock is held).

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/group.c                |  2 +-
 fs/notify/inode_mark.c           |  4 ++--
 fs/notify/mark.c                 | 18 +++++++++---------
 fs/notify/vfsmount_mark.c        |  4 ++--
 include/linux/fsnotify_backend.h |  2 +-
 5 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/group.c b/fs/notify/group.c
index 354044c47e23..1f7305711fc9 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -95,7 +95,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 	init_waitqueue_head(&group->notification_waitq);
 	group->max_events = UINT_MAX;
 
-	spin_lock_init(&group->mark_lock);
+	mutex_init(&group->mark_mutex);
 	INIT_LIST_HEAD(&group->marks_list);
 
 	group->ops = ops;
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index b13c00ac48eb..4e9071e37d5d 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -63,8 +63,8 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
 {
 	struct inode *inode = mark->i.inode;
 
+	BUG_ON(!mutex_is_locked(&mark->group->mark_mutex));
 	assert_spin_locked(&mark->lock);
-	assert_spin_locked(&mark->group->mark_lock);
 
 	spin_lock(&inode->i_lock);
 
@@ -191,8 +191,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 
 	mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
 
+	BUG_ON(!mutex_is_locked(&group->mark_mutex));
 	assert_spin_locked(&mark->lock);
-	assert_spin_locked(&group->mark_lock);
 
 	spin_lock(&inode->i_lock);
 
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 32447dc06c07..ab25b810b146 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -136,13 +136,13 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 	group = mark->group;
 	spin_unlock(&mark->lock);
 
-	spin_lock(&group->mark_lock);
+	mutex_lock(&group->mark_mutex);
 	spin_lock(&mark->lock);
 
 	/* something else already called this function on this mark */
 	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
 		spin_unlock(&mark->lock);
-		spin_unlock(&group->mark_lock);
+		mutex_unlock(&group->mark_mutex);
 		goto put_group;
 	}
 
@@ -159,7 +159,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 	list_del_init(&mark->g_list);
 
 	spin_unlock(&mark->lock);
-	spin_unlock(&group->mark_lock);
+	mutex_unlock(&group->mark_mutex);
 
 	spin_lock(&destroy_lock);
 	list_add(&mark->destroy_list, &destroy_list);
@@ -232,11 +232,11 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 
 	/*
 	 * LOCKING ORDER!!!!
-	 * group->mark_lock
+	 * group->mark_mutex
 	 * mark->lock
 	 * inode->i_lock
 	 */
-	spin_lock(&group->mark_lock);
+	mutex_lock(&group->mark_mutex);
 
 	spin_lock(&mark->lock);
 	mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE;
@@ -263,7 +263,7 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 	fsnotify_set_mark_mask_locked(mark, mark->mask);
 	spin_unlock(&mark->lock);
 
-	spin_unlock(&group->mark_lock);
+	mutex_unlock(&group->mark_mutex);
 
 	if (inode)
 		__fsnotify_update_child_dentry_flags(inode);
@@ -277,7 +277,7 @@ err:
 	atomic_dec(&group->num_marks);
 
 	spin_unlock(&mark->lock);
-	spin_unlock(&group->mark_lock);
+	mutex_unlock(&group->mark_mutex);
 
 	spin_lock(&destroy_lock);
 	list_add(&mark->destroy_list, &destroy_list);
@@ -296,7 +296,7 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 	struct fsnotify_mark *lmark, *mark;
 	LIST_HEAD(free_list);
 
-	spin_lock(&group->mark_lock);
+	mutex_lock(&group->mark_mutex);
 	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
 		if (mark->flags & flags) {
 			list_add(&mark->free_g_list, &free_list);
@@ -304,7 +304,7 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 			fsnotify_get_mark(mark);
 		}
 	}
-	spin_unlock(&group->mark_lock);
+	mutex_unlock(&group->mark_mutex);
 
 	list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
 		fsnotify_destroy_mark(mark);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index b7b4b0e8554f..f26a348827f8 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -88,8 +88,8 @@ void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
 {
 	struct vfsmount *mnt = mark->m.mnt;
 
+	BUG_ON(!mutex_is_locked(&mark->group->mark_mutex));
 	assert_spin_locked(&mark->lock);
-	assert_spin_locked(&mark->group->mark_lock);
 
 	spin_lock(&mnt->mnt_root->d_lock);
 
@@ -151,8 +151,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 
 	mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
 
+	BUG_ON(!mutex_is_locked(&group->mark_mutex));
 	assert_spin_locked(&mark->lock);
-	assert_spin_locked(&group->mark_lock);
 
 	spin_lock(&mnt->mnt_root->d_lock);
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index e76cef75295d..c5848346840d 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -141,7 +141,7 @@ struct fsnotify_group {
 	unsigned int priority;
 
 	/* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
-	spinlock_t mark_lock;		/* protect marks_list */
+	struct mutex mark_mutex;	/* protect marks_list */
 	atomic_t num_marks;		/* 1 for each mark and 1 for not being
 					 * past the point of no return when freeing
 					 * a group */
-- 
cgit v1.2.3-55-g7522


From e2a29943e9a2ee2aa737a77f550f46ba72269db4 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo
Date: Tue, 14 Jun 2011 17:29:51 +0200
Subject: fsnotify: pass group to fsnotify_destroy_mark()

In fsnotify_destroy_mark() dont get the group from the passed mark anymore,
but pass the group itself as an additional parameter to the function.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c          |  4 ++--
 fs/notify/fanotify/fanotify_user.c   |  4 ++--
 fs/notify/inode_mark.c               | 10 +++++++++-
 fs/notify/inotify/inotify_fsnotify.c |  2 +-
 fs/notify/inotify/inotify_user.c     |  2 +-
 fs/notify/mark.c                     | 21 ++++-----------------
 fs/notify/vfsmount_mark.c            | 10 +++++++++-
 include/linux/fsnotify_backend.h     |  5 +++--
 kernel/audit_tree.c                  | 10 +++++-----
 kernel/audit_watch.c                 |  4 ++--
 10 files changed, 38 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 3344bdd5506e..08b886f119ce 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -201,7 +201,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 
 	/* nothing else could have found us thanks to the dnotify_mark_mutex */
 	if (dn_mark->dn == NULL)
-		fsnotify_destroy_mark(fsn_mark);
+		fsnotify_destroy_mark(fsn_mark, dnotify_group);
 
 	mutex_unlock(&dnotify_mark_mutex);
 
@@ -385,7 +385,7 @@ out:
 	spin_unlock(&fsn_mark->lock);
 
 	if (destroy)
-		fsnotify_destroy_mark(fsn_mark);
+		fsnotify_destroy_mark(fsn_mark, dnotify_group);
 
 	mutex_unlock(&dnotify_mark_mutex);
 	fsnotify_put_mark(fsn_mark);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 599a01952c74..1218d10424d0 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -546,7 +546,7 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
 	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
 						 &destroy_mark);
 	if (destroy_mark)
-		fsnotify_destroy_mark(fsn_mark);
+		fsnotify_destroy_mark(fsn_mark, group);
 
 	fsnotify_put_mark(fsn_mark);
 	if (removed & real_mount(mnt)->mnt_fsnotify_mask)
@@ -570,7 +570,7 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
 	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
 						 &destroy_mark);
 	if (destroy_mark)
-		fsnotify_destroy_mark(fsn_mark);
+		fsnotify_destroy_mark(fsn_mark, group);
 	/* matches the fsnotify_find_inode_mark() */
 	fsnotify_put_mark(fsn_mark);
 	if (removed & inode->i_fsnotify_mask)
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 4e9071e37d5d..21230209c957 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -99,8 +99,16 @@ void fsnotify_clear_marks_by_inode(struct inode *inode)
 	spin_unlock(&inode->i_lock);
 
 	list_for_each_entry_safe(mark, lmark, &free_list, i.free_i_list) {
-		fsnotify_destroy_mark(mark);
+		struct fsnotify_group *group;
+
+		spin_lock(&mark->lock);
+		fsnotify_get_group(mark->group);
+		group = mark->group;
+		spin_unlock(&mark->lock);
+
+		fsnotify_destroy_mark(mark, group);
 		fsnotify_put_mark(mark);
+		fsnotify_put_group(group);
 	}
 }
 
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 74977fbf5aae..871569c7d609 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -132,7 +132,7 @@ static int inotify_handle_event(struct fsnotify_group *group,
 	}
 
 	if (inode_mark->mask & IN_ONESHOT)
-		fsnotify_destroy_mark(inode_mark);
+		fsnotify_destroy_mark(inode_mark, group);
 
 	return ret;
 }
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 246250f1db7a..00ff82ff7c9f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -816,7 +816,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 
 	ret = 0;
 
-	fsnotify_destroy_mark(&i_mark->fsn_mark);
+	fsnotify_destroy_mark(&i_mark->fsn_mark, group);
 
 	/* match ref taken by inotify_idr_find */
 	fsnotify_put_mark(&i_mark->fsn_mark);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index ab25b810b146..b77c833c8d0a 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -121,21 +121,11 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
  * The caller had better be holding a reference to this mark so we don't actually
  * do the final put under the mark->lock
  */
-void fsnotify_destroy_mark(struct fsnotify_mark *mark)
+void fsnotify_destroy_mark(struct fsnotify_mark *mark,
+			   struct fsnotify_group *group)
 {
-	struct fsnotify_group *group;
 	struct inode *inode = NULL;
 
-	spin_lock(&mark->lock);
-	/* dont get the group from a mark that is not alive yet */
-	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
-		spin_unlock(&mark->lock);
-		return;
-	}
-	fsnotify_get_group(mark->group);
-	group = mark->group;
-	spin_unlock(&mark->lock);
-
 	mutex_lock(&group->mark_mutex);
 	spin_lock(&mark->lock);
 
@@ -143,7 +133,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
 		spin_unlock(&mark->lock);
 		mutex_unlock(&group->mark_mutex);
-		goto put_group;
+		return;
 	}
 
 	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
@@ -194,9 +184,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 	 */
 
 	atomic_dec(&group->num_marks);
-
-put_group:
-	fsnotify_put_group(group);
 }
 
 void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
@@ -307,7 +294,7 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 	mutex_unlock(&group->mark_mutex);
 
 	list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
-		fsnotify_destroy_mark(mark);
+		fsnotify_destroy_mark(mark, group);
 		fsnotify_put_mark(mark);
 	}
 }
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index f26a348827f8..4df58b8ea64a 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -46,8 +46,16 @@ void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
 	spin_unlock(&mnt->mnt_root->d_lock);
 
 	list_for_each_entry_safe(mark, lmark, &free_list, m.free_m_list) {
-		fsnotify_destroy_mark(mark);
+		struct fsnotify_group *group;
+
+		spin_lock(&mark->lock);
+		fsnotify_get_group(mark->group);
+		group = mark->group;
+		spin_unlock(&mark->lock);
+
+		fsnotify_destroy_mark(mark, group);
 		fsnotify_put_mark(mark);
+		fsnotify_put_group(group);
 	}
 }
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index c5848346840d..140b4b8a100b 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -408,8 +408,9 @@ extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask
 /* attach the mark to both the group and the inode */
 extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
 			     struct inode *inode, struct vfsmount *mnt, int allow_dups);
-/* given a mark, flag it to be freed when all references are dropped */
-extern void fsnotify_destroy_mark(struct fsnotify_mark *mark);
+/* given a group and a mark, flag mark to be freed when all references are dropped */
+extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
+				  struct fsnotify_group *group);
 /* run all the marks in a group, and clear all of the vfsmount marks */
 extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group);
 /* run all the marks in a group, and clear all of the inode marks */
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ed206fd88cca..e81175ef25f8 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -249,7 +249,7 @@ static void untag_chunk(struct node *p)
 		list_del_rcu(&chunk->hash);
 		spin_unlock(&hash_lock);
 		spin_unlock(&entry->lock);
-		fsnotify_destroy_mark(entry);
+		fsnotify_destroy_mark(entry, audit_tree_group);
 		goto out;
 	}
 
@@ -291,7 +291,7 @@ static void untag_chunk(struct node *p)
 		owner->root = new;
 	spin_unlock(&hash_lock);
 	spin_unlock(&entry->lock);
-	fsnotify_destroy_mark(entry);
+	fsnotify_destroy_mark(entry, audit_tree_group);
 	fsnotify_put_mark(&new->mark);	/* drop initial reference */
 	goto out;
 
@@ -331,7 +331,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
 		spin_unlock(&hash_lock);
 		chunk->dead = 1;
 		spin_unlock(&entry->lock);
-		fsnotify_destroy_mark(entry);
+		fsnotify_destroy_mark(entry, audit_tree_group);
 		fsnotify_put_mark(entry);
 		return 0;
 	}
@@ -412,7 +412,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 		spin_unlock(&chunk_entry->lock);
 		spin_unlock(&old_entry->lock);
 
-		fsnotify_destroy_mark(chunk_entry);
+		fsnotify_destroy_mark(chunk_entry, audit_tree_group);
 
 		fsnotify_put_mark(chunk_entry);
 		fsnotify_put_mark(old_entry);
@@ -443,7 +443,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	spin_unlock(&hash_lock);
 	spin_unlock(&chunk_entry->lock);
 	spin_unlock(&old_entry->lock);
-	fsnotify_destroy_mark(old_entry);
+	fsnotify_destroy_mark(old_entry, audit_tree_group);
 	fsnotify_put_mark(chunk_entry);	/* drop initial reference */
 	fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
 	return 0;
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 3823281401b5..a66affc1c12c 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -349,7 +349,7 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
 	}
 	mutex_unlock(&audit_filter_mutex);
 
-	fsnotify_destroy_mark(&parent->mark);
+	fsnotify_destroy_mark(&parent->mark, audit_watch_group);
 }
 
 /* Get path information necessary for adding watches. */
@@ -456,7 +456,7 @@ void audit_remove_watch_rule(struct audit_krule *krule)
 
 		if (list_empty(&parent->watches)) {
 			audit_get_parent(parent);
-			fsnotify_destroy_mark(&parent->mark);
+			fsnotify_destroy_mark(&parent->mark, audit_watch_group);
 			audit_put_parent(parent);
 		}
 	}
-- 
cgit v1.2.3-55-g7522


From d5a335b845792d2a69ed1e244c0b233117b7db3c Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo
Date: Tue, 14 Jun 2011 17:29:52 +0200
Subject: fsnotify: introduce locked versions of fsnotify_add_mark() and
 fsnotify_remove_mark()

This patch introduces fsnotify_add_mark_locked() and fsnotify_remove_mark_locked()
which are essentially the same as fsnotify_add_mark() and fsnotify_remove_mark() but
assume that the caller has already taken the groups mark mutex.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/mark.c                 | 42 ++++++++++++++++++++++++++++------------
 include/linux/fsnotify_backend.h |  4 ++++
 2 files changed, 34 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index b77c833c8d0a..f9dda0304a10 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -121,18 +121,18 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
  * The caller had better be holding a reference to this mark so we don't actually
  * do the final put under the mark->lock
  */
-void fsnotify_destroy_mark(struct fsnotify_mark *mark,
-			   struct fsnotify_group *group)
+void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
+				  struct fsnotify_group *group)
 {
 	struct inode *inode = NULL;
 
-	mutex_lock(&group->mark_mutex);
+	BUG_ON(!mutex_is_locked(&group->mark_mutex));
+
 	spin_lock(&mark->lock);
 
 	/* something else already called this function on this mark */
 	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
 		spin_unlock(&mark->lock);
-		mutex_unlock(&group->mark_mutex);
 		return;
 	}
 
@@ -149,6 +149,8 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 	list_del_init(&mark->g_list);
 
 	spin_unlock(&mark->lock);
+
+	/* release lock temporarily */
 	mutex_unlock(&group->mark_mutex);
 
 	spin_lock(&destroy_lock);
@@ -184,6 +186,16 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 	 */
 
 	atomic_dec(&group->num_marks);
+
+	mutex_lock(&group->mark_mutex);
+}
+
+void fsnotify_destroy_mark(struct fsnotify_mark *mark,
+			   struct fsnotify_group *group)
+{
+	mutex_lock(&group->mark_mutex);
+	fsnotify_destroy_mark_locked(mark, group);
+	mutex_unlock(&group->mark_mutex);
 }
 
 void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
@@ -208,14 +220,15 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas
  * These marks may be used for the fsnotify backend to determine which
  * event types should be delivered to which group.
  */
-int fsnotify_add_mark(struct fsnotify_mark *mark,
-		      struct fsnotify_group *group, struct inode *inode,
-		      struct vfsmount *mnt, int allow_dups)
+int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+			     struct fsnotify_group *group, struct inode *inode,
+			     struct vfsmount *mnt, int allow_dups)
 {
 	int ret = 0;
 
 	BUG_ON(inode && mnt);
 	BUG_ON(!inode && !mnt);
+	BUG_ON(!mutex_is_locked(&group->mark_mutex));
 
 	/*
 	 * LOCKING ORDER!!!!
@@ -223,8 +236,6 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 	 * mark->lock
 	 * inode->i_lock
 	 */
-	mutex_lock(&group->mark_mutex);
-
 	spin_lock(&mark->lock);
 	mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE;
 
@@ -250,8 +261,6 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 	fsnotify_set_mark_mask_locked(mark, mark->mask);
 	spin_unlock(&mark->lock);
 
-	mutex_unlock(&group->mark_mutex);
-
 	if (inode)
 		__fsnotify_update_child_dentry_flags(inode);
 
@@ -264,7 +273,6 @@ err:
 	atomic_dec(&group->num_marks);
 
 	spin_unlock(&mark->lock);
-	mutex_unlock(&group->mark_mutex);
 
 	spin_lock(&destroy_lock);
 	list_add(&mark->destroy_list, &destroy_list);
@@ -274,6 +282,16 @@ err:
 	return ret;
 }
 
+int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
+		      struct inode *inode, struct vfsmount *mnt, int allow_dups)
+{
+	int ret;
+	mutex_lock(&group->mark_mutex);
+	ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups);
+	mutex_unlock(&group->mark_mutex);
+	return ret;
+}
+
 /*
  * clear any marks in a group in which mark->flags & flags is true
  */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 140b4b8a100b..26c06afa264e 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -408,9 +408,13 @@ extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask
 /* attach the mark to both the group and the inode */
 extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
 			     struct inode *inode, struct vfsmount *mnt, int allow_dups);
+extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct fsnotify_group *group,
+				    struct inode *inode, struct vfsmount *mnt, int allow_dups);
 /* given a group and a mark, flag mark to be freed when all references are dropped */
 extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 				  struct fsnotify_group *group);
+extern void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
+					 struct fsnotify_group *group);
 /* run all the marks in a group, and clear all of the vfsmount marks */
 extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group);
 /* run all the marks in a group, and clear all of the inode marks */
-- 
cgit v1.2.3-55-g7522


From 64c20d2a20fce295c260ea6cb3b468edfa2fb07b Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo
Date: Tue, 14 Jun 2011 17:29:53 +0200
Subject: fsnotify: dont put marks on temporary list when clearing marks by
 group

In clear_marks_by_group_flags() the mark list of a group is iterated and the
marks are put on a temporary list.
Since we introduced fsnotify_destroy_mark_locked() we dont need the temp list
any more and are able to remove the marks while the mark list is iterated and
the mark list mutex is held.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/mark.c                 | 10 ++--------
 include/linux/fsnotify_backend.h |  1 -
 2 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index f9dda0304a10..0e93d90bb753 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -299,22 +299,16 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 					 unsigned int flags)
 {
 	struct fsnotify_mark *lmark, *mark;
-	LIST_HEAD(free_list);
 
 	mutex_lock(&group->mark_mutex);
 	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
 		if (mark->flags & flags) {
-			list_add(&mark->free_g_list, &free_list);
-			list_del_init(&mark->g_list);
 			fsnotify_get_mark(mark);
+			fsnotify_destroy_mark_locked(mark, group);
+			fsnotify_put_mark(mark);
 		}
 	}
 	mutex_unlock(&group->mark_mutex);
-
-	list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
-		fsnotify_destroy_mark(mark, group);
-		fsnotify_put_mark(mark);
-	}
 }
 
 /*
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 26c06afa264e..5a8899350456 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -287,7 +287,6 @@ struct fsnotify_mark {
 		struct fsnotify_inode_mark i;
 		struct fsnotify_vfsmount_mark m;
 	};
-	struct list_head free_g_list;	/* tmp list used when freeing this mark */
 	__u32 ignored_mask;		/* events types to ignore */
 #define FSNOTIFY_MARK_FLAG_INODE		0x01
 #define FSNOTIFY_MARK_FLAG_VFSMOUNT		0x02
-- 
cgit v1.2.3-55-g7522


From 6960b0d909cde5bdff49e4e5c1250edd10be7ebd Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo
Date: Fri, 12 Aug 2011 01:13:31 +0200
Subject: fsnotify: change locking order

On Mon, Aug 01, 2011 at 04:38:22PM -0400, Eric Paris wrote:
>
> I finally built and tested a v3.0 kernel with these patches (I know I'm
> SOOOOOO far behind).  Not what I hoped for:
>
> > [  150.937798] VFS: Busy inodes after unmount of tmpfs. Self-destruct in 5 seconds.  Have a nice day...
> > [  150.945290] BUG: unable to handle kernel NULL pointer dereference at 0000000000000070
> > [  150.946012] IP: [<ffffffff810ffd58>] shmem_free_inode+0x18/0x50
> > [  150.946012] PGD 2bf9e067 PUD 2bf9f067 PMD 0
> > [  150.946012] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
> > [  150.946012] CPU 0
> > [  150.946012] Modules linked in: nfs lockd fscache auth_rpcgss nfs_acl sunrpc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables ext4 jbd2 crc16 joydev ata_piix i2c_piix4 pcspkr uinput ipv6 autofs4 usbhid [last unloaded: scsi_wait_scan]
> > [  150.946012]
> > [  150.946012] Pid: 2764, comm: syscall_thrash Not tainted 3.0.0+ #1 Red Hat KVM
> > [  150.946012] RIP: 0010:[<ffffffff810ffd58>]  [<ffffffff810ffd58>] shmem_free_inode+0x18/0x50
> > [  150.946012] RSP: 0018:ffff88002c2e5df8  EFLAGS: 00010282
> > [  150.946012] RAX: 000000004e370d9f RBX: 0000000000000000 RCX: ffff88003a029438
> > [  150.946012] RDX: 0000000033630a5f RSI: 0000000000000000 RDI: ffff88003491c240
> > [  150.946012] RBP: ffff88002c2e5e08 R08: 0000000000000000 R09: 0000000000000000
> > [  150.946012] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003a029428
> > [  150.946012] R13: ffff88003a029428 R14: ffff88003a029428 R15: ffff88003499a610
> > [  150.946012] FS:  00007f5a05420700(0000) GS:ffff88003f600000(0000) knlGS:0000000000000000
> > [  150.946012] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> > [  150.946012] CR2: 0000000000000070 CR3: 000000002a662000 CR4: 00000000000006f0
> > [  150.946012] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > [  150.946012] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> > [  150.946012] Process syscall_thrash (pid: 2764, threadinfo ffff88002c2e4000, task ffff88002bfbc760)
> > [  150.946012] Stack:
> > [  150.946012]  ffff88003a029438 ffff88003a029428 ffff88002c2e5e38 ffffffff81102f76
> > [  150.946012]  ffff88003a029438 ffff88003a029598 ffffffff8160f9c0 ffff88002c221250
> > [  150.946012]  ffff88002c2e5e68 ffffffff8115e9be ffff88002c2e5e68 ffff88003a029438
> > [  150.946012] Call Trace:
> > [  150.946012]  [<ffffffff81102f76>] shmem_evict_inode+0x76/0x130
> > [  150.946012]  [<ffffffff8115e9be>] evict+0x7e/0x170
> > [  150.946012]  [<ffffffff8115ee40>] iput_final+0xd0/0x190
> > [  150.946012]  [<ffffffff8115ef33>] iput+0x33/0x40
> > [  150.946012]  [<ffffffff81180205>] fsnotify_destroy_mark_locked+0x145/0x160
> > [  150.946012]  [<ffffffff81180316>] fsnotify_destroy_mark+0x36/0x50
> > [  150.946012]  [<ffffffff81181937>] sys_inotify_rm_watch+0x77/0xd0
> > [  150.946012]  [<ffffffff815aca52>] system_call_fastpath+0x16/0x1b
> > [  150.946012] Code: 67 4a 00 b8 e4 ff ff ff eb aa 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 48 83 ec 10 48 89 1c 24 4c 89 64 24 08 48 8b 9f 40 05 00 00
> > [  150.946012]  83 7b 70 00 74 1c 4c 8d a3 80 00 00 00 4c 89 e7 e8 d2 5d 4a
> > [  150.946012] RIP  [<ffffffff810ffd58>] shmem_free_inode+0x18/0x50
> > [  150.946012]  RSP <ffff88002c2e5df8>
> > [  150.946012] CR2: 0000000000000070
>
> Looks at aweful lot like the problem from:
> http://www.spinics.net/lists/linux-fsdevel/msg46101.html
>

I tried to reproduce this bug with your test program, but without success.
However, if I understand correctly, this occurs since we dont hold any locks when
we call iput() in mark_destroy(), right?
With the patches you tested, iput() is also not called within any lock, since the
groups mark_mutex is released temporarily before iput() is called.  This is, since
the original codes behaviour is similar.
However since we now have a mutex as the biggest lock, we can do what you
suggested (http://www.spinics.net/lists/linux-fsdevel/msg46107.html) and
call iput() with the mutex held to avoid the race.
The patch below implements this. It uses nested locking to avoid deadlock in case
we do the final iput() on an inode which still holds marks and thus would take
the mutex again when calling fsnotify_inode_delete() in destroy_inode().

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/mark.c                 | 20 ++++++++++----------
 include/linux/fsnotify_backend.h |  7 ++++---
 2 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 0e93d90bb753..fc6b49bf7360 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -150,6 +150,8 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
 
 	spin_unlock(&mark->lock);
 
+	if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
+		iput(inode);
 	/* release lock temporarily */
 	mutex_unlock(&group->mark_mutex);
 
@@ -157,6 +159,11 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
 	list_add(&mark->destroy_list, &destroy_list);
 	spin_unlock(&destroy_lock);
 	wake_up(&destroy_waitq);
+	/*
+	 * We don't necessarily have a ref on mark from caller so the above destroy
+	 * may have actually freed it, unless this group provides a 'freeing_mark'
+	 * function which must be holding a reference.
+	 */
 
 	/*
 	 * Some groups like to know that marks are being freed.  This is a
@@ -178,22 +185,15 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
 	 * is just a lazy update (and could be a perf win...)
 	 */
 
-	if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
-		iput(inode);
-	/*
-	 * We don't necessarily have a ref on mark from caller so the above iput
-	 * may have already destroyed it.  Don't touch from now on.
-	 */
-
 	atomic_dec(&group->num_marks);
 
-	mutex_lock(&group->mark_mutex);
+	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 }
 
 void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 			   struct fsnotify_group *group)
 {
-	mutex_lock(&group->mark_mutex);
+	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 	fsnotify_destroy_mark_locked(mark, group);
 	mutex_unlock(&group->mark_mutex);
 }
@@ -300,7 +300,7 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 {
 	struct fsnotify_mark *lmark, *mark;
 
-	mutex_lock(&group->mark_mutex);
+	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
 		if (mark->flags & flags) {
 			fsnotify_get_mark(mark);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 5a8899350456..1af2f6a722c0 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -88,9 +88,10 @@ struct fsnotify_event_private_data;
  *		if the group is interested in this event.
  * handle_event - main call for a group to handle an fs event
  * free_group_priv - called when a group refcnt hits 0 to clean up the private union
- * freeing-mark - this means that a mark has been flagged to die when everything
- *		finishes using it.  The function is supplied with what must be a
- *		valid group and inode to use to clean up.
+ * freeing_mark - called when a mark is being destroyed for some reason.  The group
+ * 		MUST be holding a reference on each mark and that reference must be
+ * 		dropped in this function.  inotify uses this function to send
+ * 		userspace messages that marks have been removed.
  */
 struct fsnotify_ops {
 	bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode,
-- 
cgit v1.2.3-55-g7522


From 0a6b6bd5919a65030b557ec8fe81f6fb3e93744a Mon Sep 17 00:00:00 2001
From: Eric Paris
Date: Fri, 14 Oct 2011 17:43:39 -0400
Subject: fsnotify: make fasync generic for both inotify and fanotify

inotify is supposed to support async signal notification when information
is available on the inotify fd.  This patch moves that support to generic
fsnotify functions so it can be used by all notification mechanisms.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c |  4 ++++
 fs/notify/group.c                  |  7 +++++++
 fs/notify/inotify/inotify_user.c   | 13 ++++---------
 fs/notify/notification.c           |  1 +
 include/linux/fsnotify_backend.h   |  5 ++++-
 5 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 1218d10424d0..f0e7a57bc899 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -414,6 +414,10 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 
 	wake_up(&group->fanotify_data.access_waitq);
 #endif
+
+	if (file->f_flags & FASYNC)
+		fsnotify_fasync(-1, file, 0);
+
 	/* matches the fanotify_init->fsnotify_alloc_group */
 	fsnotify_destroy_group(group);
 
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 1f7305711fc9..bd2625bd88b4 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -102,3 +102,10 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 
 	return group;
 }
+
+int fsnotify_fasync(int fd, struct file *file, int on)
+{
+	struct fsnotify_group *group = file->private_data;
+
+	return fasync_helper(fd, file, on, &group->fsn_fa) >= 0 ? 0 : -EIO;
+}
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 00ff82ff7c9f..68f7bec1e664 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -280,19 +280,15 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 	return ret;
 }
 
-static int inotify_fasync(int fd, struct file *file, int on)
-{
-	struct fsnotify_group *group = file->private_data;
-
-	return fasync_helper(fd, file, on, &group->inotify_data.fa) >= 0 ? 0 : -EIO;
-}
-
 static int inotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
 
 	pr_debug("%s: group=%p\n", __func__, group);
 
+	if (file->f_flags & FASYNC)
+		fsnotify_fasync(-1, file, 0);
+
 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
 	fsnotify_destroy_group(group);
 
@@ -335,7 +331,7 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
 static const struct file_operations inotify_fops = {
 	.poll		= inotify_poll,
 	.read		= inotify_read,
-	.fasync		= inotify_fasync,
+	.fasync		= fsnotify_fasync,
 	.release	= inotify_release,
 	.unlocked_ioctl	= inotify_ioctl,
 	.compat_ioctl	= inotify_ioctl,
@@ -706,7 +702,6 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
 	spin_lock_init(&group->inotify_data.idr_lock);
 	idr_init(&group->inotify_data.idr);
 	group->inotify_data.last_wd = 0;
-	group->inotify_data.fa = NULL;
 	group->inotify_data.user = get_current_user();
 
 	if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index c887b1378f7e..b3963d8c9988 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -225,6 +225,7 @@ alloc_holder:
 	mutex_unlock(&group->notification_mutex);
 
 	wake_up(&group->notification_waitq);
+	kill_fasync(&group->fsn_fa, SIGIO, POLL_IN);
 	return return_event;
 }
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 1af2f6a722c0..d5b0910d4961 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -148,6 +148,8 @@ struct fsnotify_group {
 					 * a group */
 	struct list_head marks_list;	/* all inode marks for this group */
 
+	struct fasync_struct    *fsn_fa;    /* async notification */
+
 	/* groups can define private fields here or use the void *private */
 	union {
 		void *private;
@@ -156,7 +158,6 @@ struct fsnotify_group {
 			spinlock_t	idr_lock;
 			struct idr      idr;
 			u32             last_wd;
-			struct fasync_struct    *fa;    /* async notification */
 			struct user_struct      *user;
 		} inotify_data;
 #endif
@@ -368,6 +369,8 @@ extern void fsnotify_get_group(struct fsnotify_group *group);
 extern void fsnotify_put_group(struct fsnotify_group *group);
 /* destroy group */
 extern void fsnotify_destroy_group(struct fsnotify_group *group);
+/* fasync handler function */
+extern int fsnotify_fasync(int fd, struct file *file, int on);
 /* take a reference to an event */
 extern void fsnotify_get_event(struct fsnotify_event *event);
 extern void fsnotify_put_event(struct fsnotify_event *event);
-- 
cgit v1.2.3-55-g7522


From d0e1d66b5aa1ec9f556f951aa9a114cc192cd01c Mon Sep 17 00:00:00 2001
From: Namjae Jeon
Date: Tue, 11 Dec 2012 16:00:21 -0800
Subject: writeback: remove nr_pages_dirtied arg from
 balance_dirty_pages_ratelimited_nr()

There is no reason to pass the nr_pages_dirtied argument, because
nr_pages_dirtied value from the caller is unused in
balance_dirty_pages_ratelimited_nr().

Signed-off-by: Namjae Jeon <linkinjeon@gmail.com>
Signed-off-by: Vivek Trivedi <vtrivedi018@gmail.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/btrfs/disk-io.c        |  8 ++++----
 fs/btrfs/file.c           |  3 +--
 fs/btrfs/ioctl.c          |  2 +-
 fs/ocfs2/file.c           |  5 +----
 fs/splice.c               |  5 +----
 include/linux/writeback.h |  9 +--------
 mm/page-writeback.c       | 11 +++++------
 7 files changed, 14 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7cda51995c1e..22a0439e5a86 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3416,8 +3416,8 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
 	num_dirty = root->fs_info->dirty_metadata_bytes;
 
 	if (num_dirty > thresh) {
-		balance_dirty_pages_ratelimited_nr(
-				   root->fs_info->btree_inode->i_mapping, 1);
+		balance_dirty_pages_ratelimited(
+				   root->fs_info->btree_inode->i_mapping);
 	}
 	return;
 }
@@ -3437,8 +3437,8 @@ void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
 	num_dirty = root->fs_info->dirty_metadata_bytes;
 
 	if (num_dirty > thresh) {
-		balance_dirty_pages_ratelimited_nr(
-				   root->fs_info->btree_inode->i_mapping, 1);
+		balance_dirty_pages_ratelimited(
+				   root->fs_info->btree_inode->i_mapping);
 	}
 	return;
 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9ab1bed88116..a8ee75cb96ee 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1346,8 +1346,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 
 		cond_resched();
 
-		balance_dirty_pages_ratelimited_nr(inode->i_mapping,
-						   dirty_pages);
+		balance_dirty_pages_ratelimited(inode->i_mapping);
 		if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
 			btrfs_btree_balance_dirty(root, 1);
 
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8fcf9a59c28d..5b3429ab8ec1 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1225,7 +1225,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 		}
 
 		defrag_count += ret;
-		balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret);
+		balance_dirty_pages_ratelimited(inode->i_mapping);
 		mutex_unlock(&inode->i_mutex);
 
 		if (newer_than) {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 5a4ee77cec51..dda089804942 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2513,18 +2513,15 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
 		ret = sd.num_spliced;
 
 	if (ret > 0) {
-		unsigned long nr_pages;
 		int err;
 
-		nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
 		err = generic_write_sync(out, *ppos, ret);
 		if (err)
 			ret = err;
 		else
 			*ppos += ret;
 
-		balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
+		balance_dirty_pages_ratelimited(mapping);
 	}
 
 	return ret;
diff --git a/fs/splice.c b/fs/splice.c
index 13e5b4776e7a..8890604e3fcd 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1024,17 +1024,14 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 		ret = sd.num_spliced;
 
 	if (ret > 0) {
-		unsigned long nr_pages;
 		int err;
 
-		nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
 		err = generic_write_sync(out, *ppos, ret);
 		if (err)
 			ret = err;
 		else
 			*ppos += ret;
-		balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
+		balance_dirty_pages_ratelimited(mapping);
 	}
 	sb_end_write(inode->i_sb);
 
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 50c3e8fa06a8..b82a83aba311 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -161,14 +161,7 @@ void __bdi_update_bandwidth(struct backing_dev_info *bdi,
 			    unsigned long start_time);
 
 void page_writeback_init(void);
-void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
-					unsigned long nr_pages_dirtied);
-
-static inline void
-balance_dirty_pages_ratelimited(struct address_space *mapping)
-{
-	balance_dirty_pages_ratelimited_nr(mapping, 1);
-}
+void balance_dirty_pages_ratelimited(struct address_space *mapping);
 
 typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
 				void *data);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 830893b2b3c7..6f4271224493 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1069,7 +1069,7 @@ static void bdi_update_bandwidth(struct backing_dev_info *bdi,
 }
 
 /*
- * After a task dirtied this many pages, balance_dirty_pages_ratelimited_nr()
+ * After a task dirtied this many pages, balance_dirty_pages_ratelimited()
  * will look to see if it needs to start dirty throttling.
  *
  * If dirty_poll_interval is too low, big NUMA machines will call the expensive
@@ -1436,9 +1436,8 @@ static DEFINE_PER_CPU(int, bdp_ratelimits);
 DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
 
 /**
- * balance_dirty_pages_ratelimited_nr - balance dirty memory state
+ * balance_dirty_pages_ratelimited - balance dirty memory state
  * @mapping: address_space which was dirtied
- * @nr_pages_dirtied: number of pages which the caller has just dirtied
  *
  * Processes which are dirtying memory should call in here once for each page
  * which was newly dirtied.  The function will periodically check the system's
@@ -1449,8 +1448,7 @@ DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
  * limit we decrease the ratelimiting by a lot, to prevent individual processes
  * from overshooting the limit by (ratelimit_pages) each.
  */
-void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
-					unsigned long nr_pages_dirtied)
+void balance_dirty_pages_ratelimited(struct address_space *mapping)
 {
 	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	int ratelimit;
@@ -1484,6 +1482,7 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
 	 */
 	p = &__get_cpu_var(dirty_throttle_leaks);
 	if (*p > 0 && current->nr_dirtied < ratelimit) {
+		unsigned long nr_pages_dirtied;
 		nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
 		*p -= nr_pages_dirtied;
 		current->nr_dirtied += nr_pages_dirtied;
@@ -1493,7 +1492,7 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
 	if (unlikely(current->nr_dirtied >= ratelimit))
 		balance_dirty_pages(mapping, current->nr_dirtied);
 }
-EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
+EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
 
 void throttle_vm_writeout(gfp_t gfp_mask)
 {
-- 
cgit v1.2.3-55-g7522


From 19965460e31c73a934d2c19c152f876a75bdff3e Mon Sep 17 00:00:00 2001
From: David Rientjes
Date: Tue, 11 Dec 2012 16:00:26 -0800
Subject: mm, memcg: make mem_cgroup_out_of_memory() static

mem_cgroup_out_of_memory() is only referenced from within file scope, so
it can be marked static.

Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h | 2 --
 mm/memcontrol.c     | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index fb9826847b89..4a4188da4570 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -49,8 +49,6 @@ extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
 extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 		unsigned long totalpages, const nodemask_t *nodemask,
 		bool force_kill);
-extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
-				     int order);
 
 extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
 		int order, nodemask_t *mask, bool force_kill);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dd39ba000b31..cf6d0df4849c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1498,8 +1498,8 @@ static u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
 	return limit;
 }
 
-void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
-			      int order)
+static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
+				     int order)
 {
 	struct mem_cgroup *iter;
 	unsigned long chosen_points = 0;
-- 
cgit v1.2.3-55-g7522


From e5adfffc857788c8b7eca0e98cf1e26f1964b292 Mon Sep 17 00:00:00 2001
From: Kirill A. Shutemov
Date: Tue, 11 Dec 2012 16:00:29 -0800
Subject: mm: use IS_ENABLED(CONFIG_NUMA) instead of NUMA_BUILD

We don't need custom NUMA_BUILD anymore, since we have handy
IS_ENABLED().

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h    |  2 +-
 include/linux/kernel.h |  7 -------
 mm/page_alloc.c        | 18 ++++++++++--------
 mm/vmalloc.c           |  4 ++--
 4 files changed, 13 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index d0a79678f169..31e8041274f6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -266,7 +266,7 @@ static inline enum zone_type gfp_zone(gfp_t flags)
 
 static inline int gfp_zonelist(gfp_t flags)
 {
-	if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE))
+	if (IS_ENABLED(CONFIG_NUMA) && unlikely(flags & __GFP_THISNODE))
 		return 1;
 
 	return 0;
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 7d8dfc7392f1..815e5845d954 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -687,13 +687,6 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
 
-/* This helps us to avoid #ifdef CONFIG_NUMA */
-#ifdef CONFIG_NUMA
-#define NUMA_BUILD 1
-#else
-#define NUMA_BUILD 0
-#endif
-
 /* This helps us avoid #ifdef CONFIG_COMPACTION */
 #ifdef CONFIG_COMPACTION
 #define COMPACTION_BUILD 1
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dc018b486b74..a49b0ea3cc2f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1871,7 +1871,7 @@ zonelist_scan:
 	 */
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 						high_zoneidx, nodemask) {
-		if (NUMA_BUILD && zlc_active &&
+		if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
 			!zlc_zone_worth_trying(zonelist, z, allowednodes))
 				continue;
 		if ((alloc_flags & ALLOC_CPUSET) &&
@@ -1917,7 +1917,8 @@ zonelist_scan:
 				    classzone_idx, alloc_flags))
 				goto try_this_zone;
 
-			if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) {
+			if (IS_ENABLED(CONFIG_NUMA) &&
+					!did_zlc_setup && nr_online_nodes > 1) {
 				/*
 				 * we do zlc_setup if there are multiple nodes
 				 * and before considering the first zone allowed
@@ -1936,7 +1937,7 @@ zonelist_scan:
 			 * As we may have just activated ZLC, check if the first
 			 * eligible zone has failed zone_reclaim recently.
 			 */
-			if (NUMA_BUILD && zlc_active &&
+			if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
 				!zlc_zone_worth_trying(zonelist, z, allowednodes))
 				continue;
 
@@ -1962,11 +1963,11 @@ try_this_zone:
 		if (page)
 			break;
 this_zone_full:
-		if (NUMA_BUILD)
+		if (IS_ENABLED(CONFIG_NUMA))
 			zlc_mark_zone_full(zonelist, z);
 	}
 
-	if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) {
+	if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) {
 		/* Disable zlc cache for second zonelist scan */
 		zlc_active = 0;
 		goto zonelist_scan;
@@ -2266,7 +2267,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
 		return NULL;
 
 	/* After successful reclaim, reconsider all zones for allocation */
-	if (NUMA_BUILD)
+	if (IS_ENABLED(CONFIG_NUMA))
 		zlc_clear_zones_full(zonelist);
 
 retry:
@@ -2412,7 +2413,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	 * allowed per node queues are empty and that nodes are
 	 * over allocated.
 	 */
-	if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
+	if (IS_ENABLED(CONFIG_NUMA) &&
+			(gfp_mask & GFP_THISNODE) == GFP_THISNODE)
 		goto nopage;
 
 restart:
@@ -2819,7 +2821,7 @@ unsigned int nr_free_pagecache_pages(void)
 
 static inline void show_node(struct zone *zone)
 {
-	if (NUMA_BUILD)
+	if (IS_ENABLED(CONFIG_NUMA))
 		printk("Node %d ", zone_to_nid(zone));
 }
 
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 78e08300db21..5123a169ab7b 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2550,7 +2550,7 @@ static void s_stop(struct seq_file *m, void *p)
 
 static void show_numa_info(struct seq_file *m, struct vm_struct *v)
 {
-	if (NUMA_BUILD) {
+	if (IS_ENABLED(CONFIG_NUMA)) {
 		unsigned int nr, *counters = m->private;
 
 		if (!counters)
@@ -2615,7 +2615,7 @@ static int vmalloc_open(struct inode *inode, struct file *file)
 	unsigned int *ptr = NULL;
 	int ret;
 
-	if (NUMA_BUILD) {
+	if (IS_ENABLED(CONFIG_NUMA)) {
 		ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
 		if (ptr == NULL)
 			return -ENOMEM;
-- 
cgit v1.2.3-55-g7522


From d84da3f9e4f18809821562bd960e00a10673b341 Mon Sep 17 00:00:00 2001
From: Kirill A. Shutemov
Date: Tue, 11 Dec 2012 16:00:31 -0800
Subject: mm: use IS_ENABLED(CONFIG_COMPACTION) instead of COMPACTION_BUILD

We don't need custom COMPACTION_BUILD anymore, since we have handy
IS_ENABLED().

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 7 -------
 mm/vmscan.c            | 9 +++++----
 2 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 815e5845d954..dd9900cabf89 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -687,13 +687,6 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
 
-/* This helps us avoid #ifdef CONFIG_COMPACTION */
-#ifdef CONFIG_COMPACTION
-#define COMPACTION_BUILD 1
-#else
-#define COMPACTION_BUILD 0
-#endif
-
 /* This helps us to avoid #ifdef CONFIG_SYMBOL_PREFIX */
 #ifdef CONFIG_SYMBOL_PREFIX
 #define SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b7ed37675644..a1ce17f44be0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1752,7 +1752,7 @@ out:
 /* Use reclaim/compaction for costly allocs or under memory pressure */
 static bool in_reclaim_compaction(struct scan_control *sc)
 {
-	if (COMPACTION_BUILD && sc->order &&
+	if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
 			(sc->order > PAGE_ALLOC_COSTLY_ORDER ||
 			 sc->priority < DEF_PRIORITY - 2))
 		return true;
@@ -2005,7 +2005,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 			if (zone->all_unreclaimable &&
 					sc->priority != DEF_PRIORITY)
 				continue;	/* Let kswapd poll it */
-			if (COMPACTION_BUILD) {
+			if (IS_ENABLED(CONFIG_COMPACTION)) {
 				/*
 				 * If we already have plenty of memory free for
 				 * compaction in this zone, don't free any more.
@@ -2421,7 +2421,8 @@ static bool zone_balanced(struct zone *zone, int order,
 				    balance_gap, classzone_idx, 0))
 		return false;
 
-	if (COMPACTION_BUILD && order && !compaction_suitable(zone, order))
+	if (IS_ENABLED(CONFIG_COMPACTION) && order &&
+	    !compaction_suitable(zone, order))
 		return false;
 
 	return true;
@@ -2684,7 +2685,7 @@ loop_again:
 			 * Do not reclaim more than needed for compaction.
 			 */
 			testorder = order;
-			if (COMPACTION_BUILD && order &&
+			if (IS_ENABLED(CONFIG_COMPACTION) && order &&
 					compaction_suitable(zone, order) !=
 						COMPACT_SKIPPED)
 				testorder = 0;
-- 
cgit v1.2.3-55-g7522


From b023f46813cde6e3b8a8c24f432ff9c1fd8e9a64 Mon Sep 17 00:00:00 2001
From: Wen Congyang
Date: Tue, 11 Dec 2012 16:00:45 -0800
Subject: memory-hotplug: skip HWPoisoned page when offlining pages

hwpoisoned may be set when we offline a page by the sysfs interface
/sys/devices/system/memory/soft_offline_page or
/sys/devices/system/memory/hard_offline_page. If we don't clear
this flag when onlining pages, this page can't be freed, and will
not in free list. So we can't offline these pages again. So we
should skip such page when offlining pages.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-isolation.h | 10 ++++++----
 mm/memory-failure.c            |  2 +-
 mm/memory_hotplug.c            |  5 +++--
 mm/page_alloc.c                | 27 +++++++++++++++++++++++----
 mm/page_isolation.c            | 27 ++++++++++++++++++++-------
 5 files changed, 53 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 76a9539cfd3f..a92061e08d48 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -2,7 +2,8 @@
 #define __LINUX_PAGEISOLATION_H
 
 
-bool has_unmovable_pages(struct zone *zone, struct page *page, int count);
+bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
+			 bool skip_hwpoisoned_pages);
 void set_pageblock_migratetype(struct page *page, int migratetype);
 int move_freepages_block(struct zone *zone, struct page *page,
 				int migratetype);
@@ -21,7 +22,7 @@ int move_freepages(struct zone *zone,
  */
 int
 start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
-			 unsigned migratetype);
+			 unsigned migratetype, bool skip_hwpoisoned_pages);
 
 /*
  * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
@@ -34,12 +35,13 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 /*
  * Test all pages in [start_pfn, end_pfn) are isolated or not.
  */
-int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn);
+int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
+			bool skip_hwpoisoned_pages);
 
 /*
  * Internal functions. Changes pageblock's migrate type.
  */
-int set_migratetype_isolate(struct page *page);
+int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages);
 void unset_migratetype_isolate(struct page *page, unsigned migratetype);
 struct page *alloc_migrate_target(struct page *page, unsigned long private,
 				int **resultp);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 8b20278be6a6..2c9fc7340b12 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1385,7 +1385,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
 	 * Isolate the page, so that it doesn't get reallocated if it
 	 * was free.
 	 */
-	set_migratetype_isolate(p);
+	set_migratetype_isolate(p, true);
 	/*
 	 * When the target page is a free hugepage, just remove it
 	 * from free hugepage list.
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e4eeacae2b91..0095d156324a 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -847,7 +847,7 @@ check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
 {
 	int ret;
 	long offlined = *(long *)data;
-	ret = test_pages_isolated(start_pfn, start_pfn + nr_pages);
+	ret = test_pages_isolated(start_pfn, start_pfn + nr_pages, true);
 	offlined = nr_pages;
 	if (!ret)
 		*(long *)data += offlined;
@@ -894,7 +894,8 @@ static int __ref __offline_pages(unsigned long start_pfn,
 	nr_pages = end_pfn - start_pfn;
 
 	/* set above range as isolated */
-	ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
+	ret = start_isolate_page_range(start_pfn, end_pfn,
+				       MIGRATE_MOVABLE, true);
 	if (ret)
 		goto out;
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a49b0ea3cc2f..6f50cfe98a7b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5616,7 +5616,8 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
  * MIGRATE_MOVABLE block might include unmovable pages. It means you can't
  * expect this function should be exact.
  */
-bool has_unmovable_pages(struct zone *zone, struct page *page, int count)
+bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
+			 bool skip_hwpoisoned_pages)
 {
 	unsigned long pfn, iter, found;
 	int mt;
@@ -5651,6 +5652,13 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count)
 			continue;
 		}
 
+		/*
+		 * The HWPoisoned page may be not in buddy system, and
+		 * page_count() is not 0.
+		 */
+		if (skip_hwpoisoned_pages && PageHWPoison(page))
+			continue;
+
 		if (!PageLRU(page))
 			found++;
 		/*
@@ -5693,7 +5701,7 @@ bool is_pageblock_removable_nolock(struct page *page)
 			zone->zone_start_pfn + zone->spanned_pages <= pfn)
 		return false;
 
-	return !has_unmovable_pages(zone, page, 0);
+	return !has_unmovable_pages(zone, page, 0, true);
 }
 
 #ifdef CONFIG_CMA
@@ -5864,7 +5872,8 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 	 */
 
 	ret = start_isolate_page_range(pfn_max_align_down(start),
-				       pfn_max_align_up(end), migratetype);
+				       pfn_max_align_up(end), migratetype,
+				       false);
 	if (ret)
 		return ret;
 
@@ -5903,7 +5912,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 	}
 
 	/* Make sure the range is really isolated. */
-	if (test_pages_isolated(outer_start, end)) {
+	if (test_pages_isolated(outer_start, end, false)) {
 		pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n",
 		       outer_start, end);
 		ret = -EBUSY;
@@ -6018,6 +6027,16 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
 			continue;
 		}
 		page = pfn_to_page(pfn);
+		/*
+		 * The HWPoisoned page may be not in buddy system, and
+		 * page_count() is not 0.
+		 */
+		if (unlikely(!PageBuddy(page) && PageHWPoison(page))) {
+			pfn++;
+			SetPageReserved(page);
+			continue;
+		}
+
 		BUG_ON(page_count(page));
 		BUG_ON(!PageBuddy(page));
 		order = page_order(page);
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index f2f5b4818e94..9d2264ea4606 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -30,7 +30,7 @@ static void restore_pageblock_isolate(struct page *page, int migratetype)
 	zone->nr_pageblock_isolate--;
 }
 
-int set_migratetype_isolate(struct page *page)
+int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages)
 {
 	struct zone *zone;
 	unsigned long flags, pfn;
@@ -66,7 +66,8 @@ int set_migratetype_isolate(struct page *page)
 	 * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
 	 * We just check MOVABLE pages.
 	 */
-	if (!has_unmovable_pages(zone, page, arg.pages_found))
+	if (!has_unmovable_pages(zone, page, arg.pages_found,
+				 skip_hwpoisoned_pages))
 		ret = 0;
 
 	/*
@@ -134,7 +135,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
  * Returns 0 on success and -EBUSY if any part of range cannot be isolated.
  */
 int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
-			     unsigned migratetype)
+			     unsigned migratetype, bool skip_hwpoisoned_pages)
 {
 	unsigned long pfn;
 	unsigned long undo_pfn;
@@ -147,7 +148,8 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 	     pfn < end_pfn;
 	     pfn += pageblock_nr_pages) {
 		page = __first_valid_page(pfn, pageblock_nr_pages);
-		if (page && set_migratetype_isolate(page)) {
+		if (page &&
+		    set_migratetype_isolate(page, skip_hwpoisoned_pages)) {
 			undo_pfn = pfn;
 			goto undo;
 		}
@@ -190,7 +192,8 @@ int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
  * Returns 1 if all pages in the range are isolated.
  */
 static int
-__test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn)
+__test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn,
+				  bool skip_hwpoisoned_pages)
 {
 	struct page *page;
 
@@ -220,6 +223,14 @@ __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn)
 		else if (page_count(page) == 0 &&
 			get_freepage_migratetype(page) == MIGRATE_ISOLATE)
 			pfn += 1;
+		else if (skip_hwpoisoned_pages && PageHWPoison(page)) {
+			/*
+			 * The HWPoisoned page may be not in buddy
+			 * system, and page_count() is not 0.
+			 */
+			pfn++;
+			continue;
+		}
 		else
 			break;
 	}
@@ -228,7 +239,8 @@ __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn)
 	return 1;
 }
 
-int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
+int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
+			bool skip_hwpoisoned_pages)
 {
 	unsigned long pfn, flags;
 	struct page *page;
@@ -251,7 +263,8 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
 	/* Check all pages are free or Marked as ISOLATED */
 	zone = page_zone(page);
 	spin_lock_irqsave(&zone->lock, flags);
-	ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn);
+	ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn,
+						skip_hwpoisoned_pages);
 	spin_unlock_irqrestore(&zone->lock, flags);
 	return ret ? 0 : -EBUSY;
 }
-- 
cgit v1.2.3-55-g7522


From 8732794b166196cc501c2ddd9e7c97cf45ab64c5 Mon Sep 17 00:00:00 2001
From: Wen Congyang
Date: Tue, 11 Dec 2012 16:00:56 -0800
Subject: numa: convert static memory to dynamically allocated memory for per
 node device

We use a static array to store struct node.  In many cases, we don't have
too many nodes, and some memory will be unused.  Convert it to per-device
dynamically allocated memory.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/sysfs.c |  4 ++--
 drivers/base/node.c         | 38 ++++++++++++++++++++++----------------
 include/linux/node.h        |  2 +-
 mm/hugetlb.c                |  4 ++--
 4 files changed, 27 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index cf357a059ddb..3ce1f864c2d3 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -607,7 +607,7 @@ static void register_nodes(void)
 
 int sysfs_add_device_to_node(struct device *dev, int nid)
 {
-	struct node *node = &node_devices[nid];
+	struct node *node = node_devices[nid];
 	return sysfs_create_link(&node->dev.kobj, &dev->kobj,
 			kobject_name(&dev->kobj));
 }
@@ -615,7 +615,7 @@ EXPORT_SYMBOL_GPL(sysfs_add_device_to_node);
 
 void sysfs_remove_device_from_node(struct device *dev, int nid)
 {
-	struct node *node = &node_devices[nid];
+	struct node *node = node_devices[nid];
 	sysfs_remove_link(&node->dev.kobj, kobject_name(&dev->kobj));
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
diff --git a/drivers/base/node.c b/drivers/base/node.c
index af1a177216f1..28216ce74b3d 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -306,7 +306,7 @@ void unregister_node(struct node *node)
 	device_unregister(&node->dev);
 }
 
-struct node node_devices[MAX_NUMNODES];
+struct node *node_devices[MAX_NUMNODES];
 
 /*
  * register cpu under node
@@ -323,15 +323,15 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid)
 	if (!obj)
 		return 0;
 
-	ret = sysfs_create_link(&node_devices[nid].dev.kobj,
+	ret = sysfs_create_link(&node_devices[nid]->dev.kobj,
 				&obj->kobj,
 				kobject_name(&obj->kobj));
 	if (ret)
 		return ret;
 
 	return sysfs_create_link(&obj->kobj,
-				 &node_devices[nid].dev.kobj,
-				 kobject_name(&node_devices[nid].dev.kobj));
+				 &node_devices[nid]->dev.kobj,
+				 kobject_name(&node_devices[nid]->dev.kobj));
 }
 
 int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
@@ -345,10 +345,10 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
 	if (!obj)
 		return 0;
 
-	sysfs_remove_link(&node_devices[nid].dev.kobj,
+	sysfs_remove_link(&node_devices[nid]->dev.kobj,
 			  kobject_name(&obj->kobj));
 	sysfs_remove_link(&obj->kobj,
-			  kobject_name(&node_devices[nid].dev.kobj));
+			  kobject_name(&node_devices[nid]->dev.kobj));
 
 	return 0;
 }
@@ -390,15 +390,15 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
 			continue;
 		if (page_nid != nid)
 			continue;
-		ret = sysfs_create_link_nowarn(&node_devices[nid].dev.kobj,
+		ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
 					&mem_blk->dev.kobj,
 					kobject_name(&mem_blk->dev.kobj));
 		if (ret)
 			return ret;
 
 		return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
-				&node_devices[nid].dev.kobj,
-				kobject_name(&node_devices[nid].dev.kobj));
+				&node_devices[nid]->dev.kobj,
+				kobject_name(&node_devices[nid]->dev.kobj));
 	}
 	/* mem section does not span the specified node */
 	return 0;
@@ -431,10 +431,10 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
 			continue;
 		if (node_test_and_set(nid, *unlinked_nodes))
 			continue;
-		sysfs_remove_link(&node_devices[nid].dev.kobj,
+		sysfs_remove_link(&node_devices[nid]->dev.kobj,
 			 kobject_name(&mem_blk->dev.kobj));
 		sysfs_remove_link(&mem_blk->dev.kobj,
-			 kobject_name(&node_devices[nid].dev.kobj));
+			 kobject_name(&node_devices[nid]->dev.kobj));
 	}
 	NODEMASK_FREE(unlinked_nodes);
 	return 0;
@@ -500,7 +500,7 @@ static void node_hugetlb_work(struct work_struct *work)
 
 static void init_node_hugetlb_work(int nid)
 {
-	INIT_WORK(&node_devices[nid].node_work, node_hugetlb_work);
+	INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work);
 }
 
 static int node_memory_callback(struct notifier_block *self,
@@ -517,7 +517,7 @@ static int node_memory_callback(struct notifier_block *self,
 		 * when transitioning to/from memoryless state.
 		 */
 		if (nid != NUMA_NO_NODE)
-			schedule_work(&node_devices[nid].node_work);
+			schedule_work(&node_devices[nid]->node_work);
 		break;
 
 	case MEM_GOING_ONLINE:
@@ -558,9 +558,13 @@ int register_one_node(int nid)
 		struct node *parent = NULL;
 
 		if (p_node != nid)
-			parent = &node_devices[p_node];
+			parent = node_devices[p_node];
 
-		error = register_node(&node_devices[nid], nid, parent);
+		node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
+		if (!node_devices[nid])
+			return -ENOMEM;
+
+		error = register_node(node_devices[nid], nid, parent);
 
 		/* link cpu under this node */
 		for_each_present_cpu(cpu) {
@@ -581,7 +585,9 @@ int register_one_node(int nid)
 
 void unregister_one_node(int nid)
 {
-	unregister_node(&node_devices[nid]);
+	unregister_node(node_devices[nid]);
+	kfree(node_devices[nid]);
+	node_devices[nid] = NULL;
 }
 
 /*
diff --git a/include/linux/node.h b/include/linux/node.h
index 624e53cecc02..10316f1c68a9 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -27,7 +27,7 @@ struct node {
 };
 
 struct memory_block;
-extern struct node node_devices[];
+extern struct node *node_devices[];
 typedef  void (*node_registration_func_t)(struct node *);
 
 extern int register_node(struct node *, int, struct node *);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 59a0059b39e2..1ef2cd4ae3c9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1800,7 +1800,7 @@ static void hugetlb_unregister_all_nodes(void)
 	 * remove hstate attributes from any nodes that have them.
 	 */
 	for (nid = 0; nid < nr_node_ids; nid++)
-		hugetlb_unregister_node(&node_devices[nid]);
+		hugetlb_unregister_node(node_devices[nid]);
 }
 
 /*
@@ -1845,7 +1845,7 @@ static void hugetlb_register_all_nodes(void)
 	int nid;
 
 	for_each_node_state(nid, N_HIGH_MEMORY) {
-		struct node *node = &node_devices[nid];
+		struct node *node = node_devices[nid];
 		if (node->dev.id == nid)
 			hugetlb_register_node(node);
 	}
-- 
cgit v1.2.3-55-g7522


From d9713679dbd2a6ecb840cd5b65a3ec555c1ec3d4 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Tue, 11 Dec 2012 16:01:03 -0800
Subject: memory_hotplug: fix possible incorrect node_states[N_NORMAL_MEMORY]

Currently memory_hotplug only manages the node_states[N_HIGH_MEMORY], it
forgets to manage node_states[N_NORMAL_MEMORY].  This may cause
node_states[N_NORMAL_MEMORY] to become incorrect.

Example, if a node is empty before online, and we online a memory which is
in ZONE_NORMAL.  And after online, node_states[N_HIGH_MEMORY] is correct,
but node_states[N_NORMAL_MEMORY] is incorrect, the online code doesn't set
the new online node to node_states[N_NORMAL_MEMORY].

The same thing will happen when offlining (the offline code doesn't clear
the node from node_states[N_NORMAL_MEMORY] when needed).  Some memory
managment code depends node_states[N_NORMAL_MEMORY], so we have to fix up
the node_states[N_NORMAL_MEMORY].

We add node_states_check_changes_online() and
node_states_check_changes_offline() to detect whether
node_states[N_HIGH_MEMORY] and node_states[N_NORMAL_MEMORY] are changed
while hotpluging.

Also add @status_change_nid_normal to struct memory_notify, thus the
memory hotplug callbacks know whether the node_states[N_NORMAL_MEMORY] are
changed.  (We can add a @flags and reuse @status_change_nid instead of
introducing @status_change_nid_normal, but it will add much more
complexity in memory hotplug callback in every subsystem.  So introducing
@status_change_nid_normal is better and it doesn't change the sematics of
@status_change_nid)

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Rob Landley <rob@landley.net>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/memory-hotplug.txt |   5 +-
 include/linux/memory.h           |   1 +
 mm/memory_hotplug.c              | 136 ++++++++++++++++++++++++++++++++++-----
 3 files changed, 125 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 6d0c2519cf47..6e6cbc78f329 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -377,15 +377,18 @@ The third argument is passed by pointer of struct memory_notify.
 struct memory_notify {
        unsigned long start_pfn;
        unsigned long nr_pages;
+       int status_change_nid_normal;
        int status_change_nid;
 }
 
 start_pfn is start_pfn of online/offline memory.
 nr_pages is # of pages of online/offline memory.
+status_change_nid_normal is set node id when N_NORMAL_MEMORY of nodemask
+is (will be) set/clear, if this is -1, then nodemask status is not changed.
 status_change_nid is set node id when N_HIGH_MEMORY of nodemask is (will be)
 set/clear. It means a new(memoryless) node gets new memory by online and a
 node loses all memory. If this is -1, then nodemask status is not changed.
-If status_changed_nid >= 0, callback should create/discard structures for the
+If status_changed_nid* >= 0, callback should create/discard structures for the
 node if necessary.
 
 --------------
diff --git a/include/linux/memory.h b/include/linux/memory.h
index ff9a9f8e0ed9..a09216d0dcc7 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -53,6 +53,7 @@ int arch_get_memory_phys_device(unsigned long start_pfn);
 struct memory_notify {
 	unsigned long start_pfn;
 	unsigned long nr_pages;
+	int status_change_nid_normal;
 	int status_change_nid;
 };
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ec2f199cc5f7..72195602ded5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -460,6 +460,53 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
 	return 0;
 }
 
+/* check which state of node_states will be changed when online memory */
+static void node_states_check_changes_online(unsigned long nr_pages,
+	struct zone *zone, struct memory_notify *arg)
+{
+	int nid = zone_to_nid(zone);
+	enum zone_type zone_last = ZONE_NORMAL;
+
+	/*
+	 * If we have HIGHMEM, node_states[N_NORMAL_MEMORY] contains nodes
+	 * which have 0...ZONE_NORMAL, set zone_last to ZONE_NORMAL.
+	 *
+	 * If we don't have HIGHMEM, node_states[N_NORMAL_MEMORY] contains nodes
+	 * which have 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
+	 */
+	if (N_HIGH_MEMORY == N_NORMAL_MEMORY)
+		zone_last = ZONE_MOVABLE;
+
+	/*
+	 * if the memory to be online is in a zone of 0...zone_last, and
+	 * the zones of 0...zone_last don't have memory before online, we will
+	 * need to set the node to node_states[N_NORMAL_MEMORY] after
+	 * the memory is online.
+	 */
+	if (zone_idx(zone) <= zone_last && !node_state(nid, N_NORMAL_MEMORY))
+		arg->status_change_nid_normal = nid;
+	else
+		arg->status_change_nid_normal = -1;
+
+	/*
+	 * if the node don't have memory befor online, we will need to
+	 * set the node to node_states[N_HIGH_MEMORY] after the memory
+	 * is online.
+	 */
+	if (!node_state(nid, N_HIGH_MEMORY))
+		arg->status_change_nid = nid;
+	else
+		arg->status_change_nid = -1;
+}
+
+static void node_states_set_node(int node, struct memory_notify *arg)
+{
+	if (arg->status_change_nid_normal >= 0)
+		node_set_state(node, N_NORMAL_MEMORY);
+
+	node_set_state(node, N_HIGH_MEMORY);
+}
+
 
 int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
 {
@@ -471,13 +518,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
 	struct memory_notify arg;
 
 	lock_memory_hotplug();
+	/*
+	 * This doesn't need a lock to do pfn_to_page().
+	 * The section can't be removed here because of the
+	 * memory_block->state_mutex.
+	 */
+	zone = page_zone(pfn_to_page(pfn));
+
 	arg.start_pfn = pfn;
 	arg.nr_pages = nr_pages;
-	arg.status_change_nid = -1;
+	node_states_check_changes_online(nr_pages, zone, &arg);
 
 	nid = page_to_nid(pfn_to_page(pfn));
-	if (node_present_pages(nid) == 0)
-		arg.status_change_nid = nid;
 
 	ret = memory_notify(MEM_GOING_ONLINE, &arg);
 	ret = notifier_to_errno(ret);
@@ -486,12 +538,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
 		unlock_memory_hotplug();
 		return ret;
 	}
-	/*
-	 * This doesn't need a lock to do pfn_to_page().
-	 * The section can't be removed here because of the
-	 * memory_block->state_mutex.
-	 */
-	zone = page_zone(pfn_to_page(pfn));
 	/*
 	 * If this zone is not populated, then it is not in zonelist.
 	 * This means the page allocator ignores this zone.
@@ -521,7 +567,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
 	zone->present_pages += onlined_pages;
 	zone->zone_pgdat->node_present_pages += onlined_pages;
 	if (onlined_pages) {
-		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
+		node_states_set_node(zone_to_nid(zone), &arg);
 		if (need_zonelists_rebuild)
 			build_all_zonelists(NULL, NULL);
 		else
@@ -871,6 +917,67 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
 	return offlined;
 }
 
+/* check which state of node_states will be changed when offline memory */
+static void node_states_check_changes_offline(unsigned long nr_pages,
+		struct zone *zone, struct memory_notify *arg)
+{
+	struct pglist_data *pgdat = zone->zone_pgdat;
+	unsigned long present_pages = 0;
+	enum zone_type zt, zone_last = ZONE_NORMAL;
+
+	/*
+	 * If we have HIGHMEM, node_states[N_NORMAL_MEMORY] contains nodes
+	 * which have 0...ZONE_NORMAL, set zone_last to ZONE_NORMAL.
+	 *
+	 * If we don't have HIGHMEM, node_states[N_NORMAL_MEMORY] contains nodes
+	 * which have 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
+	 */
+	if (N_HIGH_MEMORY == N_NORMAL_MEMORY)
+		zone_last = ZONE_MOVABLE;
+
+	/*
+	 * check whether node_states[N_NORMAL_MEMORY] will be changed.
+	 * If the memory to be offline is in a zone of 0...zone_last,
+	 * and it is the last present memory, 0...zone_last will
+	 * become empty after offline , thus we can determind we will
+	 * need to clear the node from node_states[N_NORMAL_MEMORY].
+	 */
+	for (zt = 0; zt <= zone_last; zt++)
+		present_pages += pgdat->node_zones[zt].present_pages;
+	if (zone_idx(zone) <= zone_last && nr_pages >= present_pages)
+		arg->status_change_nid_normal = zone_to_nid(zone);
+	else
+		arg->status_change_nid_normal = -1;
+
+	/*
+	 * node_states[N_HIGH_MEMORY] contains nodes which have 0...ZONE_MOVABLE
+	 */
+	zone_last = ZONE_MOVABLE;
+
+	/*
+	 * check whether node_states[N_HIGH_MEMORY] will be changed
+	 * If we try to offline the last present @nr_pages from the node,
+	 * we can determind we will need to clear the node from
+	 * node_states[N_HIGH_MEMORY].
+	 */
+	for (; zt <= zone_last; zt++)
+		present_pages += pgdat->node_zones[zt].present_pages;
+	if (nr_pages >= present_pages)
+		arg->status_change_nid = zone_to_nid(zone);
+	else
+		arg->status_change_nid = -1;
+}
+
+static void node_states_clear_node(int node, struct memory_notify *arg)
+{
+	if (arg->status_change_nid_normal >= 0)
+		node_clear_state(node, N_NORMAL_MEMORY);
+
+	if ((N_HIGH_MEMORY != N_NORMAL_MEMORY) &&
+	    (arg->status_change_nid >= 0))
+		node_clear_state(node, N_HIGH_MEMORY);
+}
+
 static int __ref __offline_pages(unsigned long start_pfn,
 		  unsigned long end_pfn, unsigned long timeout)
 {
@@ -905,9 +1012,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
 
 	arg.start_pfn = start_pfn;
 	arg.nr_pages = nr_pages;
-	arg.status_change_nid = -1;
-	if (nr_pages >= node_present_pages(node))
-		arg.status_change_nid = node;
+	node_states_check_changes_offline(nr_pages, zone, &arg);
 
 	ret = memory_notify(MEM_GOING_OFFLINE, &arg);
 	ret = notifier_to_errno(ret);
@@ -980,10 +1085,9 @@ repeat:
 	} else
 		zone_pcp_update(zone);
 
-	if (!node_present_pages(node)) {
-		node_clear_state(node, N_HIGH_MEMORY);
+	node_states_clear_node(node, &arg);
+	if (arg.status_change_nid >= 0)
 		kswapd_stop(node);
-	}
 
 	vm_total_pages = nr_free_pagecache_pages();
 	writeback_set_ratelimit();
-- 
cgit v1.2.3-55-g7522


From a1dd450bcb1a05e8218b9aac0ee36f8755d8a140 Mon Sep 17 00:00:00 2001
From: Will Deacon
Date: Tue, 11 Dec 2012 16:01:27 -0800
Subject: mm: thp: set the accessed flag for old pages on access fault

On x86 memory accesses to pages without the ACCESSED flag set result in
the ACCESSED flag being set automatically.  With the ARM architecture a
page access fault is raised instead (and it will continue to be raised
until the ACCESSED flag is set for the appropriate PTE/PMD).

For normal memory pages, handle_pte_fault will call pte_mkyoung
(effectively setting the ACCESSED flag).  For transparent huge pages,
pmd_mkyoung will only be called for a write fault.

This patch ensures that faults on transparent hugepages which do not
result in a CoW update the access flags for the faulting pmd.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Ni zhan Chen <nizhan.chen@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h |  4 ++++
 mm/huge_memory.c        | 22 ++++++++++++++++++++++
 mm/memory.c             |  8 ++++++--
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b31cb7da0346..1af477552459 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -8,6 +8,10 @@ extern int do_huge_pmd_anonymous_page(struct mm_struct *mm,
 extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
 			 struct vm_area_struct *vma);
+extern void huge_pmd_set_accessed(struct mm_struct *mm,
+				  struct vm_area_struct *vma,
+				  unsigned long address, pmd_t *pmd,
+				  pmd_t orig_pmd, int dirty);
 extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			       unsigned long address, pmd_t *pmd,
 			       pmd_t orig_pmd);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ea5fb93a53a9..5f902e20e8c0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -784,6 +784,28 @@ out:
 	return ret;
 }
 
+void huge_pmd_set_accessed(struct mm_struct *mm,
+			   struct vm_area_struct *vma,
+			   unsigned long address,
+			   pmd_t *pmd, pmd_t orig_pmd,
+			   int dirty)
+{
+	pmd_t entry;
+	unsigned long haddr;
+
+	spin_lock(&mm->page_table_lock);
+	if (unlikely(!pmd_same(*pmd, orig_pmd)))
+		goto unlock;
+
+	entry = pmd_mkyoung(orig_pmd);
+	haddr = address & HPAGE_PMD_MASK;
+	if (pmdp_set_access_flags(vma, haddr, pmd, entry, dirty))
+		update_mmu_cache_pmd(vma, address, pmd);
+
+unlock:
+	spin_unlock(&mm->page_table_lock);
+}
+
 static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 					struct vm_area_struct *vma,
 					unsigned long address,
diff --git a/mm/memory.c b/mm/memory.c
index 221fc9ffcab1..765377385632 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3537,8 +3537,9 @@ retry:
 
 		barrier();
 		if (pmd_trans_huge(orig_pmd)) {
-			if (flags & FAULT_FLAG_WRITE &&
-			    !pmd_write(orig_pmd) &&
+			unsigned int dirty = flags & FAULT_FLAG_WRITE;
+
+			if (dirty && !pmd_write(orig_pmd) &&
 			    !pmd_trans_splitting(orig_pmd)) {
 				ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
 							  orig_pmd);
@@ -3550,6 +3551,9 @@ retry:
 				if (unlikely(ret & VM_FAULT_OOM))
 					goto retry;
 				return ret;
+			} else {
+				huge_pmd_set_accessed(mm, vma, address, pmd,
+						      orig_pmd, dirty);
 			}
 			return 0;
 		}
-- 
cgit v1.2.3-55-g7522


From 42d7395feb56f0655cd8b68e06fc6063823449f8 Mon Sep 17 00:00:00 2001
From: Andi Kleen
Date: Tue, 11 Dec 2012 16:01:34 -0800
Subject: mm: support more pagesizes for MAP_HUGETLB/SHM_HUGETLB

There was some desire in large applications using MAP_HUGETLB or
SHM_HUGETLB to use 1GB huge pages on some mappings, and stay with 2MB on
others.  This is useful together with NUMA policy: use 2MB interleaving
on some mappings, but 1GB on local mappings.

This patch extends the IPC/SHM syscall interfaces slightly to allow
specifying the page size.

It borrows some upper bits in the existing flag arguments and allows
encoding the log of the desired page size in addition to the *_HUGETLB
flag.  When 0 is specified the default size is used, this makes the
change fully compatible.

Extending the internal hugetlb code to handle this is straight forward.
Instead of a single mount it just keeps an array of them and selects the
right mount based on the specified page size.  When no page size is
specified it uses the mount of the default page size.

The change is not visible in /proc/mounts because internal mounts don't
appear there.  It also has very little overhead: the additional mounts
just consume a super block, but not more memory when not used.

I also exported the new flags to the user headers (they were previously
under __KERNEL__).  Right now only symbols for x86 and some other
architecture for 1GB and 2MB are defined.  The interface should already
work for all other architectures though.  Only architectures that define
multiple hugetlb sizes actually need it (that is currently x86, tile,
powerpc).  However tile and powerpc have user configurable hugetlb
sizes, so it's not easy to add defines.  A program on those
architectures would need to query sysfs and use the appropiate log2.

[akpm@linux-foundation.org: cleanups]
[rientjes@google.com: fix build]
[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hillf Danton <dhillf@gmail.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/mman.h          | 11 ++++++
 arch/mips/include/uapi/asm/mman.h      | 11 ++++++
 arch/parisc/include/uapi/asm/mman.h    | 11 ++++++
 arch/x86/include/asm/mman.h            |  3 ++
 arch/xtensa/include/uapi/asm/mman.h    | 11 ++++++
 fs/hugetlbfs/inode.c                   | 63 +++++++++++++++++++++++++++-------
 include/linux/hugetlb.h                |  7 ++--
 include/linux/shm.h                    | 15 ++++++++
 include/uapi/asm-generic/mman-common.h | 11 ++++++
 include/uapi/asm-generic/mman.h        |  2 ++
 ipc/shm.c                              |  3 +-
 mm/mmap.c                              |  5 +--
 12 files changed, 135 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h
index cbeb3616a28e..0086b472bc2b 100644
--- a/arch/alpha/include/asm/mman.h
+++ b/arch/alpha/include/asm/mman.h
@@ -63,4 +63,15 @@
 /* compatibility flags */
 #define MAP_FILE	0
 
+/*
+ * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
+ * This gives us 6 bits, which is enough until someone invents 128 bit address
+ * spaces.
+ *
+ * Assume these are all power of twos.
+ * When 0 use the default page size.
+ */
+#define MAP_HUGE_SHIFT	26
+#define MAP_HUGE_MASK	0x3f
+
 #endif /* __ALPHA_MMAN_H__ */
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index 46d3da0d4b92..9a936ac9a942 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -87,4 +87,15 @@
 /* compatibility flags */
 #define MAP_FILE	0
 
+/*
+ * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
+ * This gives us 6 bits, which is enough until someone invents 128 bit address
+ * spaces.
+ *
+ * Assume these are all power of twos.
+ * When 0 use the default page size.
+ */
+#define MAP_HUGE_SHIFT	26
+#define MAP_HUGE_MASK	0x3f
+
 #endif /* _ASM_MMAN_H */
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index 12219ebce869..294d251ca7b2 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -70,4 +70,15 @@
 #define MAP_FILE	0
 #define MAP_VARIABLE	0
 
+/*
+ * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
+ * This gives us 6 bits, which is enough until someone invents 128 bit address
+ * spaces.
+ *
+ * Assume these are all power of twos.
+ * When 0 use the default page size.
+ */
+#define MAP_HUGE_SHIFT	26
+#define MAP_HUGE_MASK	0x3f
+
 #endif /* __PARISC_MMAN_H__ */
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h
index 593e51d4643f..513b05f15bb4 100644
--- a/arch/x86/include/asm/mman.h
+++ b/arch/x86/include/asm/mman.h
@@ -3,6 +3,9 @@
 
 #define MAP_32BIT	0x40		/* only give out 32bit addresses */
 
+#define MAP_HUGE_2MB    (21 << MAP_HUGE_SHIFT)
+#define MAP_HUGE_1GB    (30 << MAP_HUGE_SHIFT)
+
 #include <asm-generic/mman.h>
 
 #endif /* _ASM_X86_MMAN_H */
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index 25bc6c1309c3..00eed6786d7e 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -93,4 +93,15 @@
 /* compatibility flags */
 #define MAP_FILE	0
 
+/*
+ * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
+ * This gives us 6 bits, which is enough until someone invents 128 bit address
+ * spaces.
+ *
+ * Assume these are all power of twos.
+ * When 0 use the default page size.
+ */
+#define MAP_HUGE_SHIFT	26
+#define MAP_HUGE_MASK	0x3f
+
 #endif /* _XTENSA_MMAN_H */
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c5bc355d8243..21b8a4875237 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -923,7 +923,7 @@ static struct file_system_type hugetlbfs_fs_type = {
 	.kill_sb	= kill_litter_super,
 };
 
-static struct vfsmount *hugetlbfs_vfsmount;
+static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
 
 static int can_do_hugetlb_shm(void)
 {
@@ -932,9 +932,22 @@ static int can_do_hugetlb_shm(void)
 	return capable(CAP_IPC_LOCK) || in_group_p(shm_group);
 }
 
+static int get_hstate_idx(int page_size_log)
+{
+	struct hstate *h;
+
+	if (!page_size_log)
+		return default_hstate_idx;
+	h = size_to_hstate(1 << page_size_log);
+	if (!h)
+		return -1;
+	return h - hstates;
+}
+
 struct file *hugetlb_file_setup(const char *name, unsigned long addr,
 				size_t size, vm_flags_t acctflag,
-				struct user_struct **user, int creat_flags)
+				struct user_struct **user,
+				int creat_flags, int page_size_log)
 {
 	int error = -ENOMEM;
 	struct file *file;
@@ -944,9 +957,14 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr,
 	struct qstr quick_string;
 	struct hstate *hstate;
 	unsigned long num_pages;
+	int hstate_idx;
+
+	hstate_idx = get_hstate_idx(page_size_log);
+	if (hstate_idx < 0)
+		return ERR_PTR(-ENODEV);
 
 	*user = NULL;
-	if (!hugetlbfs_vfsmount)
+	if (!hugetlbfs_vfsmount[hstate_idx])
 		return ERR_PTR(-ENOENT);
 
 	if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
@@ -963,7 +981,7 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr,
 		}
 	}
 
-	root = hugetlbfs_vfsmount->mnt_root;
+	root = hugetlbfs_vfsmount[hstate_idx]->mnt_root;
 	quick_string.name = name;
 	quick_string.len = strlen(quick_string.name);
 	quick_string.hash = 0;
@@ -971,7 +989,7 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr,
 	if (!path.dentry)
 		goto out_shm_unlock;
 
-	path.mnt = mntget(hugetlbfs_vfsmount);
+	path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]);
 	error = -ENOSPC;
 	inode = hugetlbfs_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0);
 	if (!inode)
@@ -1011,8 +1029,9 @@ out_shm_unlock:
 
 static int __init init_hugetlbfs_fs(void)
 {
+	struct hstate *h;
 	int error;
-	struct vfsmount *vfsmount;
+	int i;
 
 	error = bdi_init(&hugetlbfs_backing_dev_info);
 	if (error)
@@ -1029,14 +1048,26 @@ static int __init init_hugetlbfs_fs(void)
 	if (error)
 		goto out;
 
-	vfsmount = kern_mount(&hugetlbfs_fs_type);
+	i = 0;
+	for_each_hstate(h) {
+		char buf[50];
+		unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10);
 
-	if (!IS_ERR(vfsmount)) {
-		hugetlbfs_vfsmount = vfsmount;
-		return 0;
-	}
+		snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb);
+		hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type,
+							buf);
 
-	error = PTR_ERR(vfsmount);
+		if (IS_ERR(hugetlbfs_vfsmount[i])) {
+			pr_err("hugetlb: Cannot mount internal hugetlbfs for "
+				"page size %uK", ps_kb);
+			error = PTR_ERR(hugetlbfs_vfsmount[i]);
+			hugetlbfs_vfsmount[i] = NULL;
+		}
+		i++;
+	}
+	/* Non default hstates are optional */
+	if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx]))
+		return 0;
 
  out:
 	kmem_cache_destroy(hugetlbfs_inode_cachep);
@@ -1047,13 +1078,19 @@ static int __init init_hugetlbfs_fs(void)
 
 static void __exit exit_hugetlbfs_fs(void)
 {
+	struct hstate *h;
+	int i;
+
+
 	/*
 	 * Make sure all delayed rcu free inodes are flushed before we
 	 * destroy cache.
 	 */
 	rcu_barrier();
 	kmem_cache_destroy(hugetlbfs_inode_cachep);
-	kern_unmount(hugetlbfs_vfsmount);
+	i = 0;
+	for_each_hstate(h)
+		kern_unmount(hugetlbfs_vfsmount[i++]);
 	unregister_filesystem(&hugetlbfs_fs_type);
 	bdi_destroy(&hugetlbfs_backing_dev_info);
 }
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 225164842ab6..3e7fa1acf09c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -183,7 +183,8 @@ extern const struct file_operations hugetlbfs_file_operations;
 extern const struct vm_operations_struct hugetlb_vm_ops;
 struct file *hugetlb_file_setup(const char *name, unsigned long addr,
 				size_t size, vm_flags_t acct,
-				struct user_struct **user, int creat_flags);
+				struct user_struct **user, int creat_flags,
+				int page_size_log);
 
 static inline int is_file_hugepages(struct file *file)
 {
@@ -195,12 +196,14 @@ static inline int is_file_hugepages(struct file *file)
 	return 0;
 }
 
+
 #else /* !CONFIG_HUGETLBFS */
 
 #define is_file_hugepages(file)			0
 static inline struct file *
 hugetlb_file_setup(const char *name, unsigned long addr, size_t size,
-		vm_flags_t acctflag, struct user_struct **user, int creat_flags)
+		vm_flags_t acctflag, struct user_struct **user, int creat_flags,
+		int page_size_log)
 {
 	return ERR_PTR(-ENOSYS);
 }
diff --git a/include/linux/shm.h b/include/linux/shm.h
index bcf8a6a3ec00..429c1995d756 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -29,6 +29,21 @@ struct shmid_kernel /* private to the kernel */
 #define SHM_HUGETLB     04000   /* segment will use huge TLB pages */
 #define SHM_NORESERVE   010000  /* don't check for reservations */
 
+/* Bits [26:31] are reserved */
+
+/*
+ * When SHM_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
+ * This gives us 6 bits, which is enough until someone invents 128 bit address
+ * spaces.
+ *
+ * Assume these are all power of twos.
+ * When 0 use the default page size.
+ */
+#define SHM_HUGE_SHIFT  26
+#define SHM_HUGE_MASK   0x3f
+#define SHM_HUGE_2MB    (21 << SHM_HUGE_SHIFT)
+#define SHM_HUGE_1GB    (30 << SHM_HUGE_SHIFT)
+
 #ifdef CONFIG_SYSVIPC
 long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
 	      unsigned long shmlba);
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index d030d2c2647a..4164529a94f9 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -55,4 +55,15 @@
 /* compatibility flags */
 #define MAP_FILE	0
 
+/*
+ * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
+ * This gives us 6 bits, which is enough until someone invents 128 bit address
+ * spaces.
+ *
+ * Assume these are all power of twos.
+ * When 0 use the default page size.
+ */
+#define MAP_HUGE_SHIFT	26
+#define MAP_HUGE_MASK	0x3f
+
 #endif /* __ASM_GENERIC_MMAN_COMMON_H */
diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h
index 32c8bd6a196d..e9fe6fd2a074 100644
--- a/include/uapi/asm-generic/mman.h
+++ b/include/uapi/asm-generic/mman.h
@@ -13,6 +13,8 @@
 #define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
+/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
+
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
diff --git a/ipc/shm.c b/ipc/shm.c
index dff40c9f73c9..4fa6d8fee730 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -495,7 +495,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 		if (shmflg & SHM_NORESERVE)
 			acctflag = VM_NORESERVE;
 		file = hugetlb_file_setup(name, 0, size, acctflag,
-					&shp->mlock_user, HUGETLB_SHMFS_INODE);
+				  &shp->mlock_user, HUGETLB_SHMFS_INODE,
+				(shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
 	} else {
 		/*
 		 * Do not allow no accounting for OVERCOMMIT_NEVER, even
diff --git a/mm/mmap.c b/mm/mmap.c
index 9a796c41e7d9..ebf19031c5e4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1153,8 +1153,9 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
 		 * memory so no accounting is necessary
 		 */
 		file = hugetlb_file_setup(HUGETLB_ANON_FILE, addr, len,
-						VM_NORESERVE, &user,
-						HUGETLB_ANONHUGE_INODE);
+				VM_NORESERVE,
+				&user, HUGETLB_ANONHUGE_INODE,
+				(flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
 		if (IS_ERR(file))
 			return PTR_ERR(file);
 	}
-- 
cgit v1.2.3-55-g7522


From d37371870ceb1d2165397dc36114725b6dca946c Mon Sep 17 00:00:00 2001
From: Michel Lespinasse
Date: Tue, 11 Dec 2012 16:01:38 -0800
Subject: mm: augment vma rbtree with rb_subtree_gap

Define vma->rb_subtree_gap as the largest gap between any vma in the
subtree rooted at that vma, and their predecessor.  Or, for a recursive
definition, vma->rb_subtree_gap is the max of:

 - vma->vm_start - vma->vm_prev->vm_end
 - rb_subtree_gap fields of the vmas pointed by vma->rb.rb_left and
   vma->rb.rb_right

This will allow get_unmapped_area_* to find a free area of the right
size in O(log(N)) time, instead of potentially having to do a linear
walk across all the VMAs.

Also define mm->highest_vm_end as the vm_end field of the highest vma,
so that we can easily check if the following gap is suitable.

This does have the potential to make unmapping VMAs more expensive,
especially for processes with very large numbers of VMAs, where the VMA
rbtree can grow quite deep.

Signed-off-by: Michel Lespinasse <walken@google.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |   9 +++
 mm/mmap.c                | 143 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 141 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 31f8a3af7d94..94fa52b28ee8 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -237,6 +237,14 @@ struct vm_area_struct {
 
 	struct rb_node vm_rb;
 
+	/*
+	 * Largest free memory gap in bytes to the left of this VMA.
+	 * Either between this VMA and vma->vm_prev, or between one of the
+	 * VMAs below us in the VMA rbtree and its ->vm_prev. This helps
+	 * get_unmapped_area find a free area of the right size.
+	 */
+	unsigned long rb_subtree_gap;
+
 	/*
 	 * For areas with an address space and backing store,
 	 * linkage into the address_space->i_mmap interval tree, or
@@ -322,6 +330,7 @@ struct mm_struct {
 	unsigned long task_size;		/* size of task vm space */
 	unsigned long cached_hole_size; 	/* if non-zero, the largest hole below free_area_cache */
 	unsigned long free_area_cache;		/* first hole of size cached_hole_size or larger */
+	unsigned long highest_vm_end;		/* highest vma end address */
 	pgd_t * pgd;
 	atomic_t mm_users;			/* How many users with user space? */
 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
diff --git a/mm/mmap.c b/mm/mmap.c
index ebf19031c5e4..bdcea6310fff 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -31,6 +31,7 @@
 #include <linux/audit.h>
 #include <linux/khugepaged.h>
 #include <linux/uprobes.h>
+#include <linux/rbtree_augmented.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -297,6 +298,27 @@ out:
 	return retval;
 }
 
+static long vma_compute_subtree_gap(struct vm_area_struct *vma)
+{
+	unsigned long max, subtree_gap;
+	max = vma->vm_start;
+	if (vma->vm_prev)
+		max -= vma->vm_prev->vm_end;
+	if (vma->vm_rb.rb_left) {
+		subtree_gap = rb_entry(vma->vm_rb.rb_left,
+				struct vm_area_struct, vm_rb)->rb_subtree_gap;
+		if (subtree_gap > max)
+			max = subtree_gap;
+	}
+	if (vma->vm_rb.rb_right) {
+		subtree_gap = rb_entry(vma->vm_rb.rb_right,
+				struct vm_area_struct, vm_rb)->rb_subtree_gap;
+		if (subtree_gap > max)
+			max = subtree_gap;
+	}
+	return max;
+}
+
 #ifdef CONFIG_DEBUG_VM_RB
 static int browse_rb(struct rb_root *root)
 {
@@ -327,6 +349,18 @@ static int browse_rb(struct rb_root *root)
 	return i;
 }
 
+static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
+{
+	struct rb_node *nd;
+
+	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+		struct vm_area_struct *vma;
+		vma = rb_entry(nd, struct vm_area_struct, vm_rb);
+		BUG_ON(vma != ignore &&
+		       vma->rb_subtree_gap != vma_compute_subtree_gap(vma));
+	}
+}
+
 void validate_mm(struct mm_struct *mm)
 {
 	int bug = 0;
@@ -349,9 +383,52 @@ void validate_mm(struct mm_struct *mm)
 	BUG_ON(bug);
 }
 #else
+#define validate_mm_rb(root, ignore) do { } while (0)
 #define validate_mm(mm) do { } while (0)
 #endif
 
+RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
+		     unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
+
+/*
+ * Update augmented rbtree rb_subtree_gap values after vma->vm_start or
+ * vma->vm_prev->vm_end values changed, without modifying the vma's position
+ * in the rbtree.
+ */
+static void vma_gap_update(struct vm_area_struct *vma)
+{
+	/*
+	 * As it turns out, RB_DECLARE_CALLBACKS() already created a callback
+	 * function that does exacltly what we want.
+	 */
+	vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
+}
+
+static inline void vma_rb_insert(struct vm_area_struct *vma,
+				 struct rb_root *root)
+{
+	/* All rb_subtree_gap values must be consistent prior to insertion */
+	validate_mm_rb(root, NULL);
+
+	rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
+}
+
+static void vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
+{
+	/*
+	 * All rb_subtree_gap values must be consistent prior to erase,
+	 * with the possible exception of the vma being erased.
+	 */
+	validate_mm_rb(root, vma);
+
+	/*
+	 * Note rb_erase_augmented is a fairly large inline function,
+	 * so make sure we instantiate it only once with our desired
+	 * augmented rbtree callbacks.
+	 */
+	rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
+}
+
 /*
  * vma has some anon_vma assigned, and is already inserted on that
  * anon_vma's interval trees.
@@ -421,8 +498,25 @@ static int find_vma_links(struct mm_struct *mm, unsigned long addr,
 void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct rb_node **rb_link, struct rb_node *rb_parent)
 {
+	/* Update tracking information for the gap following the new vma. */
+	if (vma->vm_next)
+		vma_gap_update(vma->vm_next);
+	else
+		mm->highest_vm_end = vma->vm_end;
+
+	/*
+	 * vma->vm_prev wasn't known when we followed the rbtree to find the
+	 * correct insertion point for that vma. As a result, we could not
+	 * update the vma vm_rb parents rb_subtree_gap values on the way down.
+	 * So, we first insert the vma with a zero rb_subtree_gap value
+	 * (to be consistent with what we did on the way down), and then
+	 * immediately update the gap to the correct value. Finally we
+	 * rebalance the rbtree after all augmented values have been set.
+	 */
 	rb_link_node(&vma->vm_rb, rb_parent, rb_link);
-	rb_insert_color(&vma->vm_rb, &mm->mm_rb);
+	vma->rb_subtree_gap = 0;
+	vma_gap_update(vma);
+	vma_rb_insert(vma, &mm->mm_rb);
 }
 
 static void __vma_link_file(struct vm_area_struct *vma)
@@ -498,12 +592,12 @@ static inline void
 __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct vm_area_struct *prev)
 {
-	struct vm_area_struct *next = vma->vm_next;
+	struct vm_area_struct *next;
 
-	prev->vm_next = next;
+	vma_rb_erase(vma, &mm->mm_rb);
+	prev->vm_next = next = vma->vm_next;
 	if (next)
 		next->vm_prev = prev;
-	rb_erase(&vma->vm_rb, &mm->mm_rb);
 	if (mm->mmap_cache == vma)
 		mm->mmap_cache = prev;
 }
@@ -525,6 +619,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	struct rb_root *root = NULL;
 	struct anon_vma *anon_vma = NULL;
 	struct file *file = vma->vm_file;
+	bool start_changed = false, end_changed = false;
 	long adjust_next = 0;
 	int remove_next = 0;
 
@@ -615,8 +710,14 @@ again:			remove_next = 1 + (end > next->vm_end);
 			vma_interval_tree_remove(next, root);
 	}
 
-	vma->vm_start = start;
-	vma->vm_end = end;
+	if (start != vma->vm_start) {
+		vma->vm_start = start;
+		start_changed = true;
+	}
+	if (end != vma->vm_end) {
+		vma->vm_end = end;
+		end_changed = true;
+	}
 	vma->vm_pgoff = pgoff;
 	if (adjust_next) {
 		next->vm_start += adjust_next << PAGE_SHIFT;
@@ -645,6 +746,15 @@ again:			remove_next = 1 + (end > next->vm_end);
 		 * (it may either follow vma or precede it).
 		 */
 		__insert_vm_struct(mm, insert);
+	} else {
+		if (start_changed)
+			vma_gap_update(vma);
+		if (end_changed) {
+			if (!next)
+				mm->highest_vm_end = end;
+			else if (!adjust_next)
+				vma_gap_update(next);
+		}
 	}
 
 	if (anon_vma) {
@@ -678,10 +788,13 @@ again:			remove_next = 1 + (end > next->vm_end);
 		 * we must remove another next too. It would clutter
 		 * up the code too much to do both in one go.
 		 */
-		if (remove_next == 2) {
-			next = vma->vm_next;
+		next = vma->vm_next;
+		if (remove_next == 2)
 			goto again;
-		}
+		else if (next)
+			vma_gap_update(next);
+		else
+			mm->highest_vm_end = end;
 	}
 	if (insert && file)
 		uprobe_mmap(insert);
@@ -1784,6 +1897,10 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 				anon_vma_interval_tree_pre_update_vma(vma);
 				vma->vm_end = address;
 				anon_vma_interval_tree_post_update_vma(vma);
+				if (vma->vm_next)
+					vma_gap_update(vma->vm_next);
+				else
+					vma->vm_mm->highest_vm_end = address;
 				perf_event_mmap(vma);
 			}
 		}
@@ -1838,6 +1955,7 @@ int expand_downwards(struct vm_area_struct *vma,
 				vma->vm_start = address;
 				vma->vm_pgoff -= grow;
 				anon_vma_interval_tree_post_update_vma(vma);
+				vma_gap_update(vma);
 				perf_event_mmap(vma);
 			}
 		}
@@ -1960,14 +2078,17 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
 	vma->vm_prev = NULL;
 	do {
-		rb_erase(&vma->vm_rb, &mm->mm_rb);
+		vma_rb_erase(vma, &mm->mm_rb);
 		mm->map_count--;
 		tail_vma = vma;
 		vma = vma->vm_next;
 	} while (vma && vma->vm_start < end);
 	*insertion_point = vma;
-	if (vma)
+	if (vma) {
 		vma->vm_prev = prev;
+		vma_gap_update(vma);
+	} else
+		mm->highest_vm_end = prev ? prev->vm_end : 0;
 	tail_vma->vm_next = NULL;
 	if (mm->unmap_area == arch_unmap_area)
 		addr = prev ? prev->vm_end : mm->mmap_base;
-- 
cgit v1.2.3-55-g7522


From e4c6bfd2d79d063017ab19a18915f0bc759f32d9 Mon Sep 17 00:00:00 2001
From: Rik van Riel
Date: Tue, 11 Dec 2012 16:01:44 -0800
Subject: mm: rearrange vm_area_struct for fewer cache misses

The kernel walks the VMA rbtree in various places, including the page
fault path.  However, the vm_rb node spanned two cache lines, on 64 bit
systems with 64 byte cache lines (most x86 systems).

Rearrange vm_area_struct a little, so all the information we need to do a
VMA tree walk is in the first cache line.

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Michel Lespinasse <walken@google.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 94fa52b28ee8..7ade2731b5d6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -224,7 +224,8 @@ struct vm_region {
  * library, the executable area etc).
  */
 struct vm_area_struct {
-	struct mm_struct * vm_mm;	/* The address space we belong to. */
+	/* The first cache line has the info for VMA tree walking. */
+
 	unsigned long vm_start;		/* Our start address within vm_mm. */
 	unsigned long vm_end;		/* The first byte after our end address
 					   within vm_mm. */
@@ -232,9 +233,6 @@ struct vm_area_struct {
 	/* linked list of VM areas per task, sorted by address */
 	struct vm_area_struct *vm_next, *vm_prev;
 
-	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
-	unsigned long vm_flags;		/* Flags, see mm.h. */
-
 	struct rb_node vm_rb;
 
 	/*
@@ -245,6 +243,12 @@ struct vm_area_struct {
 	 */
 	unsigned long rb_subtree_gap;
 
+	/* Second cache line starts here. */
+
+	struct mm_struct *vm_mm;	/* The address space we belong to. */
+	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
+	unsigned long vm_flags;		/* Flags, see mm.h. */
+
 	/*
 	 * For areas with an address space and backing store,
 	 * linkage into the address_space->i_mmap interval tree, or
-- 
cgit v1.2.3-55-g7522


From db4fbfb9523c93583c339e66023506f651c1d54b Mon Sep 17 00:00:00 2001
From: Michel Lespinasse
Date: Tue, 11 Dec 2012 16:01:49 -0800
Subject: mm: vm_unmapped_area() lookup function

Implement vm_unmapped_area() using the rb_subtree_gap and highest_vm_end
information to look up for suitable virtual address space gaps.

struct vm_unmapped_area_info is used to define the desired allocation
request:
 - lowest or highest possible address matching the remaining constraints
 - desired gap length
 - low/high address limits that the gap must fit into
 - alignment mask and offset

Also update the generic arch_get_unmapped_area[_topdown] functions to make
use of vm_unmapped_area() instead of implementing a brute force search.

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Michel Lespinasse <walken@google.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  31 ++++++
 mm/mmap.c          | 312 +++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 253 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bcaab4e6fe91..4af4f0b1be4c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1456,6 +1456,37 @@ extern unsigned long vm_mmap(struct file *, unsigned long,
         unsigned long, unsigned long,
         unsigned long, unsigned long);
 
+struct vm_unmapped_area_info {
+#define VM_UNMAPPED_AREA_TOPDOWN 1
+	unsigned long flags;
+	unsigned long length;
+	unsigned long low_limit;
+	unsigned long high_limit;
+	unsigned long align_mask;
+	unsigned long align_offset;
+};
+
+extern unsigned long unmapped_area(struct vm_unmapped_area_info *info);
+extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info);
+
+/*
+ * Search for an unmapped address range.
+ *
+ * We are looking for a range that:
+ * - does not intersect with any VMA;
+ * - is contained within the [low_limit, high_limit) interval;
+ * - is at least the desired size.
+ * - satisfies (begin_addr & align_mask) == (align_offset & align_mask)
+ */
+static inline unsigned long
+vm_unmapped_area(struct vm_unmapped_area_info *info)
+{
+	if (!(info->flags & VM_UNMAPPED_AREA_TOPDOWN))
+		return unmapped_area(info);
+	else
+		return unmapped_area_topdown(info);
+}
+
 /* truncate.c */
 extern void truncate_inode_pages(struct address_space *, loff_t);
 extern void truncate_inode_pages_range(struct address_space *,
diff --git a/mm/mmap.c b/mm/mmap.c
index ff93f6c8436c..5646677a96d5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1539,6 +1539,206 @@ unacct_error:
 	return error;
 }
 
+unsigned long unmapped_area(struct vm_unmapped_area_info *info)
+{
+	/*
+	 * We implement the search by looking for an rbtree node that
+	 * immediately follows a suitable gap. That is,
+	 * - gap_start = vma->vm_prev->vm_end <= info->high_limit - length;
+	 * - gap_end   = vma->vm_start        >= info->low_limit  + length;
+	 * - gap_end - gap_start >= length
+	 */
+
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/* Adjust search limits by the desired length */
+	if (info->high_limit < length)
+		return -ENOMEM;
+	high_limit = info->high_limit - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+	low_limit = info->low_limit + length;
+
+	/* Check if rbtree root looks promising */
+	if (RB_EMPTY_ROOT(&mm->mm_rb))
+		goto check_highest;
+	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+	if (vma->rb_subtree_gap < length)
+		goto check_highest;
+
+	while (true) {
+		/* Visit left subtree if it looks promising */
+		gap_end = vma->vm_start;
+		if (gap_end >= low_limit && vma->vm_rb.rb_left) {
+			struct vm_area_struct *left =
+				rb_entry(vma->vm_rb.rb_left,
+					 struct vm_area_struct, vm_rb);
+			if (left->rb_subtree_gap >= length) {
+				vma = left;
+				continue;
+			}
+		}
+
+		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+check_current:
+		/* Check if current node has a suitable gap */
+		if (gap_start > high_limit)
+			return -ENOMEM;
+		if (gap_end >= low_limit && gap_end - gap_start >= length)
+			goto found;
+
+		/* Visit right subtree if it looks promising */
+		if (vma->vm_rb.rb_right) {
+			struct vm_area_struct *right =
+				rb_entry(vma->vm_rb.rb_right,
+					 struct vm_area_struct, vm_rb);
+			if (right->rb_subtree_gap >= length) {
+				vma = right;
+				continue;
+			}
+		}
+
+		/* Go back up the rbtree to find next candidate node */
+		while (true) {
+			struct rb_node *prev = &vma->vm_rb;
+			if (!rb_parent(prev))
+				goto check_highest;
+			vma = rb_entry(rb_parent(prev),
+				       struct vm_area_struct, vm_rb);
+			if (prev == vma->vm_rb.rb_left) {
+				gap_start = vma->vm_prev->vm_end;
+				gap_end = vma->vm_start;
+				goto check_current;
+			}
+		}
+	}
+
+check_highest:
+	/* Check highest gap, which does not precede any rbtree node */
+	gap_start = mm->highest_vm_end;
+	gap_end = ULONG_MAX;  /* Only for VM_BUG_ON below */
+	if (gap_start > high_limit)
+		return -ENOMEM;
+
+found:
+	/* We found a suitable gap. Clip it with the original low_limit. */
+	if (gap_start < info->low_limit)
+		gap_start = info->low_limit;
+
+	/* Adjust gap address to the desired alignment */
+	gap_start += (info->align_offset - gap_start) & info->align_mask;
+
+	VM_BUG_ON(gap_start + info->length > info->high_limit);
+	VM_BUG_ON(gap_start + info->length > gap_end);
+	return gap_start;
+}
+
+unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+	low_limit = info->low_limit + length;
+
+	/* Check highest gap, which does not precede any rbtree node */
+	gap_start = mm->highest_vm_end;
+	if (gap_start <= high_limit)
+		goto found_highest;
+
+	/* Check if rbtree root looks promising */
+	if (RB_EMPTY_ROOT(&mm->mm_rb))
+		return -ENOMEM;
+	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+	if (vma->rb_subtree_gap < length)
+		return -ENOMEM;
+
+	while (true) {
+		/* Visit right subtree if it looks promising */
+		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+			struct vm_area_struct *right =
+				rb_entry(vma->vm_rb.rb_right,
+					 struct vm_area_struct, vm_rb);
+			if (right->rb_subtree_gap >= length) {
+				vma = right;
+				continue;
+			}
+		}
+
+check_current:
+		/* Check if current node has a suitable gap */
+		gap_end = vma->vm_start;
+		if (gap_end < low_limit)
+			return -ENOMEM;
+		if (gap_start <= high_limit && gap_end - gap_start >= length)
+			goto found;
+
+		/* Visit left subtree if it looks promising */
+		if (vma->vm_rb.rb_left) {
+			struct vm_area_struct *left =
+				rb_entry(vma->vm_rb.rb_left,
+					 struct vm_area_struct, vm_rb);
+			if (left->rb_subtree_gap >= length) {
+				vma = left;
+				continue;
+			}
+		}
+
+		/* Go back up the rbtree to find next candidate node */
+		while (true) {
+			struct rb_node *prev = &vma->vm_rb;
+			if (!rb_parent(prev))
+				return -ENOMEM;
+			vma = rb_entry(rb_parent(prev),
+				       struct vm_area_struct, vm_rb);
+			if (prev == vma->vm_rb.rb_right) {
+				gap_start = vma->vm_prev ?
+					vma->vm_prev->vm_end : 0;
+				goto check_current;
+			}
+		}
+	}
+
+found:
+	/* We found a suitable gap. Clip it with the original high_limit. */
+	if (gap_end > info->high_limit)
+		gap_end = info->high_limit;
+
+found_highest:
+	/* Compute highest gap address at the desired alignment */
+	gap_end -= info->length;
+	gap_end -= (gap_end - info->align_offset) & info->align_mask;
+
+	VM_BUG_ON(gap_end < info->low_limit);
+	VM_BUG_ON(gap_end < gap_start);
+	return gap_end;
+}
+
 /* Get an address range which is currently unmapped.
  * For shmat() with addr=0.
  *
@@ -1557,7 +1757,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
-	unsigned long start_addr;
+	struct vm_unmapped_area_info info;
 
 	if (len > TASK_SIZE)
 		return -ENOMEM;
@@ -1572,40 +1772,13 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		    (!vma || addr + len <= vma->vm_start))
 			return addr;
 	}
-	if (len > mm->cached_hole_size) {
-	        start_addr = addr = mm->free_area_cache;
-	} else {
-	        start_addr = addr = TASK_UNMAPPED_BASE;
-	        mm->cached_hole_size = 0;
-	}
 
-full_search:
-	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
-		/* At this point:  (!vma || addr < vma->vm_end). */
-		if (TASK_SIZE - len < addr) {
-			/*
-			 * Start a new search - just in case we missed
-			 * some holes.
-			 */
-			if (start_addr != TASK_UNMAPPED_BASE) {
-				addr = TASK_UNMAPPED_BASE;
-			        start_addr = addr;
-				mm->cached_hole_size = 0;
-				goto full_search;
-			}
-			return -ENOMEM;
-		}
-		if (!vma || addr + len <= vma->vm_start) {
-			/*
-			 * Remember the place where we stopped the search:
-			 */
-			mm->free_area_cache = addr + len;
-			return addr;
-		}
-		if (addr + mm->cached_hole_size < vma->vm_start)
-		        mm->cached_hole_size = vma->vm_start - addr;
-		addr = vma->vm_end;
-	}
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = TASK_UNMAPPED_BASE;
+	info.high_limit = TASK_SIZE;
+	info.align_mask = 0;
+	return vm_unmapped_area(&info);
 }
 #endif	
 
@@ -1630,7 +1803,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 {
 	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
-	unsigned long addr = addr0, start_addr;
+	unsigned long addr = addr0;
+	struct vm_unmapped_area_info info;
 
 	/* requested length too big for entire address space */
 	if (len > TASK_SIZE)
@@ -1648,53 +1822,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			return addr;
 	}
 
-	/* check if free_area_cache is useful for us */
-	if (len <= mm->cached_hole_size) {
- 	        mm->cached_hole_size = 0;
- 		mm->free_area_cache = mm->mmap_base;
- 	}
-
-try_again:
-	/* either no address requested or can't fit in requested address hole */
-	start_addr = addr = mm->free_area_cache;
-
-	if (addr < len)
-		goto fail;
-
-	addr -= len;
-	do {
-		/*
-		 * Lookup failure means no vma is above this address,
-		 * else if new region fits below vma->vm_start,
-		 * return with success:
-		 */
-		vma = find_vma(mm, addr);
-		if (!vma || addr+len <= vma->vm_start)
-			/* remember the address as a hint for next time */
-			return (mm->free_area_cache = addr);
-
- 		/* remember the largest hole we saw so far */
- 		if (addr + mm->cached_hole_size < vma->vm_start)
- 		        mm->cached_hole_size = vma->vm_start - addr;
-
-		/* try just below the current vma->vm_start */
-		addr = vma->vm_start-len;
-	} while (len < vma->vm_start);
-
-fail:
-	/*
-	 * if hint left us with no space for the requested
-	 * mapping then try again:
-	 *
-	 * Note: this is different with the case of bottomup
-	 * which does the fully line-search, but we use find_vma
-	 * here that causes some holes skipped.
-	 */
-	if (start_addr != mm->mmap_base) {
-		mm->free_area_cache = mm->mmap_base;
-		mm->cached_hole_size = 0;
-		goto try_again;
-	}
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.low_limit = PAGE_SIZE;
+	info.high_limit = mm->mmap_base;
+	info.align_mask = 0;
+	addr = vm_unmapped_area(&info);
 
 	/*
 	 * A failed mmap() very likely causes application failure,
@@ -1702,14 +1835,13 @@ fail:
 	 * can happen with large stack limits and large mmap()
 	 * allocations.
 	 */
-	mm->cached_hole_size = ~0UL;
-  	mm->free_area_cache = TASK_UNMAPPED_BASE;
-	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
-	/*
-	 * Restore the topdown base:
-	 */
-	mm->free_area_cache = mm->mmap_base;
-	mm->cached_hole_size = ~0UL;
+	if (addr & ~PAGE_MASK) {
+		VM_BUG_ON(addr != -ENOMEM);
+		info.flags = 0;
+		info.low_limit = TASK_UNMAPPED_BASE;
+		info.high_limit = TASK_SIZE;
+		addr = vm_unmapped_area(&info);
+	}
 
 	return addr;
 }
-- 
cgit v1.2.3-55-g7522


From 78bd52097d04205a33a8014a1b8ac01cf1ae9d06 Mon Sep 17 00:00:00 2001
From: Rafael Aquini
Date: Tue, 11 Dec 2012 16:02:31 -0800
Subject: mm: adjust address_space_operations.migratepage() return code

Memory fragmentation introduced by ballooning might reduce significantly
the number of 2MB contiguous memory blocks that can be used within a
guest, thus imposing performance penalties associated with the reduced
number of transparent huge pages that could be used by the guest workload.

This patch-set follows the main idea discussed at 2012 LSFMMS session:
"Ballooning for transparent huge pages" -- http://lwn.net/Articles/490114/
to introduce the required changes to the virtio_balloon driver, as well as
the changes to the core compaction & migration bits, in order to make
those subsystems aware of ballooned pages and allow memory balloon pages
become movable within a guest, thus avoiding the aforementioned
fragmentation issue

Following are numbers that prove this patch benefits on allowing
compaction to be more effective at memory ballooned guests.

Results for STRESS-HIGHALLOC benchmark, from Mel Gorman's mmtests suite,
running on a 4gB RAM KVM guest which was ballooning 512mB RAM in 64mB
chunks, at every minute (inflating/deflating), while test was running:

===BEGIN stress-highalloc

STRESS-HIGHALLOC
                 highalloc-3.7     highalloc-3.7
                     rc4-clean         rc4-patch
Pass 1          55.00 ( 0.00%)    62.00 ( 7.00%)
Pass 2          54.00 ( 0.00%)    62.00 ( 8.00%)
while Rested    75.00 ( 0.00%)    80.00 ( 5.00%)

MMTests Statistics: duration
                 3.7         3.7
           rc4-clean   rc4-patch
User         1207.59     1207.46
System       1300.55     1299.61
Elapsed      2273.72     2157.06

MMTests Statistics: vmstat
                                3.7         3.7
                          rc4-clean   rc4-patch
Page Ins                    3581516     2374368
Page Outs                  11148692    10410332
Swap Ins                         80          47
Swap Outs                      3641         476
Direct pages scanned          37978       33826
Kswapd pages scanned        1828245     1342869
Kswapd pages reclaimed      1710236     1304099
Direct pages reclaimed        32207       31005
Kswapd efficiency               93%         97%
Kswapd velocity             804.077     622.546
Direct efficiency               84%         91%
Direct velocity              16.703      15.682
Percentage direct scans          2%          2%
Page writes by reclaim        79252        9704
Page writes file              75611        9228
Page writes anon               3641         476
Page reclaim immediate        16764       11014
Page rescued immediate            0           0
Slabs scanned               2171904     2152448
Direct inode steals             385        2261
Kswapd inode steals          659137      609670
Kswapd skipped wait               1          69
THP fault alloc                 546         631
THP collapse alloc              361         339
THP splits                      259         263
THP fault fallback               98          50
THP collapse fail                20          17
Compaction stalls               747         499
Compaction success              244         145
Compaction failures             503         354
Compaction pages moved       370888      474837
Compaction move failure       77378       65259

===END stress-highalloc

This patch:

Introduce MIGRATEPAGE_SUCCESS as the default return code for
address_space_operations.migratepage() method and documents the expected
return code for the same method in failure cases.

Signed-off-by: Rafael Aquini <aquini@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    |  4 ++--
 include/linux/migrate.h |  7 +++++++
 mm/migrate.c            | 33 +++++++++++++++------------------
 3 files changed, 24 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 47e6e2f21e21..4a55f35a6ced 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -582,11 +582,11 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
 	int rc;
 
 	rc = migrate_huge_page_move_mapping(mapping, newpage, page);
-	if (rc)
+	if (rc != MIGRATEPAGE_SUCCESS)
 		return rc;
 	migrate_page_copy(newpage, page);
 
-	return 0;
+	return MIGRATEPAGE_SUCCESS;
 }
 
 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ce7e6671968b..a4e886d17f87 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,6 +7,13 @@
 
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
+/*
+ * Return values from addresss_space_operations.migratepage():
+ * - negative errno on page migration failure;
+ * - zero on page migration success;
+ */
+#define MIGRATEPAGE_SUCCESS		0
+
 #ifdef CONFIG_MIGRATION
 
 extern void putback_lru_pages(struct list_head *l);
diff --git a/mm/migrate.c b/mm/migrate.c
index 1dc4598d2513..33f5f82a6006 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -276,7 +276,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 		/* Anonymous page without mapping */
 		if (page_count(page) != 1)
 			return -EAGAIN;
-		return 0;
+		return MIGRATEPAGE_SUCCESS;
 	}
 
 	spin_lock_irq(&mapping->tree_lock);
@@ -346,7 +346,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	}
 	spin_unlock_irq(&mapping->tree_lock);
 
-	return 0;
+	return MIGRATEPAGE_SUCCESS;
 }
 
 /*
@@ -362,7 +362,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 	if (!mapping) {
 		if (page_count(page) != 1)
 			return -EAGAIN;
-		return 0;
+		return MIGRATEPAGE_SUCCESS;
 	}
 
 	spin_lock_irq(&mapping->tree_lock);
@@ -389,7 +389,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 	page_unfreeze_refs(page, expected_count - 1);
 
 	spin_unlock_irq(&mapping->tree_lock);
-	return 0;
+	return MIGRATEPAGE_SUCCESS;
 }
 
 /*
@@ -476,11 +476,11 @@ int migrate_page(struct address_space *mapping,
 
 	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode);
 
-	if (rc)
+	if (rc != MIGRATEPAGE_SUCCESS)
 		return rc;
 
 	migrate_page_copy(newpage, page);
-	return 0;
+	return MIGRATEPAGE_SUCCESS;
 }
 EXPORT_SYMBOL(migrate_page);
 
@@ -503,7 +503,7 @@ int buffer_migrate_page(struct address_space *mapping,
 
 	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode);
 
-	if (rc)
+	if (rc != MIGRATEPAGE_SUCCESS)
 		return rc;
 
 	/*
@@ -539,7 +539,7 @@ int buffer_migrate_page(struct address_space *mapping,
 
 	} while (bh != head);
 
-	return 0;
+	return MIGRATEPAGE_SUCCESS;
 }
 EXPORT_SYMBOL(buffer_migrate_page);
 #endif
@@ -618,7 +618,7 @@ static int fallback_migrate_page(struct address_space *mapping,
  *
  * Return value:
  *   < 0 - error code
- *  == 0 - success
+ *  MIGRATEPAGE_SUCCESS - success
  */
 static int move_to_new_page(struct page *newpage, struct page *page,
 				int remap_swapcache, enum migrate_mode mode)
@@ -655,7 +655,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 	else
 		rc = fallback_migrate_page(mapping, newpage, page, mode);
 
-	if (rc) {
+	if (rc != MIGRATEPAGE_SUCCESS) {
 		newpage->mapping = NULL;
 	} else {
 		if (remap_swapcache)
@@ -804,7 +804,7 @@ skip_unmap:
 		put_anon_vma(anon_vma);
 
 uncharge:
-	mem_cgroup_end_migration(mem, page, newpage, rc == 0);
+	mem_cgroup_end_migration(mem, page, newpage, rc == MIGRATEPAGE_SUCCESS);
 unlock:
 	unlock_page(page);
 out:
@@ -977,7 +977,7 @@ int migrate_pages(struct list_head *from,
 			case -EAGAIN:
 				retry++;
 				break;
-			case 0:
+			case MIGRATEPAGE_SUCCESS:
 				break;
 			default:
 				/* Permanent failure */
@@ -986,15 +986,12 @@ int migrate_pages(struct list_head *from,
 			}
 		}
 	}
-	rc = 0;
+	rc = nr_failed + retry;
 out:
 	if (!swapwrite)
 		current->flags &= ~PF_SWAPWRITE;
 
-	if (rc)
-		return rc;
-
-	return nr_failed + retry;
+	return rc;
 }
 
 int migrate_huge_page(struct page *hpage, new_page_t get_new_page,
@@ -1014,7 +1011,7 @@ int migrate_huge_page(struct page *hpage, new_page_t get_new_page,
 			/* try again */
 			cond_resched();
 			break;
-		case 0:
+		case MIGRATEPAGE_SUCCESS:
 			goto out;
 		default:
 			rc = -EIO;
-- 
cgit v1.2.3-55-g7522


From 252aa6f5be64c90c67b9f066ccff880f6b487d32 Mon Sep 17 00:00:00 2001
From: Rafael Aquini
Date: Tue, 11 Dec 2012 16:02:35 -0800
Subject: mm: redefine address_space.assoc_mapping

Overhaul struct address_space.assoc_mapping renaming it to
address_space.private_data and its type is redefined to void*.  By this
approach we consistently name the .private_* elements from struct
address_space as well as allow extended usage for address_space
association with other data structures through ->private_data.

Also, all users of old ->assoc_mapping element are converted to reflect
its new name and type change (->private_data).

Signed-off-by: Rafael Aquini <aquini@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c        | 12 ++++++------
 fs/gfs2/glock.c    |  2 +-
 fs/inode.c         |  2 +-
 fs/nilfs2/page.c   |  2 +-
 include/linux/fs.h |  2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index ec0aca8ba6bf..6e9ed48064fc 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -555,7 +555,7 @@ void emergency_thaw_all(void)
  */
 int sync_mapping_buffers(struct address_space *mapping)
 {
-	struct address_space *buffer_mapping = mapping->assoc_mapping;
+	struct address_space *buffer_mapping = mapping->private_data;
 
 	if (buffer_mapping == NULL || list_empty(&mapping->private_list))
 		return 0;
@@ -588,10 +588,10 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
 	struct address_space *buffer_mapping = bh->b_page->mapping;
 
 	mark_buffer_dirty(bh);
-	if (!mapping->assoc_mapping) {
-		mapping->assoc_mapping = buffer_mapping;
+	if (!mapping->private_data) {
+		mapping->private_data = buffer_mapping;
 	} else {
-		BUG_ON(mapping->assoc_mapping != buffer_mapping);
+		BUG_ON(mapping->private_data != buffer_mapping);
 	}
 	if (!bh->b_assoc_map) {
 		spin_lock(&buffer_mapping->private_lock);
@@ -788,7 +788,7 @@ void invalidate_inode_buffers(struct inode *inode)
 	if (inode_has_buffers(inode)) {
 		struct address_space *mapping = &inode->i_data;
 		struct list_head *list = &mapping->private_list;
-		struct address_space *buffer_mapping = mapping->assoc_mapping;
+		struct address_space *buffer_mapping = mapping->private_data;
 
 		spin_lock(&buffer_mapping->private_lock);
 		while (!list_empty(list))
@@ -811,7 +811,7 @@ int remove_inode_buffers(struct inode *inode)
 	if (inode_has_buffers(inode)) {
 		struct address_space *mapping = &inode->i_data;
 		struct list_head *list = &mapping->private_list;
-		struct address_space *buffer_mapping = mapping->assoc_mapping;
+		struct address_space *buffer_mapping = mapping->private_data;
 
 		spin_lock(&buffer_mapping->private_lock);
 		while (!list_empty(list)) {
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index e6c2fd53cab2..0f22d09f358d 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -768,7 +768,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 		mapping->host = s->s_bdev->bd_inode;
 		mapping->flags = 0;
 		mapping_set_gfp_mask(mapping, GFP_NOFS);
-		mapping->assoc_mapping = NULL;
+		mapping->private_data = NULL;
 		mapping->backing_dev_info = s->s_bdi;
 		mapping->writeback_index = 0;
 	}
diff --git a/fs/inode.c b/fs/inode.c
index 64999f144153..14084b72b259 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -165,7 +165,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	mapping->host = inode;
 	mapping->flags = 0;
 	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
-	mapping->assoc_mapping = NULL;
+	mapping->private_data = NULL;
 	mapping->backing_dev_info = &default_backing_dev_info;
 	mapping->writeback_index = 0;
 
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 3e7b2a0dc0c8..07f76db04ec7 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -431,7 +431,7 @@ void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
 	mapping->host = inode;
 	mapping->flags = 0;
 	mapping_set_gfp_mask(mapping, GFP_NOFS);
-	mapping->assoc_mapping = NULL;
+	mapping->private_data = NULL;
 	mapping->backing_dev_info = bdi;
 	mapping->a_ops = &empty_aops;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 75fe9a134803..408fb1e77a0a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -418,7 +418,7 @@ struct address_space {
 	struct backing_dev_info *backing_dev_info; /* device readahead, etc */
 	spinlock_t		private_lock;	/* for use by the address_space */
 	struct list_head	private_list;	/* ditto */
-	struct address_space	*assoc_mapping;	/* ditto */
+	void			*private_data;	/* ditto */
 } __attribute__((aligned(sizeof(long))));
 	/*
 	 * On most architectures that alignment is already the case; but
-- 
cgit v1.2.3-55-g7522


From 18468d93e53b037e1a04ec58398eab763d054064 Mon Sep 17 00:00:00 2001
From: Rafael Aquini
Date: Tue, 11 Dec 2012 16:02:38 -0800
Subject: mm: introduce a common interface for balloon pages mobility

Memory fragmentation introduced by ballooning might reduce significantly
the number of 2MB contiguous memory blocks that can be used within a guest,
thus imposing performance penalties associated with the reduced number of
transparent huge pages that could be used by the guest workload.

This patch introduces a common interface to help a balloon driver on
making its page set movable to compaction, and thus allowing the system
to better leverage the compation efforts on memory defragmentation.

[akpm@linux-foundation.org: use PAGE_FLAGS_CHECK_AT_PREP, s/__balloon_page_flags/page_flags_cleared/, small cleanups]
[rientjes@google.com: allow balloon compaction for any system with memory compaction enabled, which is the defconfig]
Signed-off-by: Rafael Aquini <aquini@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/balloon_compaction.h | 272 +++++++++++++++++++++++++++++++++
 include/linux/migrate.h            |  10 ++
 include/linux/pagemap.h            |  16 ++
 mm/Kconfig                         |  15 ++
 mm/Makefile                        |   3 +-
 mm/balloon_compaction.c            | 302 +++++++++++++++++++++++++++++++++++++
 6 files changed, 617 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/balloon_compaction.h
 create mode 100644 mm/balloon_compaction.c

(limited to 'include/linux')

diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
new file mode 100644
index 000000000000..f7f1d7169b11
--- /dev/null
+++ b/include/linux/balloon_compaction.h
@@ -0,0 +1,272 @@
+/*
+ * include/linux/balloon_compaction.h
+ *
+ * Common interface definitions for making balloon pages movable by compaction.
+ *
+ * Despite being perfectly possible to perform ballooned pages migration, they
+ * make a special corner case to compaction scans because balloon pages are not
+ * enlisted at any LRU list like the other pages we do compact / migrate.
+ *
+ * As the page isolation scanning step a compaction thread does is a lockless
+ * procedure (from a page standpoint), it might bring some racy situations while
+ * performing balloon page compaction. In order to sort out these racy scenarios
+ * and safely perform balloon's page compaction and migration we must, always,
+ * ensure following these three simple rules:
+ *
+ *   i. when updating a balloon's page ->mapping element, strictly do it under
+ *      the following lock order, independently of the far superior
+ *      locking scheme (lru_lock, balloon_lock):
+ *	    +-page_lock(page);
+ *	      +--spin_lock_irq(&b_dev_info->pages_lock);
+ *	            ... page->mapping updates here ...
+ *
+ *  ii. before isolating or dequeueing a balloon page from the balloon device
+ *      pages list, the page reference counter must be raised by one and the
+ *      extra refcount must be dropped when the page is enqueued back into
+ *      the balloon device page list, thus a balloon page keeps its reference
+ *      counter raised only while it is under our special handling;
+ *
+ * iii. after the lockless scan step have selected a potential balloon page for
+ *      isolation, re-test the page->mapping flags and the page ref counter
+ *      under the proper page lock, to ensure isolating a valid balloon page
+ *      (not yet isolated, nor under release procedure)
+ *
+ * The functions provided by this interface are placed to help on coping with
+ * the aforementioned balloon page corner case, as well as to ensure the simple
+ * set of exposed rules are satisfied while we are dealing with balloon pages
+ * compaction / migration.
+ *
+ * Copyright (C) 2012, Red Hat, Inc.  Rafael Aquini <aquini@redhat.com>
+ */
+#ifndef _LINUX_BALLOON_COMPACTION_H
+#define _LINUX_BALLOON_COMPACTION_H
+#include <linux/pagemap.h>
+#include <linux/page-flags.h>
+#include <linux/migrate.h>
+#include <linux/gfp.h>
+#include <linux/err.h>
+
+/*
+ * Balloon device information descriptor.
+ * This struct is used to allow the common balloon compaction interface
+ * procedures to find the proper balloon device holding memory pages they'll
+ * have to cope for page compaction / migration, as well as it serves the
+ * balloon driver as a page book-keeper for its registered balloon devices.
+ */
+struct balloon_dev_info {
+	void *balloon_device;		/* balloon device descriptor */
+	struct address_space *mapping;	/* balloon special page->mapping */
+	unsigned long isolated_pages;	/* # of isolated pages for migration */
+	spinlock_t pages_lock;		/* Protection to pages list */
+	struct list_head pages;		/* Pages enqueued & handled to Host */
+};
+
+extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info);
+extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info);
+extern struct balloon_dev_info *balloon_devinfo_alloc(
+						void *balloon_dev_descriptor);
+
+static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info)
+{
+	kfree(b_dev_info);
+}
+
+/*
+ * balloon_page_free - release a balloon page back to the page free lists
+ * @page: ballooned page to be set free
+ *
+ * This function must be used to properly set free an isolated/dequeued balloon
+ * page at the end of a sucessful page migration, or at the balloon driver's
+ * page release procedure.
+ */
+static inline void balloon_page_free(struct page *page)
+{
+	/*
+	 * Balloon pages always get an extra refcount before being isolated
+	 * and before being dequeued to help on sorting out fortuite colisions
+	 * between a thread attempting to isolate and another thread attempting
+	 * to release the very same balloon page.
+	 *
+	 * Before we handle the page back to Buddy, lets drop its extra refcnt.
+	 */
+	put_page(page);
+	__free_page(page);
+}
+
+#ifdef CONFIG_BALLOON_COMPACTION
+extern bool balloon_page_isolate(struct page *page);
+extern void balloon_page_putback(struct page *page);
+extern int balloon_page_migrate(struct page *newpage,
+				struct page *page, enum migrate_mode mode);
+extern struct address_space
+*balloon_mapping_alloc(struct balloon_dev_info *b_dev_info,
+			const struct address_space_operations *a_ops);
+
+static inline void balloon_mapping_free(struct address_space *balloon_mapping)
+{
+	kfree(balloon_mapping);
+}
+
+/*
+ * page_flags_cleared - helper to perform balloon @page ->flags tests.
+ *
+ * As balloon pages are obtained from buddy and we do not play with page->flags
+ * at driver level (exception made when we get the page lock for compaction),
+ * we can safely identify a ballooned page by checking if the
+ * PAGE_FLAGS_CHECK_AT_PREP page->flags are all cleared.  This approach also
+ * helps us skip ballooned pages that are locked for compaction or release, thus
+ * mitigating their racy check at balloon_page_movable()
+ */
+static inline bool page_flags_cleared(struct page *page)
+{
+	return !(page->flags & PAGE_FLAGS_CHECK_AT_PREP);
+}
+
+/*
+ * __is_movable_balloon_page - helper to perform @page mapping->flags tests
+ */
+static inline bool __is_movable_balloon_page(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	return mapping_balloon(mapping);
+}
+
+/*
+ * balloon_page_movable - test page->mapping->flags to identify balloon pages
+ *			  that can be moved by compaction/migration.
+ *
+ * This function is used at core compaction's page isolation scheme, therefore
+ * most pages exposed to it are not enlisted as balloon pages and so, to avoid
+ * undesired side effects like racing against __free_pages(), we cannot afford
+ * holding the page locked while testing page->mapping->flags here.
+ *
+ * As we might return false positives in the case of a balloon page being just
+ * released under us, the page->mapping->flags need to be re-tested later,
+ * under the proper page lock, at the functions that will be coping with the
+ * balloon page case.
+ */
+static inline bool balloon_page_movable(struct page *page)
+{
+	/*
+	 * Before dereferencing and testing mapping->flags, let's make sure
+	 * this is not a page that uses ->mapping in a different way
+	 */
+	if (page_flags_cleared(page) && !page_mapped(page) &&
+	    page_count(page) == 1)
+		return __is_movable_balloon_page(page);
+
+	return false;
+}
+
+/*
+ * balloon_page_insert - insert a page into the balloon's page list and make
+ *		         the page->mapping assignment accordingly.
+ * @page    : page to be assigned as a 'balloon page'
+ * @mapping : allocated special 'balloon_mapping'
+ * @head    : balloon's device page list head
+ *
+ * Caller must ensure the page is locked and the spin_lock protecting balloon
+ * pages list is held before inserting a page into the balloon device.
+ */
+static inline void balloon_page_insert(struct page *page,
+				       struct address_space *mapping,
+				       struct list_head *head)
+{
+	page->mapping = mapping;
+	list_add(&page->lru, head);
+}
+
+/*
+ * balloon_page_delete - delete a page from balloon's page list and clear
+ *			 the page->mapping assignement accordingly.
+ * @page    : page to be released from balloon's page list
+ *
+ * Caller must ensure the page is locked and the spin_lock protecting balloon
+ * pages list is held before deleting a page from the balloon device.
+ */
+static inline void balloon_page_delete(struct page *page)
+{
+	page->mapping = NULL;
+	list_del(&page->lru);
+}
+
+/*
+ * balloon_page_device - get the b_dev_info descriptor for the balloon device
+ *			 that enqueues the given page.
+ */
+static inline struct balloon_dev_info *balloon_page_device(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	if (likely(mapping))
+		return mapping->private_data;
+
+	return NULL;
+}
+
+static inline gfp_t balloon_mapping_gfp_mask(void)
+{
+	return GFP_HIGHUSER_MOVABLE;
+}
+
+static inline bool balloon_compaction_check(void)
+{
+	return true;
+}
+
+#else /* !CONFIG_BALLOON_COMPACTION */
+
+static inline void *balloon_mapping_alloc(void *balloon_device,
+				const struct address_space_operations *a_ops)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void balloon_mapping_free(struct address_space *balloon_mapping)
+{
+	return;
+}
+
+static inline void balloon_page_insert(struct page *page,
+				       struct address_space *mapping,
+				       struct list_head *head)
+{
+	list_add(&page->lru, head);
+}
+
+static inline void balloon_page_delete(struct page *page)
+{
+	list_del(&page->lru);
+}
+
+static inline bool balloon_page_movable(struct page *page)
+{
+	return false;
+}
+
+static inline bool balloon_page_isolate(struct page *page)
+{
+	return false;
+}
+
+static inline void balloon_page_putback(struct page *page)
+{
+	return;
+}
+
+static inline int balloon_page_migrate(struct page *newpage,
+				struct page *page, enum migrate_mode mode)
+{
+	return 0;
+}
+
+static inline gfp_t balloon_mapping_gfp_mask(void)
+{
+	return GFP_HIGHUSER;
+}
+
+static inline bool balloon_compaction_check(void)
+{
+	return false;
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
+#endif /* _LINUX_BALLOON_COMPACTION_H */
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index a4e886d17f87..ce42847ed35f 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -11,8 +11,18 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **);
  * Return values from addresss_space_operations.migratepage():
  * - negative errno on page migration failure;
  * - zero on page migration success;
+ *
+ * The balloon page migration introduces this special case where a 'distinct'
+ * return code is used to flag a successful page migration to unmap_and_move().
+ * This approach is necessary because page migration can race against balloon
+ * deflation procedure, and for such case we could introduce a nasty page leak
+ * if a successfully migrated balloon page gets released concurrently with
+ * migration's unmap_and_move() wrap-up steps.
  */
 #define MIGRATEPAGE_SUCCESS		0
+#define MIGRATEPAGE_BALLOON_SUCCESS	1 /* special ret code for balloon page
+					   * sucessful migration case.
+					   */
 
 #ifdef CONFIG_MIGRATION
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e42c762f0dc7..6da609d14c15 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -24,6 +24,7 @@ enum mapping_flags {
 	AS_ENOSPC	= __GFP_BITS_SHIFT + 1,	/* ENOSPC on async write */
 	AS_MM_ALL_LOCKS	= __GFP_BITS_SHIFT + 2,	/* under mm_take_all_locks() */
 	AS_UNEVICTABLE	= __GFP_BITS_SHIFT + 3,	/* e.g., ramdisk, SHM_LOCK */
+	AS_BALLOON_MAP  = __GFP_BITS_SHIFT + 4, /* balloon page special map */
 };
 
 static inline void mapping_set_error(struct address_space *mapping, int error)
@@ -53,6 +54,21 @@ static inline int mapping_unevictable(struct address_space *mapping)
 	return !!mapping;
 }
 
+static inline void mapping_set_balloon(struct address_space *mapping)
+{
+	set_bit(AS_BALLOON_MAP, &mapping->flags);
+}
+
+static inline void mapping_clear_balloon(struct address_space *mapping)
+{
+	clear_bit(AS_BALLOON_MAP, &mapping->flags);
+}
+
+static inline int mapping_balloon(struct address_space *mapping)
+{
+	return mapping && test_bit(AS_BALLOON_MAP, &mapping->flags);
+}
+
 static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 {
 	return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
diff --git a/mm/Kconfig b/mm/Kconfig
index a3f8dddaaab3..e6651c5de14f 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -187,6 +187,21 @@ config SPLIT_PTLOCK_CPUS
 	default "999999" if DEBUG_SPINLOCK || DEBUG_LOCK_ALLOC
 	default "4"
 
+#
+# support for memory balloon compaction
+config BALLOON_COMPACTION
+	bool "Allow for balloon memory compaction/migration"
+	def_bool y
+	depends on COMPACTION && VIRTIO_BALLOON
+	help
+	  Memory fragmentation introduced by ballooning might reduce
+	  significantly the number of 2MB contiguous memory blocks that can be
+	  used within a guest, thus imposing performance penalties associated
+	  with the reduced number of transparent huge pages that could be used
+	  by the guest workload. Allowing the compaction & migration for memory
+	  pages enlisted as being part of memory balloon devices avoids the
+	  scenario aforementioned and helps improving memory defragmentation.
+
 #
 # support for memory compaction
 config COMPACTION
diff --git a/mm/Makefile b/mm/Makefile
index 6b025f80af34..3a4628751f89 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -16,7 +16,8 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   util.o mmzone.o vmstat.o backing-dev.o \
 			   mm_init.o mmu_context.o percpu.o slab_common.o \
-			   compaction.o interval_tree.o $(mmu-y)
+			   compaction.o balloon_compaction.o \
+			   interval_tree.o $(mmu-y)
 
 obj-y += init-mm.o
 
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
new file mode 100644
index 000000000000..07dbc8ec46cf
--- /dev/null
+++ b/mm/balloon_compaction.c
@@ -0,0 +1,302 @@
+/*
+ * mm/balloon_compaction.c
+ *
+ * Common interface for making balloon pages movable by compaction.
+ *
+ * Copyright (C) 2012, Red Hat, Inc.  Rafael Aquini <aquini@redhat.com>
+ */
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/balloon_compaction.h>
+
+/*
+ * balloon_devinfo_alloc - allocates a balloon device information descriptor.
+ * @balloon_dev_descriptor: pointer to reference the balloon device which
+ *                          this struct balloon_dev_info will be servicing.
+ *
+ * Driver must call it to properly allocate and initialize an instance of
+ * struct balloon_dev_info which will be used to reference a balloon device
+ * as well as to keep track of the balloon device page list.
+ */
+struct balloon_dev_info *balloon_devinfo_alloc(void *balloon_dev_descriptor)
+{
+	struct balloon_dev_info *b_dev_info;
+	b_dev_info = kmalloc(sizeof(*b_dev_info), GFP_KERNEL);
+	if (!b_dev_info)
+		return ERR_PTR(-ENOMEM);
+
+	b_dev_info->balloon_device = balloon_dev_descriptor;
+	b_dev_info->mapping = NULL;
+	b_dev_info->isolated_pages = 0;
+	spin_lock_init(&b_dev_info->pages_lock);
+	INIT_LIST_HEAD(&b_dev_info->pages);
+
+	return b_dev_info;
+}
+EXPORT_SYMBOL_GPL(balloon_devinfo_alloc);
+
+/*
+ * balloon_page_enqueue - allocates a new page and inserts it into the balloon
+ *			  page list.
+ * @b_dev_info: balloon device decriptor where we will insert a new page to
+ *
+ * Driver must call it to properly allocate a new enlisted balloon page
+ * before definetively removing it from the guest system.
+ * This function returns the page address for the recently enqueued page or
+ * NULL in the case we fail to allocate a new page this turn.
+ */
+struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info)
+{
+	unsigned long flags;
+	struct page *page = alloc_page(balloon_mapping_gfp_mask() |
+					__GFP_NOMEMALLOC | __GFP_NORETRY);
+	if (!page)
+		return NULL;
+
+	/*
+	 * Block others from accessing the 'page' when we get around to
+	 * establishing additional references. We should be the only one
+	 * holding a reference to the 'page' at this point.
+	 */
+	BUG_ON(!trylock_page(page));
+	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+	balloon_page_insert(page, b_dev_info->mapping, &b_dev_info->pages);
+	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+	unlock_page(page);
+	return page;
+}
+EXPORT_SYMBOL_GPL(balloon_page_enqueue);
+
+/*
+ * balloon_page_dequeue - removes a page from balloon's page list and returns
+ *			  the its address to allow the driver release the page.
+ * @b_dev_info: balloon device decriptor where we will grab a page from.
+ *
+ * Driver must call it to properly de-allocate a previous enlisted balloon page
+ * before definetively releasing it back to the guest system.
+ * This function returns the page address for the recently dequeued page or
+ * NULL in the case we find balloon's page list temporarily empty due to
+ * compaction isolated pages.
+ */
+struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
+{
+	struct page *page, *tmp;
+	unsigned long flags;
+	bool dequeued_page;
+
+	dequeued_page = false;
+	list_for_each_entry_safe(page, tmp, &b_dev_info->pages, lru) {
+		/*
+		 * Block others from accessing the 'page' while we get around
+		 * establishing additional references and preparing the 'page'
+		 * to be released by the balloon driver.
+		 */
+		if (trylock_page(page)) {
+			spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+			/*
+			 * Raise the page refcount here to prevent any wrong
+			 * attempt to isolate this page, in case of coliding
+			 * with balloon_page_isolate() just after we release
+			 * the page lock.
+			 *
+			 * balloon_page_free() will take care of dropping
+			 * this extra refcount later.
+			 */
+			get_page(page);
+			balloon_page_delete(page);
+			spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+			unlock_page(page);
+			dequeued_page = true;
+			break;
+		}
+	}
+
+	if (!dequeued_page) {
+		/*
+		 * If we are unable to dequeue a balloon page because the page
+		 * list is empty and there is no isolated pages, then something
+		 * went out of track and some balloon pages are lost.
+		 * BUG() here, otherwise the balloon driver may get stuck into
+		 * an infinite loop while attempting to release all its pages.
+		 */
+		spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+		if (unlikely(list_empty(&b_dev_info->pages) &&
+			     !b_dev_info->isolated_pages))
+			BUG();
+		spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+		page = NULL;
+	}
+	return page;
+}
+EXPORT_SYMBOL_GPL(balloon_page_dequeue);
+
+#ifdef CONFIG_BALLOON_COMPACTION
+/*
+ * balloon_mapping_alloc - allocates a special ->mapping for ballooned pages.
+ * @b_dev_info: holds the balloon device information descriptor.
+ * @a_ops: balloon_mapping address_space_operations descriptor.
+ *
+ * Driver must call it to properly allocate and initialize an instance of
+ * struct address_space which will be used as the special page->mapping for
+ * balloon device enlisted page instances.
+ */
+struct address_space *balloon_mapping_alloc(struct balloon_dev_info *b_dev_info,
+				const struct address_space_operations *a_ops)
+{
+	struct address_space *mapping;
+
+	mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+	if (!mapping)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * Give a clean 'zeroed' status to all elements of this special
+	 * balloon page->mapping struct address_space instance.
+	 */
+	address_space_init_once(mapping);
+
+	/*
+	 * Set mapping->flags appropriately, to allow balloon pages
+	 * ->mapping identification.
+	 */
+	mapping_set_balloon(mapping);
+	mapping_set_gfp_mask(mapping, balloon_mapping_gfp_mask());
+
+	/* balloon's page->mapping->a_ops callback descriptor */
+	mapping->a_ops = a_ops;
+
+	/*
+	 * Establish a pointer reference back to the balloon device descriptor
+	 * this particular page->mapping will be servicing.
+	 * This is used by compaction / migration procedures to identify and
+	 * access the balloon device pageset while isolating / migrating pages.
+	 *
+	 * As some balloon drivers can register multiple balloon devices
+	 * for a single guest, this also helps compaction / migration to
+	 * properly deal with multiple balloon pagesets, when required.
+	 */
+	mapping->private_data = b_dev_info;
+	b_dev_info->mapping = mapping;
+
+	return mapping;
+}
+EXPORT_SYMBOL_GPL(balloon_mapping_alloc);
+
+static inline void __isolate_balloon_page(struct page *page)
+{
+	struct balloon_dev_info *b_dev_info = page->mapping->private_data;
+	unsigned long flags;
+	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+	list_del(&page->lru);
+	b_dev_info->isolated_pages++;
+	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+}
+
+static inline void __putback_balloon_page(struct page *page)
+{
+	struct balloon_dev_info *b_dev_info = page->mapping->private_data;
+	unsigned long flags;
+	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+	list_add(&page->lru, &b_dev_info->pages);
+	b_dev_info->isolated_pages--;
+	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+}
+
+static inline int __migrate_balloon_page(struct address_space *mapping,
+		struct page *newpage, struct page *page, enum migrate_mode mode)
+{
+	return page->mapping->a_ops->migratepage(mapping, newpage, page, mode);
+}
+
+/* __isolate_lru_page() counterpart for a ballooned page */
+bool balloon_page_isolate(struct page *page)
+{
+	/*
+	 * Avoid burning cycles with pages that are yet under __free_pages(),
+	 * or just got freed under us.
+	 *
+	 * In case we 'win' a race for a balloon page being freed under us and
+	 * raise its refcount preventing __free_pages() from doing its job
+	 * the put_page() at the end of this block will take care of
+	 * release this page, thus avoiding a nasty leakage.
+	 */
+	if (likely(get_page_unless_zero(page))) {
+		/*
+		 * As balloon pages are not isolated from LRU lists, concurrent
+		 * compaction threads can race against page migration functions
+		 * as well as race against the balloon driver releasing a page.
+		 *
+		 * In order to avoid having an already isolated balloon page
+		 * being (wrongly) re-isolated while it is under migration,
+		 * or to avoid attempting to isolate pages being released by
+		 * the balloon driver, lets be sure we have the page lock
+		 * before proceeding with the balloon page isolation steps.
+		 */
+		if (likely(trylock_page(page))) {
+			/*
+			 * A ballooned page, by default, has just one refcount.
+			 * Prevent concurrent compaction threads from isolating
+			 * an already isolated balloon page by refcount check.
+			 */
+			if (__is_movable_balloon_page(page) &&
+			    page_count(page) == 2) {
+				__isolate_balloon_page(page);
+				unlock_page(page);
+				return true;
+			}
+			unlock_page(page);
+		}
+		put_page(page);
+	}
+	return false;
+}
+
+/* putback_lru_page() counterpart for a ballooned page */
+void balloon_page_putback(struct page *page)
+{
+	/*
+	 * 'lock_page()' stabilizes the page and prevents races against
+	 * concurrent isolation threads attempting to re-isolate it.
+	 */
+	lock_page(page);
+
+	if (__is_movable_balloon_page(page)) {
+		__putback_balloon_page(page);
+		/* drop the extra ref count taken for page isolation */
+		put_page(page);
+	} else {
+		WARN_ON(1);
+		dump_page(page);
+	}
+	unlock_page(page);
+}
+
+/* move_to_new_page() counterpart for a ballooned page */
+int balloon_page_migrate(struct page *newpage,
+			 struct page *page, enum migrate_mode mode)
+{
+	struct address_space *mapping;
+	int rc = -EAGAIN;
+
+	/*
+	 * Block others from accessing the 'newpage' when we get around to
+	 * establishing additional references. We should be the only one
+	 * holding a reference to the 'newpage' at this point.
+	 */
+	BUG_ON(!trylock_page(newpage));
+
+	if (WARN_ON(!__is_movable_balloon_page(page))) {
+		dump_page(page);
+		unlock_page(newpage);
+		return rc;
+	}
+
+	mapping = page->mapping;
+	if (mapping)
+		rc = __migrate_balloon_page(mapping, newpage, page, mode);
+
+	unlock_page(newpage);
+	return rc;
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
-- 
cgit v1.2.3-55-g7522


From 5733c7d11dff44e98d2ca16617886a78086b354f Mon Sep 17 00:00:00 2001
From: Rafael Aquini
Date: Tue, 11 Dec 2012 16:02:47 -0800
Subject: mm: introduce putback_movable_pages()

The PATCH "mm: introduce compaction and migration for virtio ballooned pages"
hacks around putback_lru_pages() in order to allow ballooned pages to be
re-inserted on balloon page list as if a ballooned page was like a LRU page.

As ballooned pages are not legitimate LRU pages, this patch introduces
putback_movable_pages() to properly cope with cases where the isolated
pageset contains ballooned pages and LRU pages, thus fixing the mentioned
inelegant hack around putback_lru_pages().

Signed-off-by: Rafael Aquini <aquini@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h |  2 ++
 mm/compaction.c         |  6 +++---
 mm/migrate.c            | 20 ++++++++++++++++++++
 mm/page_alloc.c         |  2 +-
 4 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ce42847ed35f..0b5865c61efd 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -27,6 +27,7 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 #ifdef CONFIG_MIGRATION
 
 extern void putback_lru_pages(struct list_head *l);
+extern void putback_movable_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
 			struct page *, struct page *, enum migrate_mode);
 extern int migrate_pages(struct list_head *l, new_page_t x,
@@ -50,6 +51,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 #else
 
 static inline void putback_lru_pages(struct list_head *l) {}
+static inline void putback_movable_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private, bool offlining,
 		enum migrate_mode mode) { return -ENOSYS; }
diff --git a/mm/compaction.c b/mm/compaction.c
index 470474c03b61..d24dd2d7bad4 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1003,7 +1003,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 		switch (isolate_migratepages(zone, cc)) {
 		case ISOLATE_ABORT:
 			ret = COMPACT_PARTIAL;
-			putback_lru_pages(&cc->migratepages);
+			putback_movable_pages(&cc->migratepages);
 			cc->nr_migratepages = 0;
 			goto out;
 		case ISOLATE_NONE:
@@ -1026,9 +1026,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 		trace_mm_compaction_migratepages(nr_migrate - nr_remaining,
 						nr_remaining);
 
-		/* Release LRU pages not migrated */
+		/* Release isolated pages not migrated */
 		if (err) {
-			putback_lru_pages(&cc->migratepages);
+			putback_movable_pages(&cc->migratepages);
 			cc->nr_migratepages = 0;
 			if (err == -ENOMEM) {
 				ret = COMPACT_PARTIAL;
diff --git a/mm/migrate.c b/mm/migrate.c
index 427343c0c296..3f675ca08279 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -76,6 +76,26 @@ void putback_lru_pages(struct list_head *l)
 	struct page *page;
 	struct page *page2;
 
+	list_for_each_entry_safe(page, page2, l, lru) {
+		list_del(&page->lru);
+		dec_zone_page_state(page, NR_ISOLATED_ANON +
+				page_is_file_cache(page));
+			putback_lru_page(page);
+	}
+}
+
+/*
+ * Put previously isolated pages back onto the appropriate lists
+ * from where they were once taken off for compaction/migration.
+ *
+ * This function shall be used instead of putback_lru_pages(),
+ * whenever the isolated pageset has been built by isolate_migratepages_range()
+ */
+void putback_movable_pages(struct list_head *l)
+{
+	struct page *page;
+	struct page *page2;
+
 	list_for_each_entry_safe(page, page2, l, lru) {
 		list_del(&page->lru);
 		dec_zone_page_state(page, NR_ISOLATED_ANON +
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5a7b7611d4e4..d9fbac1b5030 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5761,7 +5761,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
 				    0, false, MIGRATE_SYNC);
 	}
 
-	putback_lru_pages(&cc->migratepages);
+	putback_movable_pages(&cc->migratepages);
 	return ret > 0 ? 0 : ret;
 }
 
-- 
cgit v1.2.3-55-g7522


From fa264375175a382621c5344a6508e02ec4d1c3c0 Mon Sep 17 00:00:00 2001
From: Yasuaki Ishimatsu
Date: Tue, 11 Dec 2012 16:02:52 -0800
Subject: mm: cleanup register_node()

register_node() is defined as extern in include/linux/node.h.  But the
function is only called from register_one_node() in driver/base/node.c.

So the patch defines register_node() as static.

Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c  | 2 +-
 include/linux/node.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 4282e82d9f26..fffed4c96bb3 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -277,7 +277,7 @@ static void node_device_release(struct device *dev)
  *
  * Initialize and register the node device.
  */
-int register_node(struct node *node, int num, struct node *parent)
+static int register_node(struct node *node, int num, struct node *parent)
 {
 	int error;
 
diff --git a/include/linux/node.h b/include/linux/node.h
index 10316f1c68a9..2115ad5d6f19 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -30,7 +30,6 @@ struct memory_block;
 extern struct node *node_devices[];
 typedef  void (*node_registration_func_t)(struct node *);
 
-extern int register_node(struct node *, int, struct node *);
 extern void unregister_node(struct node *node);
 #ifdef CONFIG_NUMA
 extern int register_one_node(int nid);
-- 
cgit v1.2.3-55-g7522


From a9c58b907dbc6821533dfc295b63caf111ff1f16 Mon Sep 17 00:00:00 2001
From: David Rientjes
Date: Tue, 11 Dec 2012 16:02:54 -0800
Subject: mm, oom: change type of oom_score_adj to short

The maximum oom_score_adj is 1000 and the minimum oom_score_adj is -1000,
so this range can be represented by the signed short type with no
functional change.  The extra space this frees up in struct signal_struct
will be used for per-thread oom kill flags in the next patch.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Anton Vorontsov <anton.vorontsov@linaro.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/staging/android/lowmemorykiller.c | 16 ++++++++--------
 fs/proc/base.c                            | 10 +++++-----
 include/linux/oom.h                       |  4 ++--
 include/linux/sched.h                     |  6 +++---
 include/trace/events/oom.h                |  4 ++--
 include/trace/events/task.h               |  8 ++++----
 mm/ksm.c                                  |  2 +-
 mm/oom_kill.c                             | 10 +++++-----
 mm/swapfile.c                             |  2 +-
 9 files changed, 31 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c
index b91e4bc332a7..3b91b0fd4de3 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -40,7 +40,7 @@
 #include <linux/notifier.h>
 
 static uint32_t lowmem_debug_level = 2;
-static int lowmem_adj[6] = {
+static short lowmem_adj[6] = {
 	0,
 	1,
 	6,
@@ -70,9 +70,9 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
 	int rem = 0;
 	int tasksize;
 	int i;
-	int min_score_adj = OOM_SCORE_ADJ_MAX + 1;
+	short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
 	int selected_tasksize = 0;
-	int selected_oom_score_adj;
+	short selected_oom_score_adj;
 	int array_size = ARRAY_SIZE(lowmem_adj);
 	int other_free = global_page_state(NR_FREE_PAGES);
 	int other_file = global_page_state(NR_FILE_PAGES) -
@@ -90,7 +90,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
 		}
 	}
 	if (sc->nr_to_scan > 0)
-		lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n",
+		lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %hd\n",
 				sc->nr_to_scan, sc->gfp_mask, other_free,
 				other_file, min_score_adj);
 	rem = global_page_state(NR_ACTIVE_ANON) +
@@ -107,7 +107,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
 	rcu_read_lock();
 	for_each_process(tsk) {
 		struct task_struct *p;
-		int oom_score_adj;
+		short oom_score_adj;
 
 		if (tsk->flags & PF_KTHREAD)
 			continue;
@@ -141,11 +141,11 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
 		selected = p;
 		selected_tasksize = tasksize;
 		selected_oom_score_adj = oom_score_adj;
-		lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n",
+		lowmem_print(2, "select %d (%s), adj %hd, size %d, to kill\n",
 			     p->pid, p->comm, oom_score_adj, tasksize);
 	}
 	if (selected) {
-		lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n",
+		lowmem_print(1, "send sigkill to %d (%s), adj %hd, size %d\n",
 			     selected->pid, selected->comm,
 			     selected_oom_score_adj, selected_tasksize);
 		lowmem_deathpending_timeout = jiffies + HZ;
@@ -176,7 +176,7 @@ static void __exit lowmem_exit(void)
 }
 
 module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR);
-module_param_array_named(adj, lowmem_adj, int, &lowmem_adj_size,
+module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size,
 			 S_IRUGO | S_IWUSR);
 module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size,
 			 S_IRUGO | S_IWUSR);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9e28356a959a..aa63d25157b8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -985,7 +985,7 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
 {
 	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 	char buffer[PROC_NUMBUF];
-	int oom_score_adj = OOM_SCORE_ADJ_MIN;
+	short oom_score_adj = OOM_SCORE_ADJ_MIN;
 	unsigned long flags;
 	size_t len;
 
@@ -996,7 +996,7 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
 		unlock_task_sighand(task, &flags);
 	}
 	put_task_struct(task);
-	len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj);
+	len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
 	return simple_read_from_buffer(buf, count, ppos, buffer, len);
 }
 
@@ -1043,15 +1043,15 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
 		goto err_task_lock;
 	}
 
-	if (oom_score_adj < task->signal->oom_score_adj_min &&
+	if ((short)oom_score_adj < task->signal->oom_score_adj_min &&
 			!capable(CAP_SYS_RESOURCE)) {
 		err = -EACCES;
 		goto err_sighand;
 	}
 
-	task->signal->oom_score_adj = oom_score_adj;
+	task->signal->oom_score_adj = (short)oom_score_adj;
 	if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
-		task->signal->oom_score_adj_min = oom_score_adj;
+		task->signal->oom_score_adj_min = (short)oom_score_adj;
 	trace_oom_score_adj_update(task);
 
 err_sighand:
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 4a4188da4570..922dab164eb0 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -29,8 +29,8 @@ enum oom_scan_t {
 	OOM_SCAN_SELECT,	/* always select this thread first */
 };
 
-extern void compare_swap_oom_score_adj(int old_val, int new_val);
-extern int test_set_oom_score_adj(int new_val);
+extern void compare_swap_oom_score_adj(short old_val, short new_val);
+extern short test_set_oom_score_adj(short new_val);
 
 extern unsigned long oom_badness(struct task_struct *p,
 		struct mem_cgroup *memcg, const nodemask_t *nodemask,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0dd42a02df2e..ed30456152da 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -631,9 +631,9 @@ struct signal_struct {
 	struct rw_semaphore group_rwsem;
 #endif
 
-	int oom_score_adj;	/* OOM kill score adjustment */
-	int oom_score_adj_min;	/* OOM kill score adjustment minimum value.
-				 * Only settable by CAP_SYS_RESOURCE. */
+	short oom_score_adj;		/* OOM kill score adjustment */
+	short oom_score_adj_min;	/* OOM kill score adjustment min value.
+					 * Only settable by CAP_SYS_RESOURCE. */
 
 	struct mutex cred_guard_mutex;	/* guard against foreign influences on
 					 * credential calculations
diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index dd4ba3b92002..1e974983757e 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -14,7 +14,7 @@ TRACE_EVENT(oom_score_adj_update,
 	TP_STRUCT__entry(
 		__field(	pid_t,	pid)
 		__array(	char,	comm,	TASK_COMM_LEN )
-		__field(	 int,	oom_score_adj)
+		__field(	short,	oom_score_adj)
 	),
 
 	TP_fast_assign(
@@ -23,7 +23,7 @@ TRACE_EVENT(oom_score_adj_update,
 		__entry->oom_score_adj = task->signal->oom_score_adj;
 	),
 
-	TP_printk("pid=%d comm=%s oom_score_adj=%d",
+	TP_printk("pid=%d comm=%s oom_score_adj=%hd",
 		__entry->pid, __entry->comm, __entry->oom_score_adj)
 );
 
diff --git a/include/trace/events/task.h b/include/trace/events/task.h
index b53add02e929..102a646e1996 100644
--- a/include/trace/events/task.h
+++ b/include/trace/events/task.h
@@ -15,7 +15,7 @@ TRACE_EVENT(task_newtask,
 		__field(	pid_t,	pid)
 		__array(	char,	comm, TASK_COMM_LEN)
 		__field( unsigned long, clone_flags)
-		__field(	int,    oom_score_adj)
+		__field(	short,	oom_score_adj)
 	),
 
 	TP_fast_assign(
@@ -25,7 +25,7 @@ TRACE_EVENT(task_newtask,
 		__entry->oom_score_adj = task->signal->oom_score_adj;
 	),
 
-	TP_printk("pid=%d comm=%s clone_flags=%lx oom_score_adj=%d",
+	TP_printk("pid=%d comm=%s clone_flags=%lx oom_score_adj=%hd",
 		__entry->pid, __entry->comm,
 		__entry->clone_flags, __entry->oom_score_adj)
 );
@@ -40,7 +40,7 @@ TRACE_EVENT(task_rename,
 		__field(	pid_t,	pid)
 		__array(	char, oldcomm,  TASK_COMM_LEN)
 		__array(	char, newcomm,  TASK_COMM_LEN)
-		__field(	int, oom_score_adj)
+		__field(	short,	oom_score_adj)
 	),
 
 	TP_fast_assign(
@@ -50,7 +50,7 @@ TRACE_EVENT(task_rename,
 		__entry->oom_score_adj = task->signal->oom_score_adj;
 	),
 
-	TP_printk("pid=%d oldcomm=%s newcomm=%s oom_score_adj=%d",
+	TP_printk("pid=%d oldcomm=%s newcomm=%s oom_score_adj=%hd",
 		__entry->pid, __entry->oldcomm,
 		__entry->newcomm, __entry->oom_score_adj)
 );
diff --git a/mm/ksm.c b/mm/ksm.c
index 31ae5ea1eac0..b4d5a9deb17f 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1919,7 +1919,7 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
 	if (ksm_run != flags) {
 		ksm_run = flags;
 		if (flags & KSM_RUN_UNMERGE) {
-			int oom_score_adj;
+			short oom_score_adj;
 
 			oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
 			err = unmerge_and_remove_all_rmap_items();
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 7e9e9113bd05..37ab4c5ab6e8 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -53,7 +53,7 @@ static DEFINE_SPINLOCK(zone_scan_lock);
  * @old_val.  Usually used to reinstate a previous value to prevent racing with
  * userspacing tuning the value in the interim.
  */
-void compare_swap_oom_score_adj(int old_val, int new_val)
+void compare_swap_oom_score_adj(short old_val, short new_val)
 {
 	struct sighand_struct *sighand = current->sighand;
 
@@ -72,7 +72,7 @@ void compare_swap_oom_score_adj(int old_val, int new_val)
  * synchronization and returns the old value.  Usually used to temporarily
  * set a value, save the old value in the caller, and then reinstate it later.
  */
-int test_set_oom_score_adj(int new_val)
+short test_set_oom_score_adj(short new_val)
 {
 	struct sighand_struct *sighand = current->sighand;
 	int old_val;
@@ -193,7 +193,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 	if (!p)
 		return 0;
 
-	adj = p->signal->oom_score_adj;
+	adj = (long)p->signal->oom_score_adj;
 	if (adj == OOM_SCORE_ADJ_MIN) {
 		task_unlock(p);
 		return 0;
@@ -399,7 +399,7 @@ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemas
 			continue;
 		}
 
-		pr_info("[%5d] %5d %5d %8lu %8lu %7lu %8lu         %5d %s\n",
+		pr_info("[%5d] %5d %5d %8lu %8lu %7lu %8lu         %5hd %s\n",
 			task->pid, from_kuid(&init_user_ns, task_uid(task)),
 			task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
 			task->mm->nr_ptes,
@@ -415,7 +415,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
 {
 	task_lock(current);
 	pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
-		"oom_score_adj=%d\n",
+		"oom_score_adj=%hd\n",
 		current->comm, gfp_mask, order,
 		current->signal->oom_score_adj);
 	cpuset_print_task_mems_allowed(current);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0fbb45283c66..bb6f6a04e92d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1498,7 +1498,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	struct address_space *mapping;
 	struct inode *inode;
 	struct filename *pathname;
-	int oom_score_adj;
+	short oom_score_adj;
 	int i, type, prev;
 	int err;
 
-- 
cgit v1.2.3-55-g7522


From e1e12d2f3104be886073ac6c5c4678f30b1b9e51 Mon Sep 17 00:00:00 2001
From: David Rientjes
Date: Tue, 11 Dec 2012 16:02:56 -0800
Subject: mm, oom: fix race when specifying a thread as the oom origin

test_set_oom_score_adj() and compare_swap_oom_score_adj() are used to
specify that current should be killed first if an oom condition occurs in
between the two calls.

The usage is

	short oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
	...
	compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj);

to store the thread's oom_score_adj, temporarily change it to the maximum
score possible, and then restore the old value if it is still the same.

This happens to still be racy, however, if the user writes
OOM_SCORE_ADJ_MAX to /proc/pid/oom_score_adj in between the two calls.
The compare_swap_oom_score_adj() will then incorrectly reset the old value
prior to the write of OOM_SCORE_ADJ_MAX.

To fix this, introduce a new oom_flags_t member in struct signal_struct
that will be used for per-thread oom killer flags.  KSM and swapoff can
now use a bit in this member to specify that threads should be killed
first in oom conditions without playing around with oom_score_adj.

This also allows the correct oom_score_adj to always be shown when reading
/proc/pid/oom_score.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Anton Vorontsov <anton.vorontsov@linaro.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h   | 19 +++++++++++++++++--
 include/linux/sched.h |  1 +
 include/linux/types.h |  1 +
 mm/ksm.c              |  7 ++-----
 mm/oom_kill.c         | 49 +++++++------------------------------------------
 mm/swapfile.c         |  5 ++---
 6 files changed, 30 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 922dab164eb0..da60007075b5 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -29,8 +29,23 @@ enum oom_scan_t {
 	OOM_SCAN_SELECT,	/* always select this thread first */
 };
 
-extern void compare_swap_oom_score_adj(short old_val, short new_val);
-extern short test_set_oom_score_adj(short new_val);
+/* Thread is the potential origin of an oom condition; kill first on oom */
+#define OOM_FLAG_ORIGIN		((__force oom_flags_t)0x1)
+
+static inline void set_current_oom_origin(void)
+{
+	current->signal->oom_flags |= OOM_FLAG_ORIGIN;
+}
+
+static inline void clear_current_oom_origin(void)
+{
+	current->signal->oom_flags &= ~OOM_FLAG_ORIGIN;
+}
+
+static inline bool oom_task_origin(const struct task_struct *p)
+{
+	return !!(p->signal->oom_flags & OOM_FLAG_ORIGIN);
+}
 
 extern unsigned long oom_badness(struct task_struct *p,
 		struct mem_cgroup *memcg, const nodemask_t *nodemask,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ed30456152da..3e387df065fc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -631,6 +631,7 @@ struct signal_struct {
 	struct rw_semaphore group_rwsem;
 #endif
 
+	oom_flags_t oom_flags;
 	short oom_score_adj;		/* OOM kill score adjustment */
 	short oom_score_adj_min;	/* OOM kill score adjustment min value.
 					 * Only settable by CAP_SYS_RESOURCE. */
diff --git a/include/linux/types.h b/include/linux/types.h
index 1cc0e4b9a048..4d118ba11349 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -156,6 +156,7 @@ typedef u32 dma_addr_t;
 #endif
 typedef unsigned __bitwise__ gfp_t;
 typedef unsigned __bitwise__ fmode_t;
+typedef unsigned __bitwise__ oom_flags_t;
 
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
 typedef u64 phys_addr_t;
diff --git a/mm/ksm.c b/mm/ksm.c
index b4d5a9deb17f..382d930a0bf1 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1919,12 +1919,9 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
 	if (ksm_run != flags) {
 		ksm_run = flags;
 		if (flags & KSM_RUN_UNMERGE) {
-			short oom_score_adj;
-
-			oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
+			set_current_oom_origin();
 			err = unmerge_and_remove_all_rmap_items();
-			compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX,
-								oom_score_adj);
+			clear_current_oom_origin();
 			if (err) {
 				ksm_run = KSM_RUN_STOP;
 				count = err;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 37ab4c5ab6e8..18f1ae2b45de 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -44,48 +44,6 @@ int sysctl_oom_kill_allocating_task;
 int sysctl_oom_dump_tasks = 1;
 static DEFINE_SPINLOCK(zone_scan_lock);
 
-/*
- * compare_swap_oom_score_adj() - compare and swap current's oom_score_adj
- * @old_val: old oom_score_adj for compare
- * @new_val: new oom_score_adj for swap
- *
- * Sets the oom_score_adj value for current to @new_val iff its present value is
- * @old_val.  Usually used to reinstate a previous value to prevent racing with
- * userspacing tuning the value in the interim.
- */
-void compare_swap_oom_score_adj(short old_val, short new_val)
-{
-	struct sighand_struct *sighand = current->sighand;
-
-	spin_lock_irq(&sighand->siglock);
-	if (current->signal->oom_score_adj == old_val)
-		current->signal->oom_score_adj = new_val;
-	trace_oom_score_adj_update(current);
-	spin_unlock_irq(&sighand->siglock);
-}
-
-/**
- * test_set_oom_score_adj() - set current's oom_score_adj and return old value
- * @new_val: new oom_score_adj value
- *
- * Sets the oom_score_adj value for current to @new_val with proper
- * synchronization and returns the old value.  Usually used to temporarily
- * set a value, save the old value in the caller, and then reinstate it later.
- */
-short test_set_oom_score_adj(short new_val)
-{
-	struct sighand_struct *sighand = current->sighand;
-	int old_val;
-
-	spin_lock_irq(&sighand->siglock);
-	old_val = current->signal->oom_score_adj;
-	current->signal->oom_score_adj = new_val;
-	trace_oom_score_adj_update(current);
-	spin_unlock_irq(&sighand->siglock);
-
-	return old_val;
-}
-
 #ifdef CONFIG_NUMA
 /**
  * has_intersects_mems_allowed() - check task eligiblity for kill
@@ -310,6 +268,13 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 	if (!task->mm)
 		return OOM_SCAN_CONTINUE;
 
+	/*
+	 * If task is allocating a lot of memory and has been marked to be
+	 * killed first if it triggers an oom, then select it.
+	 */
+	if (oom_task_origin(task))
+		return OOM_SCAN_SELECT;
+
 	if (task->flags & PF_EXITING && !force_kill) {
 		/*
 		 * If this task is not being ptraced on exit, then wait for it
diff --git a/mm/swapfile.c b/mm/swapfile.c
index bb6f6a04e92d..e97a0e5aea91 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1498,7 +1498,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	struct address_space *mapping;
 	struct inode *inode;
 	struct filename *pathname;
-	short oom_score_adj;
 	int i, type, prev;
 	int err;
 
@@ -1557,9 +1556,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	p->flags &= ~SWP_WRITEOK;
 	spin_unlock(&swap_lock);
 
-	oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
+	set_current_oom_origin();
 	err = try_to_unuse(type, false, 0); /* force all pages to be unused */
-	compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj);
+	clear_current_oom_origin();
 
 	if (err) {
 		/* re-insert swap space back into swap_list */
-- 
cgit v1.2.3-55-g7522


From bc357f431c836c6631751e3ef7dfe7882394ad67 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski
Date: Tue, 11 Dec 2012 16:02:59 -0800
Subject: mm: cma: remove watermark hacks

Commits 2139cbe627b8 ("cma: fix counting of isolated pages") and
d95ea5d18e69 ("cma: fix watermark checking") introduced a reliable
method of free page accounting when memory is being allocated from CMA
regions, so the workaround introduced earlier by commit 49f223a9cd96
("mm: trigger page reclaim in alloc_contig_range() to stabilise
watermarks") can be finally removed.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  9 --------
 mm/page_alloc.c        | 58 --------------------------------------------------
 2 files changed, 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a23923ba8263..0c0b1d608a69 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -63,10 +63,8 @@ enum {
 
 #ifdef CONFIG_CMA
 #  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
-#  define cma_wmark_pages(zone)	zone->min_cma_pages
 #else
 #  define is_migrate_cma(migratetype) false
-#  define cma_wmark_pages(zone) 0
 #endif
 
 #define for_each_migratetype_order(order, type) \
@@ -382,13 +380,6 @@ struct zone {
 #ifdef CONFIG_MEMORY_HOTPLUG
 	/* see spanned/present_pages for more description */
 	seqlock_t		span_seqlock;
-#endif
-#ifdef CONFIG_CMA
-	/*
-	 * CMA needs to increase watermark levels during the allocation
-	 * process to make sure that the system is not starved.
-	 */
-	unsigned long		min_cma_pages;
 #endif
 	struct free_area	free_area[MAX_ORDER];
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 265fea4fbc81..5a8d339d282a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5218,10 +5218,6 @@ static void __setup_per_zone_wmarks(void)
 		zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) + (tmp >> 2);
 		zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
 
-		zone->watermark[WMARK_MIN] += cma_wmark_pages(zone);
-		zone->watermark[WMARK_LOW] += cma_wmark_pages(zone);
-		zone->watermark[WMARK_HIGH] += cma_wmark_pages(zone);
-
 		setup_zone_migrate_reserve(zone);
 		spin_unlock_irqrestore(&zone->lock, flags);
 	}
@@ -5766,54 +5762,6 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
 	return ret > 0 ? 0 : ret;
 }
 
-/*
- * Update zone's cma pages counter used for watermark level calculation.
- */
-static inline void __update_cma_watermarks(struct zone *zone, int count)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&zone->lock, flags);
-	zone->min_cma_pages += count;
-	spin_unlock_irqrestore(&zone->lock, flags);
-	setup_per_zone_wmarks();
-}
-
-/*
- * Trigger memory pressure bump to reclaim some pages in order to be able to
- * allocate 'count' pages in single page units. Does similar work as
- *__alloc_pages_slowpath() function.
- */
-static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count)
-{
-	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
-	struct zonelist *zonelist = node_zonelist(0, gfp_mask);
-	int did_some_progress = 0;
-	int order = 1;
-
-	/*
-	 * Increase level of watermarks to force kswapd do his job
-	 * to stabilise at new watermark level.
-	 */
-	__update_cma_watermarks(zone, count);
-
-	/* Obey watermarks as if the page was being allocated */
-	while (!zone_watermark_ok(zone, 0, low_wmark_pages(zone), 0, 0)) {
-		wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone));
-
-		did_some_progress = __perform_reclaim(gfp_mask, order, zonelist,
-						      NULL);
-		if (!did_some_progress) {
-			/* Exhausted what can be done so it's blamo time */
-			out_of_memory(zonelist, gfp_mask, order, NULL, false);
-		}
-	}
-
-	/* Restore original watermark levels. */
-	__update_cma_watermarks(zone, -count);
-
-	return count;
-}
-
 /**
  * alloc_contig_range() -- tries to allocate given range of pages
  * @start:	start PFN to allocate
@@ -5837,7 +5785,6 @@ static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count)
 int alloc_contig_range(unsigned long start, unsigned long end,
 		       unsigned migratetype)
 {
-	struct zone *zone = page_zone(pfn_to_page(start));
 	unsigned long outer_start, outer_end;
 	int ret = 0, order;
 
@@ -5922,11 +5869,6 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 		goto done;
 	}
 
-	/*
-	 * Reclaim enough pages to make sure that contiguous allocation
-	 * will not starve the system.
-	 */
-	__reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);
 
 	/* Grab isolated pages from freelists. */
 	outer_end = isolate_freepages_range(&cc, outer_start, end);
-- 
cgit v1.2.3-55-g7522


From 81df9bff2609f07cef4690ac2ebda1611b55b05a Mon Sep 17 00:00:00 2001
From: Joonsoo Kim
Date: Tue, 11 Dec 2012 16:03:10 -0800
Subject: bootmem: fix wrong call parameter for free_bootmem()

It is strange that alloc_bootmem() returns a virtual address and
free_bootmem() requires a physical address.  Anyway, free_bootmem()'s
first parameter should be physical address.

There are some call sites for free_bootmem() with virtual address.  So fix
them.

[akpm@linux-foundation.org: improve free_bootmem() and free_bootmem_pate() documentation]
Signed-off-by: Joonsoo Kim <js1304@gmail.com>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/cell/celleb_pci.c |  4 ++--
 drivers/macintosh/smu.c                  |  2 +-
 include/linux/bootmem.h                  |  4 ++--
 lib/cpumask.c                            |  2 +-
 mm/bootmem.c                             | 20 ++++++++++----------
 5 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
index abc8af43ea7c..173568140a32 100644
--- a/arch/powerpc/platforms/cell/celleb_pci.c
+++ b/arch/powerpc/platforms/cell/celleb_pci.c
@@ -401,11 +401,11 @@ error:
 	} else {
 		if (config && *config) {
 			size = 256;
-			free_bootmem((unsigned long)(*config), size);
+			free_bootmem(__pa(*config), size);
 		}
 		if (res && *res) {
 			size = sizeof(struct celleb_pci_resource);
-			free_bootmem((unsigned long)(*res), size);
+			free_bootmem(__pa(*res), size);
 		}
 	}
 
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 7d5a6b40b31c..196368009001 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -565,7 +565,7 @@ fail_msg_node:
 fail_db_node:
 	of_node_put(smu->db_node);
 fail_bootmem:
-	free_bootmem((unsigned long)smu, sizeof(struct smu_device));
+	free_bootmem(__pa(smu), sizeof(struct smu_device));
 	smu = NULL;
 fail_np:
 	of_node_put(np);
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 6d6795d46a75..7b74452c5317 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -51,8 +51,8 @@ extern unsigned long free_all_bootmem(void);
 extern void free_bootmem_node(pg_data_t *pgdat,
 			      unsigned long addr,
 			      unsigned long size);
-extern void free_bootmem(unsigned long addr, unsigned long size);
-extern void free_bootmem_late(unsigned long addr, unsigned long size);
+extern void free_bootmem(unsigned long physaddr, unsigned long size);
+extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
 
 /*
  * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 402a54ac35cb..d327b87c99b7 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -161,6 +161,6 @@ EXPORT_SYMBOL(free_cpumask_var);
  */
 void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 {
-	free_bootmem((unsigned long)mask, cpumask_size());
+	free_bootmem(__pa(mask), cpumask_size());
 }
 #endif
diff --git a/mm/bootmem.c b/mm/bootmem.c
index f468185b3b28..ecc45958ac0c 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -147,21 +147,21 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
 
 /*
  * free_bootmem_late - free bootmem pages directly to page allocator
- * @addr: starting address of the range
+ * @addr: starting physical address of the range
  * @size: size of the range in bytes
  *
  * This is only useful when the bootmem allocator has already been torn
  * down, but we are still initializing the system.  Pages are given directly
  * to the page allocator, no bootmem metadata is updated because it is gone.
  */
-void __init free_bootmem_late(unsigned long addr, unsigned long size)
+void __init free_bootmem_late(unsigned long physaddr, unsigned long size)
 {
 	unsigned long cursor, end;
 
-	kmemleak_free_part(__va(addr), size);
+	kmemleak_free_part(__va(physaddr), size);
 
-	cursor = PFN_UP(addr);
-	end = PFN_DOWN(addr + size);
+	cursor = PFN_UP(physaddr);
+	end = PFN_DOWN(physaddr + size);
 
 	for (; cursor < end; cursor++) {
 		__free_pages_bootmem(pfn_to_page(cursor), 0);
@@ -377,21 +377,21 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 
 /**
  * free_bootmem - mark a page range as usable
- * @addr: starting address of the range
+ * @addr: starting physical address of the range
  * @size: size of the range in bytes
  *
  * Partial pages will be considered reserved and left as they are.
  *
  * The range must be contiguous but may span node boundaries.
  */
-void __init free_bootmem(unsigned long addr, unsigned long size)
+void __init free_bootmem(unsigned long physaddr, unsigned long size)
 {
 	unsigned long start, end;
 
-	kmemleak_free_part(__va(addr), size);
+	kmemleak_free_part(__va(physaddr), size);
 
-	start = PFN_UP(addr);
-	end = PFN_DOWN(addr + size);
+	start = PFN_UP(physaddr);
+	end = PFN_DOWN(physaddr + size);
 
 	mark_bootmem(start, end, 0, 0);
 }
-- 
cgit v1.2.3-55-g7522


From 511c2aba8f07fc45bdcba548cb63f7b8a450c6dc Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Tue, 11 Dec 2012 16:03:16 -0800
Subject: mm, memory-hotplug: dynamic configure movable memory and portion
 memory

Add online_movable and online_kernel for logic memory hotplug.  This is
the dynamic version of "movablecore" & "kernelcore".

We have the same reason to introduce it as to introduce "movablecore" &
"kernelcore".  It has the same motive as "movablecore" & "kernelcore", but
it is dynamic/running-time:

o We can configure memory as kernelcore or movablecore after boot.

  Userspace workload is increased, we need more hugepage, we can't use
  "online_movable" to add memory and allow the system use more
  THP(transparent-huge-page), vice-verse when kernel workload is increase.

  Also help for virtualization to dynamic configure host/guest's memory,
  to save/(reduce waste) memory.

  Memory capacity on Demand

o When a new node is physically online after boot, we need to use
  "online_movable" or "online_kernel" to configure/portion it as we
  expected when we logic-online it.

  This configuration also helps for physically-memory-migrate.

o all benefit as the same as existed "movablecore" & "kernelcore".

o Preparing for movable-node, which is very important for power-saving,
  hardware partitioning and high-available-system(hardware fault
  management).

(Note, we don't introduce movable-node here.)

Action behavior:
When a memoryblock/memorysection is onlined by "online_movable", the kernel
will not have directly reference to the page of the memoryblock,
thus we can remove that memory any time when needed.

When it is online by "online_kernel", the kernel can use it.
When it is online by "online", the zone type doesn't changed.

Current constraints:
Only the memoryblock which is adjacent to the ZONE_MOVABLE
can be online from ZONE_NORMAL to ZONE_MOVABLE.

[akpm@linux-foundation.org: use min_t, cleanups]
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/memory-hotplug.txt |  14 +++++-
 drivers/base/memory.c            |  33 ++++++++-----
 include/linux/memory_hotplug.h   |  13 ++++-
 mm/memory_hotplug.c              | 100 ++++++++++++++++++++++++++++++++++++++-
 4 files changed, 146 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 6e6cbc78f329..c6f993d491b5 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -161,7 +161,8 @@ a recent addition and not present on older kernels.
 		    in the memory block.
 'state'           : read-write
                     at read:  contains online/offline state of memory.
-                    at write: user can specify "online", "offline" command
+                    at write: user can specify "online_kernel",
+                    "online_movable", "online", "offline" command
                     which will be performed on al sections in the block.
 'phys_device'     : read-only: designed to show the name of physical memory
                     device.  This is not well implemented now.
@@ -255,6 +256,17 @@ For onlining, you have to write "online" to the section's state file as:
 
 % echo online > /sys/devices/system/memory/memoryXXX/state
 
+This onlining will not change the ZONE type of the target memory section,
+If the memory section is in ZONE_NORMAL, you can change it to ZONE_MOVABLE:
+
+% echo online_movable > /sys/devices/system/memory/memoryXXX/state
+(NOTE: current limit: this memory section must be adjacent to ZONE_MOVABLE)
+
+And if the memory section is in ZONE_MOVABLE, you can change it to ZONE_NORMAL:
+
+% echo online_kernel > /sys/devices/system/memory/memoryXXX/state
+(NOTE: current limit: this memory section must be adjacent to ZONE_NORMAL)
+
 After this, section memoryXXX's state will be 'online' and the amount of
 available memory will be increased.
 
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 7eb1211ab688..987604d56c83 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -254,7 +254,7 @@ static bool pages_correctly_reserved(unsigned long start_pfn,
  * OK to have direct references to sparsemem variables in here.
  */
 static int
-memory_block_action(unsigned long phys_index, unsigned long action)
+memory_block_action(unsigned long phys_index, unsigned long action, int online_type)
 {
 	unsigned long start_pfn;
 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
@@ -269,7 +269,7 @@ memory_block_action(unsigned long phys_index, unsigned long action)
 			if (!pages_correctly_reserved(start_pfn, nr_pages))
 				return -EBUSY;
 
-			ret = online_pages(start_pfn, nr_pages);
+			ret = online_pages(start_pfn, nr_pages, online_type);
 			break;
 		case MEM_OFFLINE:
 			ret = offline_pages(start_pfn, nr_pages);
@@ -284,7 +284,8 @@ memory_block_action(unsigned long phys_index, unsigned long action)
 }
 
 static int __memory_block_change_state(struct memory_block *mem,
-		unsigned long to_state, unsigned long from_state_req)
+		unsigned long to_state, unsigned long from_state_req,
+		int online_type)
 {
 	int ret = 0;
 
@@ -296,7 +297,7 @@ static int __memory_block_change_state(struct memory_block *mem,
 	if (to_state == MEM_OFFLINE)
 		mem->state = MEM_GOING_OFFLINE;
 
-	ret = memory_block_action(mem->start_section_nr, to_state);
+	ret = memory_block_action(mem->start_section_nr, to_state, online_type);
 
 	if (ret) {
 		mem->state = from_state_req;
@@ -319,12 +320,14 @@ out:
 }
 
 static int memory_block_change_state(struct memory_block *mem,
-		unsigned long to_state, unsigned long from_state_req)
+		unsigned long to_state, unsigned long from_state_req,
+		int online_type)
 {
 	int ret;
 
 	mutex_lock(&mem->state_mutex);
-	ret = __memory_block_change_state(mem, to_state, from_state_req);
+	ret = __memory_block_change_state(mem, to_state, from_state_req,
+					  online_type);
 	mutex_unlock(&mem->state_mutex);
 
 	return ret;
@@ -338,10 +341,18 @@ store_mem_state(struct device *dev,
 
 	mem = container_of(dev, struct memory_block, dev);
 
-	if (!strncmp(buf, "online", min((int)count, 6)))
-		ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
-	else if(!strncmp(buf, "offline", min((int)count, 7)))
-		ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
+	if (!strncmp(buf, "online_kernel", min_t(int, count, 13)))
+		ret = memory_block_change_state(mem, MEM_ONLINE,
+						MEM_OFFLINE, ONLINE_KERNEL);
+	else if (!strncmp(buf, "online_movable", min_t(int, count, 14)))
+		ret = memory_block_change_state(mem, MEM_ONLINE,
+						MEM_OFFLINE, ONLINE_MOVABLE);
+	else if (!strncmp(buf, "online", min_t(int, count, 6)))
+		ret = memory_block_change_state(mem, MEM_ONLINE,
+						MEM_OFFLINE, ONLINE_KEEP);
+	else if(!strncmp(buf, "offline", min_t(int, count, 7)))
+		ret = memory_block_change_state(mem, MEM_OFFLINE,
+						MEM_ONLINE, -1);
 
 	if (ret)
 		return ret;
@@ -676,7 +687,7 @@ int offline_memory_block(struct memory_block *mem)
 
 	mutex_lock(&mem->state_mutex);
 	if (mem->state != MEM_OFFLINE)
-		ret = __memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
+		ret = __memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE, -1);
 	mutex_unlock(&mem->state_mutex);
 
 	return ret;
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 95573ec4ee6c..4a45c4e50025 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -26,6 +26,13 @@ enum {
 	MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
 };
 
+/* Types for control the zone type of onlined memory */
+enum {
+	ONLINE_KEEP,
+	ONLINE_KERNEL,
+	ONLINE_MOVABLE,
+};
+
 /*
  * pgdat resizing functions
  */
@@ -46,6 +53,10 @@ void pgdat_resize_init(struct pglist_data *pgdat)
 }
 /*
  * Zone resizing functions
+ *
+ * Note: any attempt to resize a zone should has pgdat_resize_lock()
+ * zone_span_writelock() both held. This ensure the size of a zone
+ * can't be changed while pgdat_resize_lock() held.
  */
 static inline unsigned zone_span_seqbegin(struct zone *zone)
 {
@@ -71,7 +82,7 @@ extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
 extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
 extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
 /* VM interface that may be used by firmware interface */
-extern int online_pages(unsigned long, unsigned long);
+extern int online_pages(unsigned long, unsigned long, int);
 extern void __offline_isolated_pages(unsigned long, unsigned long);
 
 typedef void (*online_page_callback_t)(struct page *page);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 571130ee66d7..5c1f4959e6b4 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -214,6 +214,88 @@ static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
 	zone_span_writeunlock(zone);
 }
 
+static void resize_zone(struct zone *zone, unsigned long start_pfn,
+		unsigned long end_pfn)
+{
+	zone_span_writelock(zone);
+
+	zone->zone_start_pfn = start_pfn;
+	zone->spanned_pages = end_pfn - start_pfn;
+
+	zone_span_writeunlock(zone);
+}
+
+static void fix_zone_id(struct zone *zone, unsigned long start_pfn,
+		unsigned long end_pfn)
+{
+	enum zone_type zid = zone_idx(zone);
+	int nid = zone->zone_pgdat->node_id;
+	unsigned long pfn;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn++)
+		set_page_links(pfn_to_page(pfn), zid, nid, pfn);
+}
+
+static int move_pfn_range_left(struct zone *z1, struct zone *z2,
+		unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long flags;
+
+	pgdat_resize_lock(z1->zone_pgdat, &flags);
+
+	/* can't move pfns which are higher than @z2 */
+	if (end_pfn > z2->zone_start_pfn + z2->spanned_pages)
+		goto out_fail;
+	/* the move out part mast at the left most of @z2 */
+	if (start_pfn > z2->zone_start_pfn)
+		goto out_fail;
+	/* must included/overlap */
+	if (end_pfn <= z2->zone_start_pfn)
+		goto out_fail;
+
+	resize_zone(z1, z1->zone_start_pfn, end_pfn);
+	resize_zone(z2, end_pfn, z2->zone_start_pfn + z2->spanned_pages);
+
+	pgdat_resize_unlock(z1->zone_pgdat, &flags);
+
+	fix_zone_id(z1, start_pfn, end_pfn);
+
+	return 0;
+out_fail:
+	pgdat_resize_unlock(z1->zone_pgdat, &flags);
+	return -1;
+}
+
+static int move_pfn_range_right(struct zone *z1, struct zone *z2,
+		unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long flags;
+
+	pgdat_resize_lock(z1->zone_pgdat, &flags);
+
+	/* can't move pfns which are lower than @z1 */
+	if (z1->zone_start_pfn > start_pfn)
+		goto out_fail;
+	/* the move out part mast at the right most of @z1 */
+	if (z1->zone_start_pfn + z1->spanned_pages >  end_pfn)
+		goto out_fail;
+	/* must included/overlap */
+	if (start_pfn >= z1->zone_start_pfn + z1->spanned_pages)
+		goto out_fail;
+
+	resize_zone(z1, z1->zone_start_pfn, start_pfn);
+	resize_zone(z2, start_pfn, z2->zone_start_pfn + z2->spanned_pages);
+
+	pgdat_resize_unlock(z1->zone_pgdat, &flags);
+
+	fix_zone_id(z2, start_pfn, end_pfn);
+
+	return 0;
+out_fail:
+	pgdat_resize_unlock(z1->zone_pgdat, &flags);
+	return -1;
+}
+
 static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
 			    unsigned long end_pfn)
 {
@@ -508,7 +590,7 @@ static void node_states_set_node(int node, struct memory_notify *arg)
 }
 
 
-int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
+int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
 {
 	unsigned long onlined_pages = 0;
 	struct zone *zone;
@@ -525,6 +607,22 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
 	 */
 	zone = page_zone(pfn_to_page(pfn));
 
+	if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) {
+		if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) {
+			unlock_memory_hotplug();
+			return -1;
+		}
+	}
+	if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) {
+		if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) {
+			unlock_memory_hotplug();
+			return -1;
+		}
+	}
+
+	/* Previous code may changed the zone of the pfn range */
+	zone = page_zone(pfn_to_page(pfn));
+
 	arg.start_pfn = pfn;
 	arg.nr_pages = nr_pages;
 	node_states_check_changes_online(nr_pages, zone, &arg);
-- 
cgit v1.2.3-55-g7522


From 1f53ef17d3ed6c34868cc8e7aa7c1d351c2fdc95 Mon Sep 17 00:00:00 2001
From: Zhang Rui
Date: Wed, 12 Dec 2012 15:31:37 +0800
Subject: Thermal: Fix DEFAULT_THERMAL_GOVERNOR

Fix DEFAULT_THERMAL_GOVERNOR to be consistant with the
default governor selected in kernel config file.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/step_wise.c |  2 +-
 include/linux/thermal.h     | 10 ++++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index 1242cffed8b0..0cd5e9fbab1c 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -170,7 +170,7 @@ static int step_wise_throttle(struct thermal_zone_device *tz, int trip)
 }
 
 static struct thermal_governor thermal_gov_step_wise = {
-	.name		= DEFAULT_THERMAL_GOVERNOR,
+	.name		= "step_wise",
 	.throttle	= step_wise_throttle,
 	.owner		= THIS_MODULE,
 };
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 807f2146fe35..fe82022478e7 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -46,8 +46,14 @@
 #define THERMAL_GENL_VERSION                    0x01
 #define THERMAL_GENL_MCAST_GROUP_NAME           "thermal_mc_group"
 
-/* Default Thermal Governor: Does Linear Throttling */
-#define DEFAULT_THERMAL_GOVERNOR	"step_wise"
+/* Default Thermal Governor */
+#if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)
+#define DEFAULT_THERMAL_GOVERNOR       "step_wise"
+#elif defined(CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE)
+#define DEFAULT_THERMAL_GOVERNOR       "fair_share"
+#elif defined(CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE)
+#define DEFAULT_THERMAL_GOVERNOR       "user_space"
+#endif
 
 struct thermal_zone_device;
 struct thermal_cooling_device;
-- 
cgit v1.2.3-55-g7522


From 83499b52c61f50292f0aae36499de8a8fc3e37c3 Mon Sep 17 00:00:00 2001
From: Alexander Holler
Date: Sun, 9 Dec 2012 12:44:30 +0100
Subject: HID: sensors: autodetect USB HID sensor hubs

It should not be necessary to add IDs for HID sensor hubs to lists in
hid-core.c and hid-sensor-hub.c. So instead of a whitelist, autodetect such USB
HID sensor hubs, based on a collection of type physical inside a useage page of
type sensor. If some sensor hubs stil must be usable as raw devices, a
blacklist might be created.

Signed-off-by: Alexander Holler <holler@ahsoftware.de>
Acked-by: "Pandruvada, Srinivas" <srinivas.pandruvada@intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c         | 11 ++++++++++-
 drivers/hid/hid-sensor-hub.c   | 32 +-------------------------------
 include/linux/hid-sensor-ids.h |  1 -
 include/linux/hid.h            |  2 ++
 4 files changed, 13 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index f4109fd657ff..7df5399a9386 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -713,7 +713,12 @@ static int hid_scan_report(struct hid_device *hid)
 					hid_scan_usage(hid, u);
 				break;
 			}
-		}
+		} else if (page == HID_UP_SENSOR &&
+			item.type == HID_ITEM_TYPE_MAIN &&
+			item.tag == HID_MAIN_ITEM_TAG_BEGIN_COLLECTION &&
+			(item_udata(&item) & 0xff) == HID_COLLECTION_PHYSICAL &&
+			hid->bus == BUS_USB)
+			hid->group = HID_GROUP_SENSOR_HUB;
 	}
 
 	return 0;
@@ -1465,6 +1470,10 @@ EXPORT_SYMBOL_GPL(hid_disconnect);
  * there is a proper autodetection and autoloading in place (based on presence
  * of HID_DG_CONTACTID), so those devices don't need to be added to this list,
  * as we are doing the right thing in hid_scan_usage().
+ *
+ * Autodetection for (USB) HID sensor hubs exists too. If a collection of type
+ * physical is found inside a usage page of type sensor, hid-sensor-hub will be
+ * used as a driver. See hid_scan_report().
  */
 static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) },
diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c
index d9d73e9163eb..ca88ddc2d78f 100644
--- a/drivers/hid/hid-sensor-hub.c
+++ b/drivers/hid/hid-sensor-hub.c
@@ -82,23 +82,6 @@ struct hid_sensor_hub_callbacks_list {
 	void *priv;
 };
 
-static int sensor_hub_check_for_sensor_page(struct hid_device *hdev)
-{
-	int i;
-	int ret = -EINVAL;
-
-	for (i = 0; i < hdev->maxcollection; i++) {
-		struct hid_collection *col = &hdev->collection[i];
-		if (col->type == HID_COLLECTION_PHYSICAL &&
-		   (col->usage & HID_USAGE_PAGE) == HID_UP_SENSOR) {
-			ret = 0;
-			break;
-		}
-	}
-
-	return ret;
-}
-
 static struct hid_report *sensor_hub_report(int id, struct hid_device *hdev,
 						int dir)
 {
@@ -524,10 +507,6 @@ static int sensor_hub_probe(struct hid_device *hdev,
 		hid_err(hdev, "parse failed\n");
 		goto err_free;
 	}
-	if (sensor_hub_check_for_sensor_page(hdev) < 0) {
-		hid_err(hdev, "sensor page not found\n");
-		goto err_free;
-	}
 	INIT_LIST_HEAD(&hdev->inputs);
 
 	ret = hid_hw_start(hdev, 0);
@@ -630,16 +609,7 @@ static void sensor_hub_remove(struct hid_device *hdev)
 }
 
 static const struct hid_device_id sensor_hub_devices[] = {
-	{ HID_USB_DEVICE(USB_VENDOR_ID_INTEL_8086,
-			USB_DEVICE_ID_SENSOR_HUB_1020) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_INTEL_8087,
-			USB_DEVICE_ID_SENSOR_HUB_1020) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_INTEL_8086,
-			USB_DEVICE_ID_SENSOR_HUB_09FA) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_INTEL_8087,
-			USB_DEVICE_ID_SENSOR_HUB_09FA) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_STANTUM_STM,
-			USB_DEVICE_ID_SENSOR_HUB_7014) },
+	{ HID_DEVICE(BUS_USB, HID_GROUP_SENSOR_HUB, HID_ANY_ID, HID_ANY_ID) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, sensor_hub_devices);
diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h
index ca8d7e94eb3c..55f277372fed 100644
--- a/include/linux/hid-sensor-ids.h
+++ b/include/linux/hid-sensor-ids.h
@@ -19,7 +19,6 @@
 #ifndef _HID_SENSORS_IDS_H
 #define _HID_SENSORS_IDS_H
 
-#define HID_UP_SENSOR						0x00200000
 #define HID_MAX_PHY_DEVICES					0xFF
 
 /* Accel 3D (200073) */
diff --git a/include/linux/hid.h b/include/linux/hid.h
index c076041a069e..c5f6ec2b15c2 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -167,6 +167,7 @@ struct hid_item {
 #define HID_UP_MSVENDOR		0xff000000
 #define HID_UP_CUSTOM		0x00ff0000
 #define HID_UP_LOGIVENDOR	0xffbc0000
+#define HID_UP_SENSOR		0x00200000
 
 #define HID_USAGE		0x0000ffff
 
@@ -292,6 +293,7 @@ struct hid_item {
  */
 #define HID_GROUP_GENERIC			0x0001
 #define HID_GROUP_MULTITOUCH			0x0002
+#define HID_GROUP_SENSOR_HUB			0x0003
 
 /*
  * This is the global environment of the parser. This information is
-- 
cgit v1.2.3-55-g7522


From 93b4796dede916de74b21fbd637588da6a99a7ec Mon Sep 17 00:00:00 2001
From: Kirill A. Shutemov
Date: Wed, 12 Dec 2012 13:50:54 -0800
Subject: thp: do_huge_pmd_wp_page(): handle huge zero page

On write access to huge zero page we alloc a new huge page and clear it.

If ENOMEM, graceful fallback: we create a new pmd table and set pte around
fault address to newly allocated normal (4k) page.  All other ptes in the
pmd set to normal zero page.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@linux.intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |   8 ++++
 mm/huge_memory.c   | 111 +++++++++++++++++++++++++++++++++++++++++++++--------
 mm/memory.c        |   7 ----
 3 files changed, 104 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4af4f0b1be4c..4b80bad4a367 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -516,6 +516,14 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 }
 #endif
 
+#ifndef my_zero_pfn
+static inline unsigned long my_zero_pfn(unsigned long addr)
+{
+	extern unsigned long zero_pfn;
+	return zero_pfn;
+}
+#endif
+
 /*
  * Multiple processes may "see" the same page. E.g. for untouched
  * mappings of /dev/null, all processes see the same page full of
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 650625390f61..a959b3a4ddd5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -858,6 +858,70 @@ unlock:
 	spin_unlock(&mm->page_table_lock);
 }
 
+static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
+		struct vm_area_struct *vma, unsigned long address,
+		pmd_t *pmd, unsigned long haddr)
+{
+	pgtable_t pgtable;
+	pmd_t _pmd;
+	struct page *page;
+	int i, ret = 0;
+	unsigned long mmun_start;	/* For mmu_notifiers */
+	unsigned long mmun_end;		/* For mmu_notifiers */
+
+	page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+	if (!page) {
+		ret |= VM_FAULT_OOM;
+		goto out;
+	}
+
+	if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) {
+		put_page(page);
+		ret |= VM_FAULT_OOM;
+		goto out;
+	}
+
+	clear_user_highpage(page, address);
+	__SetPageUptodate(page);
+
+	mmun_start = haddr;
+	mmun_end   = haddr + HPAGE_PMD_SIZE;
+	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
+	spin_lock(&mm->page_table_lock);
+	pmdp_clear_flush(vma, haddr, pmd);
+	/* leave pmd empty until pte is filled */
+
+	pgtable = pgtable_trans_huge_withdraw(mm);
+	pmd_populate(mm, &_pmd, pgtable);
+
+	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
+		pte_t *pte, entry;
+		if (haddr == (address & PAGE_MASK)) {
+			entry = mk_pte(page, vma->vm_page_prot);
+			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+			page_add_new_anon_rmap(page, vma, haddr);
+		} else {
+			entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
+			entry = pte_mkspecial(entry);
+		}
+		pte = pte_offset_map(&_pmd, haddr);
+		VM_BUG_ON(!pte_none(*pte));
+		set_pte_at(mm, haddr, pte, entry);
+		pte_unmap(pte);
+	}
+	smp_wmb(); /* make pte visible before pmd */
+	pmd_populate(mm, pmd, pgtable);
+	spin_unlock(&mm->page_table_lock);
+	inc_mm_counter(mm, MM_ANONPAGES);
+
+	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+
+	ret |= VM_FAULT_WRITE;
+out:
+	return ret;
+}
+
 static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 					struct vm_area_struct *vma,
 					unsigned long address,
@@ -964,19 +1028,21 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, pmd_t *pmd, pmd_t orig_pmd)
 {
 	int ret = 0;
-	struct page *page, *new_page;
+	struct page *page = NULL, *new_page;
 	unsigned long haddr;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 
 	VM_BUG_ON(!vma->anon_vma);
+	haddr = address & HPAGE_PMD_MASK;
+	if (is_huge_zero_pmd(orig_pmd))
+		goto alloc;
 	spin_lock(&mm->page_table_lock);
 	if (unlikely(!pmd_same(*pmd, orig_pmd)))
 		goto out_unlock;
 
 	page = pmd_page(orig_pmd);
 	VM_BUG_ON(!PageCompound(page) || !PageHead(page));
-	haddr = address & HPAGE_PMD_MASK;
 	if (page_mapcount(page) == 1) {
 		pmd_t entry;
 		entry = pmd_mkyoung(orig_pmd);
@@ -988,7 +1054,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 	get_page(page);
 	spin_unlock(&mm->page_table_lock);
-
+alloc:
 	if (transparent_hugepage_enabled(vma) &&
 	    !transparent_hugepage_debug_cow())
 		new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
@@ -998,24 +1064,34 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	if (unlikely(!new_page)) {
 		count_vm_event(THP_FAULT_FALLBACK);
-		ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
-						   pmd, orig_pmd, page, haddr);
-		if (ret & VM_FAULT_OOM)
-			split_huge_page(page);
-		put_page(page);
+		if (is_huge_zero_pmd(orig_pmd)) {
+			ret = do_huge_pmd_wp_zero_page_fallback(mm, vma,
+					address, pmd, haddr);
+		} else {
+			ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
+					pmd, orig_pmd, page, haddr);
+			if (ret & VM_FAULT_OOM)
+				split_huge_page(page);
+			put_page(page);
+		}
 		goto out;
 	}
 	count_vm_event(THP_FAULT_ALLOC);
 
 	if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
 		put_page(new_page);
-		split_huge_page(page);
-		put_page(page);
+		if (page) {
+			split_huge_page(page);
+			put_page(page);
+		}
 		ret |= VM_FAULT_OOM;
 		goto out;
 	}
 
-	copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR);
+	if (is_huge_zero_pmd(orig_pmd))
+		clear_huge_page(new_page, haddr, HPAGE_PMD_NR);
+	else
+		copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR);
 	__SetPageUptodate(new_page);
 
 	mmun_start = haddr;
@@ -1023,7 +1099,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 
 	spin_lock(&mm->page_table_lock);
-	put_page(page);
+	if (page)
+		put_page(page);
 	if (unlikely(!pmd_same(*pmd, orig_pmd))) {
 		spin_unlock(&mm->page_table_lock);
 		mem_cgroup_uncharge_page(new_page);
@@ -1031,14 +1108,18 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto out_mn;
 	} else {
 		pmd_t entry;
-		VM_BUG_ON(!PageHead(page));
 		entry = mk_huge_pmd(new_page, vma);
 		pmdp_clear_flush(vma, haddr, pmd);
 		page_add_new_anon_rmap(new_page, vma, haddr);
 		set_pmd_at(mm, haddr, pmd, entry);
 		update_mmu_cache_pmd(vma, address, pmd);
-		page_remove_rmap(page);
-		put_page(page);
+		if (is_huge_zero_pmd(orig_pmd))
+			add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
+		else {
+			VM_BUG_ON(!PageHead(page));
+			page_remove_rmap(page);
+			put_page(page);
+		}
 		ret |= VM_FAULT_WRITE;
 	}
 	spin_unlock(&mm->page_table_lock);
diff --git a/mm/memory.c b/mm/memory.c
index 765377385632..259b34fe1347 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -724,13 +724,6 @@ static inline int is_zero_pfn(unsigned long pfn)
 }
 #endif
 
-#ifndef my_zero_pfn
-static inline unsigned long my_zero_pfn(unsigned long addr)
-{
-	return zero_pfn;
-}
-#endif
-
 /*
  * vm_normal_page -- This function gets the "struct page" associated with a pte.
  *
-- 
cgit v1.2.3-55-g7522


From e180377f1ae48b3cbc559c9875d9b038f7f000c6 Mon Sep 17 00:00:00 2001
From: Kirill A. Shutemov
Date: Wed, 12 Dec 2012 13:50:59 -0800
Subject: thp: change split_huge_page_pmd() interface

Pass vma instead of mm and add address parameter.

In most cases we already have vma on the stack. We provides
split_huge_page_pmd_mm() for few cases when we have mm, but not vma.

This change is preparation to huge zero pmd splitting implementation.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@linux.intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/transhuge.txt |  4 ++--
 arch/x86/kernel/vm86_32.c      |  2 +-
 fs/proc/task_mmu.c             |  2 +-
 include/linux/huge_mm.h        | 14 ++++++++++----
 mm/huge_memory.c               | 19 +++++++++++++++++--
 mm/memory.c                    |  4 ++--
 mm/mempolicy.c                 |  2 +-
 mm/mprotect.c                  |  2 +-
 mm/mremap.c                    |  2 +-
 mm/pagewalk.c                  |  2 +-
 10 files changed, 37 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
index f734bb2a78dc..8f5b41db314c 100644
--- a/Documentation/vm/transhuge.txt
+++ b/Documentation/vm/transhuge.txt
@@ -276,7 +276,7 @@ unaffected. libhugetlbfs will also work fine as usual.
 == Graceful fallback ==
 
 Code walking pagetables but unware about huge pmds can simply call
-split_huge_page_pmd(mm, pmd) where the pmd is the one returned by
+split_huge_page_pmd(vma, addr, pmd) where the pmd is the one returned by
 pmd_offset. It's trivial to make the code transparent hugepage aware
 by just grepping for "pmd_offset" and adding split_huge_page_pmd where
 missing after pmd_offset returns the pmd. Thanks to the graceful
@@ -299,7 +299,7 @@ diff --git a/mm/mremap.c b/mm/mremap.c
 		return NULL;
 
 	pmd = pmd_offset(pud, addr);
-+	split_huge_page_pmd(mm, pmd);
++	split_huge_page_pmd(vma, addr, pmd);
 	if (pmd_none_or_clear_bad(pmd))
 		return NULL;
 
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 5c9687b1bde6..1dfe69cc78a8 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -182,7 +182,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
 	if (pud_none_or_clear_bad(pud))
 		goto out;
 	pmd = pmd_offset(pud, 0xA0000);
-	split_huge_page_pmd(mm, pmd);
+	split_huge_page_pmd_mm(mm, 0xA0000, pmd);
 	if (pmd_none_or_clear_bad(pmd))
 		goto out;
 	pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 90c63f9392a5..291a0d15a0be 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -643,7 +643,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
 	spinlock_t *ptl;
 	struct page *page;
 
-	split_huge_page_pmd(walk->mm, pmd);
+	split_huge_page_pmd(vma, addr, pmd);
 	if (pmd_trans_unstable(pmd))
 		return 0;
 
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 1af477552459..3132ea788581 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -95,12 +95,14 @@ extern int handle_pte_fault(struct mm_struct *mm,
 			    struct vm_area_struct *vma, unsigned long address,
 			    pte_t *pte, pmd_t *pmd, unsigned int flags);
 extern int split_huge_page(struct page *page);
-extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
-#define split_huge_page_pmd(__mm, __pmd)				\
+extern void __split_huge_page_pmd(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmd);
+#define split_huge_page_pmd(__vma, __address, __pmd)			\
 	do {								\
 		pmd_t *____pmd = (__pmd);				\
 		if (unlikely(pmd_trans_huge(*____pmd)))			\
-			__split_huge_page_pmd(__mm, ____pmd);		\
+			__split_huge_page_pmd(__vma, __address,		\
+					____pmd);			\
 	}  while (0)
 #define wait_split_huge_page(__anon_vma, __pmd)				\
 	do {								\
@@ -110,6 +112,8 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
 		BUG_ON(pmd_trans_splitting(*____pmd) ||			\
 		       pmd_trans_huge(*____pmd));			\
 	} while (0)
+extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address,
+		pmd_t *pmd);
 #if HPAGE_PMD_ORDER > MAX_ORDER
 #error "hugepages can't be allocated by the buddy allocator"
 #endif
@@ -177,10 +181,12 @@ static inline int split_huge_page(struct page *page)
 {
 	return 0;
 }
-#define split_huge_page_pmd(__mm, __pmd)	\
+#define split_huge_page_pmd(__vma, __address, __pmd)	\
 	do { } while (0)
 #define wait_split_huge_page(__anon_vma, __pmd)	\
 	do { } while (0)
+#define split_huge_page_pmd_mm(__mm, __address, __pmd)	\
+	do { } while (0)
 #define compound_trans_head(page) compound_head(page)
 static inline int hugepage_madvise(struct vm_area_struct *vma,
 				   unsigned long *vm_flags, int advice)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 7742fb36eb4d..de6aa5f3fdd2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2475,9 +2475,14 @@ static int khugepaged(void *none)
 	return 0;
 }
 
-void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd)
+void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
+		pmd_t *pmd)
 {
 	struct page *page;
+	unsigned long haddr = address & HPAGE_PMD_MASK;
+	struct mm_struct *mm = vma->vm_mm;
+
+	BUG_ON(vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE);
 
 	spin_lock(&mm->page_table_lock);
 	if (unlikely(!pmd_trans_huge(*pmd))) {
@@ -2495,6 +2500,16 @@ void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd)
 	BUG_ON(pmd_trans_huge(*pmd));
 }
 
+void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address,
+		pmd_t *pmd)
+{
+	struct vm_area_struct *vma;
+
+	vma = find_vma(mm, address);
+	BUG_ON(vma == NULL);
+	split_huge_page_pmd(vma, address, pmd);
+}
+
 static void split_huge_page_address(struct mm_struct *mm,
 				    unsigned long address)
 {
@@ -2509,7 +2524,7 @@ static void split_huge_page_address(struct mm_struct *mm,
 	 * Caller holds the mmap_sem write mode, so a huge pmd cannot
 	 * materialize from under us.
 	 */
-	split_huge_page_pmd(mm, pmd);
+	split_huge_page_pmd_mm(mm, address, pmd);
 }
 
 void __vma_adjust_trans_huge(struct vm_area_struct *vma,
diff --git a/mm/memory.c b/mm/memory.c
index 259b34fe1347..f9a4b0cb8623 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1243,7 +1243,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 					BUG();
 				}
 #endif
-				split_huge_page_pmd(vma->vm_mm, pmd);
+				split_huge_page_pmd(vma, addr, pmd);
 			} else if (zap_huge_pmd(tlb, vma, pmd, addr))
 				goto next;
 			/* fall through */
@@ -1512,7 +1512,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 	}
 	if (pmd_trans_huge(*pmd)) {
 		if (flags & FOLL_SPLIT) {
-			split_huge_page_pmd(mm, pmd);
+			split_huge_page_pmd(vma, address, pmd);
 			goto split_fallthrough;
 		}
 		spin_lock(&mm->page_table_lock);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 05b28361a39b..0719e8dd4945 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -511,7 +511,7 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 	pmd = pmd_offset(pud, addr);
 	do {
 		next = pmd_addr_end(addr, end);
-		split_huge_page_pmd(vma->vm_mm, pmd);
+		split_huge_page_pmd(vma, addr, pmd);
 		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
 			continue;
 		if (check_pte_range(vma, pmd, addr, next, nodes,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index a40992610ab6..e8c3938db6fa 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -90,7 +90,7 @@ static inline void change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 		next = pmd_addr_end(addr, end);
 		if (pmd_trans_huge(*pmd)) {
 			if (next - addr != HPAGE_PMD_SIZE)
-				split_huge_page_pmd(vma->vm_mm, pmd);
+				split_huge_page_pmd(vma, addr, pmd);
 			else if (change_huge_pmd(vma, pmd, addr, newprot))
 				continue;
 			/* fall through */
diff --git a/mm/mremap.c b/mm/mremap.c
index 1b61c2d3307a..eabb24da6c9e 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -182,7 +182,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 				need_flush = true;
 				continue;
 			} else if (!err) {
-				split_huge_page_pmd(vma->vm_mm, old_pmd);
+				split_huge_page_pmd(vma, old_addr, old_pmd);
 			}
 			VM_BUG_ON(pmd_trans_huge(*old_pmd));
 		}
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 6c118d012bb5..35aa294656cd 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -58,7 +58,7 @@ again:
 		if (!walk->pte_entry)
 			continue;
 
-		split_huge_page_pmd(walk->mm, pmd);
+		split_huge_page_pmd_mm(walk->mm, addr, pmd);
 		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
 			goto again;
 		err = walk_pte_range(pmd, addr, next, walk);
-- 
cgit v1.2.3-55-g7522


From d8a8e1f0da3d29d7268b3300c96a059d63901b76 Mon Sep 17 00:00:00 2001
From: Kirill A. Shutemov
Date: Wed, 12 Dec 2012 13:51:09 -0800
Subject: thp, vmstat: implement HZP_ALLOC and HZP_ALLOC_FAILED events

hzp_alloc is incremented every time a huge zero page is successfully
	allocated. It includes allocations which where dropped due
	race with other allocation. Note, it doesn't count every map
	of the huge zero page, only its allocation.

hzp_alloc_failed is incremented if kernel fails to allocate huge zero
	page and falls back to using small pages.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@linux.intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/transhuge.txt | 8 ++++++++
 include/linux/vm_event_item.h  | 2 ++
 mm/huge_memory.c               | 5 ++++-
 mm/vmstat.c                    | 2 ++
 4 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
index 8f5b41db314c..60aeedd54615 100644
--- a/Documentation/vm/transhuge.txt
+++ b/Documentation/vm/transhuge.txt
@@ -197,6 +197,14 @@ thp_split is incremented every time a huge page is split into base
 	pages. This can happen for a variety of reasons but a common
 	reason is that a huge page is old and is being reclaimed.
 
+thp_zero_page_alloc is incremented every time a huge zero page is
+	successfully allocated. It includes allocations which where
+	dropped due race with other allocation. Note, it doesn't count
+	every map of the huge zero page, only its allocation.
+
+thp_zero_page_alloc_failed is incremented if kernel fails to allocate
+	huge zero page and falls back to using small pages.
+
 As the system ages, allocating huge pages may be expensive as the
 system uses memory compaction to copy data around memory to free a
 huge page for use. There are some counters in /proc/vmstat to help
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 3d3114594370..fe786f07d2bd 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -58,6 +58,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_COLLAPSE_ALLOC,
 		THP_COLLAPSE_ALLOC_FAILED,
 		THP_SPLIT,
+		THP_ZERO_PAGE_ALLOC,
+		THP_ZERO_PAGE_ALLOC_FAILED,
 #endif
 		NR_VM_EVENT_ITEMS
 };
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d89220cb1d9f..9a5d45dfad44 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -184,8 +184,11 @@ retry:
 
 	zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
 			HPAGE_PMD_ORDER);
-	if (!zero_page)
+	if (!zero_page) {
+		count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED);
 		return 0;
+	}
+	count_vm_event(THP_ZERO_PAGE_ALLOC);
 	preempt_disable();
 	if (cmpxchg(&huge_zero_pfn, 0, page_to_pfn(zero_page))) {
 		preempt_enable();
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c7370579111b..5da4b19023c3 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -801,6 +801,8 @@ const char * const vmstat_text[] = {
 	"thp_collapse_alloc",
 	"thp_collapse_alloc_failed",
 	"thp_split",
+	"thp_zero_page_alloc",
+	"thp_zero_page_alloc_failed",
 #endif
 
 #endif /* CONFIG_VM_EVENTS_COUNTERS */
-- 
cgit v1.2.3-55-g7522


From 79da5407eeadc740fbf4b45d6df7d7f8e6adaf2c Mon Sep 17 00:00:00 2001
From: Kirill A. Shutemov
Date: Wed, 12 Dec 2012 13:51:12 -0800
Subject: thp: introduce sysfs knob to disable huge zero page

By default kernel tries to use huge zero page on read page fault.  It's
possible to disable huge zero page by writing 0 or enable it back by
writing 1:

echo 0 >/sys/kernel/mm/transparent_hugepage/khugepaged/use_zero_page
echo 1 >/sys/kernel/mm/transparent_hugepage/khugepaged/use_zero_page

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@linux.intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/transhuge.txt |  7 +++++++
 include/linux/huge_mm.h        |  4 ++++
 mm/huge_memory.c               | 21 +++++++++++++++++++--
 3 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
index 60aeedd54615..8785fb87d9c7 100644
--- a/Documentation/vm/transhuge.txt
+++ b/Documentation/vm/transhuge.txt
@@ -116,6 +116,13 @@ echo always >/sys/kernel/mm/transparent_hugepage/defrag
 echo madvise >/sys/kernel/mm/transparent_hugepage/defrag
 echo never >/sys/kernel/mm/transparent_hugepage/defrag
 
+By default kernel tries to use huge zero page on read page fault.
+It's possible to disable huge zero page by writing 0 or enable it
+back by writing 1:
+
+echo 0 >/sys/kernel/mm/transparent_hugepage/khugepaged/use_zero_page
+echo 1 >/sys/kernel/mm/transparent_hugepage/khugepaged/use_zero_page
+
 khugepaged will be automatically started when
 transparent_hugepage/enabled is set to "always" or "madvise, and it'll
 be automatically shutdown if it's set to "never".
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 3132ea788581..092dc5305a32 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -39,6 +39,7 @@ enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
 	TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
 	TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
+	TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG,
 #ifdef CONFIG_DEBUG_VM
 	TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG,
 #endif
@@ -78,6 +79,9 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 	 (transparent_hugepage_flags &					\
 	  (1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG) &&		\
 	  (__vma)->vm_flags & VM_HUGEPAGE))
+#define transparent_hugepage_use_zero_page()				\
+	(transparent_hugepage_flags &					\
+	 (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
 #ifdef CONFIG_DEBUG_VM
 #define transparent_hugepage_debug_cow()				\
 	(transparent_hugepage_flags &					\
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9a5d45dfad44..72835cea0b0f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -39,7 +39,8 @@ unsigned long transparent_hugepage_flags __read_mostly =
 	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
 #endif
 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)|
-	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG);
+	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
+	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
 
 /* default scan 8*512 pte (or vmas) every 30 second */
 static unsigned int khugepaged_pages_to_scan __read_mostly = HPAGE_PMD_NR*8;
@@ -357,6 +358,20 @@ static ssize_t defrag_store(struct kobject *kobj,
 static struct kobj_attribute defrag_attr =
 	__ATTR(defrag, 0644, defrag_show, defrag_store);
 
+static ssize_t use_zero_page_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return single_flag_show(kobj, attr, buf,
+				TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
+}
+static ssize_t use_zero_page_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	return single_flag_store(kobj, attr, buf, count,
+				 TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
+}
+static struct kobj_attribute use_zero_page_attr =
+	__ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store);
 #ifdef CONFIG_DEBUG_VM
 static ssize_t debug_cow_show(struct kobject *kobj,
 				struct kobj_attribute *attr, char *buf)
@@ -378,6 +393,7 @@ static struct kobj_attribute debug_cow_attr =
 static struct attribute *hugepage_attr[] = {
 	&enabled_attr.attr,
 	&defrag_attr.attr,
+	&use_zero_page_attr.attr,
 #ifdef CONFIG_DEBUG_VM
 	&debug_cow_attr.attr,
 #endif
@@ -779,7 +795,8 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			return VM_FAULT_OOM;
 		if (unlikely(khugepaged_enter(vma)))
 			return VM_FAULT_OOM;
-		if (!(flags & FAULT_FLAG_WRITE)) {
+		if (!(flags & FAULT_FLAG_WRITE) &&
+				transparent_hugepage_use_zero_page()) {
 			pgtable_t pgtable;
 			unsigned long zero_pfn;
 			pgtable = pte_alloc_one(mm, haddr);
-- 
cgit v1.2.3-55-g7522


From 8219fc48adb3b09eabf502c560bf13f273ea69a3 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Wed, 12 Dec 2012 13:51:21 -0800
Subject: mm: node_states: introduce N_MEMORY

We have N_NORMAL_MEMORY for standing for the nodes that have normal memory
with zone_type <= ZONE_NORMAL.

And we have N_HIGH_MEMORY for standing for the nodes that have normal or
high memory.

But we don't have any word to stand for the nodes that have *any* memory.

And we have N_CPU but without N_MEMORY.

Current code reuse the N_HIGH_MEMORY for this purpose because any node
which has memory must have high memory or normal memory currently.

A)	But this reusing is bad for *readability*. Because the name
	N_HIGH_MEMORY just stands for high or normal:

A.example 1)
	mem_cgroup_nr_lru_pages():
		for_each_node_state(nid, N_HIGH_MEMORY)

	The user will be confused(why this function just counts for high or
	normal memory node? does it counts for ZONE_MOVABLE's lru pages?)
	until someone else tell them N_HIGH_MEMORY is reused to stand for
	nodes that have any memory.

A.cont) If we introduce N_MEMORY, we can reduce this confusing
	AND make the code more clearly:

A.example 2) mm/page_cgroup.c use N_HIGH_MEMORY twice:

	One is in page_cgroup_init(void):
		for_each_node_state(nid, N_HIGH_MEMORY) {

	It means if the node have memory, we will allocate page_cgroup map for
	the node. We should use N_MEMORY instead here to gaim more clearly.

	The second using is in alloc_page_cgroup():
		if (node_state(nid, N_HIGH_MEMORY))
			addr = vzalloc_node(size, nid);

	It means if the node has high or normal memory that can be allocated
	from kernel. We should keep N_HIGH_MEMORY here, and it will be better
	if the "any memory" semantic of N_HIGH_MEMORY is removed.

B)	This reusing is out-dated if we introduce MOVABLE-dedicated node.
	The MOVABLE-dedicated node should not appear in
	node_stats[N_HIGH_MEMORY] nor node_stats[N_NORMAL_MEMORY],
	because MOVABLE-dedicated node has no high or normal memory.

	In x86_64, N_HIGH_MEMORY=N_NORMAL_MEMORY, if a MOVABLE-dedicated node
	is in node_stats[N_HIGH_MEMORY], it is also means it is in
	node_stats[N_NORMAL_MEMORY], it causes SLUB wrong.

	The slub uses
		for_each_node_state(nid, N_NORMAL_MEMORY)
	and creates kmem_cache_node for MOVABLE-dedicated node and cause problem.

In one word, we need a N_MEMORY.  We just intrude it as an alias to
N_HIGH_MEMORY and fix all im-proper usages of N_HIGH_MEMORY in late
patches.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Hillf Danton <dhillf@gmail.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: Lin Feng <linfeng@cn.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nodemask.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 7afc36334d52..c6ebdc97a428 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -380,6 +380,7 @@ enum node_states {
 #else
 	N_HIGH_MEMORY = N_NORMAL_MEMORY,
 #endif
+	N_MEMORY = N_HIGH_MEMORY,
 	N_CPU,		/* The node has one or more cpus */
 	NR_NODE_STATES
 };
-- 
cgit v1.2.3-55-g7522


From 38d7bee9d24adf4c95676a3dc902827c72930ebb Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Wed, 12 Dec 2012 13:51:24 -0800
Subject: cpuset: use N_MEMORY instead N_HIGH_MEMORY

N_HIGH_MEMORY stands for the nodes that has normal or high memory.
N_MEMORY stands for the nodes that has any memory.

The code here need to handle with the nodes which have memory, we should
use N_MEMORY instead.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Hillf Danton <dhillf@gmail.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Lin Feng <linfeng@cn.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/cpusets.txt |  2 +-
 include/linux/cpuset.h            |  2 +-
 kernel/cpuset.c                   | 32 ++++++++++++++++----------------
 3 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt
index cefd3d8bbd11..12e01d432bfe 100644
--- a/Documentation/cgroups/cpusets.txt
+++ b/Documentation/cgroups/cpusets.txt
@@ -218,7 +218,7 @@ and name space for cpusets, with a minimum of additional kernel code.
 The cpus and mems files in the root (top_cpuset) cpuset are
 read-only.  The cpus file automatically tracks the value of
 cpu_online_mask using a CPU hotplug notifier, and the mems file
-automatically tracks the value of node_states[N_HIGH_MEMORY]--i.e.,
+automatically tracks the value of node_states[N_MEMORY]--i.e.,
 nodes with memory--using the cpuset_track_online_nodes() hook.
 
 
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 838320fc3d1d..8c8a60d29407 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -144,7 +144,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
 	return node_possible_map;
 }
 
-#define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
+#define cpuset_current_mems_allowed (node_states[N_MEMORY])
 static inline void cpuset_init_current_mems_allowed(void) {}
 
 static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index b017887d632f..7bb63eea6eb8 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -302,10 +302,10 @@ static void guarantee_online_cpus(const struct cpuset *cs,
  * are online, with memory.  If none are online with memory, walk
  * up the cpuset hierarchy until we find one that does have some
  * online mems.  If we get all the way to the top and still haven't
- * found any online mems, return node_states[N_HIGH_MEMORY].
+ * found any online mems, return node_states[N_MEMORY].
  *
  * One way or another, we guarantee to return some non-empty subset
- * of node_states[N_HIGH_MEMORY].
+ * of node_states[N_MEMORY].
  *
  * Call with callback_mutex held.
  */
@@ -313,14 +313,14 @@ static void guarantee_online_cpus(const struct cpuset *cs,
 static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
 {
 	while (cs && !nodes_intersects(cs->mems_allowed,
-					node_states[N_HIGH_MEMORY]))
+					node_states[N_MEMORY]))
 		cs = cs->parent;
 	if (cs)
 		nodes_and(*pmask, cs->mems_allowed,
-					node_states[N_HIGH_MEMORY]);
+					node_states[N_MEMORY]);
 	else
-		*pmask = node_states[N_HIGH_MEMORY];
-	BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
+		*pmask = node_states[N_MEMORY];
+	BUG_ON(!nodes_intersects(*pmask, node_states[N_MEMORY]));
 }
 
 /*
@@ -1100,7 +1100,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
 		return -ENOMEM;
 
 	/*
-	 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
+	 * top_cpuset.mems_allowed tracks node_stats[N_MEMORY];
 	 * it's read-only
 	 */
 	if (cs == &top_cpuset) {
@@ -1122,7 +1122,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
 			goto done;
 
 		if (!nodes_subset(trialcs->mems_allowed,
-				node_states[N_HIGH_MEMORY])) {
+				node_states[N_MEMORY])) {
 			retval =  -EINVAL;
 			goto done;
 		}
@@ -2026,7 +2026,7 @@ static struct cpuset *cpuset_next(struct list_head *queue)
  * before dropping down to the next.  It always processes a node before
  * any of its children.
  *
- * In the case of memory hot-unplug, it will remove nodes from N_HIGH_MEMORY
+ * In the case of memory hot-unplug, it will remove nodes from N_MEMORY
  * if all present pages from a node are offlined.
  */
 static void
@@ -2065,7 +2065,7 @@ scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event)
 
 			/* Continue past cpusets with all mems online */
 			if (nodes_subset(cp->mems_allowed,
-					node_states[N_HIGH_MEMORY]))
+					node_states[N_MEMORY]))
 				continue;
 
 			oldmems = cp->mems_allowed;
@@ -2073,7 +2073,7 @@ scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event)
 			/* Remove offline mems from this cpuset. */
 			mutex_lock(&callback_mutex);
 			nodes_and(cp->mems_allowed, cp->mems_allowed,
-						node_states[N_HIGH_MEMORY]);
+						node_states[N_MEMORY]);
 			mutex_unlock(&callback_mutex);
 
 			/* Move tasks from the empty cpuset to a parent */
@@ -2126,8 +2126,8 @@ void cpuset_update_active_cpus(bool cpu_online)
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 /*
- * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
- * Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
+ * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY].
+ * Call this routine anytime after node_states[N_MEMORY] changes.
  * See cpuset_update_active_cpus() for CPU hotplug handling.
  */
 static int cpuset_track_online_nodes(struct notifier_block *self,
@@ -2140,7 +2140,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
 	case MEM_ONLINE:
 		oldmems = top_cpuset.mems_allowed;
 		mutex_lock(&callback_mutex);
-		top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
+		top_cpuset.mems_allowed = node_states[N_MEMORY];
 		mutex_unlock(&callback_mutex);
 		update_tasks_nodemask(&top_cpuset, &oldmems, NULL);
 		break;
@@ -2169,7 +2169,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
 void __init cpuset_init_smp(void)
 {
 	cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
-	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
+	top_cpuset.mems_allowed = node_states[N_MEMORY];
 
 	hotplug_memory_notifier(cpuset_track_online_nodes, 10);
 
@@ -2237,7 +2237,7 @@ void cpuset_init_current_mems_allowed(void)
  *
  * Description: Returns the nodemask_t mems_allowed of the cpuset
  * attached to the specified @tsk.  Guaranteed to return some non-empty
- * subset of node_states[N_HIGH_MEMORY], even if this means going outside the
+ * subset of node_states[N_MEMORY], even if this means going outside the
  * tasks cpuset.
  **/
 
-- 
cgit v1.2.3-55-g7522


From 6715ddf94576ff39f5d1cda8d4c568d3b79e82ec Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Wed, 12 Dec 2012 13:51:49 -0800
Subject: hotplug: update nodemasks management

Update nodemasks management for N_MEMORY.

[lliubbo@gmail.com: fix build]
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Lin Feng <linfeng@cn.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Bob Liu <lliubbo@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/memory-hotplug.txt |  5 ++-
 include/linux/memory.h           |  1 +
 mm/memory_hotplug.c              | 87 +++++++++++++++++++++++++++++++++-------
 3 files changed, 77 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index c6f993d491b5..8e5eacbdcfa3 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -390,6 +390,7 @@ struct memory_notify {
        unsigned long start_pfn;
        unsigned long nr_pages;
        int status_change_nid_normal;
+       int status_change_nid_high;
        int status_change_nid;
 }
 
@@ -397,7 +398,9 @@ start_pfn is start_pfn of online/offline memory.
 nr_pages is # of pages of online/offline memory.
 status_change_nid_normal is set node id when N_NORMAL_MEMORY of nodemask
 is (will be) set/clear, if this is -1, then nodemask status is not changed.
-status_change_nid is set node id when N_HIGH_MEMORY of nodemask is (will be)
+status_change_nid_high is set node id when N_HIGH_MEMORY of nodemask
+is (will be) set/clear, if this is -1, then nodemask status is not changed.
+status_change_nid is set node id when N_MEMORY of nodemask is (will be)
 set/clear. It means a new(memoryless) node gets new memory by online and a
 node loses all memory. If this is -1, then nodemask status is not changed.
 If status_changed_nid* >= 0, callback should create/discard structures for the
diff --git a/include/linux/memory.h b/include/linux/memory.h
index a09216d0dcc7..45e93b468878 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -54,6 +54,7 @@ struct memory_notify {
 	unsigned long start_pfn;
 	unsigned long nr_pages;
 	int status_change_nid_normal;
+	int status_change_nid_high;
 	int status_change_nid;
 };
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index de9cb14ae753..eca4aac1a83b 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -595,13 +595,15 @@ static void node_states_check_changes_online(unsigned long nr_pages,
 	enum zone_type zone_last = ZONE_NORMAL;
 
 	/*
-	 * If we have HIGHMEM, node_states[N_NORMAL_MEMORY] contains nodes
-	 * which have 0...ZONE_NORMAL, set zone_last to ZONE_NORMAL.
+	 * If we have HIGHMEM or movable node, node_states[N_NORMAL_MEMORY]
+	 * contains nodes which have zones of 0...ZONE_NORMAL,
+	 * set zone_last to ZONE_NORMAL.
 	 *
-	 * If we don't have HIGHMEM, node_states[N_NORMAL_MEMORY] contains nodes
-	 * which have 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
+	 * If we don't have HIGHMEM nor movable node,
+	 * node_states[N_NORMAL_MEMORY] contains nodes which have zones of
+	 * 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
 	 */
-	if (N_HIGH_MEMORY == N_NORMAL_MEMORY)
+	if (N_MEMORY == N_NORMAL_MEMORY)
 		zone_last = ZONE_MOVABLE;
 
 	/*
@@ -615,12 +617,34 @@ static void node_states_check_changes_online(unsigned long nr_pages,
 	else
 		arg->status_change_nid_normal = -1;
 
+#ifdef CONFIG_HIGHMEM
+	/*
+	 * If we have movable node, node_states[N_HIGH_MEMORY]
+	 * contains nodes which have zones of 0...ZONE_HIGHMEM,
+	 * set zone_last to ZONE_HIGHMEM.
+	 *
+	 * If we don't have movable node, node_states[N_NORMAL_MEMORY]
+	 * contains nodes which have zones of 0...ZONE_MOVABLE,
+	 * set zone_last to ZONE_MOVABLE.
+	 */
+	zone_last = ZONE_HIGHMEM;
+	if (N_MEMORY == N_HIGH_MEMORY)
+		zone_last = ZONE_MOVABLE;
+
+	if (zone_idx(zone) <= zone_last && !node_state(nid, N_HIGH_MEMORY))
+		arg->status_change_nid_high = nid;
+	else
+		arg->status_change_nid_high = -1;
+#else
+	arg->status_change_nid_high = arg->status_change_nid_normal;
+#endif
+
 	/*
 	 * if the node don't have memory befor online, we will need to
-	 * set the node to node_states[N_HIGH_MEMORY] after the memory
+	 * set the node to node_states[N_MEMORY] after the memory
 	 * is online.
 	 */
-	if (!node_state(nid, N_HIGH_MEMORY))
+	if (!node_state(nid, N_MEMORY))
 		arg->status_change_nid = nid;
 	else
 		arg->status_change_nid = -1;
@@ -631,7 +655,10 @@ static void node_states_set_node(int node, struct memory_notify *arg)
 	if (arg->status_change_nid_normal >= 0)
 		node_set_state(node, N_NORMAL_MEMORY);
 
-	node_set_state(node, N_HIGH_MEMORY);
+	if (arg->status_change_nid_high >= 0)
+		node_set_state(node, N_HIGH_MEMORY);
+
+	node_set_state(node, N_MEMORY);
 }
 
 
@@ -1099,13 +1126,15 @@ static void node_states_check_changes_offline(unsigned long nr_pages,
 	enum zone_type zt, zone_last = ZONE_NORMAL;
 
 	/*
-	 * If we have HIGHMEM, node_states[N_NORMAL_MEMORY] contains nodes
-	 * which have 0...ZONE_NORMAL, set zone_last to ZONE_NORMAL.
+	 * If we have HIGHMEM or movable node, node_states[N_NORMAL_MEMORY]
+	 * contains nodes which have zones of 0...ZONE_NORMAL,
+	 * set zone_last to ZONE_NORMAL.
 	 *
-	 * If we don't have HIGHMEM, node_states[N_NORMAL_MEMORY] contains nodes
-	 * which have 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
+	 * If we don't have HIGHMEM nor movable node,
+	 * node_states[N_NORMAL_MEMORY] contains nodes which have zones of
+	 * 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
 	 */
-	if (N_HIGH_MEMORY == N_NORMAL_MEMORY)
+	if (N_MEMORY == N_NORMAL_MEMORY)
 		zone_last = ZONE_MOVABLE;
 
 	/*
@@ -1122,6 +1151,30 @@ static void node_states_check_changes_offline(unsigned long nr_pages,
 	else
 		arg->status_change_nid_normal = -1;
 
+#ifdef CONFIG_HIGHMEM
+	/*
+	 * If we have movable node, node_states[N_HIGH_MEMORY]
+	 * contains nodes which have zones of 0...ZONE_HIGHMEM,
+	 * set zone_last to ZONE_HIGHMEM.
+	 *
+	 * If we don't have movable node, node_states[N_NORMAL_MEMORY]
+	 * contains nodes which have zones of 0...ZONE_MOVABLE,
+	 * set zone_last to ZONE_MOVABLE.
+	 */
+	zone_last = ZONE_HIGHMEM;
+	if (N_MEMORY == N_HIGH_MEMORY)
+		zone_last = ZONE_MOVABLE;
+
+	for (; zt <= zone_last; zt++)
+		present_pages += pgdat->node_zones[zt].present_pages;
+	if (zone_idx(zone) <= zone_last && nr_pages >= present_pages)
+		arg->status_change_nid_high = zone_to_nid(zone);
+	else
+		arg->status_change_nid_high = -1;
+#else
+	arg->status_change_nid_high = arg->status_change_nid_normal;
+#endif
+
 	/*
 	 * node_states[N_HIGH_MEMORY] contains nodes which have 0...ZONE_MOVABLE
 	 */
@@ -1146,9 +1199,13 @@ static void node_states_clear_node(int node, struct memory_notify *arg)
 	if (arg->status_change_nid_normal >= 0)
 		node_clear_state(node, N_NORMAL_MEMORY);
 
-	if ((N_HIGH_MEMORY != N_NORMAL_MEMORY) &&
-	    (arg->status_change_nid >= 0))
+	if ((N_MEMORY != N_NORMAL_MEMORY) &&
+	    (arg->status_change_nid_high >= 0))
 		node_clear_state(node, N_HIGH_MEMORY);
+
+	if ((N_MEMORY != N_HIGH_MEMORY) &&
+	    (arg->status_change_nid >= 0))
+		node_clear_state(node, N_MEMORY);
 }
 
 static int __ref __offline_pages(unsigned long start_pfn,
-- 
cgit v1.2.3-55-g7522


From 44e33e8f95171b93968c3ac3cf8516998b1db65d Mon Sep 17 00:00:00 2001
From: Greg Thelen
Date: Wed, 12 Dec 2012 13:51:52 -0800
Subject: res_counter: delete res_counter_write()

Since commit 628f42355389 ("memcg: limit change shrink usage") both
res_counter_write() and write_strategy_fn have been unused.  This patch
deletes them both.

Signed-off-by: Greg Thelen <gthelen@google.com>
Cc: Glauber Costa <glommer@parallels.com>
Cc: Tejun Heo <tj@kernel.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h |  5 -----
 kernel/res_counter.c        | 22 ----------------------
 2 files changed, 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 7d7fbe2ef782..6f54e40fa218 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -74,14 +74,9 @@ ssize_t res_counter_read(struct res_counter *counter, int member,
 		const char __user *buf, size_t nbytes, loff_t *pos,
 		int (*read_strategy)(unsigned long long val, char *s));
 
-typedef int (*write_strategy_fn)(const char *buf, unsigned long long *val);
-
 int res_counter_memparse_write_strategy(const char *buf,
 					unsigned long long *res);
 
-int res_counter_write(struct res_counter *counter, int member,
-		      const char *buffer, write_strategy_fn write_strategy);
-
 /*
  * the field descriptors. one for each member of res_counter
  */
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index ad581aa2369a..3920d593e63c 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -192,25 +192,3 @@ int res_counter_memparse_write_strategy(const char *buf,
 	*res = PAGE_ALIGN(*res);
 	return 0;
 }
-
-int res_counter_write(struct res_counter *counter, int member,
-		      const char *buf, write_strategy_fn write_strategy)
-{
-	char *end;
-	unsigned long flags;
-	unsigned long long tmp, *val;
-
-	if (write_strategy) {
-		if (write_strategy(buf, &tmp))
-			return -EINVAL;
-	} else {
-		tmp = simple_strtoull(buf, &end, 10);
-		if (*end != '\0')
-			return -EINVAL;
-	}
-	spin_lock_irqsave(&counter->lock, flags);
-	val = res_counter_member(counter, member);
-	*val = tmp;
-	spin_unlock_irqrestore(&counter->lock, flags);
-	return 0;
-}
-- 
cgit v1.2.3-55-g7522


From 05b0afd73d04109d87f00ccd39f099e217c37263 Mon Sep 17 00:00:00 2001
From: Andrew Morton
Date: Wed, 12 Dec 2012 13:51:56 -0800
Subject: mm: add a reminder comment for __GFP_BITS_SHIFT

Cc: Glauber Costa <glommer@parallels.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 31e8041274f6..f74856e17e48 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -34,6 +34,7 @@ struct vm_area_struct;
 #define ___GFP_NO_KSWAPD	0x400000u
 #define ___GFP_OTHER_NODE	0x800000u
 #define ___GFP_WRITE		0x1000000u
+/* If the above are modified, __GFP_BITS_SHIFT may need updating */
 
 /*
  * GFP bitmasks..
-- 
cgit v1.2.3-55-g7522


From 68ae564bbac8eb9ed54ddd2529b0e29ee190b355 Mon Sep 17 00:00:00 2001
From: David Rientjes
Date: Wed, 12 Dec 2012 13:51:57 -0800
Subject: mm, memcg: avoid unnecessary function call when memcg is disabled

While profiling numa/core v16 with cgroup_disable=memory on the command
line, I noticed mem_cgroup_count_vm_event() still showed up as high as
0.60% in perftop.

This occurs because the function is called extremely often even when memcg
is disabled.

To fix this, inline the check for mem_cgroup_disabled() so we avoid the
unnecessary function call if memcg is disabled.

Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Glauber Costa <glommer@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 9 ++++++++-
 mm/memcontrol.c            | 6 ++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 11ddc7ffeba8..e98a74c0c9c0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -181,7 +181,14 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 						gfp_t gfp_mask,
 						unsigned long *total_scanned);
 
-void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
+void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
+static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
+					     enum vm_event_item idx)
+{
+	if (mem_cgroup_disabled())
+		return;
+	__mem_cgroup_count_vm_event(mm, idx);
+}
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void mem_cgroup_split_huge_fixup(struct page *head);
 #endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 49d86d06e1dd..7f0e3571df7e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -59,6 +59,8 @@
 #include <trace/events/vmscan.h>
 
 struct cgroup_subsys mem_cgroup_subsys __read_mostly;
+EXPORT_SYMBOL(mem_cgroup_subsys);
+
 #define MEM_CGROUP_RECLAIM_RETRIES	5
 static struct mem_cgroup *root_mem_cgroup __read_mostly;
 
@@ -1015,7 +1017,7 @@ void mem_cgroup_iter_break(struct mem_cgroup *root,
 	     iter != NULL;				\
 	     iter = mem_cgroup_iter(NULL, iter, NULL))
 
-void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
+void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
 {
 	struct mem_cgroup *memcg;
 
@@ -1040,7 +1042,7 @@ void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
 out:
 	rcu_read_unlock();
 }
-EXPORT_SYMBOL(mem_cgroup_count_vm_event);
+EXPORT_SYMBOL(__mem_cgroup_count_vm_event);
 
 /**
  * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
-- 
cgit v1.2.3-55-g7522


From 20b2f52b73febce476fc9376f0296c1aa0e4f5a7 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Wed, 12 Dec 2012 13:52:00 -0800
Subject: numa: add CONFIG_MOVABLE_NODE for movable-dedicated node

We need a node which only contains movable memory.  This feature is very
important for node hotplug.  If a node has normal/highmem, the memory may
be used by the kernel and can't be offlined.  If the node only contains
movable memory, we can offline the memory and the node.

All are prepared, we can actually introduce N_MEMORY.
add CONFIG_MOVABLE_NODE make we can use it for movable-dedicated node

[akpm@linux-foundation.org: fix Kconfig text]
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Tested-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c      | 6 ++++++
 include/linux/nodemask.h | 4 ++++
 mm/Kconfig               | 8 ++++++++
 mm/page_alloc.c          | 3 +++
 4 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 49dbe7dc9ada..fac124a7e1c5 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -643,6 +643,9 @@ static struct node_attr node_state_attr[] = {
 	[N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY),
 #ifdef CONFIG_HIGHMEM
 	[N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY),
+#endif
+#ifdef CONFIG_MOVABLE_NODE
+	[N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
 #endif
 	[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
 };
@@ -653,6 +656,9 @@ static struct attribute *node_state_attrs[] = {
 	&node_state_attr[N_NORMAL_MEMORY].attr.attr,
 #ifdef CONFIG_HIGHMEM
 	&node_state_attr[N_HIGH_MEMORY].attr.attr,
+#endif
+#ifdef CONFIG_MOVABLE_NODE
+	&node_state_attr[N_MEMORY].attr.attr,
 #endif
 	&node_state_attr[N_CPU].attr.attr,
 	NULL
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index c6ebdc97a428..4e2cbfa640b7 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -380,7 +380,11 @@ enum node_states {
 #else
 	N_HIGH_MEMORY = N_NORMAL_MEMORY,
 #endif
+#ifdef CONFIG_MOVABLE_NODE
+	N_MEMORY,		/* The node has memory(regular, high, movable) */
+#else
 	N_MEMORY = N_HIGH_MEMORY,
+#endif
 	N_CPU,		/* The node has one or more cpus */
 	NR_NODE_STATES
 };
diff --git a/mm/Kconfig b/mm/Kconfig
index e6651c5de14f..1680a0123a13 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -143,6 +143,14 @@ config NO_BOOTMEM
 config MEMORY_ISOLATION
 	boolean
 
+config MOVABLE_NODE
+	boolean "Enable to assign a node which has only movable memory"
+	depends on HAVE_MEMBLOCK
+	depends on NO_BOOTMEM
+	depends on X86_64
+	depends on NUMA
+	default y
+
 # eventually, we can have this option just 'select SPARSEMEM'
 config MEMORY_HOTPLUG
 	bool "Allow for memory hot-add"
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 35727168896b..2bf0d43d646b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -89,6 +89,9 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
 	[N_NORMAL_MEMORY] = { { [0] = 1UL } },
 #ifdef CONFIG_HIGHMEM
 	[N_HIGH_MEMORY] = { { [0] = 1UL } },
+#endif
+#ifdef CONFIG_MOVABLE_NODE
+	[N_MEMORY] = { { [0] = 1UL } },
 #endif
 	[N_CPU] = { { [0] = 1UL } },
 #endif	/* NUMA */
-- 
cgit v1.2.3-55-g7522


From 9feedc9d831e18ae6d0d15aa562e5e46ba53647b Mon Sep 17 00:00:00 2001
From: Jiang Liu
Date: Wed, 12 Dec 2012 13:52:12 -0800
Subject: mm: introduce new field "managed_pages" to struct zone

Currently a zone's present_pages is calcuated as below, which is
inaccurate and may cause trouble to memory hotplug.

	spanned_pages - absent_pages - memmap_pages - dma_reserve.

During fixing bugs caused by inaccurate zone->present_pages, we found
zone->present_pages has been abused.  The field zone->present_pages may
have different meanings in different contexts:

1) pages existing in a zone.
2) pages managed by the buddy system.

For more discussions about the issue, please refer to:
  http://lkml.org/lkml/2012/11/5/866
  https://patchwork.kernel.org/patch/1346751/

This patchset tries to introduce a new field named "managed_pages" to
struct zone, which counts "pages managed by the buddy system".  And revert
zone->present_pages to count "physical pages existing in a zone", which
also keep in consistence with pgdat->node_present_pages.

We will set an initial value for zone->managed_pages in function
free_area_init_core() and will adjust it later if the initial value is
inaccurate.

For DMA/normal zones, the initial value is set to:

	(spanned_pages - absent_pages - memmap_pages - dma_reserve)

Later zone->managed_pages will be adjusted to the accurate value when the
bootmem allocator frees all free pages to the buddy system in function
free_all_bootmem_node() and free_all_bootmem().

The bootmem allocator doesn't touch highmem pages, so highmem zones'
managed_pages is set to the accurate value "spanned_pages - absent_pages"
in function free_area_init_core() and won't be updated anymore.

This patch also adds a new field "managed_pages" to /proc/zoneinfo
and sysrq showmem.

[akpm@linux-foundation.org: small comment tweaks]
Signed-off-by: Jiang Liu <jiang.liu@huawei.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Maciej Rutecki <maciej.rutecki@gmail.com>
Tested-by: Chris Clayton <chris2553@googlemail.com>
Cc: "Rafael J . Wysocki" <rjw@sisk.pl>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Minchan Kim <minchan@kernel.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Jianguo Wu <wujianguo@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 41 ++++++++++++++++++++++++++++++++++-------
 mm/bootmem.c           | 21 +++++++++++++++++++++
 mm/memory_hotplug.c    | 10 ++++++++++
 mm/nobootmem.c         | 22 ++++++++++++++++++++++
 mm/page_alloc.c        | 44 ++++++++++++++++++++++++++++++--------------
 mm/vmstat.c            |  6 ++++--
 6 files changed, 121 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0c0b1d608a69..cd55dad56aac 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -460,17 +460,44 @@ struct zone {
 	unsigned long		zone_start_pfn;
 
 	/*
-	 * zone_start_pfn, spanned_pages and present_pages are all
-	 * protected by span_seqlock.  It is a seqlock because it has
-	 * to be read outside of zone->lock, and it is done in the main
-	 * allocator path.  But, it is written quite infrequently.
+	 * spanned_pages is the total pages spanned by the zone, including
+	 * holes, which is calculated as:
+	 * 	spanned_pages = zone_end_pfn - zone_start_pfn;
 	 *
-	 * The lock is declared along with zone->lock because it is
+	 * present_pages is physical pages existing within the zone, which
+	 * is calculated as:
+	 *	present_pages = spanned_pages - absent_pages(pags in holes);
+	 *
+	 * managed_pages is present pages managed by the buddy system, which
+	 * is calculated as (reserved_pages includes pages allocated by the
+	 * bootmem allocator):
+	 *	managed_pages = present_pages - reserved_pages;
+	 *
+	 * So present_pages may be used by memory hotplug or memory power
+	 * management logic to figure out unmanaged pages by checking
+	 * (present_pages - managed_pages). And managed_pages should be used
+	 * by page allocator and vm scanner to calculate all kinds of watermarks
+	 * and thresholds.
+	 *
+	 * Locking rules:
+	 *
+	 * zone_start_pfn and spanned_pages are protected by span_seqlock.
+	 * It is a seqlock because it has to be read outside of zone->lock,
+	 * and it is done in the main allocator path.  But, it is written
+	 * quite infrequently.
+	 *
+	 * The span_seq lock is declared along with zone->lock because it is
 	 * frequently read in proximity to zone->lock.  It's good to
 	 * give them a chance of being in the same cacheline.
+	 *
+	 * Write access to present_pages and managed_pages at runtime should
+	 * be protected by lock_memory_hotplug()/unlock_memory_hotplug().
+	 * Any reader who can't tolerant drift of present_pages and
+	 * managed_pages should hold memory hotplug lock to get a stable value.
 	 */
-	unsigned long		spanned_pages;	/* total size, including holes */
-	unsigned long		present_pages;	/* amount of memory (excluding holes) */
+	unsigned long		spanned_pages;
+	unsigned long		present_pages;
+	unsigned long		managed_pages;
 
 	/*
 	 * rarely used fields:
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 26d057a8b552..19262ac05dd2 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -229,6 +229,22 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 	return count;
 }
 
+static void reset_node_lowmem_managed_pages(pg_data_t *pgdat)
+{
+	struct zone *z;
+
+	/*
+	 * In free_area_init_core(), highmem zone's managed_pages is set to
+	 * present_pages, and bootmem allocator doesn't allocate from highmem
+	 * zones. So there's no need to recalculate managed_pages because all
+	 * highmem pages will be managed by the buddy system. Here highmem
+	 * zone also includes highmem movable zone.
+	 */
+	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
+		if (!is_highmem(z))
+			z->managed_pages = 0;
+}
+
 /**
  * free_all_bootmem_node - release a node's free pages to the buddy allocator
  * @pgdat: node to be released
@@ -238,6 +254,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
 {
 	register_page_bootmem_info_node(pgdat);
+	reset_node_lowmem_managed_pages(pgdat);
 	return free_all_bootmem_core(pgdat->bdata);
 }
 
@@ -250,6 +267,10 @@ unsigned long __init free_all_bootmem(void)
 {
 	unsigned long total_pages = 0;
 	bootmem_data_t *bdata;
+	struct pglist_data *pgdat;
+
+	for_each_online_pgdat(pgdat)
+		reset_node_lowmem_managed_pages(pgdat);
 
 	list_for_each_entry(bdata, &bdata_list, list)
 		total_pages += free_all_bootmem_core(bdata);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c6cd8b515424..b7c93ca896d6 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -106,6 +106,7 @@ static void get_page_bootmem(unsigned long info,  struct page *page,
 void __ref put_page_bootmem(struct page *page)
 {
 	unsigned long type;
+	static DEFINE_MUTEX(ppb_lock);
 
 	type = (unsigned long) page->lru.next;
 	BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
@@ -115,7 +116,14 @@ void __ref put_page_bootmem(struct page *page)
 		ClearPagePrivate(page);
 		set_page_private(page, 0);
 		INIT_LIST_HEAD(&page->lru);
+
+		/*
+		 * Please refer to comment for __free_pages_bootmem()
+		 * for why we serialize here.
+		 */
+		mutex_lock(&ppb_lock);
 		__free_pages_bootmem(page, 0);
+		mutex_unlock(&ppb_lock);
 	}
 
 }
@@ -748,6 +756,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 		return ret;
 	}
 
+	zone->managed_pages += onlined_pages;
 	zone->present_pages += onlined_pages;
 	zone->zone_pgdat->node_present_pages += onlined_pages;
 	if (onlined_pages) {
@@ -1321,6 +1330,7 @@ repeat:
 	/* reset pagetype flags and makes migrate type to be MOVABLE */
 	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
 	/* removal success */
+	zone->managed_pages -= offlined_pages;
 	zone->present_pages -= offlined_pages;
 	zone->zone_pgdat->node_present_pages -= offlined_pages;
 	totalram_pages -= offlined_pages;
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index bd82f6b31411..b8294fc03df8 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -137,6 +137,22 @@ unsigned long __init free_low_memory_core_early(int nodeid)
 	return count;
 }
 
+static void reset_node_lowmem_managed_pages(pg_data_t *pgdat)
+{
+	struct zone *z;
+
+	/*
+	 * In free_area_init_core(), highmem zone's managed_pages is set to
+	 * present_pages, and bootmem allocator doesn't allocate from highmem
+	 * zones. So there's no need to recalculate managed_pages because all
+	 * highmem pages will be managed by the buddy system. Here highmem
+	 * zone also includes highmem movable zone.
+	 */
+	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
+		if (!is_highmem(z))
+			z->managed_pages = 0;
+}
+
 /**
  * free_all_bootmem_node - release a node's free pages to the buddy allocator
  * @pgdat: node to be released
@@ -146,6 +162,7 @@ unsigned long __init free_low_memory_core_early(int nodeid)
 unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
 {
 	register_page_bootmem_info_node(pgdat);
+	reset_node_lowmem_managed_pages(pgdat);
 
 	/* free_low_memory_core_early(MAX_NUMNODES) will be called later */
 	return 0;
@@ -158,6 +175,11 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
  */
 unsigned long __init free_all_bootmem(void)
 {
+	struct pglist_data *pgdat;
+
+	for_each_online_pgdat(pgdat)
+		reset_node_lowmem_managed_pages(pgdat);
+
 	/*
 	 * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
 	 *  because in some case like Node0 doesn't have RAM installed
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2bf0d43d646b..0b6a6d04300a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -735,6 +735,13 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 	local_irq_restore(flags);
 }
 
+/*
+ * Read access to zone->managed_pages is safe because it's unsigned long,
+ * but we still need to serialize writers. Currently all callers of
+ * __free_pages_bootmem() except put_page_bootmem() should only be used
+ * at boot time. So for shorter boot time, we shift the burden to
+ * put_page_bootmem() to serialize writers.
+ */
 void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
 {
 	unsigned int nr_pages = 1 << order;
@@ -750,6 +757,7 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
 		set_page_count(p, 0);
 	}
 
+	page_zone(page)->managed_pages += 1 << order;
 	set_page_refcounted(page);
 	__free_pages(page, order);
 }
@@ -2984,6 +2992,7 @@ void show_free_areas(unsigned int filter)
 			" isolated(anon):%lukB"
 			" isolated(file):%lukB"
 			" present:%lukB"
+			" managed:%lukB"
 			" mlocked:%lukB"
 			" dirty:%lukB"
 			" writeback:%lukB"
@@ -3013,6 +3022,7 @@ void show_free_areas(unsigned int filter)
 			K(zone_page_state(zone, NR_ISOLATED_ANON)),
 			K(zone_page_state(zone, NR_ISOLATED_FILE)),
 			K(zone->present_pages),
+			K(zone->managed_pages),
 			K(zone_page_state(zone, NR_MLOCK)),
 			K(zone_page_state(zone, NR_FILE_DIRTY)),
 			K(zone_page_state(zone, NR_WRITEBACK)),
@@ -4502,48 +4512,54 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
-		unsigned long size, realsize, memmap_pages;
+		unsigned long size, realsize, freesize, memmap_pages;
 
 		size = zone_spanned_pages_in_node(nid, j, zones_size);
-		realsize = size - zone_absent_pages_in_node(nid, j,
+		realsize = freesize = size - zone_absent_pages_in_node(nid, j,
 								zholes_size);
 
 		/*
-		 * Adjust realsize so that it accounts for how much memory
+		 * Adjust freesize so that it accounts for how much memory
 		 * is used by this zone for memmap. This affects the watermark
 		 * and per-cpu initialisations
 		 */
 		memmap_pages =
 			PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
-		if (realsize >= memmap_pages) {
-			realsize -= memmap_pages;
+		if (freesize >= memmap_pages) {
+			freesize -= memmap_pages;
 			if (memmap_pages)
 				printk(KERN_DEBUG
 				       "  %s zone: %lu pages used for memmap\n",
 				       zone_names[j], memmap_pages);
 		} else
 			printk(KERN_WARNING
-				"  %s zone: %lu pages exceeds realsize %lu\n",
-				zone_names[j], memmap_pages, realsize);
+				"  %s zone: %lu pages exceeds freesize %lu\n",
+				zone_names[j], memmap_pages, freesize);
 
 		/* Account for reserved pages */
-		if (j == 0 && realsize > dma_reserve) {
-			realsize -= dma_reserve;
+		if (j == 0 && freesize > dma_reserve) {
+			freesize -= dma_reserve;
 			printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
 					zone_names[0], dma_reserve);
 		}
 
 		if (!is_highmem_idx(j))
-			nr_kernel_pages += realsize;
-		nr_all_pages += realsize;
+			nr_kernel_pages += freesize;
+		nr_all_pages += freesize;
 
 		zone->spanned_pages = size;
-		zone->present_pages = realsize;
+		zone->present_pages = freesize;
+		/*
+		 * Set an approximate value for lowmem here, it will be adjusted
+		 * when the bootmem allocator frees pages into the buddy system.
+		 * And all highmem pages will be managed by the buddy system.
+		 */
+		zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
 #ifdef CONFIG_NUMA
 		zone->node = nid;
-		zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
+		zone->min_unmapped_pages = (freesize*sysctl_min_unmapped_ratio)
 						/ 100;
-		zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
+		zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100;
 #endif
 		zone->name = zone_names[j];
 		spin_lock_init(&zone->lock);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 9a4a522c0b0f..df14808f0a36 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -994,14 +994,16 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   "\n        high     %lu"
 		   "\n        scanned  %lu"
 		   "\n        spanned  %lu"
-		   "\n        present  %lu",
+		   "\n        present  %lu"
+		   "\n        managed  %lu",
 		   zone_page_state(zone, NR_FREE_PAGES),
 		   min_wmark_pages(zone),
 		   low_wmark_pages(zone),
 		   high_wmark_pages(zone),
 		   zone->pages_scanned,
 		   zone->spanned_pages,
-		   zone->present_pages);
+		   zone->present_pages,
+		   zone->managed_pages);
 
 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
 		seq_printf(m, "\n    %-12s %lu", vmstat_text[i],
-- 
cgit v1.2.3-55-g7522


From 816422ad76474fed8052b6f7b905a054d082e59a Mon Sep 17 00:00:00 2001
From: Kirill A. Shutemov
Date: Wed, 12 Dec 2012 13:52:36 -0800
Subject: asm-generic, mm: pgtable: consolidate zero page helpers

We have two different implementation of is_zero_pfn() and my_zero_pfn()
helpers: for architectures with and without zero page coloring.

Let's consolidate them in <asm-generic/pgtable.h>.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/include/asm/pgtable.h | 11 +----------
 arch/s390/include/asm/pgtable.h | 11 +----------
 include/asm-generic/pgtable.h   | 26 ++++++++++++++++++++++++++
 include/linux/mm.h              |  8 --------
 mm/memory.c                     |  7 -------
 5 files changed, 28 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index c02158be836c..14490e9443af 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -76,16 +76,7 @@ extern unsigned long zero_page_mask;
 
 #define ZERO_PAGE(vaddr) \
 	(virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
-
-#define is_zero_pfn is_zero_pfn
-static inline int is_zero_pfn(unsigned long pfn)
-{
-	extern unsigned long zero_pfn;
-	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
-	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
-}
-
-#define my_zero_pfn(addr)	page_to_pfn(ZERO_PAGE(addr))
+#define __HAVE_COLOR_ZERO_PAGE
 
 extern void paging_init(void);
 
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 2d3b7cb26005..c814e6f5b57d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -55,16 +55,7 @@ extern unsigned long zero_page_mask;
 #define ZERO_PAGE(vaddr) \
 	(virt_to_page((void *)(empty_zero_page + \
 	 (((unsigned long)(vaddr)) &zero_page_mask))))
-
-#define is_zero_pfn is_zero_pfn
-static inline int is_zero_pfn(unsigned long pfn)
-{
-	extern unsigned long zero_pfn;
-	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
-	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
-}
-
-#define my_zero_pfn(addr)	page_to_pfn(ZERO_PAGE(addr))
+#define __HAVE_COLOR_ZERO_PAGE
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index b36ce40bd1c6..284e80831d2c 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -449,6 +449,32 @@ extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
 			unsigned long size);
 #endif
 
+#ifdef __HAVE_COLOR_ZERO_PAGE
+static inline int is_zero_pfn(unsigned long pfn)
+{
+	extern unsigned long zero_pfn;
+	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
+	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
+}
+
+static inline unsigned long my_zero_pfn(unsigned long addr)
+{
+	return page_to_pfn(ZERO_PAGE(addr));
+}
+#else
+static inline int is_zero_pfn(unsigned long pfn)
+{
+	extern unsigned long zero_pfn;
+	return pfn == zero_pfn;
+}
+
+static inline unsigned long my_zero_pfn(unsigned long addr)
+{
+	extern unsigned long zero_pfn;
+	return zero_pfn;
+}
+#endif
+
 #ifdef CONFIG_MMU
 
 #ifndef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4b80bad4a367..4af4f0b1be4c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -516,14 +516,6 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 }
 #endif
 
-#ifndef my_zero_pfn
-static inline unsigned long my_zero_pfn(unsigned long addr)
-{
-	extern unsigned long zero_pfn;
-	return zero_pfn;
-}
-#endif
-
 /*
  * Multiple processes may "see" the same page. E.g. for untouched
  * mappings of /dev/null, all processes see the same page full of
diff --git a/mm/memory.c b/mm/memory.c
index f9a4b0cb8623..3f680e7c7645 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -717,13 +717,6 @@ static inline bool is_cow_mapping(vm_flags_t flags)
 	return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 }
 
-#ifndef is_zero_pfn
-static inline int is_zero_pfn(unsigned long pfn)
-{
-	return pfn == zero_pfn;
-}
-#endif
-
 /*
  * vm_normal_page -- This function gets the "struct page" associated with a pte.
  *
-- 
cgit v1.2.3-55-g7522


From 98870901cce098bbe94d90d2c41d8d1fa8d94392 Mon Sep 17 00:00:00 2001
From: Lin Feng
Date: Wed, 12 Dec 2012 13:52:39 -0800
Subject: mm/bootmem.c: remove unused wrapper function
 reserve_bootmem_generic()

reserve_bootmem_generic() has no caller,

Signed-off-by: Lin Feng <linfeng@cn.fujitsu.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 3 ---
 mm/bootmem.c            | 6 ------
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 7b74452c5317..3f778c27f825 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -137,9 +137,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 #define alloc_bootmem_low_pages_node(pgdat, x) \
 	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
 
-extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
-				   int flags);
-
 #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
 extern void *alloc_remap(int nid, unsigned long size);
 #else
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 19262ac05dd2..1324cd74faec 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -460,12 +460,6 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size,
 	return mark_bootmem(start, end, 1, flags);
 }
 
-int __weak __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
-				   int flags)
-{
-	return reserve_bootmem(phys, len, flags);
-}
-
 static unsigned long __init align_idx(struct bootmem_data *bdata,
 				      unsigned long idx, unsigned long step)
 {
-- 
cgit v1.2.3-55-g7522


From 7056741fd9fc14a65608549a4657cf5178f05f63 Mon Sep 17 00:00:00 2001
From: Jim Kukunas
Date: Thu, 8 Nov 2012 13:47:44 -0800
Subject: lib/raid6: Add AVX2 optimized recovery functions

Optimize RAID6 recovery functions to take advantage of
the 256-bit YMM integer instructions introduced in AVX2.

The patch was tested and benchmarked before submission.
However hardware is not yet released so benchmark numbers
cannot be reported.

Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 arch/x86/Makefile       |   5 +-
 include/linux/raid/pq.h |   1 +
 lib/raid6/Makefile      |   2 +-
 lib/raid6/algos.c       |   3 +
 lib/raid6/recov_avx2.c  | 327 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/raid6/test/Makefile |   2 +-
 lib/raid6/x86.h         |  14 ++-
 7 files changed, 345 insertions(+), 9 deletions(-)
 create mode 100644 lib/raid6/recov_avx2.c

(limited to 'include/linux')

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 58790bd85c1d..95477aae9ff7 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -123,9 +123,10 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI
 # does binutils support specific instructions?
 asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
 avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
+avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
 
-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
 
 LDFLAGS := -m elf_$(UTS_MACHINE)
 
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 640c69ceec96..3156347452b9 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -109,6 +109,7 @@ struct raid6_recov_calls {
 
 extern const struct raid6_recov_calls raid6_recov_intx1;
 extern const struct raid6_recov_calls raid6_recov_ssse3;
+extern const struct raid6_recov_calls raid6_recov_avx2;
 
 /* Algorithm list */
 extern const struct raid6_calls * const raid6_algos[];
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index de06dfe165b8..8c2e22bef661 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -1,6 +1,6 @@
 obj-$(CONFIG_RAID6_PQ)	+= raid6_pq.o
 
-raid6_pq-y	+= algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \
+raid6_pq-y	+= algos.o recov.o recov_ssse3.o recov_avx2.o tables.o int1.o int2.o int4.o \
 		   int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
 		   altivec8.o mmx.o sse1.o sse2.o
 hostprogs-y	+= mktables
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 589f5f50ad2e..8b7f55cadb45 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -72,6 +72,9 @@ EXPORT_SYMBOL_GPL(raid6_datap_recov);
 
 const struct raid6_recov_calls *const raid6_recov_algos[] = {
 #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+#ifdef CONFIG_AS_AVX2
+	&raid6_recov_avx2,
+#endif
 	&raid6_recov_ssse3,
 #endif
 	&raid6_recov_intx1,
diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c
new file mode 100644
index 000000000000..43a9bab91879
--- /dev/null
+++ b/lib/raid6/recov_avx2.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+
+#if CONFIG_AS_AVX2
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+static int raid6_has_avx2(void)
+{
+	return boot_cpu_has(X86_FEATURE_AVX2) &&
+		boot_cpu_has(X86_FEATURE_AVX);
+}
+
+static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila,
+		int failb, void **ptrs)
+{
+	u8 *p, *q, *dp, *dq;
+	const u8 *pbmul;	/* P multiplier table for B data */
+	const u8 *qmul;		/* Q multiplier table (for both) */
+	const u8 x0f = 0x0f;
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data pages
+	   Use the dead data pages as temporary storage for
+	   delta p and delta q */
+	dp = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-2] = dp;
+	dq = (u8 *)ptrs[failb];
+	ptrs[failb] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dp;
+	ptrs[failb]   = dq;
+	ptrs[disks-2] = p;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
+		raid6_gfexp[failb]]];
+
+	kernel_fpu_begin();
+
+	/* ymm0 = x0f[16] */
+	asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
+
+	while (bytes) {
+#ifdef CONFIG_X86_64
+		asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0]));
+		asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32]));
+		asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0]));
+		asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32]));
+		asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0]));
+		asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32]));
+		asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0]));
+		asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32]));
+
+		/*
+		 * 1 = dq[0]  ^ q[0]
+		 * 9 = dq[32] ^ q[32]
+		 * 0 = dp[0]  ^ p[0]
+		 * 8 = dp[32] ^ p[32]
+		 */
+
+		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
+
+		asm volatile("vpsraw $4, %ymm1, %ymm3");
+		asm volatile("vpsraw $4, %ymm9, %ymm12");
+		asm volatile("vpand %ymm7, %ymm1, %ymm1");
+		asm volatile("vpand %ymm7, %ymm9, %ymm9");
+		asm volatile("vpand %ymm7, %ymm3, %ymm3");
+		asm volatile("vpand %ymm7, %ymm12, %ymm12");
+		asm volatile("vpshufb %ymm9, %ymm4, %ymm14");
+		asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
+		asm volatile("vpshufb %ymm12, %ymm5, %ymm15");
+		asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
+		asm volatile("vpxor %ymm14, %ymm15, %ymm15");
+		asm volatile("vpxor %ymm4, %ymm5, %ymm5");
+
+		/*
+		 * 5 = qx[0]
+		 * 15 = qx[32]
+		 */
+
+		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
+		asm volatile("vpsraw $4, %ymm0, %ymm2");
+		asm volatile("vpsraw $4, %ymm8, %ymm6");
+		asm volatile("vpand %ymm7, %ymm0, %ymm3");
+		asm volatile("vpand %ymm7, %ymm8, %ymm14");
+		asm volatile("vpand %ymm7, %ymm2, %ymm2");
+		asm volatile("vpand %ymm7, %ymm6, %ymm6");
+		asm volatile("vpshufb %ymm14, %ymm4, %ymm12");
+		asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
+		asm volatile("vpshufb %ymm6, %ymm1, %ymm13");
+		asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm4, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm12, %ymm13, %ymm13");
+
+		/*
+		 * 1  = pbmul[px[0]]
+		 * 13 = pbmul[px[32]]
+		 */
+		asm volatile("vpxor %ymm5, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm15, %ymm13, %ymm13");
+
+		/*
+		 * 1 = db = DQ
+		 * 13 = db[32] = DQ[32]
+		 */
+		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+		asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32]));
+		asm volatile("vpxor %ymm1, %ymm0, %ymm0");
+		asm volatile("vpxor %ymm13, %ymm8, %ymm8");
+
+		asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
+		asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32]));
+
+		bytes -= 64;
+		p += 64;
+		q += 64;
+		dp += 64;
+		dq += 64;
+#else
+		asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q));
+		asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p));
+		asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq));
+		asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp));
+
+		/* 1 = dq ^ q;  0 = dp ^ p */
+
+		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
+
+		/*
+		 * 1 = dq ^ q
+		 * 3 = dq ^ p >> 4
+		 */
+		asm volatile("vpsraw $4, %ymm1, %ymm3");
+		asm volatile("vpand %ymm7, %ymm1, %ymm1");
+		asm volatile("vpand %ymm7, %ymm3, %ymm3");
+		asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
+		asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
+		asm volatile("vpxor %ymm4, %ymm5, %ymm5");
+
+		/* 5 = qx */
+
+		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
+
+		asm volatile("vpsraw $4, %ymm0, %ymm2");
+		asm volatile("vpand %ymm7, %ymm0, %ymm3");
+		asm volatile("vpand %ymm7, %ymm2, %ymm2");
+		asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
+		asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm4, %ymm1, %ymm1");
+
+		/* 1 = pbmul[px] */
+		asm volatile("vpxor %ymm5, %ymm1, %ymm1");
+		/* 1 = db = DQ */
+		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+
+		asm volatile("vpxor %ymm1, %ymm0, %ymm0");
+		asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
+
+		bytes -= 32;
+		p += 32;
+		q += 32;
+		dp += 32;
+		dq += 32;
+#endif
+	}
+
+	kernel_fpu_end();
+}
+
+static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila,
+		void **ptrs)
+{
+	u8 *p, *q, *dq;
+	const u8 *qmul;		/* Q multiplier table */
+	const u8 x0f = 0x0f;
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data page
+	   Use the dead data page as temporary storage for delta q */
+	dq = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dq;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+	kernel_fpu_begin();
+
+	asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
+
+	while (bytes) {
+#ifdef CONFIG_X86_64
+		asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
+		asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32]));
+		asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
+		asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32]));
+
+		/*
+		 * 3 = q[0] ^ dq[0]
+		 * 8 = q[32] ^ dq[32]
+		 */
+		asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
+		asm volatile("vmovapd %ymm0, %ymm13");
+		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
+		asm volatile("vmovapd %ymm1, %ymm14");
+
+		asm volatile("vpsraw $4, %ymm3, %ymm6");
+		asm volatile("vpsraw $4, %ymm8, %ymm12");
+		asm volatile("vpand %ymm7, %ymm3, %ymm3");
+		asm volatile("vpand %ymm7, %ymm8, %ymm8");
+		asm volatile("vpand %ymm7, %ymm6, %ymm6");
+		asm volatile("vpand %ymm7, %ymm12, %ymm12");
+		asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
+		asm volatile("vpshufb %ymm8, %ymm13, %ymm13");
+		asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
+		asm volatile("vpshufb %ymm12, %ymm14, %ymm14");
+		asm volatile("vpxor %ymm0, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm13, %ymm14, %ymm14");
+
+		/*
+		 * 1  = qmul[q[0]  ^ dq[0]]
+		 * 14 = qmul[q[32] ^ dq[32]]
+		 */
+		asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
+		asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32]));
+		asm volatile("vpxor %ymm1, %ymm2, %ymm2");
+		asm volatile("vpxor %ymm14, %ymm12, %ymm12");
+
+		/*
+		 * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
+		 * 12 = p[32] ^ qmul[q[32] ^ dq[32]]
+		 */
+
+		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+		asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32]));
+		asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
+		asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32]));
+
+		bytes -= 64;
+		p += 64;
+		q += 64;
+		dq += 64;
+#else
+		asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
+		asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
+
+		/* 3 = q ^ dq */
+
+		asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
+
+		asm volatile("vpsraw $4, %ymm3, %ymm6");
+		asm volatile("vpand %ymm7, %ymm3, %ymm3");
+		asm volatile("vpand %ymm7, %ymm6, %ymm6");
+		asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
+		asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm0, %ymm1, %ymm1");
+
+		/* 1 = qmul[q ^ dq] */
+
+		asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
+		asm volatile("vpxor %ymm1, %ymm2, %ymm2");
+
+		/* 2 = p ^ qmul[q ^ dq] */
+
+		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+		asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
+
+		bytes -= 32;
+		p += 32;
+		q += 32;
+		dq += 32;
+#endif
+	}
+
+	kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_avx2 = {
+	.data2 = raid6_2data_recov_avx2,
+	.datap = raid6_datap_recov_avx2,
+	.valid = raid6_has_avx2,
+#ifdef CONFIG_X86_64
+	.name = "avx2x2",
+#else
+	.name = "avx2x1",
+#endif
+	.priority = 2,
+};
+
+#else
+#warning "your version of binutils lacks AVX2 support"
+#endif
+
+#endif
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index c76151d94764..d919c98ce266 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -23,7 +23,7 @@ RANLIB	 = ranlib
 all:	raid6.a raid6test
 
 raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
-	 altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \
+	 altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o recov_avx2.o algos.o \
 	 tables.o
 	 rm -f $@
 	 $(AR) cq $@ $^
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index d55d63232c55..b7595484a815 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -45,19 +45,23 @@ static inline void kernel_fpu_end(void)
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
 #define X86_FEATURE_SSSE3	(4*32+ 9) /* Supplemental SSE-3 */
 #define X86_FEATURE_AVX	(4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_AVX2        (9*32+ 5) /* AVX2 instructions */
 #define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */
 
 /* Should work well enough on modern CPUs for testing */
 static inline int boot_cpu_has(int flag)
 {
-	u32 eax = (flag & 0x20) ? 0x80000001 : 1;
-	u32 ecx, edx;
+	u32 eax, ebx, ecx, edx;
+
+	eax = (flag & 0x100) ? 7 :
+		(flag & 0x20) ? 0x80000001 : 1;
+	ecx = 0;
 
 	asm volatile("cpuid"
-		     : "+a" (eax), "=d" (edx), "=c" (ecx)
-		     : : "ebx");
+		     : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx));
 
-	return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1;
+	return ((flag & 0x100 ? ebx :
+		(flag & 0x80) ? ecx : edx) >> (flag & 31)) & 1;
 }
 
 #endif /* ndef __KERNEL__ */
-- 
cgit v1.2.3-55-g7522


From 2c935842bdb46f5f557426feb4d2bdfdad1aa5f9 Mon Sep 17 00:00:00 2001
From: Yuanhan Liu
Date: Fri, 30 Nov 2012 13:10:39 -0800
Subject: lib/raid6: Add AVX2 optimized gen_syndrome functions

Add AVX2 optimized gen_syndrom functions, which is simply based on
sse2.c written by hpa.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/pq.h |   3 +
 lib/raid6/Makefile      |   2 +-
 lib/raid6/algos.c       |   9 ++
 lib/raid6/avx2.c        | 251 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/raid6/test/Makefile |  12 ++-
 5 files changed, 275 insertions(+), 2 deletions(-)
 create mode 100644 lib/raid6/avx2.c

(limited to 'include/linux')

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 3156347452b9..8dfaa2ce2e95 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -98,6 +98,9 @@ extern const struct raid6_calls raid6_altivec1;
 extern const struct raid6_calls raid6_altivec2;
 extern const struct raid6_calls raid6_altivec4;
 extern const struct raid6_calls raid6_altivec8;
+extern const struct raid6_calls raid6_avx2x1;
+extern const struct raid6_calls raid6_avx2x2;
+extern const struct raid6_calls raid6_avx2x4;
 
 struct raid6_recov_calls {
 	void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 8c2e22bef661..3430711b9bdf 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -2,7 +2,7 @@ obj-$(CONFIG_RAID6_PQ)	+= raid6_pq.o
 
 raid6_pq-y	+= algos.o recov.o recov_ssse3.o recov_avx2.o tables.o int1.o int2.o int4.o \
 		   int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
-		   altivec8.o mmx.o sse1.o sse2.o
+		   altivec8.o mmx.o sse1.o sse2.o avx2.o
 hostprogs-y	+= mktables
 
 quiet_cmd_unroll = UNROLL  $@
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 8b7f55cadb45..6d7316fe9f30 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -45,11 +45,20 @@ const struct raid6_calls * const raid6_algos[] = {
 	&raid6_sse1x2,
 	&raid6_sse2x1,
 	&raid6_sse2x2,
+#ifdef CONFIG_AS_AVX2
+	&raid6_avx2x1,
+	&raid6_avx2x2,
+#endif
 #endif
 #if defined(__x86_64__) && !defined(__arch_um__)
 	&raid6_sse2x1,
 	&raid6_sse2x2,
 	&raid6_sse2x4,
+#ifdef CONFIG_AS_AVX2
+	&raid6_avx2x1,
+	&raid6_avx2x2,
+	&raid6_avx2x4,
+#endif
 #endif
 #ifdef CONFIG_ALTIVEC
 	&raid6_altivec1,
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c
new file mode 100644
index 000000000000..bc3b1dd436eb
--- /dev/null
+++ b/lib/raid6/avx2.c
@@ -0,0 +1,251 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright (C) 2012 Intel Corporation
+ *   Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
+ *
+ *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * AVX2 implementation of RAID-6 syndrome functions
+ *
+ */
+
+#ifdef CONFIG_AS_AVX2
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+static const struct raid6_avx2_constants {
+	u64 x1d[4];
+} raid6_avx2_constants __aligned(32) = {
+	{ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
+	  0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
+};
+
+static int raid6_have_avx2(void)
+{
+	return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX);
+}
+
+/*
+ * Plain AVX2 implementation
+ */
+static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+	asm volatile("vpxor %ymm3,%ymm3,%ymm3");	/* Zero temp */
+
+	for (d = 0; d < bytes; d += 32) {
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+		asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
+		asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
+		asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
+		for (z = z0-2; z >= 0; z--) {
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+			asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
+			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+			asm volatile("vpand %ymm0,%ymm5,%ymm5");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm6,%ymm2,%ymm2");
+			asm volatile("vpxor %ymm6,%ymm4,%ymm4");
+			asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
+		}
+		asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
+		asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+		asm volatile("vpand %ymm0,%ymm5,%ymm5");
+		asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+		asm volatile("vpxor %ymm6,%ymm2,%ymm2");
+		asm volatile("vpxor %ymm6,%ymm4,%ymm4");
+
+		asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+		asm volatile("vpxor %ymm2,%ymm2,%ymm2");
+		asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+		asm volatile("vpxor %ymm4,%ymm4,%ymm4");
+	}
+
+	asm volatile("sfence" : : : "memory");
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x1 = {
+	raid6_avx21_gen_syndrome,
+	raid6_have_avx2,
+	"avx2x1",
+	1			/* Has cache hints */
+};
+
+/*
+ * Unrolled-by-2 AVX2 implementation
+ */
+static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+	asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
+
+	/* We uniformly assume a single prefetch covers at least 32 bytes */
+	for (d = 0; d < bytes; d += 64) {
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32]));
+		asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
+		asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */
+		asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
+		asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
+		for (z = z0-1; z >= 0; z--) {
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
+			asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
+			asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
+			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+			asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
+			asm volatile("vpand %ymm0,%ymm5,%ymm5");
+			asm volatile("vpand %ymm0,%ymm7,%ymm7");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+			asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
+			asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
+			asm volatile("vpxor %ymm5,%ymm2,%ymm2");
+			asm volatile("vpxor %ymm7,%ymm3,%ymm3");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+		}
+		asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+		asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
+		asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+		asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
+	}
+
+	asm volatile("sfence" : : : "memory");
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x2 = {
+	raid6_avx22_gen_syndrome,
+	raid6_have_avx2,
+	"avx2x2",
+	1			/* Has cache hints */
+};
+
+#ifdef CONFIG_X86_64
+
+/*
+ * Unrolled-by-4 AVX2 implementation
+ */
+static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+	asm volatile("vpxor %ymm1,%ymm1,%ymm1");	/* Zero temp */
+	asm volatile("vpxor %ymm2,%ymm2,%ymm2");	/* P[0] */
+	asm volatile("vpxor %ymm3,%ymm3,%ymm3");	/* P[1] */
+	asm volatile("vpxor %ymm4,%ymm4,%ymm4");	/* Q[0] */
+	asm volatile("vpxor %ymm6,%ymm6,%ymm6");	/* Q[1] */
+	asm volatile("vpxor %ymm10,%ymm10,%ymm10");	/* P[2] */
+	asm volatile("vpxor %ymm11,%ymm11,%ymm11");	/* P[3] */
+	asm volatile("vpxor %ymm12,%ymm12,%ymm12");	/* Q[2] */
+	asm volatile("vpxor %ymm14,%ymm14,%ymm14");	/* Q[3] */
+
+	for (d = 0; d < bytes; d += 128) {
+		for (z = z0; z >= 0; z--) {
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64]));
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96]));
+			asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
+			asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
+			asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
+			asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
+			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+			asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
+			asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
+			asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
+			asm volatile("vpand %ymm0,%ymm5,%ymm5");
+			asm volatile("vpand %ymm0,%ymm7,%ymm7");
+			asm volatile("vpand %ymm0,%ymm13,%ymm13");
+			asm volatile("vpand %ymm0,%ymm15,%ymm15");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+			asm volatile("vpxor %ymm13,%ymm12,%ymm12");
+			asm volatile("vpxor %ymm15,%ymm14,%ymm14");
+			asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
+			asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
+			asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64]));
+			asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96]));
+			asm volatile("vpxor %ymm5,%ymm2,%ymm2");
+			asm volatile("vpxor %ymm7,%ymm3,%ymm3");
+			asm volatile("vpxor %ymm13,%ymm10,%ymm10");
+			asm volatile("vpxor %ymm15,%ymm11,%ymm11");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+			asm volatile("vpxor %ymm13,%ymm12,%ymm12");
+			asm volatile("vpxor %ymm15,%ymm14,%ymm14");
+		}
+		asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+		asm volatile("vpxor %ymm2,%ymm2,%ymm2");
+		asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
+		asm volatile("vpxor %ymm3,%ymm3,%ymm3");
+		asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
+		asm volatile("vpxor %ymm10,%ymm10,%ymm10");
+		asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
+		asm volatile("vpxor %ymm11,%ymm11,%ymm11");
+		asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+		asm volatile("vpxor %ymm4,%ymm4,%ymm4");
+		asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
+		asm volatile("vpxor %ymm6,%ymm6,%ymm6");
+		asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
+		asm volatile("vpxor %ymm12,%ymm12,%ymm12");
+		asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
+		asm volatile("vpxor %ymm14,%ymm14,%ymm14");
+	}
+
+	asm volatile("sfence" : : : "memory");
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x4 = {
+	raid6_avx24_gen_syndrome,
+	raid6_have_avx2,
+	"avx2x4",
+	1			/* Has cache hints */
+};
+#endif
+
+#endif /* CONFIG_AS_AVX2 */
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index d919c98ce266..754cbac0f9f8 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -11,6 +11,16 @@ AWK	 = awk -f
 AR	 = ar
 RANLIB	 = ranlib
 
+ARCH := $(shell uname -m 2>/dev/null | sed -e /s/i.86/i386/)
+ifeq ($(ARCH),i386)
+        CFLAGS += -DCONFIG_X86_32
+endif
+ifeq ($(ARCH),x86_64)
+        CFLAGS += -DCONFIG_X86_64
+endif
+CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1"| gcc -c -x assembler - &&\
+	     rm ./-.o && echo -DCONFIG_AS_AVX2=1)
+
 .c.o:
 	$(CC) $(CFLAGS) -c -o $@ $<
 
@@ -22,7 +32,7 @@ RANLIB	 = ranlib
 
 all:	raid6.a raid6test
 
-raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
+raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o avx2.o \
 	 altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o recov_avx2.o algos.o \
 	 tables.o
 	 rm -f $@
-- 
cgit v1.2.3-55-g7522


From 83aff95eb9d60aff5497e9f44a2ae906b86d8e88 Mon Sep 17 00:00:00 2001
From: Sage Weil
Date: Wed, 28 Nov 2012 12:28:24 -0800
Subject: libceph: remove 'osdtimeout' option

This would reset a connection with any OSD that had an outstanding
request that was taking more than N seconds.  The idea was that if the
OSD was buggy, the client could compensate by resending the request.

In reality, this only served to hide server bugs, and we haven't
actually seen such a bug in quite a while.  Moreover, the userspace
client code never did this.

More importantly, often the request is taking a long time because the
OSD is trying to recover, or overloaded, and killing the connection
and retrying would only make the situation worse by giving the OSD
more work to do.

Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 fs/ceph/super.c              |  2 --
 include/linux/ceph/libceph.h |  2 --
 net/ceph/ceph_common.c       |  3 +--
 net/ceph/osd_client.c        | 47 ++++----------------------------------------
 4 files changed, 5 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 2f586b0e5e0f..fcda1c73a1e5 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -403,8 +403,6 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 		seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
 	if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
 		seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
-	if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
-		seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
 	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
 		seq_printf(m, ",osdkeepalivetimeout=%d",
 			   opt->osd_keepalive_timeout);
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 42624789b06f..317aff8feb0a 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -43,7 +43,6 @@ struct ceph_options {
 	struct ceph_entity_addr my_addr;
 	int mount_timeout;
 	int osd_idle_ttl;
-	int osd_timeout;
 	int osd_keepalive_timeout;
 
 	/*
@@ -63,7 +62,6 @@ struct ceph_options {
  * defaults
  */
 #define CEPH_MOUNT_TIMEOUT_DEFAULT  60
-#define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */
 #define CEPH_OSD_KEEPALIVE_DEFAULT  5
 #define CEPH_OSD_IDLE_TTL_DEFAULT    60
 
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index a8020293f342..ee71ea26777a 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -305,7 +305,6 @@ ceph_parse_options(char *options, const char *dev_name,
 
 	/* start with defaults */
 	opt->flags = CEPH_OPT_DEFAULT;
-	opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
 	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
 	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
 	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;   /* seconds */
@@ -391,7 +390,7 @@ ceph_parse_options(char *options, const char *dev_name,
 
 			/* misc */
 		case Opt_osdtimeout:
-			opt->osd_timeout = intval;
+			pr_warning("ignoring deprecated osdtimeout option\n");
 			break;
 		case Opt_osdkeepalivetimeout:
 			opt->osd_keepalive_timeout = intval;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ccbdfbba9e53..7ebfe13267e6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -608,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
 	}
 }
 
-static void kick_osd_requests(struct ceph_osd_client *osdc,
-			      struct ceph_osd *kickosd)
-{
-	mutex_lock(&osdc->request_mutex);
-	__kick_osd_requests(osdc, kickosd);
-	mutex_unlock(&osdc->request_mutex);
-}
-
 /*
  * If the osd connection drops, we need to resubmit all requests.
  */
@@ -629,7 +621,9 @@ static void osd_reset(struct ceph_connection *con)
 	dout("osd_reset osd%d\n", osd->o_osd);
 	osdc = osd->o_osdc;
 	down_read(&osdc->map_sem);
-	kick_osd_requests(osdc, osd);
+	mutex_lock(&osdc->request_mutex);
+	__kick_osd_requests(osdc, osd);
+	mutex_unlock(&osdc->request_mutex);
 	send_queued(osdc);
 	up_read(&osdc->map_sem);
 }
@@ -1091,12 +1085,10 @@ static void handle_timeout(struct work_struct *work)
 {
 	struct ceph_osd_client *osdc =
 		container_of(work, struct ceph_osd_client, timeout_work.work);
-	struct ceph_osd_request *req, *last_req = NULL;
+	struct ceph_osd_request *req;
 	struct ceph_osd *osd;
-	unsigned long timeout = osdc->client->options->osd_timeout * HZ;
 	unsigned long keepalive =
 		osdc->client->options->osd_keepalive_timeout * HZ;
-	unsigned long last_stamp = 0;
 	struct list_head slow_osds;
 	dout("timeout\n");
 	down_read(&osdc->map_sem);
@@ -1105,37 +1097,6 @@ static void handle_timeout(struct work_struct *work)
 
 	mutex_lock(&osdc->request_mutex);
 
-	/*
-	 * reset osds that appear to be _really_ unresponsive.  this
-	 * is a failsafe measure.. we really shouldn't be getting to
-	 * this point if the system is working properly.  the monitors
-	 * should mark the osd as failed and we should find out about
-	 * it from an updated osd map.
-	 */
-	while (timeout && !list_empty(&osdc->req_lru)) {
-		req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
-				 r_req_lru_item);
-
-		/* hasn't been long enough since we sent it? */
-		if (time_before(jiffies, req->r_stamp + timeout))
-			break;
-
-		/* hasn't been long enough since it was acked? */
-		if (req->r_request->ack_stamp == 0 ||
-		    time_before(jiffies, req->r_request->ack_stamp + timeout))
-			break;
-
-		BUG_ON(req == last_req && req->r_stamp == last_stamp);
-		last_req = req;
-		last_stamp = req->r_stamp;
-
-		osd = req->r_osd;
-		BUG_ON(!osd);
-		pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
-			   req->r_tid, osd->o_osd);
-		__kick_osd_requests(osdc, osd);
-	}
-
 	/*
 	 * ping osds that are a bit slow.  this ensures that if there
 	 * is a break in the TCP connection we will notice, and reopen
-- 
cgit v1.2.3-55-g7522


From d2cc4dde9206aa2c7fb237aa689d3277cc070547 Mon Sep 17 00:00:00 2001
From: Joe Perches
Date: Thu, 29 Nov 2012 08:37:03 -0600
Subject: bdi_register: add __printf verification, fix arg mismatch

__printf is useful to verify format and arguments.

Signed-off-by: Joe Perches <joe@perches.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 fs/ceph/super.c             | 2 +-
 include/linux/backing-dev.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index fcda1c73a1e5..1a144001b2e1 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -842,7 +842,7 @@ static int ceph_register_bdi(struct super_block *sb,
 		fsc->backing_dev_info.ra_pages =
 			default_backing_dev_info.ra_pages;
 
-	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
+	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
 			   atomic_long_inc_return(&bdi_seq));
 	if (!err)
 		sb->s_bdi = &fsc->backing_dev_info;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2a9a9abc9126..12731a19ef06 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -114,6 +114,7 @@ struct backing_dev_info {
 int bdi_init(struct backing_dev_info *bdi);
 void bdi_destroy(struct backing_dev_info *bdi);
 
+__printf(3, 4)
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...);
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
-- 
cgit v1.2.3-55-g7522


From 34e1169d996ab148490c01b65b4ee371cf8ffba2 Mon Sep 17 00:00:00 2001
From: Kees Cook
Date: Tue, 16 Oct 2012 07:31:07 +1030
Subject: module: add syscall to load module from fd

As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.

Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.

If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on.  In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.

This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
---
 arch/x86/syscalls/syscall_32.tbl |   1 +
 arch/x86/syscalls/syscall_64.tbl |   1 +
 include/linux/syscalls.h         |   1 +
 kernel/module.c                  | 367 +++++++++++++++++++++++----------------
 kernel/sys_ni.c                  |   1 +
 5 files changed, 223 insertions(+), 148 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index a47103fbc692..83b3838417ed 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -356,3 +356,4 @@
 347	i386	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
 348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
 349	i386	kcmp			sys_kcmp
+350	i386	finit_module		sys_finit_module
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index a582bfed95bb..7c58c84b7bc8 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -319,6 +319,7 @@
 310	64	process_vm_readv	sys_process_vm_readv
 311	64	process_vm_writev	sys_process_vm_writev
 312	common	kcmp			sys_kcmp
+313	common	finit_module		sys_finit_module
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 727f0cd73921..32bc035bcd68 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -868,4 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
+asmlinkage long sys_finit_module(int fd, const char __user *uargs);
 #endif
diff --git a/kernel/module.c b/kernel/module.c
index 6e48c3a43599..6d2c4e4ca1f5 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -21,6 +21,7 @@
 #include <linux/ftrace_event.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
+#include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/sysfs.h>
 #include <linux/kernel.h>
@@ -2425,18 +2426,17 @@ static inline void kmemleak_load_module(const struct module *mod,
 #endif
 
 #ifdef CONFIG_MODULE_SIG
-static int module_sig_check(struct load_info *info,
-			    const void *mod, unsigned long *_len)
+static int module_sig_check(struct load_info *info)
 {
 	int err = -ENOKEY;
-	unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
-	unsigned long len = *_len;
+	const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
+	const void *mod = info->hdr;
 
-	if (len > markerlen &&
-	    memcmp(mod + len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
+	if (info->len > markerlen &&
+	    memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
 		/* We truncate the module to discard the signature */
-		*_len -= markerlen;
-		err = mod_verify_sig(mod, _len);
+		info->len -= markerlen;
+		err = mod_verify_sig(mod, &info->len);
 	}
 
 	if (!err) {
@@ -2454,59 +2454,97 @@ static int module_sig_check(struct load_info *info,
 	return err;
 }
 #else /* !CONFIG_MODULE_SIG */
-static int module_sig_check(struct load_info *info,
-			    void *mod, unsigned long *len)
+static int module_sig_check(struct load_info *info)
 {
 	return 0;
 }
 #endif /* !CONFIG_MODULE_SIG */
 
-/* Sets info->hdr, info->len and info->sig_ok. */
-static int copy_and_check(struct load_info *info,
-			  const void __user *umod, unsigned long len,
-			  const char __user *uargs)
+/* Sanity checks against invalid binaries, wrong arch, weird elf version. */
+static int elf_header_check(struct load_info *info)
 {
-	int err;
-	Elf_Ehdr *hdr;
+	if (info->len < sizeof(*(info->hdr)))
+		return -ENOEXEC;
+
+	if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0
+	    || info->hdr->e_type != ET_REL
+	    || !elf_check_arch(info->hdr)
+	    || info->hdr->e_shentsize != sizeof(Elf_Shdr))
+		return -ENOEXEC;
+
+	if (info->hdr->e_shoff >= info->len
+	    || (info->hdr->e_shnum * sizeof(Elf_Shdr) >
+		info->len - info->hdr->e_shoff))
+		return -ENOEXEC;
 
-	if (len < sizeof(*hdr))
+	return 0;
+}
+
+/* Sets info->hdr and info->len. */
+static int copy_module_from_user(const void __user *umod, unsigned long len,
+				  struct load_info *info)
+{
+	info->len = len;
+	if (info->len < sizeof(*(info->hdr)))
 		return -ENOEXEC;
 
 	/* Suck in entire file: we'll want most of it. */
-	if ((hdr = vmalloc(len)) == NULL)
+	info->hdr = vmalloc(info->len);
+	if (!info->hdr)
 		return -ENOMEM;
 
-	if (copy_from_user(hdr, umod, len) != 0) {
-		err = -EFAULT;
-		goto free_hdr;
+	if (copy_from_user(info->hdr, umod, info->len) != 0) {
+		vfree(info->hdr);
+		return -EFAULT;
 	}
 
-	err = module_sig_check(info, hdr, &len);
+	return 0;
+}
+
+/* Sets info->hdr and info->len. */
+static int copy_module_from_fd(int fd, struct load_info *info)
+{
+	struct file *file;
+	int err;
+	struct kstat stat;
+	loff_t pos;
+	ssize_t bytes = 0;
+
+	file = fget(fd);
+	if (!file)
+		return -ENOEXEC;
+
+	err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
 	if (err)
-		goto free_hdr;
+		goto out;
 
-	/* Sanity checks against insmoding binaries or wrong arch,
-	   weird elf version */
-	if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
-	    || hdr->e_type != ET_REL
-	    || !elf_check_arch(hdr)
-	    || hdr->e_shentsize != sizeof(Elf_Shdr)) {
-		err = -ENOEXEC;
-		goto free_hdr;
+	if (stat.size > INT_MAX) {
+		err = -EFBIG;
+		goto out;
 	}
-
-	if (hdr->e_shoff >= len ||
-	    hdr->e_shnum * sizeof(Elf_Shdr) > len - hdr->e_shoff) {
-		err = -ENOEXEC;
-		goto free_hdr;
+	info->hdr = vmalloc(stat.size);
+	if (!info->hdr) {
+		err = -ENOMEM;
+		goto out;
 	}
 
-	info->hdr = hdr;
-	info->len = len;
-	return 0;
+	pos = 0;
+	while (pos < stat.size) {
+		bytes = kernel_read(file, pos, (char *)(info->hdr) + pos,
+				    stat.size - pos);
+		if (bytes < 0) {
+			vfree(info->hdr);
+			err = bytes;
+			goto out;
+		}
+		if (bytes == 0)
+			break;
+		pos += bytes;
+	}
+	info->len = pos;
 
-free_hdr:
-	vfree(hdr);
+out:
+	fput(file);
 	return err;
 }
 
@@ -2945,33 +2983,123 @@ static bool finished_loading(const char *name)
 	return ret;
 }
 
+/* Call module constructors. */
+static void do_mod_ctors(struct module *mod)
+{
+#ifdef CONFIG_CONSTRUCTORS
+	unsigned long i;
+
+	for (i = 0; i < mod->num_ctors; i++)
+		mod->ctors[i]();
+#endif
+}
+
+/* This is where the real work happens */
+static int do_init_module(struct module *mod)
+{
+	int ret = 0;
+
+	blocking_notifier_call_chain(&module_notify_list,
+			MODULE_STATE_COMING, mod);
+
+	/* Set RO and NX regions for core */
+	set_section_ro_nx(mod->module_core,
+				mod->core_text_size,
+				mod->core_ro_size,
+				mod->core_size);
+
+	/* Set RO and NX regions for init */
+	set_section_ro_nx(mod->module_init,
+				mod->init_text_size,
+				mod->init_ro_size,
+				mod->init_size);
+
+	do_mod_ctors(mod);
+	/* Start the module */
+	if (mod->init != NULL)
+		ret = do_one_initcall(mod->init);
+	if (ret < 0) {
+		/* Init routine failed: abort.  Try to protect us from
+                   buggy refcounters. */
+		mod->state = MODULE_STATE_GOING;
+		synchronize_sched();
+		module_put(mod);
+		blocking_notifier_call_chain(&module_notify_list,
+					     MODULE_STATE_GOING, mod);
+		free_module(mod);
+		wake_up_all(&module_wq);
+		return ret;
+	}
+	if (ret > 0) {
+		printk(KERN_WARNING
+"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
+"%s: loading module anyway...\n",
+		       __func__, mod->name, ret,
+		       __func__);
+		dump_stack();
+	}
+
+	/* Now it's a first class citizen! */
+	mod->state = MODULE_STATE_LIVE;
+	blocking_notifier_call_chain(&module_notify_list,
+				     MODULE_STATE_LIVE, mod);
+
+	/* We need to finish all async code before the module init sequence is done */
+	async_synchronize_full();
+
+	mutex_lock(&module_mutex);
+	/* Drop initial reference. */
+	module_put(mod);
+	trim_init_extable(mod);
+#ifdef CONFIG_KALLSYMS
+	mod->num_symtab = mod->core_num_syms;
+	mod->symtab = mod->core_symtab;
+	mod->strtab = mod->core_strtab;
+#endif
+	unset_module_init_ro_nx(mod);
+	module_free(mod, mod->module_init);
+	mod->module_init = NULL;
+	mod->init_size = 0;
+	mod->init_ro_size = 0;
+	mod->init_text_size = 0;
+	mutex_unlock(&module_mutex);
+	wake_up_all(&module_wq);
+
+	return 0;
+}
+
+static int may_init_module(void)
+{
+	if (!capable(CAP_SYS_MODULE) || modules_disabled)
+		return -EPERM;
+
+	return 0;
+}
+
 /* Allocate and load the module: note that size of section 0 is always
    zero, and we rely on this for optional sections. */
-static struct module *load_module(void __user *umod,
-				  unsigned long len,
-				  const char __user *uargs)
+static int load_module(struct load_info *info, const char __user *uargs)
 {
-	struct load_info info = { NULL, };
 	struct module *mod, *old;
 	long err;
 
-	pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n",
-	       umod, len, uargs);
+	err = module_sig_check(info);
+	if (err)
+		goto free_copy;
 
-	/* Copy in the blobs from userspace, check they are vaguely sane. */
-	err = copy_and_check(&info, umod, len, uargs);
+	err = elf_header_check(info);
 	if (err)
-		return ERR_PTR(err);
+		goto free_copy;
 
 	/* Figure out module layout, and allocate all the memory. */
-	mod = layout_and_allocate(&info);
+	mod = layout_and_allocate(info);
 	if (IS_ERR(mod)) {
 		err = PTR_ERR(mod);
 		goto free_copy;
 	}
 
 #ifdef CONFIG_MODULE_SIG
-	mod->sig_ok = info.sig_ok;
+	mod->sig_ok = info->sig_ok;
 	if (!mod->sig_ok)
 		add_taint_module(mod, TAINT_FORCED_MODULE);
 #endif
@@ -2983,25 +3111,25 @@ static struct module *load_module(void __user *umod,
 
 	/* Now we've got everything in the final locations, we can
 	 * find optional sections. */
-	find_module_sections(mod, &info);
+	find_module_sections(mod, info);
 
 	err = check_module_license_and_versions(mod);
 	if (err)
 		goto free_unload;
 
 	/* Set up MODINFO_ATTR fields */
-	setup_modinfo(mod, &info);
+	setup_modinfo(mod, info);
 
 	/* Fix up syms, so that st_value is a pointer to location. */
-	err = simplify_symbols(mod, &info);
+	err = simplify_symbols(mod, info);
 	if (err < 0)
 		goto free_modinfo;
 
-	err = apply_relocations(mod, &info);
+	err = apply_relocations(mod, info);
 	if (err < 0)
 		goto free_modinfo;
 
-	err = post_relocation(mod, &info);
+	err = post_relocation(mod, info);
 	if (err < 0)
 		goto free_modinfo;
 
@@ -3041,14 +3169,14 @@ again:
 	}
 
 	/* This has to be done once we're sure module name is unique. */
-	dynamic_debug_setup(info.debug, info.num_debug);
+	dynamic_debug_setup(info->debug, info->num_debug);
 
 	/* Find duplicate symbols */
 	err = verify_export_symbols(mod);
 	if (err < 0)
 		goto ddebug;
 
-	module_bug_finalize(info.hdr, info.sechdrs, mod);
+	module_bug_finalize(info->hdr, info->sechdrs, mod);
 	list_add_rcu(&mod->list, &modules);
 	mutex_unlock(&module_mutex);
 
@@ -3059,16 +3187,17 @@ again:
 		goto unlink;
 
 	/* Link in to syfs. */
-	err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp);
+	err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);
 	if (err < 0)
 		goto unlink;
 
 	/* Get rid of temporary copy. */
-	free_copy(&info);
+	free_copy(info);
 
 	/* Done! */
 	trace_module_load(mod);
-	return mod;
+
+	return do_init_module(mod);
 
  unlink:
 	mutex_lock(&module_mutex);
@@ -3077,7 +3206,7 @@ again:
 	module_bug_cleanup(mod);
 	wake_up_all(&module_wq);
  ddebug:
-	dynamic_debug_remove(info.debug);
+	dynamic_debug_remove(info->debug);
  unlock:
 	mutex_unlock(&module_mutex);
 	synchronize_sched();
@@ -3089,106 +3218,48 @@ again:
  free_unload:
 	module_unload_free(mod);
  free_module:
-	module_deallocate(mod, &info);
+	module_deallocate(mod, info);
  free_copy:
-	free_copy(&info);
-	return ERR_PTR(err);
-}
-
-/* Call module constructors. */
-static void do_mod_ctors(struct module *mod)
-{
-#ifdef CONFIG_CONSTRUCTORS
-	unsigned long i;
-
-	for (i = 0; i < mod->num_ctors; i++)
-		mod->ctors[i]();
-#endif
+	free_copy(info);
+	return err;
 }
 
-/* This is where the real work happens */
 SYSCALL_DEFINE3(init_module, void __user *, umod,
 		unsigned long, len, const char __user *, uargs)
 {
-	struct module *mod;
-	int ret = 0;
-
-	/* Must have permission */
-	if (!capable(CAP_SYS_MODULE) || modules_disabled)
-		return -EPERM;
+	int err;
+	struct load_info info = { };
 
-	/* Do all the hard work */
-	mod = load_module(umod, len, uargs);
-	if (IS_ERR(mod))
-		return PTR_ERR(mod);
+	err = may_init_module();
+	if (err)
+		return err;
 
-	blocking_notifier_call_chain(&module_notify_list,
-			MODULE_STATE_COMING, mod);
+	pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n",
+	       umod, len, uargs);
 
-	/* Set RO and NX regions for core */
-	set_section_ro_nx(mod->module_core,
-				mod->core_text_size,
-				mod->core_ro_size,
-				mod->core_size);
+	err = copy_module_from_user(umod, len, &info);
+	if (err)
+		return err;
 
-	/* Set RO and NX regions for init */
-	set_section_ro_nx(mod->module_init,
-				mod->init_text_size,
-				mod->init_ro_size,
-				mod->init_size);
+	return load_module(&info, uargs);
+}
 
-	do_mod_ctors(mod);
-	/* Start the module */
-	if (mod->init != NULL)
-		ret = do_one_initcall(mod->init);
-	if (ret < 0) {
-		/* Init routine failed: abort.  Try to protect us from
-                   buggy refcounters. */
-		mod->state = MODULE_STATE_GOING;
-		synchronize_sched();
-		module_put(mod);
-		blocking_notifier_call_chain(&module_notify_list,
-					     MODULE_STATE_GOING, mod);
-		free_module(mod);
-		wake_up_all(&module_wq);
-		return ret;
-	}
-	if (ret > 0) {
-		printk(KERN_WARNING
-"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
-"%s: loading module anyway...\n",
-		       __func__, mod->name, ret,
-		       __func__);
-		dump_stack();
-	}
+SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs)
+{
+	int err;
+	struct load_info info = { };
 
-	/* Now it's a first class citizen! */
-	mod->state = MODULE_STATE_LIVE;
-	blocking_notifier_call_chain(&module_notify_list,
-				     MODULE_STATE_LIVE, mod);
+	err = may_init_module();
+	if (err)
+		return err;
 
-	/* We need to finish all async code before the module init sequence is done */
-	async_synchronize_full();
+	pr_debug("finit_module: fd=%d, uargs=%p\n", fd, uargs);
 
-	mutex_lock(&module_mutex);
-	/* Drop initial reference. */
-	module_put(mod);
-	trim_init_extable(mod);
-#ifdef CONFIG_KALLSYMS
-	mod->num_symtab = mod->core_num_syms;
-	mod->symtab = mod->core_symtab;
-	mod->strtab = mod->core_strtab;
-#endif
-	unset_module_init_ro_nx(mod);
-	module_free(mod, mod->module_init);
-	mod->module_init = NULL;
-	mod->init_size = 0;
-	mod->init_ro_size = 0;
-	mod->init_text_size = 0;
-	mutex_unlock(&module_mutex);
-	wake_up_all(&module_wq);
+	err = copy_module_from_fd(fd, &info);
+	if (err)
+		return err;
 
-	return 0;
+	return load_module(&info, uargs);
 }
 
 static inline int within(unsigned long addr, void *start, unsigned long size)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index dbff751e4086..395084d4ce16 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -25,6 +25,7 @@ cond_syscall(sys_swapoff);
 cond_syscall(sys_kexec_load);
 cond_syscall(compat_sys_kexec_load);
 cond_syscall(sys_init_module);
+cond_syscall(sys_finit_module);
 cond_syscall(sys_delete_module);
 cond_syscall(sys_socketpair);
 cond_syscall(sys_bind);
-- 
cgit v1.2.3-55-g7522


From 2f3238aebedb243804f58d62d57244edec4149b2 Mon Sep 17 00:00:00 2001
From: Rusty Russell
Date: Mon, 22 Oct 2012 18:09:41 +1030
Subject: module: add flags arg to sys_finit_module()

Thanks to Michael Kerrisk for keeping us honest.  These flags are actually
useful for eliminating the only case where kmod has to mangle a module's
internals: for overriding module versioning.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Acked-by: Kees Cook <keescook@chromium.org>
---
 include/linux/syscalls.h    |  2 +-
 include/uapi/linux/module.h |  8 ++++++++
 kernel/module.c             | 40 ++++++++++++++++++++++++++--------------
 3 files changed, 35 insertions(+), 15 deletions(-)
 create mode 100644 include/uapi/linux/module.h

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 32bc035bcd68..8cf7b508cb50 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -868,5 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
-asmlinkage long sys_finit_module(int fd, const char __user *uargs);
+asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
 #endif
diff --git a/include/uapi/linux/module.h b/include/uapi/linux/module.h
new file mode 100644
index 000000000000..38da4258b12f
--- /dev/null
+++ b/include/uapi/linux/module.h
@@ -0,0 +1,8 @@
+#ifndef _UAPI_LINUX_MODULE_H
+#define _UAPI_LINUX_MODULE_H
+
+/* Flags for sys_finit_module: */
+#define MODULE_INIT_IGNORE_MODVERSIONS	1
+#define MODULE_INIT_IGNORE_VERMAGIC	2
+
+#endif /* _UAPI_LINUX_MODULE_H */
diff --git a/kernel/module.c b/kernel/module.c
index 6d2c4e4ca1f5..1395ca382fb5 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -60,6 +60,7 @@
 #include <linux/pfn.h>
 #include <linux/bsearch.h>
 #include <linux/fips.h>
+#include <uapi/linux/module.h>
 #include "module-internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -2553,7 +2554,7 @@ static void free_copy(struct load_info *info)
 	vfree(info->hdr);
 }
 
-static int rewrite_section_headers(struct load_info *info)
+static int rewrite_section_headers(struct load_info *info, int flags)
 {
 	unsigned int i;
 
@@ -2581,7 +2582,10 @@ static int rewrite_section_headers(struct load_info *info)
 	}
 
 	/* Track but don't keep modinfo and version sections. */
-	info->index.vers = find_sec(info, "__versions");
+	if (flags & MODULE_INIT_IGNORE_MODVERSIONS)
+		info->index.vers = 0; /* Pretend no __versions section! */
+	else
+		info->index.vers = find_sec(info, "__versions");
 	info->index.info = find_sec(info, ".modinfo");
 	info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
 	info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -2596,7 +2600,7 @@ static int rewrite_section_headers(struct load_info *info)
  * Return the temporary module pointer (we'll replace it with the final
  * one when we move the module sections around).
  */
-static struct module *setup_load_info(struct load_info *info)
+static struct module *setup_load_info(struct load_info *info, int flags)
 {
 	unsigned int i;
 	int err;
@@ -2607,7 +2611,7 @@ static struct module *setup_load_info(struct load_info *info)
 	info->secstrings = (void *)info->hdr
 		+ info->sechdrs[info->hdr->e_shstrndx].sh_offset;
 
-	err = rewrite_section_headers(info);
+	err = rewrite_section_headers(info, flags);
 	if (err)
 		return ERR_PTR(err);
 
@@ -2645,11 +2649,14 @@ static struct module *setup_load_info(struct load_info *info)
 	return mod;
 }
 
-static int check_modinfo(struct module *mod, struct load_info *info)
+static int check_modinfo(struct module *mod, struct load_info *info, int flags)
 {
 	const char *modmagic = get_modinfo(info, "vermagic");
 	int err;
 
+	if (flags & MODULE_INIT_IGNORE_VERMAGIC)
+		modmagic = NULL;
+
 	/* This is allowed: modprobe --force will invalidate it. */
 	if (!modmagic) {
 		err = try_to_force_load(mod, "bad vermagic");
@@ -2885,18 +2892,18 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr,
 	return 0;
 }
 
-static struct module *layout_and_allocate(struct load_info *info)
+static struct module *layout_and_allocate(struct load_info *info, int flags)
 {
 	/* Module within temporary copy. */
 	struct module *mod;
 	Elf_Shdr *pcpusec;
 	int err;
 
-	mod = setup_load_info(info);
+	mod = setup_load_info(info, flags);
 	if (IS_ERR(mod))
 		return mod;
 
-	err = check_modinfo(mod, info);
+	err = check_modinfo(mod, info, flags);
 	if (err)
 		return ERR_PTR(err);
 
@@ -3078,7 +3085,8 @@ static int may_init_module(void)
 
 /* Allocate and load the module: note that size of section 0 is always
    zero, and we rely on this for optional sections. */
-static int load_module(struct load_info *info, const char __user *uargs)
+static int load_module(struct load_info *info, const char __user *uargs,
+		       int flags)
 {
 	struct module *mod, *old;
 	long err;
@@ -3092,7 +3100,7 @@ static int load_module(struct load_info *info, const char __user *uargs)
 		goto free_copy;
 
 	/* Figure out module layout, and allocate all the memory. */
-	mod = layout_and_allocate(info);
+	mod = layout_and_allocate(info, flags);
 	if (IS_ERR(mod)) {
 		err = PTR_ERR(mod);
 		goto free_copy;
@@ -3241,10 +3249,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 	if (err)
 		return err;
 
-	return load_module(&info, uargs);
+	return load_module(&info, uargs, 0);
 }
 
-SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs)
+SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
 {
 	int err;
 	struct load_info info = { };
@@ -3253,13 +3261,17 @@ SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs)
 	if (err)
 		return err;
 
-	pr_debug("finit_module: fd=%d, uargs=%p\n", fd, uargs);
+	pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
+
+	if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
+		      |MODULE_INIT_IGNORE_VERMAGIC))
+		return -EINVAL;
 
 	err = copy_module_from_fd(fd, &info);
 	if (err)
 		return err;
 
-	return load_module(&info, uargs);
+	return load_module(&info, uargs, flags);
 }
 
 static inline int within(unsigned long addr, void *start, unsigned long size)
-- 
cgit v1.2.3-55-g7522


From 2e72d51b4ac32989496870cd8171b3682fea1839 Mon Sep 17 00:00:00 2001
From: Kees Cook
Date: Tue, 16 Oct 2012 07:32:07 +1030
Subject: security: introduce kernel_module_from_file hook

Now that kernel module origins can be reasoned about, provide a hook to
the LSMs to make policy decisions about the module file. This will let
Chrome OS enforce that loadable kernel modules can only come from its
read-only hash-verified root filesystem. Other LSMs can, for example,
read extended attributes for signatures, etc.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
Acked-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Acked-by: James Morris <james.l.morris@oracle.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/security.h | 13 +++++++++++++
 kernel/module.c          | 11 +++++++++++
 security/capability.c    |  6 ++++++
 security/security.c      |  5 +++++
 4 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 05e88bdcf7d9..0f6afc657f77 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -694,6 +694,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	userspace to load a kernel module with the given name.
  *	@kmod_name name of the module requested by the kernel
  *	Return 0 if successful.
+ * @kernel_module_from_file:
+ *	Load a kernel module from userspace.
+ *	@file contains the file structure pointing to the file containing
+ *	the kernel module to load. If the module is being loaded from a blob,
+ *	this argument will be NULL.
+ *	Return 0 if permission is granted.
  * @task_fix_setuid:
  *	Update the module's state after setting one or more of the user
  *	identity attributes of the current process.  The @flags parameter
@@ -1508,6 +1514,7 @@ struct security_operations {
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
 	int (*kernel_module_request)(char *kmod_name);
+	int (*kernel_module_from_file)(struct file *file);
 	int (*task_fix_setuid) (struct cred *new, const struct cred *old,
 				int flags);
 	int (*task_setpgid) (struct task_struct *p, pid_t pgid);
@@ -1765,6 +1772,7 @@ void security_transfer_creds(struct cred *new, const struct cred *old);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_module_request(char *kmod_name);
+int security_kernel_module_from_file(struct file *file);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags);
 int security_task_setpgid(struct task_struct *p, pid_t pgid);
@@ -2278,6 +2286,11 @@ static inline int security_kernel_module_request(char *kmod_name)
 	return 0;
 }
 
+static inline int security_kernel_module_from_file(struct file *file)
+{
+	return 0;
+}
+
 static inline int security_task_fix_setuid(struct cred *new,
 					   const struct cred *old,
 					   int flags)
diff --git a/kernel/module.c b/kernel/module.c
index 1395ca382fb5..a1d2ed8bab93 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -29,6 +29,7 @@
 #include <linux/vmalloc.h>
 #include <linux/elf.h>
 #include <linux/proc_fs.h>
+#include <linux/security.h>
 #include <linux/seq_file.h>
 #include <linux/syscalls.h>
 #include <linux/fcntl.h>
@@ -2485,10 +2486,16 @@ static int elf_header_check(struct load_info *info)
 static int copy_module_from_user(const void __user *umod, unsigned long len,
 				  struct load_info *info)
 {
+	int err;
+
 	info->len = len;
 	if (info->len < sizeof(*(info->hdr)))
 		return -ENOEXEC;
 
+	err = security_kernel_module_from_file(NULL);
+	if (err)
+		return err;
+
 	/* Suck in entire file: we'll want most of it. */
 	info->hdr = vmalloc(info->len);
 	if (!info->hdr)
@@ -2515,6 +2522,10 @@ static int copy_module_from_fd(int fd, struct load_info *info)
 	if (!file)
 		return -ENOEXEC;
 
+	err = security_kernel_module_from_file(file);
+	if (err)
+		goto out;
+
 	err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
 	if (err)
 		goto out;
diff --git a/security/capability.c b/security/capability.c
index b14a30c234b8..0fe5a026aef8 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -395,6 +395,11 @@ static int cap_kernel_module_request(char *kmod_name)
 	return 0;
 }
 
+static int cap_kernel_module_from_file(struct file *file)
+{
+	return 0;
+}
+
 static int cap_task_setpgid(struct task_struct *p, pid_t pgid)
 {
 	return 0;
@@ -967,6 +972,7 @@ void __init security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, kernel_act_as);
 	set_to_cap_if_null(ops, kernel_create_files_as);
 	set_to_cap_if_null(ops, kernel_module_request);
+	set_to_cap_if_null(ops, kernel_module_from_file);
 	set_to_cap_if_null(ops, task_fix_setuid);
 	set_to_cap_if_null(ops, task_setpgid);
 	set_to_cap_if_null(ops, task_getpgid);
diff --git a/security/security.c b/security/security.c
index 8dcd4ae10a5f..ce88630de15d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -820,6 +820,11 @@ int security_kernel_module_request(char *kmod_name)
 	return security_ops->kernel_module_request(kmod_name);
 }
 
+int security_kernel_module_from_file(struct file *file)
+{
+	return security_ops->kernel_module_from_file(file);
+}
+
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags)
 {
-- 
cgit v1.2.3-55-g7522


From fdf90729e57812cb12d7938e2dee7c71e875fb08 Mon Sep 17 00:00:00 2001
From: Mimi Zohar
Date: Tue, 16 Oct 2012 12:40:08 +1030
Subject: ima: support new kernel module syscall

With the addition of the new kernel module syscall, which defines two
arguments - a file descriptor to the kernel module and a pointer to a NULL
terminated string of module arguments - it is now possible to measure and
appraise kernel modules like any other file on the file system.

This patch adds support to measure and appraise kernel modules in an
extensible and consistent manner.

To support filesystems without extended attribute support, additional
patches could pass the signature as the first parameter.

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/ABI/testing/ima_policy |  3 ++-
 include/linux/ima.h                  |  6 ++++++
 security/integrity/ima/ima.h         |  2 +-
 security/integrity/ima/ima_api.c     |  4 ++--
 security/integrity/ima/ima_main.c    | 21 +++++++++++++++++++++
 security/integrity/ima/ima_policy.c  |  3 +++
 security/security.c                  |  7 ++++++-
 7 files changed, 41 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index 986946613542..ec0a38ef3145 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -23,7 +23,7 @@ Description:
 			lsm:	[[subj_user=] [subj_role=] [subj_type=]
 				 [obj_user=] [obj_role=] [obj_type=]]
 
-		base: 	func:= [BPRM_CHECK][FILE_MMAP][FILE_CHECK]
+		base: 	func:= [BPRM_CHECK][FILE_MMAP][FILE_CHECK][MODULE_CHECK]
 			mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC]
 			fsmagic:= hex value
 			uid:= decimal value
@@ -53,6 +53,7 @@ Description:
 			measure func=BPRM_CHECK
 			measure func=FILE_MMAP mask=MAY_EXEC
 			measure func=FILE_CHECK mask=MAY_READ uid=0
+			measure func=MODULE_CHECK uid=0
 			appraise fowner=0
 
 		The default policy measures all executables in bprm_check,
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 2c7223d7e73b..86c361e947b9 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -18,6 +18,7 @@ extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
+extern int ima_module_check(struct file *file);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -40,6 +41,11 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot)
 	return 0;
 }
 
+static inline int ima_module_check(struct file *file)
+{
+	return 0;
+}
+
 #endif /* CONFIG_IMA_H */
 
 #ifdef CONFIG_IMA_APPRAISE
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 6ee8826662cc..3b2adb794f15 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -127,7 +127,7 @@ struct integrity_iint_cache *integrity_iint_insert(struct inode *inode);
 struct integrity_iint_cache *integrity_iint_find(struct inode *inode);
 
 /* IMA policy related functions */
-enum ima_hooks { FILE_CHECK = 1, FILE_MMAP, BPRM_CHECK, POST_SETATTR };
+enum ima_hooks { FILE_CHECK = 1, FILE_MMAP, BPRM_CHECK, MODULE_CHECK, POST_SETATTR };
 
 int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask,
 		     int flags);
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index b356884fb3ef..0cea3db21657 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -100,12 +100,12 @@ err_out:
  * ima_get_action - appraise & measure decision based on policy.
  * @inode: pointer to inode to measure
  * @mask: contains the permission mask (MAY_READ, MAY_WRITE, MAY_EXECUTE)
- * @function: calling function (FILE_CHECK, BPRM_CHECK, FILE_MMAP)
+ * @function: calling function (FILE_CHECK, BPRM_CHECK, FILE_MMAP, MODULE_CHECK)
  *
  * The policy is defined in terms of keypairs:
  * 		subj=, obj=, type=, func=, mask=, fsmagic=
  *	subj,obj, and type: are LSM specific.
- * 	func: FILE_CHECK | BPRM_CHECK | FILE_MMAP
+ * 	func: FILE_CHECK | BPRM_CHECK | FILE_MMAP | MODULE_CHECK
  * 	mask: contains the permission mask
  *	fsmagic: hex value
  *
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 73c9a268253e..45de18e9a6f2 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -280,6 +280,27 @@ int ima_file_check(struct file *file, int mask)
 }
 EXPORT_SYMBOL_GPL(ima_file_check);
 
+/**
+ * ima_module_check - based on policy, collect/store/appraise measurement.
+ * @file: pointer to the file to be measured/appraised
+ *
+ * Measure/appraise kernel modules based on policy.
+ *
+ * Always return 0 and audit dentry_open failures.
+ * Return code is based upon measurement appraisal.
+ */
+int ima_module_check(struct file *file)
+{
+	int rc;
+
+	if (!file)
+		rc = INTEGRITY_UNKNOWN;
+	else
+		rc = process_measurement(file, file->f_dentry->d_name.name,
+					 MAY_EXEC, MODULE_CHECK);
+	return (ima_appraise & IMA_APPRAISE_ENFORCE) ? rc : 0;
+}
+
 static int __init init_ima(void)
 {
 	int error;
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index c7dacd2eab7a..af7d182d5a46 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -80,6 +80,7 @@ static struct ima_rule_entry default_rules[] = {
 	 .flags = IMA_FUNC | IMA_MASK},
 	{.action = MEASURE,.func = FILE_CHECK,.mask = MAY_READ,.uid = GLOBAL_ROOT_UID,
 	 .flags = IMA_FUNC | IMA_MASK | IMA_UID},
+	{.action = MEASURE,.func = MODULE_CHECK, .flags = IMA_FUNC},
 };
 
 static struct ima_rule_entry default_appraise_rules[] = {
@@ -401,6 +402,8 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 			/* PATH_CHECK is for backwards compat */
 			else if (strcmp(args[0].from, "PATH_CHECK") == 0)
 				entry->func = FILE_CHECK;
+			else if (strcmp(args[0].from, "MODULE_CHECK") == 0)
+				entry->func = MODULE_CHECK;
 			else if (strcmp(args[0].from, "FILE_MMAP") == 0)
 				entry->func = FILE_MMAP;
 			else if (strcmp(args[0].from, "BPRM_CHECK") == 0)
diff --git a/security/security.c b/security/security.c
index ce88630de15d..daa97f4ac9d1 100644
--- a/security/security.c
+++ b/security/security.c
@@ -822,7 +822,12 @@ int security_kernel_module_request(char *kmod_name)
 
 int security_kernel_module_from_file(struct file *file)
 {
-	return security_ops->kernel_module_from_file(file);
+	int ret;
+
+	ret = security_ops->kernel_module_from_file(file);
+	if (ret)
+		return ret;
+	return ima_module_check(file);
 }
 
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
-- 
cgit v1.2.3-55-g7522


From 6f33d58794ef4cef4b2c706029810f9688bd3026 Mon Sep 17 00:00:00 2001
From: Rusty Russell
Date: Thu, 22 Nov 2012 12:30:25 +1030
Subject: __UNIQUE_ID()

Jan Beulich points out __COUNTER__ (gcc 4.3 and above), so let's use
that to create unique ids.  This is better than __LINE__ which we use
today, so provide a wrapper.

Stanislaw Gruszka <sgruszka@redhat.com> reported that some module parameters
start with a digit, so we need to prepend when we for the unique id.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Jan Beulich <jbeulich@suse.com>
---
 include/linux/compiler-gcc4.h | 2 ++
 include/linux/compiler.h      | 9 +++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 412bc6c2b023..56c802cba7f6 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -31,6 +31,8 @@
 
 #define __linktime_error(message) __attribute__((__error__(message)))
 
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
 #if __GNUC_MINOR__ >= 5
 /*
  * Mark a position in code as unreachable.  This can be used to
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index f430e4162f41..5f45335e1ac7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -42,6 +42,10 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __rcu
 #endif
 
+/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
+#define ___PASTE(a,b) a##b
+#define __PASTE(a,b) ___PASTE(a,b)
+
 #ifdef __KERNEL__
 
 #ifdef __GNUC__
@@ -164,6 +168,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
     (typeof(ptr)) (__ptr + (off)); })
 #endif
 
+/* Not-quite-unique ID. */
+#ifndef __UNIQUE_ID
+# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3-55-g7522


From 34182eea36fc1d70d748b0947c873314980ba806 Mon Sep 17 00:00:00 2001
From: Rusty Russell
Date: Thu, 22 Nov 2012 12:30:25 +1030
Subject: moduleparam: use __UNIQUE_ID()

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index d6a58065c09c..137b4198fc03 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -16,17 +16,15 @@
 /* Chosen so that structs with an unsigned long line up. */
 #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long))
 
-#define ___module_cat(a,b) __mod_ ## a ## b
-#define __module_cat(a,b) ___module_cat(a,b)
 #ifdef MODULE
 #define __MODULE_INFO(tag, name, info)					  \
-static const char __module_cat(name,__LINE__)[]				  \
+static const char __UNIQUE_ID(name)[]					  \
   __used __attribute__((section(".modinfo"), unused, aligned(1)))	  \
   = __stringify(tag) "=" info
 #else  /* !MODULE */
 /* This struct is here for syntactic coherency, it is not used */
 #define __MODULE_INFO(tag, name, info)					  \
-  struct __module_cat(name,__LINE__) {}
+  struct __UNIQUE_ID(name) {}
 #endif
 #define __MODULE_PARM_TYPE(name, _type)					  \
   __MODULE_INFO(parmtype, name##type, #name ":" _type)
-- 
cgit v1.2.3-55-g7522


From facc0a6bd494ce21e31b34fc355ecf702518272b Mon Sep 17 00:00:00 2001
From: David Howells
Date: Wed, 5 Dec 2012 11:55:28 +1030
Subject: ASN.1: Define indefinite length marker constant

Define a constant to hold the marker value seen in an indefinite-length
element.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/asn1.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/asn1.h b/include/linux/asn1.h
index 5c3f4e4b9a23..eed6982860ba 100644
--- a/include/linux/asn1.h
+++ b/include/linux/asn1.h
@@ -64,4 +64,6 @@ enum asn1_tag {
 	ASN1_LONG_TAG	= 31	/* Long form tag */
 };
 
+#define ASN1_INDEFINITE_LENGTH 0x80
+
 #endif /* _LINUX_ASN1_H */
-- 
cgit v1.2.3-55-g7522


From 8349e5aeaadd160b7cce554a62a05be4b2d894aa Mon Sep 17 00:00:00 2001
From: Jeff Garzik
Date: Fri, 14 Dec 2012 09:34:01 -0500
Subject: Revert "libata: check SATA_SETTINGS log with HW Feature Ctrl"

This reverts commit de90cd71f68e947d3bd6c3f2ef5731ead010a768.

Shane Huang writes:

  Please suspend this patch because I just received two new
  DevSlp drives but found word 78 bit 5 is _not_ set.

  I'm checking with the drive vendor whether he gave me
  the wrong information. If bit 5 is not the necessary and
  sufficient condition, I will implement another patch to
  replace ata_device->sata_settings into ->devslp_timing.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 3 ++-
 include/linux/ata.h       | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 47d59616fe3d..9e8b99af400d 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2331,8 +2331,9 @@ int ata_dev_configure(struct ata_device *dev)
 
 		/* Obtain SATA Settings page from Identify Device Data Log,
 		 * which contains DevSlp timing variables etc.
+		 * Exclude old devices with ata_id_has_ncq()
 		 */
-		if (ata_id_has_hw_feature_ctrl(dev->id)) {
+		if (ata_id_has_ncq(dev->id)) {
 			err_mask = ata_read_log_page(dev,
 						     ATA_LOG_SATA_ID_DEV_DATA,
 						     ATA_LOG_SATA_SETTINGS,
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 18cbb93fdbca..408da9502177 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -593,7 +593,6 @@ static inline int ata_is_data(u8 prot)
 #define ata_id_cdb_intr(id)	(((id)[ATA_ID_CONFIG] & 0x60) == 0x20)
 #define ata_id_has_da(id)	((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4))
 #define ata_id_has_devslp(id)	((id)[ATA_ID_FEATURE_SUPP] & (1 << 8))
-#define ata_id_has_hw_feature_ctrl(id)	((id)[ATA_ID_FEATURE_SUPP] & (1 << 5))
 
 static inline bool ata_id_has_hipm(const u16 *id)
 {
-- 
cgit v1.2.3-55-g7522


From 8dd2cb7e880d2f77fba53b523c99133ad5054cfd Mon Sep 17 00:00:00 2001
From: Shaohua Li
Date: Fri, 14 Dec 2012 11:15:36 +0800
Subject: block: discard granularity might not be power of 2

In MD raid case, discard granularity might not be power of 2, for example, a
4-disk raid5 has 3*chunk_size discard granularity. Correct the calculation for
such cases.

Reported-by: Neil Brown <neilb@suse.de>
Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-lib.c        | 23 +++++++++++++----------
 block/blk-settings.c   |  6 +++---
 include/linux/blkdev.h |  7 ++++---
 3 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9373b58dfab1..5677fd33d7d2 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -43,8 +43,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q = bdev_get_queue(bdev);
 	int type = REQ_WRITE | REQ_DISCARD;
-	unsigned int max_discard_sectors;
-	unsigned int granularity, alignment, mask;
+	sector_t max_discard_sectors;
+	sector_t granularity, alignment;
 	struct bio_batch bb;
 	struct bio *bio;
 	int ret = 0;
@@ -57,15 +57,16 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
 	granularity = max(q->limits.discard_granularity >> 9, 1U);
-	mask = granularity - 1;
-	alignment = (bdev_discard_alignment(bdev) >> 9) & mask;
+	alignment = bdev_discard_alignment(bdev) >> 9;
+	alignment = sector_div(alignment, granularity);
 
 	/*
 	 * Ensure that max_discard_sectors is of the proper
 	 * granularity, so that requests stay aligned after a split.
 	 */
 	max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
-	max_discard_sectors = round_down(max_discard_sectors, granularity);
+	sector_div(max_discard_sectors, granularity);
+	max_discard_sectors *= granularity;
 	if (unlikely(!max_discard_sectors)) {
 		/* Avoid infinite loop below. Being cautious never hurts. */
 		return -EOPNOTSUPP;
@@ -83,7 +84,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
 	while (nr_sects) {
 		unsigned int req_sects;
-		sector_t end_sect;
+		sector_t end_sect, tmp;
 
 		bio = bio_alloc(gfp_mask, 1);
 		if (!bio) {
@@ -98,10 +99,12 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		 * misaligned, stop the discard at the previous aligned sector.
 		 */
 		end_sect = sector + req_sects;
-		if (req_sects < nr_sects && (end_sect & mask) != alignment) {
-			end_sect =
-				round_down(end_sect - alignment, granularity)
-				+ alignment;
+		tmp = end_sect;
+		if (req_sects < nr_sects &&
+		    sector_div(tmp, granularity) != alignment) {
+			end_sect = end_sect - alignment;
+			sector_div(end_sect, granularity);
+			end_sect = end_sect * granularity + alignment;
 			req_sects = end_sect - sector;
 		}
 
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 779bb7646bcd..c50ecf0ea3b1 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -611,7 +611,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			bottom = b->discard_granularity + alignment;
 
 			/* Verify that top and bottom intervals line up */
-			if (max(top, bottom) & (min(top, bottom) - 1))
+			if ((max(top, bottom) % min(top, bottom)) != 0)
 				t->discard_misaligned = 1;
 		}
 
@@ -619,8 +619,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 						      b->max_discard_sectors);
 		t->discard_granularity = max(t->discard_granularity,
 					     b->discard_granularity);
-		t->discard_alignment = lcm(t->discard_alignment, alignment) &
-			(t->discard_granularity - 1);
+		t->discard_alignment = lcm(t->discard_alignment, alignment) %
+			t->discard_granularity;
 	}
 
 	return ret;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c9d233e727f2..acb4f7bbbd32 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1188,13 +1188,14 @@ static inline int queue_discard_alignment(struct request_queue *q)
 
 static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
 {
-	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
+	sector_t alignment = sector << 9;
+	alignment = sector_div(alignment, lim->discard_granularity);
 
 	if (!lim->max_discard_sectors)
 		return 0;
 
-	return (lim->discard_granularity + lim->discard_alignment - alignment)
-		& (lim->discard_granularity - 1);
+	alignment = lim->discard_granularity + lim->discard_alignment - alignment;
+	return sector_div(alignment, lim->discard_granularity);
 }
 
 static inline int bdev_discard_alignment(struct block_device *bdev)
-- 
cgit v1.2.3-55-g7522


From 63b68901dfd590cc13d4fe5c08dec2ca75b3c4aa Mon Sep 17 00:00:00 2001
From: Roger Quadros
Date: Fri, 14 Dec 2012 09:09:11 -0800
Subject: mfd: omap-usb-host: get rid of cpu_is_omap..() macros

Instead of using cpu_is_omap..() macros in the device driver we
rely on information provided in the platform data.

The only information we need is whether the USB Host module has
a single ULPI bypass control bit for all ports or individual bypass
control bits for each port. OMAP3 REV2.1 and earlier have the former.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
[tony@atomide.com: updated to remove plat/cpu.h]
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/usb-host.c         | 4 ++++
 drivers/mfd/omap-usb-host.c            | 3 +--
 include/linux/platform_data/usb-omap.h | 3 +++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/usb-host.c b/arch/arm/mach-omap2/usb-host.c
index d1dbe125b34f..2e44e8a22884 100644
--- a/arch/arm/mach-omap2/usb-host.c
+++ b/arch/arm/mach-omap2/usb-host.c
@@ -508,6 +508,10 @@ void __init usbhs_init(const struct usbhs_omap_board_data *pdata)
 	if (cpu_is_omap34xx()) {
 		setup_ehci_io_mux(pdata->port_mode);
 		setup_ohci_io_mux(pdata->port_mode);
+
+		if (omap_rev() <= OMAP3430_REV_ES2_1)
+			usbhs_data.single_ulpi_bypass = true;
+
 	} else if (cpu_is_omap44xx()) {
 		setup_4430ehci_io_mux(pdata->port_mode);
 		setup_4430ohci_io_mux(pdata->port_mode);
diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index 770a0d01e0b9..05164d7f054b 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c
@@ -25,7 +25,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/spinlock.h>
 #include <linux/gpio.h>
-#include <plat/cpu.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/usb-omap.h>
 #include <linux/pm_runtime.h>
@@ -384,7 +383,7 @@ static void omap_usbhs_init(struct device *dev)
 			reg &= ~OMAP_UHH_HOSTCONFIG_P3_CONNECT_STATUS;
 
 		/* Bypass the TLL module for PHY mode operation */
-		if (cpu_is_omap3430() && (omap_rev() <= OMAP3430_REV_ES2_1)) {
+		if (pdata->single_ulpi_bypass) {
 			dev_dbg(dev, "OMAP3 ES version <= ES2.1\n");
 			if (is_ehci_phy_mode(pdata->port_mode[0]) ||
 				is_ehci_phy_mode(pdata->port_mode[1]) ||
diff --git a/include/linux/platform_data/usb-omap.h b/include/linux/platform_data/usb-omap.h
index 8570bcfe6311..ef65b67c56c3 100644
--- a/include/linux/platform_data/usb-omap.h
+++ b/include/linux/platform_data/usb-omap.h
@@ -59,6 +59,9 @@ struct usbhs_omap_platform_data {
 
 	struct ehci_hcd_omap_platform_data	*ehci_data;
 	struct ohci_hcd_omap_platform_data	*ohci_data;
+
+	/* OMAP3 <= ES2.1 have a single ulpi bypass control bit */
+	unsigned				single_ulpi_bypass:1;
 };
 
 /*-------------------------------------------------------------------------*/
-- 
cgit v1.2.3-55-g7522


From d9ba573718666df2e7e30d671f81bba39d07f91c Mon Sep 17 00:00:00 2001
From: Tony Lindgren
Date: Fri, 14 Dec 2012 09:09:11 -0800
Subject: ARM: OMAP: Move plat/omap-serial.h to
 include/linux/platform_data/serial-omap.h

We need to move this file to allow ARM multiplatform configurations
to build for omap2+. This can now be done as this file now only
contains platform_data.

cc: Russell King <linux@arm.linux.org.uk>
cc: Alan Cox <alan@linux.intel.com>
cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: Govindraj.R <govindraj.raja@ti.com>
cc: Kevin Hilman <khilman@ti.com>
cc: linux-serial@vger.kernel.org
Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/serial.c                  |  3 +-
 arch/arm/plat-omap/include/plat/omap-serial.h | 51 ---------------------------
 drivers/tty/serial/omap-serial.c              |  3 +-
 include/linux/platform_data/serial-omap.h     | 51 +++++++++++++++++++++++++++
 4 files changed, 53 insertions(+), 55 deletions(-)
 delete mode 100644 arch/arm/plat-omap/include/plat/omap-serial.h
 create mode 100644 include/linux/platform_data/serial-omap.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index 93d102535c85..04fdbc4c499b 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -27,8 +27,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/console.h>
 #include <linux/omap-dma.h>
-
-#include <plat/omap-serial.h>
+#include <linux/platform_data/serial-omap.h>
 
 #include "common.h"
 #include "omap_hwmod.h"
diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h
deleted file mode 100644
index ff9b0aab5281..000000000000
--- a/arch/arm/plat-omap/include/plat/omap-serial.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Driver for OMAP-UART controller.
- * Based on drivers/serial/8250.c
- *
- * Copyright (C) 2010 Texas Instruments.
- *
- * Authors:
- *	Govindraj R	<govindraj.raja@ti.com>
- *	Thara Gopinath	<thara@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __OMAP_SERIAL_H__
-#define __OMAP_SERIAL_H__
-
-#include <linux/serial_core.h>
-#include <linux/device.h>
-#include <linux/pm_qos.h>
-
-#define DRIVER_NAME	"omap_uart"
-
-/*
- * Use tty device name as ttyO, [O -> OMAP]
- * in bootargs we specify as console=ttyO0 if uart1
- * is used as console uart.
- */
-#define OMAP_SERIAL_NAME	"ttyO"
-
-struct omap_uart_port_info {
-	bool			dma_enabled;	/* To specify DMA Mode */
-	unsigned int		uartclk;	/* UART clock rate */
-	upf_t			flags;		/* UPF_* flags */
-	unsigned int		dma_rx_buf_size;
-	unsigned int		dma_rx_timeout;
-	unsigned int		autosuspend_timeout;
-	unsigned int		dma_rx_poll_rate;
-	int			DTR_gpio;
-	int			DTR_inverted;
-	int			DTR_present;
-
-	int (*get_context_loss_count)(struct device *);
-	void (*set_forceidle)(struct device *);
-	void (*set_noidle)(struct device *);
-	void (*enable_wakeup)(struct device *, bool);
-};
-
-#endif /* __OMAP_SERIAL_H__ */
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index 23f797eb7a28..57d6b29c039c 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -41,8 +41,7 @@
 #include <linux/of.h>
 #include <linux/gpio.h>
 #include <linux/pinctrl/consumer.h>
-
-#include <plat/omap-serial.h>
+#include <linux/platform_data/serial-omap.h>
 
 #define OMAP_MAX_HSUART_PORTS	6
 
diff --git a/include/linux/platform_data/serial-omap.h b/include/linux/platform_data/serial-omap.h
new file mode 100644
index 000000000000..ff9b0aab5281
--- /dev/null
+++ b/include/linux/platform_data/serial-omap.h
@@ -0,0 +1,51 @@
+/*
+ * Driver for OMAP-UART controller.
+ * Based on drivers/serial/8250.c
+ *
+ * Copyright (C) 2010 Texas Instruments.
+ *
+ * Authors:
+ *	Govindraj R	<govindraj.raja@ti.com>
+ *	Thara Gopinath	<thara@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __OMAP_SERIAL_H__
+#define __OMAP_SERIAL_H__
+
+#include <linux/serial_core.h>
+#include <linux/device.h>
+#include <linux/pm_qos.h>
+
+#define DRIVER_NAME	"omap_uart"
+
+/*
+ * Use tty device name as ttyO, [O -> OMAP]
+ * in bootargs we specify as console=ttyO0 if uart1
+ * is used as console uart.
+ */
+#define OMAP_SERIAL_NAME	"ttyO"
+
+struct omap_uart_port_info {
+	bool			dma_enabled;	/* To specify DMA Mode */
+	unsigned int		uartclk;	/* UART clock rate */
+	upf_t			flags;		/* UPF_* flags */
+	unsigned int		dma_rx_buf_size;
+	unsigned int		dma_rx_timeout;
+	unsigned int		autosuspend_timeout;
+	unsigned int		dma_rx_poll_rate;
+	int			DTR_gpio;
+	int			DTR_inverted;
+	int			DTR_present;
+
+	int (*get_context_loss_count)(struct device *);
+	void (*set_forceidle)(struct device *);
+	void (*set_noidle)(struct device *);
+	void (*enable_wakeup)(struct device *, bool);
+};
+
+#endif /* __OMAP_SERIAL_H__ */
-- 
cgit v1.2.3-55-g7522


From 8e63b6a8adabb0551124c3b78f7f5f36912c3728 Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Sat, 15 Dec 2012 15:21:52 -0500
Subject: NFSv4.1: Move the RPC timestamp out of the slot.

Shave a few bytes off the slot table size by moving the RPC timestamp
into the sequence results.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 14 +++++++-------
 fs/nfs/nfs4session.c    |  3 +--
 fs/nfs/nfs4session.h    |  1 -
 include/linux/nfs_xdr.h |  1 +
 4 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9003b8f6b77f..afb428e63b52 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -419,7 +419,6 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 {
 	struct nfs4_session *session;
 	struct nfs4_slot *slot;
-	unsigned long timestamp;
 	struct nfs_client *clp;
 	int ret = 1;
 
@@ -444,9 +443,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 	case 0:
 		/* Update the slot's sequence and clientid lease timer */
 		++slot->seq_nr;
-		timestamp = slot->renewal_time;
 		clp = session->clp;
-		do_renew_lease(clp, timestamp);
+		do_renew_lease(clp, res->sr_timestamp);
 		/* Check sequence flags */
 		if (res->sr_status_flags != 0)
 			nfs4_schedule_lease_recovery(clp);
@@ -473,10 +471,11 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		 * Could this slot have been previously retired?
 		 * If so, then the server may be expecting seq_nr = 1!
 		 */
-		if (slot->seq_nr == 1)
-			break;
-		slot->seq_nr = 1;
-		goto retry_nowait;
+		if (slot->seq_nr != 1) {
+			slot->seq_nr = 1;
+			goto retry_nowait;
+		}
+		break;
 	case -NFS4ERR_SEQ_FALSE_RETRY:
 		++slot->seq_nr;
 		goto retry_nowait;
@@ -567,6 +566,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
 			slot->slot_nr, slot->seq_nr);
 
 	res->sr_slot = slot;
+	res->sr_timestamp = jiffies;
 	res->sr_status_flags = 0;
 	/*
 	 * sr_status is only set in decode_sequence, and so will remain
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index 1e6c87c443a7..0e1cc1f4e51a 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -143,7 +143,6 @@ struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl)
 	if (slotid > tbl->highest_used_slotid ||
 			tbl->highest_used_slotid == NFS4_NO_SLOT)
 		tbl->highest_used_slotid = slotid;
-	ret->renewal_time = jiffies;
 	ret->generation = tbl->generation;
 
 out:
@@ -228,9 +227,9 @@ static bool nfs41_assign_slot(struct rpc_task *task, void *pslot)
 
 	if (nfs4_session_draining(tbl->session) && !args->sa_privileged)
 		return false;
-	slot->renewal_time = jiffies;
 	slot->generation = tbl->generation;
 	args->sa_slot = slot;
+	res->sr_timestamp = jiffies;
 	res->sr_slot = slot;
 	res->sr_status_flags = 0;
 	res->sr_status = 1;
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 04f834cab16c..d17b08091d4b 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -19,7 +19,6 @@ struct nfs4_slot {
 	struct nfs4_slot_table	*table;
 	struct nfs4_slot	*next;
 	unsigned long		generation;
-	unsigned long		renewal_time;
 	u32			slot_nr;
 	u32		 	seq_nr;
 };
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a55abd499c21..29adb12c7ecf 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -194,6 +194,7 @@ struct nfs4_sequence_args {
 
 struct nfs4_sequence_res {
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
+	unsigned long		sr_timestamp;
 	int			sr_status;	/* sequence operation status */
 	u32			sr_status_flags;
 	u32			sr_highest_slotid;
-- 
cgit v1.2.3-55-g7522


From 11520e5e7c1855fc3bf202bb3be35a39d9efa034 Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Sat, 15 Dec 2012 15:15:24 -0800
Subject: Revert "x86-64/efi: Use EFI to deal with platform wall clock (again)"

This reverts commit bd52276fa1d4 ("x86-64/efi: Use EFI to deal with
platform wall clock (again)"), and the two supporting commits:

  da5a108d05b4: "x86/kernel: remove tboot 1:1 page table creation code"

  185034e72d59: "x86, efi: 1:1 pagetable mapping for virtual EFI calls")

as they all depend semantically on commit 53b87cf088e2 ("x86, mm:
Include the entire kernel memory map in trampoline_pgd") that got
reverted earlier due to the problems it caused.

This was pointed out by Yinghai Lu, and verified by me on my Macbook Air
that uses EFI.

Pointed-out-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/efi.h     | 28 ++++-----------
 arch/x86/kernel/tboot.c        | 78 +++++++++++++++++++++++++++++++++++++++---
 arch/x86/mm/pageattr.c         | 10 +++---
 arch/x86/platform/efi/efi.c    | 30 +++++++++++++---
 arch/x86/platform/efi/efi_64.c | 15 --------
 include/linux/efi.h            |  2 ++
 init/main.c                    |  8 ++---
 7 files changed, 116 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index fd13815fe85c..6e8fdf5ad113 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -69,37 +69,23 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
 	efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3),		\
 		  (u64)(a4), (u64)(a5), (u64)(a6))
 
-extern unsigned long efi_call_virt_prelog(void);
-extern void efi_call_virt_epilog(unsigned long);
-
-#define efi_callx(x, func, ...)					\
-	({							\
-		efi_status_t __status;				\
-		unsigned long __pgd;				\
-								\
-		__pgd = efi_call_virt_prelog();			\
-		__status = efi_call##x(func, __VA_ARGS__);	\
-		efi_call_virt_epilog(__pgd);			\
-		__status;					\
-	})
-
 #define efi_call_virt0(f)				\
-	efi_callx(0, (void *)(efi.systab->runtime->f))
+	efi_call0((void *)(efi.systab->runtime->f))
 #define efi_call_virt1(f, a1)					\
-	efi_callx(1, (void *)(efi.systab->runtime->f), (u64)(a1))
+	efi_call1((void *)(efi.systab->runtime->f), (u64)(a1))
 #define efi_call_virt2(f, a1, a2)					\
-	efi_callx(2, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2))
+	efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2))
 #define efi_call_virt3(f, a1, a2, a3)					\
-	efi_callx(3, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3))
 #define efi_call_virt4(f, a1, a2, a3, a4)				\
-	efi_callx(4, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4))
 #define efi_call_virt5(f, a1, a2, a3, a4, a5)				\
-	efi_callx(5, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4), (u64)(a5))
 #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)			\
-	efi_callx(6, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
 
 extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index d4f460f962ee..f84fe00fad48 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -103,13 +103,71 @@ void __init tboot_probe(void)
 	pr_debug("tboot_size: 0x%x\n", tboot->tboot_size);
 }
 
+static pgd_t *tboot_pg_dir;
+static struct mm_struct tboot_mm = {
+	.mm_rb          = RB_ROOT,
+	.pgd            = swapper_pg_dir,
+	.mm_users       = ATOMIC_INIT(2),
+	.mm_count       = ATOMIC_INIT(1),
+	.mmap_sem       = __RWSEM_INITIALIZER(init_mm.mmap_sem),
+	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
+	.mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
+};
+
 static inline void switch_to_tboot_pt(void)
 {
-#ifdef CONFIG_X86_32
-	load_cr3(initial_page_table);
-#else
-	write_cr3(real_mode_header->trampoline_pgd);
-#endif
+	write_cr3(virt_to_phys(tboot_pg_dir));
+}
+
+static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
+			  pgprot_t prot)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset(&tboot_mm, vaddr);
+	pud = pud_alloc(&tboot_mm, pgd, vaddr);
+	if (!pud)
+		return -1;
+	pmd = pmd_alloc(&tboot_mm, pud, vaddr);
+	if (!pmd)
+		return -1;
+	pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr);
+	if (!pte)
+		return -1;
+	set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
+	pte_unmap(pte);
+	return 0;
+}
+
+static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn,
+			   unsigned long nr)
+{
+	/* Reuse the original kernel mapping */
+	tboot_pg_dir = pgd_alloc(&tboot_mm);
+	if (!tboot_pg_dir)
+		return -1;
+
+	for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) {
+		if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC))
+			return -1;
+	}
+
+	return 0;
+}
+
+static void tboot_create_trampoline(void)
+{
+	u32 map_base, map_size;
+
+	/* Create identity map for tboot shutdown code. */
+	map_base = PFN_DOWN(tboot->tboot_base);
+	map_size = PFN_UP(tboot->tboot_size);
+	if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size))
+		panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n",
+		      map_base, map_size);
 }
 
 #ifdef CONFIG_ACPI_SLEEP
@@ -167,6 +225,14 @@ void tboot_shutdown(u32 shutdown_type)
 	if (!tboot_enabled())
 		return;
 
+	/*
+	 * if we're being called before the 1:1 mapping is set up then just
+	 * return and let the normal shutdown happen; this should only be
+	 * due to very early panic()
+	 */
+	if (!tboot_pg_dir)
+		return;
+
 	/* if this is S3 then set regions to MAC */
 	if (shutdown_type == TB_SHUTDOWN_S3)
 		if (tboot_setup_sleep())
@@ -277,6 +343,8 @@ static __init int tboot_late_init(void)
 	if (!tboot_enabled())
 		return 0;
 
+	tboot_create_trampoline();
+
 	atomic_set(&ap_wfs_count, 0);
 	register_hotcpu_notifier(&tboot_cpu_notifier);
 
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 931930a96160..a718e0d23503 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -919,13 +919,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 
 	/*
 	 * On success we use clflush, when the CPU supports it to
-	 * avoid the wbindv. If the CPU does not support it, in the
-	 * error case, and during early boot (for EFI) we fall back
-	 * to cpa_flush_all (which uses wbinvd):
+	 * avoid the wbindv. If the CPU does not support it and in the
+	 * error case we fall back to cpa_flush_all (which uses
+	 * wbindv):
 	 */
-	if (early_boot_irqs_disabled)
-		__cpa_flush_all((void *)(long)cache);
-	else if (!ret && cpu_has_clflush) {
+	if (!ret && cpu_has_clflush) {
 		if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
 			cpa_flush_array(addr, numpages, cache,
 					cpa.flags, pages);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 0a34d9e9c263..ad4439145f85 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -239,7 +239,22 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
 	return status;
 }
 
-static int efi_set_rtc_mmss(unsigned long nowtime)
+static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
+					     efi_time_cap_t *tc)
+{
+	unsigned long flags;
+	efi_status_t status;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	efi_call_phys_prelog();
+	status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
+				virt_to_phys(tc));
+	efi_call_phys_epilog();
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	return status;
+}
+
+int efi_set_rtc_mmss(unsigned long nowtime)
 {
 	int real_seconds, real_minutes;
 	efi_status_t 	status;
@@ -268,7 +283,7 @@ static int efi_set_rtc_mmss(unsigned long nowtime)
 	return 0;
 }
 
-static unsigned long efi_get_time(void)
+unsigned long efi_get_time(void)
 {
 	efi_status_t status;
 	efi_time_t eft;
@@ -624,13 +639,18 @@ static int __init efi_runtime_init(void)
 	}
 	/*
 	 * We will only need *early* access to the following
-	 * EFI runtime service before set_virtual_address_map
+	 * two EFI runtime services before set_virtual_address_map
 	 * is invoked.
 	 */
+	efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
 	efi_phys.set_virtual_address_map =
 		(efi_set_virtual_address_map_t *)
 		runtime->set_virtual_address_map;
-
+	/*
+	 * Make efi_get_time can be called before entering
+	 * virtual mode.
+	 */
+	efi.get_time = phys_efi_get_time;
 	early_iounmap(runtime, sizeof(efi_runtime_services_t));
 
 	return 0;
@@ -716,10 +736,12 @@ void __init efi_init(void)
 		efi_enabled = 0;
 		return;
 	}
+#ifdef CONFIG_X86_32
 	if (efi_is_native()) {
 		x86_platform.get_wallclock = efi_get_time;
 		x86_platform.set_wallclock = efi_set_rtc_mmss;
 	}
+#endif
 
 #if EFI_DEBUG
 	print_efi_memmap();
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 06c8b2e662ab..95fd505dfeb6 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -58,21 +58,6 @@ static void __init early_code_mapping_set_exec(int executable)
 	}
 }
 
-unsigned long efi_call_virt_prelog(void)
-{
-	unsigned long saved;
-
-	saved = read_cr3();
-	write_cr3(real_mode_header->trampoline_pgd);
-
-	return saved;
-}
-
-void efi_call_virt_epilog(unsigned long saved)
-{
-	write_cr3(saved);
-}
-
 void __init efi_call_phys_prelog(void)
 {
 	unsigned long vaddress;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 02a69418be18..8b84916dc671 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -587,6 +587,8 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
 extern int __init efi_uart_console_only (void);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
+extern unsigned long efi_get_time(void);
+extern int efi_set_rtc_mmss(unsigned long nowtime);
 extern void efi_reserve_boot_services(void);
 extern struct efi_memory_map memmap;
 
diff --git a/init/main.c b/init/main.c
index 6af5470b8067..63ae904a99a8 100644
--- a/init/main.c
+++ b/init/main.c
@@ -463,10 +463,6 @@ static void __init mm_init(void)
 	percpu_init_late();
 	pgtable_cache_init();
 	vmalloc_init();
-#ifdef CONFIG_X86
-	if (efi_enabled)
-		efi_enter_virtual_mode();
-#endif
 }
 
 asmlinkage void __init start_kernel(void)
@@ -607,6 +603,10 @@ asmlinkage void __init start_kernel(void)
 	calibrate_delay();
 	pidmap_init();
 	anon_vma_init();
+#ifdef CONFIG_X86
+	if (efi_enabled)
+		efi_enter_virtual_mode();
+#endif
 	thread_info_cache_init();
 	cred_init();
 	fork_init(totalram_pages);
-- 
cgit v1.2.3-55-g7522


From 9360b53661a2c7754517b2925580055bacc8ec38 Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Mon, 17 Dec 2012 11:29:09 -0800
Subject: Revert "bdi: add a user-tunable cpu_list for the bdi flusher threads"

This reverts commit 8fa72d234da9b6b473bbb1f74d533663e4996e6b.

People disagree about how this should be done, so let's revert this for
now so that nobody starts using the new tuning interface.  Tejun is
thinking about a more generic interface for thread pool affinity.

Requested-by: Tejun Heo <tj@kernel.org>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev.h |  4 ---
 mm/backing-dev.c            | 84 ---------------------------------------------
 2 files changed, 88 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 238521a19849..2a9a9abc9126 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -18,7 +18,6 @@
 #include <linux/writeback.h>
 #include <linux/atomic.h>
 #include <linux/sysctl.h>
-#include <linux/mutex.h>
 
 struct page;
 struct device;
@@ -106,9 +105,6 @@ struct backing_dev_info {
 
 	struct timer_list laptop_mode_wb_timer;
 
-	cpumask_t *flusher_cpumask; /* used for writeback thread scheduling */
-	struct mutex flusher_cpumask_lock;
-
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debug_dir;
 	struct dentry *debug_stats;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index bd6a6cabef71..d3ca2b3ee176 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,7 +10,6 @@
 #include <linux/module.h>
 #include <linux/writeback.h>
 #include <linux/device.h>
-#include <linux/slab.h>
 #include <trace/events/writeback.h>
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -222,63 +221,12 @@ static ssize_t max_ratio_store(struct device *dev,
 }
 BDI_SHOW(max_ratio, bdi->max_ratio)
 
-static ssize_t cpu_list_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct backing_dev_info *bdi = dev_get_drvdata(dev);
-	struct bdi_writeback *wb = &bdi->wb;
-	cpumask_var_t newmask;
-	ssize_t ret;
-	struct task_struct *task;
-
-	if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
-		return -ENOMEM;
-
-	ret = cpulist_parse(buf, newmask);
-	if (!ret) {
-		spin_lock_bh(&bdi->wb_lock);
-		task = wb->task;
-		if (task)
-			get_task_struct(task);
-		spin_unlock_bh(&bdi->wb_lock);
-
-		mutex_lock(&bdi->flusher_cpumask_lock);
-		if (task) {
-			ret = set_cpus_allowed_ptr(task, newmask);
-			put_task_struct(task);
-		}
-		if (ret == 0) {
-			cpumask_copy(bdi->flusher_cpumask, newmask);
-			ret = count;
-		}
-		mutex_unlock(&bdi->flusher_cpumask_lock);
-
-	}
-	free_cpumask_var(newmask);
-
-	return ret;
-}
-
-static ssize_t cpu_list_show(struct device *dev,
-		struct device_attribute *attr, char *page)
-{
-	struct backing_dev_info *bdi = dev_get_drvdata(dev);
-	ssize_t ret;
-
-	mutex_lock(&bdi->flusher_cpumask_lock);
-	ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask);
-	mutex_unlock(&bdi->flusher_cpumask_lock);
-
-	return ret;
-}
-
 #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
 
 static struct device_attribute bdi_dev_attrs[] = {
 	__ATTR_RW(read_ahead_kb),
 	__ATTR_RW(min_ratio),
 	__ATTR_RW(max_ratio),
-	__ATTR_RW(cpu_list),
 	__ATTR_NULL,
 };
 
@@ -480,7 +428,6 @@ static int bdi_forker_thread(void *ptr)
 				writeback_inodes_wb(&bdi->wb, 1024,
 						    WB_REASON_FORKER_THREAD);
 			} else {
-				int ret;
 				/*
 				 * The spinlock makes sure we do not lose
 				 * wake-ups when racing with 'bdi_queue_work()'.
@@ -490,14 +437,6 @@ static int bdi_forker_thread(void *ptr)
 				spin_lock_bh(&bdi->wb_lock);
 				bdi->wb.task = task;
 				spin_unlock_bh(&bdi->wb_lock);
-				mutex_lock(&bdi->flusher_cpumask_lock);
-				ret = set_cpus_allowed_ptr(task,
-							bdi->flusher_cpumask);
-				mutex_unlock(&bdi->flusher_cpumask_lock);
-				if (ret)
-					printk_once("%s: failed to bind flusher"
-						    " thread %s, error %d\n",
-						    __func__, task->comm, ret);
 				wake_up_process(task);
 			}
 			bdi_clear_pending(bdi);
@@ -570,17 +509,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 						dev_name(dev));
 		if (IS_ERR(wb->task))
 			return PTR_ERR(wb->task);
-	} else {
-		int node;
-		/*
-		 * Set up a default cpumask for the flusher threads that
-		 * includes all cpus on the same numa node as the device.
-		 * The mask may be overridden via sysfs.
-		 */
-		node = dev_to_node(bdi->dev);
-		if (node != NUMA_NO_NODE)
-			cpumask_copy(bdi->flusher_cpumask,
-				     cpumask_of_node(node));
 	}
 
 	bdi_debug_register(bdi, dev_name(dev));
@@ -706,15 +634,6 @@ int bdi_init(struct backing_dev_info *bdi)
 
 	bdi_wb_init(&bdi->wb, bdi);
 
-	if (!bdi_cap_flush_forker(bdi)) {
-		bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
-		if (!bdi->flusher_cpumask)
-			return -ENOMEM;
-		cpumask_setall(bdi->flusher_cpumask);
-		mutex_init(&bdi->flusher_cpumask_lock);
-	} else
-		bdi->flusher_cpumask = NULL;
-
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
 		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
 		if (err)
@@ -737,7 +656,6 @@ int bdi_init(struct backing_dev_info *bdi)
 err:
 		while (i--)
 			percpu_counter_destroy(&bdi->bdi_stat[i]);
-		kfree(bdi->flusher_cpumask);
 	}
 
 	return err;
@@ -765,8 +683,6 @@ void bdi_destroy(struct backing_dev_info *bdi)
 
 	bdi_unregister(bdi);
 
-	kfree(bdi->flusher_cpumask);
-
 	/*
 	 * If bdi_unregister() had already been called earlier, the
 	 * wakeup_timer could still be armed because bdi_prune_sb()
-- 
cgit v1.2.3-55-g7522


From 7929d407e47fbf843fe1337fd95ed57785ae5e9d Mon Sep 17 00:00:00 2001
From: Matthew Leach
Date: Mon, 17 Dec 2012 15:59:32 -0800
Subject: include/linux/init.h: use the stringify operator for the
 __define_initcall macro

Currently the __define_initcall() macro takes three arguments, fn, id and
level.  The level argument is exactly the same as the id argument but
wrapped in quotes.  To overcome this need to specify three arguments to
the __define_initcall macro, where one argument is the stringification of
another, we can just use the stringification macro instead.

Signed-off-by: Matthew Leach <matthew@mattleach.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/include/asm/machdep.h | 36 +++++++++++++++++-----------------
 include/linux/init.h               | 40 +++++++++++++++++++-------------------
 2 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index c4231973edd3..3c82daf8be99 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -320,28 +320,28 @@ static inline void log_error(char *buf, unsigned int err_type, int fatal)
 		ppc_md.log_error(buf, err_type, fatal);
 }
 
-#define __define_machine_initcall(mach,level,fn,id) \
+#define __define_machine_initcall(mach, fn, id) \
 	static int __init __machine_initcall_##mach##_##fn(void) { \
 		if (machine_is(mach)) return fn(); \
 		return 0; \
 	} \
-	__define_initcall(level,__machine_initcall_##mach##_##fn,id);
-
-#define machine_core_initcall(mach,fn)		__define_machine_initcall(mach,"1",fn,1)
-#define machine_core_initcall_sync(mach,fn)	__define_machine_initcall(mach,"1s",fn,1s)
-#define machine_postcore_initcall(mach,fn)	__define_machine_initcall(mach,"2",fn,2)
-#define machine_postcore_initcall_sync(mach,fn)	__define_machine_initcall(mach,"2s",fn,2s)
-#define machine_arch_initcall(mach,fn)		__define_machine_initcall(mach,"3",fn,3)
-#define machine_arch_initcall_sync(mach,fn)	__define_machine_initcall(mach,"3s",fn,3s)
-#define machine_subsys_initcall(mach,fn)	__define_machine_initcall(mach,"4",fn,4)
-#define machine_subsys_initcall_sync(mach,fn)	__define_machine_initcall(mach,"4s",fn,4s)
-#define machine_fs_initcall(mach,fn)		__define_machine_initcall(mach,"5",fn,5)
-#define machine_fs_initcall_sync(mach,fn)	__define_machine_initcall(mach,"5s",fn,5s)
-#define machine_rootfs_initcall(mach,fn)	__define_machine_initcall(mach,"rootfs",fn,rootfs)
-#define machine_device_initcall(mach,fn)	__define_machine_initcall(mach,"6",fn,6)
-#define machine_device_initcall_sync(mach,fn)	__define_machine_initcall(mach,"6s",fn,6s)
-#define machine_late_initcall(mach,fn)		__define_machine_initcall(mach,"7",fn,7)
-#define machine_late_initcall_sync(mach,fn)	__define_machine_initcall(mach,"7s",fn,7s)
+	__define_initcall(__machine_initcall_##mach##_##fn, id);
+
+#define machine_core_initcall(mach, fn)		__define_machine_initcall(mach, fn, 1)
+#define machine_core_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 1s)
+#define machine_postcore_initcall(mach, fn)	__define_machine_initcall(mach, fn, 2)
+#define machine_postcore_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 2s)
+#define machine_arch_initcall(mach, fn)		__define_machine_initcall(mach, fn, 3)
+#define machine_arch_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 3s)
+#define machine_subsys_initcall(mach, fn)	__define_machine_initcall(mach, fn, 4)
+#define machine_subsys_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 4s)
+#define machine_fs_initcall(mach, fn)		__define_machine_initcall(mach, fn, 5)
+#define machine_fs_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 5s)
+#define machine_rootfs_initcall(mach, fn)	__define_machine_initcall(mach, fn, rootfs)
+#define machine_device_initcall(mach, fn)	__define_machine_initcall(mach, fn, 6)
+#define machine_device_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 6s)
+#define machine_late_initcall(mach, fn)		__define_machine_initcall(mach, fn, 7)
+#define machine_late_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 7s)
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_MACHDEP_H */
diff --git a/include/linux/init.h b/include/linux/init.h
index f63692d6902e..a799273714ac 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -182,16 +182,16 @@ extern bool initcall_debug;
  * can point at the same handler without causing duplicate-symbol build errors.
  */
 
-#define __define_initcall(level,fn,id) \
+#define __define_initcall(fn, id) \
 	static initcall_t __initcall_##fn##id __used \
-	__attribute__((__section__(".initcall" level ".init"))) = fn
+	__attribute__((__section__(".initcall" #id ".init"))) = fn
 
 /*
  * Early initcalls run before initializing SMP.
  *
  * Only for built-in code, not modules.
  */
-#define early_initcall(fn)		__define_initcall("early",fn,early)
+#define early_initcall(fn)		__define_initcall(fn, early)
 
 /*
  * A "pure" initcall has no dependencies on anything else, and purely
@@ -200,23 +200,23 @@ extern bool initcall_debug;
  * This only exists for built-in code, not for modules.
  * Keep main.c:initcall_level_names[] in sync.
  */
-#define pure_initcall(fn)		__define_initcall("0",fn,0)
-
-#define core_initcall(fn)		__define_initcall("1",fn,1)
-#define core_initcall_sync(fn)		__define_initcall("1s",fn,1s)
-#define postcore_initcall(fn)		__define_initcall("2",fn,2)
-#define postcore_initcall_sync(fn)	__define_initcall("2s",fn,2s)
-#define arch_initcall(fn)		__define_initcall("3",fn,3)
-#define arch_initcall_sync(fn)		__define_initcall("3s",fn,3s)
-#define subsys_initcall(fn)		__define_initcall("4",fn,4)
-#define subsys_initcall_sync(fn)	__define_initcall("4s",fn,4s)
-#define fs_initcall(fn)			__define_initcall("5",fn,5)
-#define fs_initcall_sync(fn)		__define_initcall("5s",fn,5s)
-#define rootfs_initcall(fn)		__define_initcall("rootfs",fn,rootfs)
-#define device_initcall(fn)		__define_initcall("6",fn,6)
-#define device_initcall_sync(fn)	__define_initcall("6s",fn,6s)
-#define late_initcall(fn)		__define_initcall("7",fn,7)
-#define late_initcall_sync(fn)		__define_initcall("7s",fn,7s)
+#define pure_initcall(fn)		__define_initcall(fn, 0)
+
+#define core_initcall(fn)		__define_initcall(fn, 1)
+#define core_initcall_sync(fn)		__define_initcall(fn, 1s)
+#define postcore_initcall(fn)		__define_initcall(fn, 2)
+#define postcore_initcall_sync(fn)	__define_initcall(fn, 2s)
+#define arch_initcall(fn)		__define_initcall(fn, 3)
+#define arch_initcall_sync(fn)		__define_initcall(fn, 3s)
+#define subsys_initcall(fn)		__define_initcall(fn, 4)
+#define subsys_initcall_sync(fn)	__define_initcall(fn, 4s)
+#define fs_initcall(fn)			__define_initcall(fn, 5)
+#define fs_initcall_sync(fn)		__define_initcall(fn, 5s)
+#define rootfs_initcall(fn)		__define_initcall(fn, rootfs)
+#define device_initcall(fn)		__define_initcall(fn, 6)
+#define device_initcall_sync(fn)	__define_initcall(fn, 6s)
+#define late_initcall(fn)		__define_initcall(fn, 7)
+#define late_initcall_sync(fn)		__define_initcall(fn, 7s)
 
 #define __initcall(fn) device_initcall(fn)
 
-- 
cgit v1.2.3-55-g7522


From 965c8e59cfcf845ecde2265a1d1bfee5f011d302 Mon Sep 17 00:00:00 2001
From: Andrew Morton
Date: Mon, 17 Dec 2012 15:59:39 -0800
Subject: lseek: the "whence" argument is called "whence"

But the kernel decided to call it "origin" instead.  Fix most of the
sites.

Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/bad_inode.c           |  2 +-
 fs/block_dev.c           |  4 ++--
 fs/btrfs/file.c          | 16 ++++++++--------
 fs/ceph/dir.c            |  4 ++--
 fs/ceph/file.c           |  6 +++---
 fs/cifs/cifsfs.c         |  8 ++++----
 fs/configfs/dir.c        |  4 ++--
 fs/ext3/dir.c            |  6 +++---
 fs/ext4/dir.c            |  6 +++---
 fs/ext4/file.c           | 22 +++++++++++-----------
 fs/fuse/file.c           |  8 ++++----
 fs/gfs2/file.c           | 10 +++++-----
 fs/libfs.c               |  4 ++--
 fs/nfs/dir.c             |  6 +++---
 fs/nfs/file.c            | 10 +++++-----
 fs/ocfs2/extent_map.c    | 12 ++++++------
 fs/ocfs2/file.c          |  6 +++---
 fs/pstore/inode.c        |  6 +++---
 fs/read_write.c          | 40 ++++++++++++++++++++--------------------
 fs/seq_file.c            |  4 ++--
 fs/ubifs/dir.c           |  4 ++--
 include/linux/fs.h       | 12 ++++++------
 include/linux/ftrace.h   |  4 ++--
 include/linux/syscalls.h |  4 ++--
 kernel/trace/ftrace.c    |  4 ++--
 mm/shmem.c               | 20 ++++++++++----------
 26 files changed, 116 insertions(+), 116 deletions(-)

(limited to 'include/linux')

diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index b1342ffb3cf6..922ad460bff9 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -16,7 +16,7 @@
 #include <linux/poll.h>
 
 
-static loff_t bad_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t bad_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	return -EIO;
 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ab3a456f6650..172f8491a2bd 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -321,7 +321,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
  * for a block special file file->f_path.dentry->d_inode->i_size is zero
  * so we compute the size by hand (just as in block_read/write above)
  */
-static loff_t block_llseek(struct file *file, loff_t offset, int origin)
+static loff_t block_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *bd_inode = file->f_mapping->host;
 	loff_t size;
@@ -331,7 +331,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
 	size = i_size_read(bd_inode);
 
 	retval = -EINVAL;
-	switch (origin) {
+	switch (whence) {
 		case SEEK_END:
 			offset += size;
 			break;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a8ee75cb96ee..9c6673a9231f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2120,7 +2120,7 @@ out:
 	return ret;
 }
 
-static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
+static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct extent_map *em;
@@ -2154,7 +2154,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
 	 * before the position we want in case there is outstanding delalloc
 	 * going on here.
 	 */
-	if (origin == SEEK_HOLE && start != 0) {
+	if (whence == SEEK_HOLE && start != 0) {
 		if (start <= root->sectorsize)
 			em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
 						     root->sectorsize, 0);
@@ -2188,13 +2188,13 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
 				}
 			}
 
-			if (origin == SEEK_HOLE) {
+			if (whence == SEEK_HOLE) {
 				*offset = start;
 				free_extent_map(em);
 				break;
 			}
 		} else {
-			if (origin == SEEK_DATA) {
+			if (whence == SEEK_DATA) {
 				if (em->block_start == EXTENT_MAP_DELALLOC) {
 					if (start >= inode->i_size) {
 						free_extent_map(em);
@@ -2231,16 +2231,16 @@ out:
 	return ret;
 }
 
-static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int ret;
 
 	mutex_lock(&inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END:
 	case SEEK_CUR:
-		offset = generic_file_llseek(file, offset, origin);
+		offset = generic_file_llseek(file, offset, whence);
 		goto out;
 	case SEEK_DATA:
 	case SEEK_HOLE:
@@ -2249,7 +2249,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
 			return -ENXIO;
 		}
 
-		ret = find_desired_extent(inode, &offset, origin);
+		ret = find_desired_extent(inode, &offset, whence);
 		if (ret) {
 			mutex_unlock(&inode->i_mutex);
 			return ret;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index e5b77319c97b..8c1aabe93b67 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -454,7 +454,7 @@ static void reset_readdir(struct ceph_file_info *fi)
 	fi->flags &= ~CEPH_F_ATEND;
 }
 
-static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
+static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct ceph_file_info *fi = file->private_data;
 	struct inode *inode = file->f_mapping->host;
@@ -463,7 +463,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
 
 	mutex_lock(&inode->i_mutex);
 	retval = -EINVAL;
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END:
 		offset += inode->i_size + 2;   /* FIXME */
 		break;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 5840d2aaed15..d4dfdcf76d7f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -797,7 +797,7 @@ out:
 /*
  * llseek.  be sure to verify file size on SEEK_END.
  */
-static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
+static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int ret;
@@ -805,7 +805,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
 	mutex_lock(&inode->i_mutex);
 	__ceph_do_pending_vmtruncate(inode);
 
-	if (origin == SEEK_END || origin == SEEK_DATA || origin == SEEK_HOLE) {
+	if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
 		ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
 		if (ret < 0) {
 			offset = ret;
@@ -813,7 +813,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
 		}
 	}
 
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END:
 		offset += inode->i_size;
 		break;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 210f0af83fc4..ce9f3c5421bf 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -695,13 +695,13 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	return written;
 }
 
-static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
+static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
 {
 	/*
-	 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
+	 * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
 	 * the cached file length
 	 */
-	if (origin != SEEK_SET && origin != SEEK_CUR) {
+	if (whence != SEEK_SET && whence != SEEK_CUR) {
 		int rc;
 		struct inode *inode = file->f_path.dentry->d_inode;
 
@@ -728,7 +728,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
 		if (rc < 0)
 			return (loff_t)rc;
 	}
-	return generic_file_llseek(file, offset, origin);
+	return generic_file_llseek(file, offset, whence);
 }
 
 static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 7414ae24a79b..712b10f64c70 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1613,12 +1613,12 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
 	return 0;
 }
 
-static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
+static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
 {
 	struct dentry * dentry = file->f_path.dentry;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 		case 1:
 			offset += file->f_pos;
 		case 0:
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index c8fff930790d..dd91264ba94f 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -296,17 +296,17 @@ static inline loff_t ext3_get_htree_eof(struct file *filp)
  * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
  *       will be invalid once the directory was converted into a dx directory
  */
-loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin)
+loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int dx_dir = is_dx_dir(inode);
 	loff_t htree_max = ext3_get_htree_eof(file);
 
 	if (likely(dx_dir))
-		return generic_file_llseek_size(file, offset, origin,
+		return generic_file_llseek_size(file, offset, whence,
 					        htree_max, htree_max);
 	else
-		return generic_file_llseek(file, offset, origin);
+		return generic_file_llseek(file, offset, whence);
 }
 
 /*
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index b8d877f6c1fa..80a28b297279 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -333,17 +333,17 @@ static inline loff_t ext4_get_htree_eof(struct file *filp)
  *
  * For non-htree, ext4_llseek already chooses the proper max offset.
  */
-loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
+loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int dx_dir = is_dx_dir(inode);
 	loff_t htree_max = ext4_get_htree_eof(file);
 
 	if (likely(dx_dir))
-		return generic_file_llseek_size(file, offset, origin,
+		return generic_file_llseek_size(file, offset, whence,
 						    htree_max, htree_max);
 	else
-		return ext4_llseek(file, offset, origin);
+		return ext4_llseek(file, offset, whence);
 }
 
 /*
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index b64a60bf105a..d07c27ca594a 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -303,7 +303,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
  * page cache has data or not.
  */
 static int ext4_find_unwritten_pgoff(struct inode *inode,
-				     int origin,
+				     int whence,
 				     struct ext4_map_blocks *map,
 				     loff_t *offset)
 {
@@ -333,10 +333,10 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
 					  (pgoff_t)num);
 		if (nr_pages == 0) {
-			if (origin == SEEK_DATA)
+			if (whence == SEEK_DATA)
 				break;
 
-			BUG_ON(origin != SEEK_HOLE);
+			BUG_ON(whence != SEEK_HOLE);
 			/*
 			 * If this is the first time to go into the loop and
 			 * offset is not beyond the end offset, it will be a
@@ -352,7 +352,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 		 * offset is smaller than the first page offset, it will be a
 		 * hole at this offset.
 		 */
-		if (lastoff == startoff && origin == SEEK_HOLE &&
+		if (lastoff == startoff && whence == SEEK_HOLE &&
 		    lastoff < page_offset(pvec.pages[0])) {
 			found = 1;
 			break;
@@ -366,7 +366,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 			 * If the current offset is not beyond the end of given
 			 * range, it will be a hole.
 			 */
-			if (lastoff < endoff && origin == SEEK_HOLE &&
+			if (lastoff < endoff && whence == SEEK_HOLE &&
 			    page->index > end) {
 				found = 1;
 				*offset = lastoff;
@@ -391,10 +391,10 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 				do {
 					if (buffer_uptodate(bh) ||
 					    buffer_unwritten(bh)) {
-						if (origin == SEEK_DATA)
+						if (whence == SEEK_DATA)
 							found = 1;
 					} else {
-						if (origin == SEEK_HOLE)
+						if (whence == SEEK_HOLE)
 							found = 1;
 					}
 					if (found) {
@@ -416,7 +416,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 		 * The no. of pages is less than our desired, that would be a
 		 * hole in there.
 		 */
-		if (nr_pages < num && origin == SEEK_HOLE) {
+		if (nr_pages < num && whence == SEEK_HOLE) {
 			found = 1;
 			*offset = lastoff;
 			break;
@@ -609,7 +609,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
  * by calling generic_file_llseek_size() with the appropriate maxbytes
  * value for each.
  */
-loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
+loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	loff_t maxbytes;
@@ -619,11 +619,11 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
 	else
 		maxbytes = inode->i_sb->s_maxbytes;
 
-	switch (origin) {
+	switch (whence) {
 	case SEEK_SET:
 	case SEEK_CUR:
 	case SEEK_END:
-		return generic_file_llseek_size(file, offset, origin,
+		return generic_file_llseek_size(file, offset, whence,
 						maxbytes, i_size_read(inode));
 	case SEEK_DATA:
 		return ext4_seek_data(file, offset, maxbytes);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 78d2837bc940..e21d4d8f87e3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1599,19 +1599,19 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
 	return err ? 0 : outarg.block;
 }
 
-static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	loff_t retval;
 	struct inode *inode = file->f_path.dentry->d_inode;
 
 	/* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */
-	if (origin == SEEK_CUR || origin == SEEK_SET)
-		return generic_file_llseek(file, offset, origin);
+	if (whence == SEEK_CUR || whence == SEEK_SET)
+		return generic_file_llseek(file, offset, whence);
 
 	mutex_lock(&inode->i_mutex);
 	retval = fuse_update_attributes(inode, NULL, file, NULL);
 	if (!retval)
-		retval = generic_file_llseek(file, offset, origin);
+		retval = generic_file_llseek(file, offset, whence);
 	mutex_unlock(&inode->i_mutex);
 
 	return retval;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index dfe2d8cb9b2c..991ab2d484dd 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -44,7 +44,7 @@
  * gfs2_llseek - seek to a location in a file
  * @file: the file
  * @offset: the offset
- * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
+ * @whence: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
  *
  * SEEK_END requires the glock for the file because it references the
  * file's size.
@@ -52,26 +52,26 @@
  * Returns: The new offset, or errno
  */
 
-static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
+static loff_t gfs2_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
 	struct gfs2_holder i_gh;
 	loff_t error;
 
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END: /* These reference inode->i_size */
 	case SEEK_DATA:
 	case SEEK_HOLE:
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
 					   &i_gh);
 		if (!error) {
-			error = generic_file_llseek(file, offset, origin);
+			error = generic_file_llseek(file, offset, whence);
 			gfs2_glock_dq_uninit(&i_gh);
 		}
 		break;
 	case SEEK_CUR:
 	case SEEK_SET:
-		error = generic_file_llseek(file, offset, origin);
+		error = generic_file_llseek(file, offset, whence);
 		break;
 	default:
 		error = -EINVAL;
diff --git a/fs/libfs.c b/fs/libfs.c
index 7cc37ca19cd8..35fc6e74cd88 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -81,11 +81,11 @@ int dcache_dir_close(struct inode *inode, struct file *file)
 	return 0;
 }
 
-loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
+loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 {
 	struct dentry *dentry = file->f_path.dentry;
 	mutex_lock(&dentry->d_inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 		case 1:
 			offset += file->f_pos;
 		case 0:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b9e66b7e0c14..1cc71f60b491 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -871,7 +871,7 @@ out:
 	return res;
 }
 
-static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
+static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
 {
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
@@ -880,10 +880,10 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
 	dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
 			dentry->d_parent->d_name.name,
 			dentry->d_name.name,
-			offset, origin);
+			offset, whence);
 
 	mutex_lock(&inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 		case 1:
 			offset += filp->f_pos;
 		case 0:
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 582bb8866131..3c2b893665ba 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -119,18 +119,18 @@ force_reval:
 	return __nfs_revalidate_inode(server, inode);
 }
 
-loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
+loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence)
 {
 	dprintk("NFS: llseek file(%s/%s, %lld, %d)\n",
 			filp->f_path.dentry->d_parent->d_name.name,
 			filp->f_path.dentry->d_name.name,
-			offset, origin);
+			offset, whence);
 
 	/*
-	 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
+	 * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
 	 * the cached file length
 	 */
-	if (origin != SEEK_SET && origin != SEEK_CUR) {
+	if (whence != SEEK_SET && whence != SEEK_CUR) {
 		struct inode *inode = filp->f_mapping->host;
 
 		int retval = nfs_revalidate_file_size(inode, filp);
@@ -138,7 +138,7 @@ loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
 			return (loff_t)retval;
 	}
 
-	return generic_file_llseek(filp, offset, origin);
+	return generic_file_llseek(filp, offset, whence);
 }
 EXPORT_SYMBOL_GPL(nfs_file_llseek);
 
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 70b5863a2d64..f487aa343442 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -832,7 +832,7 @@ out:
 	return ret;
 }
 
-int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin)
+int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int ret;
@@ -843,7 +843,7 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin)
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_extent_rec rec;
 
-	BUG_ON(origin != SEEK_DATA && origin != SEEK_HOLE);
+	BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
 
 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
 	if (ret) {
@@ -859,7 +859,7 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin)
 	}
 
 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
-		if (origin == SEEK_HOLE)
+		if (whence == SEEK_HOLE)
 			*offset = inode->i_size;
 		goto out_unlock;
 	}
@@ -888,8 +888,8 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin)
 			is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
 		}
 
-		if ((!is_data && origin == SEEK_HOLE) ||
-		    (is_data && origin == SEEK_DATA)) {
+		if ((!is_data && whence == SEEK_HOLE) ||
+		    (is_data && whence == SEEK_DATA)) {
 			if (extoff > *offset)
 				*offset = extoff;
 			goto out_unlock;
@@ -899,7 +899,7 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin)
 			cpos += clen;
 	}
 
-	if (origin == SEEK_HOLE) {
+	if (whence == SEEK_HOLE) {
 		extoff = cpos;
 		extoff <<= cs_bits;
 		extlen = clen;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index dda089804942..fe492e1a3cfc 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2637,14 +2637,14 @@ bail:
 }
 
 /* Refer generic_file_llseek_unlocked() */
-static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int ret = 0;
 
 	mutex_lock(&inode->i_mutex);
 
-	switch (origin) {
+	switch (whence) {
 	case SEEK_SET:
 		break;
 	case SEEK_END:
@@ -2659,7 +2659,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int origin)
 		break;
 	case SEEK_DATA:
 	case SEEK_HOLE:
-		ret = ocfs2_seek_data_hole_offset(file, &offset, origin);
+		ret = ocfs2_seek_data_hole_offset(file, &offset, whence);
 		if (ret)
 			goto out;
 		break;
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ed1d8c7212da..67de74ca85f4 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -151,13 +151,13 @@ static int pstore_file_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static loff_t pstore_file_llseek(struct file *file, loff_t off, int origin)
+static loff_t pstore_file_llseek(struct file *file, loff_t off, int whence)
 {
 	struct seq_file *sf = file->private_data;
 
 	if (sf->op)
-		return seq_lseek(file, off, origin);
-	return default_llseek(file, off, origin);
+		return seq_lseek(file, off, whence);
+	return default_llseek(file, off, whence);
 }
 
 static const struct file_operations pstore_file_operations = {
diff --git a/fs/read_write.c b/fs/read_write.c
index d06534857e9e..1edaf099ddd7 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -54,7 +54,7 @@ static loff_t lseek_execute(struct file *file, struct inode *inode,
  * generic_file_llseek_size - generic llseek implementation for regular files
  * @file:	file structure to seek on
  * @offset:	file offset to seek to
- * @origin:	type of seek
+ * @whence:	type of seek
  * @size:	max size of this file in file system
  * @eof:	offset used for SEEK_END position
  *
@@ -67,12 +67,12 @@ static loff_t lseek_execute(struct file *file, struct inode *inode,
  * read/writes behave like SEEK_SET against seeks.
  */
 loff_t
-generic_file_llseek_size(struct file *file, loff_t offset, int origin,
+generic_file_llseek_size(struct file *file, loff_t offset, int whence,
 		loff_t maxsize, loff_t eof)
 {
 	struct inode *inode = file->f_mapping->host;
 
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END:
 		offset += eof;
 		break;
@@ -122,17 +122,17 @@ EXPORT_SYMBOL(generic_file_llseek_size);
  * generic_file_llseek - generic llseek implementation for regular files
  * @file:	file structure to seek on
  * @offset:	file offset to seek to
- * @origin:	type of seek
+ * @whence:	type of seek
  *
  * This is a generic implemenation of ->llseek useable for all normal local
  * filesystems.  It just updates the file offset to the value specified by
- * @offset and @origin under i_mutex.
+ * @offset and @whence under i_mutex.
  */
-loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
+loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 
-	return generic_file_llseek_size(file, offset, origin,
+	return generic_file_llseek_size(file, offset, whence,
 					inode->i_sb->s_maxbytes,
 					i_size_read(inode));
 }
@@ -142,32 +142,32 @@ EXPORT_SYMBOL(generic_file_llseek);
  * noop_llseek - No Operation Performed llseek implementation
  * @file:	file structure to seek on
  * @offset:	file offset to seek to
- * @origin:	type of seek
+ * @whence:	type of seek
  *
  * This is an implementation of ->llseek useable for the rare special case when
  * userspace expects the seek to succeed but the (device) file is actually not
  * able to perform the seek. In this case you use noop_llseek() instead of
  * falling back to the default implementation of ->llseek.
  */
-loff_t noop_llseek(struct file *file, loff_t offset, int origin)
+loff_t noop_llseek(struct file *file, loff_t offset, int whence)
 {
 	return file->f_pos;
 }
 EXPORT_SYMBOL(noop_llseek);
 
-loff_t no_llseek(struct file *file, loff_t offset, int origin)
+loff_t no_llseek(struct file *file, loff_t offset, int whence)
 {
 	return -ESPIPE;
 }
 EXPORT_SYMBOL(no_llseek);
 
-loff_t default_llseek(struct file *file, loff_t offset, int origin)
+loff_t default_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
 	loff_t retval;
 
 	mutex_lock(&inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 		case SEEK_END:
 			offset += i_size_read(inode);
 			break;
@@ -216,7 +216,7 @@ out:
 }
 EXPORT_SYMBOL(default_llseek);
 
-loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
+loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
 {
 	loff_t (*fn)(struct file *, loff_t, int);
 
@@ -225,11 +225,11 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
 		if (file->f_op && file->f_op->llseek)
 			fn = file->f_op->llseek;
 	}
-	return fn(file, offset, origin);
+	return fn(file, offset, whence);
 }
 EXPORT_SYMBOL(vfs_llseek);
 
-SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
+SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
 {
 	off_t retval;
 	struct fd f = fdget(fd);
@@ -237,8 +237,8 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
 		return -EBADF;
 
 	retval = -EINVAL;
-	if (origin <= SEEK_MAX) {
-		loff_t res = vfs_llseek(f.file, offset, origin);
+	if (whence <= SEEK_MAX) {
+		loff_t res = vfs_llseek(f.file, offset, whence);
 		retval = res;
 		if (res != (loff_t)retval)
 			retval = -EOVERFLOW;	/* LFS: should only happen on 32 bit platforms */
@@ -250,7 +250,7 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
 #ifdef __ARCH_WANT_SYS_LLSEEK
 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 		unsigned long, offset_low, loff_t __user *, result,
-		unsigned int, origin)
+		unsigned int, whence)
 {
 	int retval;
 	struct fd f = fdget(fd);
@@ -260,11 +260,11 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 		return -EBADF;
 
 	retval = -EINVAL;
-	if (origin > SEEK_MAX)
+	if (whence > SEEK_MAX)
 		goto out_putf;
 
 	offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
-			origin);
+			whence);
 
 	retval = (int)offset;
 	if (offset >= 0) {
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 99dffab4c4e4..9d863fb501f9 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -300,14 +300,14 @@ EXPORT_SYMBOL(seq_read);
  *
  *	Ready-made ->f_op->llseek()
  */
-loff_t seq_lseek(struct file *file, loff_t offset, int origin)
+loff_t seq_lseek(struct file *file, loff_t offset, int whence)
 {
 	struct seq_file *m = file->private_data;
 	loff_t retval = -EINVAL;
 
 	mutex_lock(&m->lock);
 	m->version = file->f_version;
-	switch (origin) {
+	switch (whence) {
 		case 1:
 			offset += file->f_pos;
 		case 0:
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index e271fba1651b..8a574776a493 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -453,11 +453,11 @@ out:
 }
 
 /* If a directory is seeked, we have to free saved readdir() state */
-static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin)
+static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence)
 {
 	kfree(file->private_data);
 	file->private_data = NULL;
-	return generic_file_llseek(file, offset, origin);
+	return generic_file_llseek(file, offset, whence);
 }
 
 /* Free saved readdir() state when the directory is closed */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 408fb1e77a0a..029552ff774c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2286,9 +2286,9 @@ extern ino_t find_inode_number(struct dentry *, struct qstr *);
 #include <linux/err.h>
 
 /* needed for stackable file system support */
-extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
+extern loff_t default_llseek(struct file *file, loff_t offset, int whence);
 
-extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
+extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
 
 extern int inode_init_always(struct super_block *, struct inode *);
 extern void inode_init_once(struct inode *);
@@ -2396,11 +2396,11 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 
 extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
-extern loff_t noop_llseek(struct file *file, loff_t offset, int origin);
-extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
-extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
+extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
+extern loff_t no_llseek(struct file *file, loff_t offset, int whence);
+extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
 extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
-		int origin, loff_t maxsize, loff_t eof);
+		int whence, loff_t maxsize, loff_t eof);
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a52f2f4fe030..92691d85c320 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -394,7 +394,7 @@ ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
 			    size_t cnt, loff_t *ppos);
 ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
 			     size_t cnt, loff_t *ppos);
-loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin);
+loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int whence);
 int ftrace_regex_release(struct inode *inode, struct file *file);
 
 void __init
@@ -559,7 +559,7 @@ static inline ssize_t ftrace_filter_write(struct file *file, const char __user *
 			    size_t cnt, loff_t *ppos) { return -ENODEV; }
 static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
 			     size_t cnt, loff_t *ppos) { return -ENODEV; }
-static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
+static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int whence)
 {
 	return -ENODEV;
 }
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 91835e7f364d..36c3b07c5119 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -560,10 +560,10 @@ asmlinkage long sys_utime(char __user *filename,
 asmlinkage long sys_utimes(char __user *filename,
 				struct timeval __user *utimes);
 asmlinkage long sys_lseek(unsigned int fd, off_t offset,
-			  unsigned int origin);
+			  unsigned int whence);
 asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high,
 			unsigned long offset_low, loff_t __user *result,
-			unsigned int origin);
+			unsigned int whence);
 asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count);
 asmlinkage long sys_readahead(int fd, loff_t offset, size_t count);
 asmlinkage long sys_readv(unsigned long fd,
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index afd092de45b7..3ffe4c5ad3f3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2675,12 +2675,12 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
 }
 
 loff_t
-ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
+ftrace_regex_lseek(struct file *file, loff_t offset, int whence)
 {
 	loff_t ret;
 
 	if (file->f_mode & FMODE_READ)
-		ret = seq_lseek(file, offset, origin);
+		ret = seq_lseek(file, offset, whence);
 	else
 		file->f_pos = ret = 1;
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 03f9ba8fb8e5..5c90d84c2b02 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1719,7 +1719,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
  * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
  */
 static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
-				    pgoff_t index, pgoff_t end, int origin)
+				    pgoff_t index, pgoff_t end, int whence)
 {
 	struct page *page;
 	struct pagevec pvec;
@@ -1733,13 +1733,13 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
 		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
 					pvec.nr, pvec.pages, indices);
 		if (!pvec.nr) {
-			if (origin == SEEK_DATA)
+			if (whence == SEEK_DATA)
 				index = end;
 			break;
 		}
 		for (i = 0; i < pvec.nr; i++, index++) {
 			if (index < indices[i]) {
-				if (origin == SEEK_HOLE) {
+				if (whence == SEEK_HOLE) {
 					done = true;
 					break;
 				}
@@ -1751,8 +1751,8 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
 					page = NULL;
 			}
 			if (index >= end ||
-			    (page && origin == SEEK_DATA) ||
-			    (!page && origin == SEEK_HOLE)) {
+			    (page && whence == SEEK_DATA) ||
+			    (!page && whence == SEEK_HOLE)) {
 				done = true;
 				break;
 			}
@@ -1765,15 +1765,15 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
 	return index;
 }
 
-static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
 	pgoff_t start, end;
 	loff_t new_offset;
 
-	if (origin != SEEK_DATA && origin != SEEK_HOLE)
-		return generic_file_llseek_size(file, offset, origin,
+	if (whence != SEEK_DATA && whence != SEEK_HOLE)
+		return generic_file_llseek_size(file, offset, whence,
 					MAX_LFS_FILESIZE, i_size_read(inode));
 	mutex_lock(&inode->i_mutex);
 	/* We're holding i_mutex so we can access i_size directly */
@@ -1785,12 +1785,12 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin)
 	else {
 		start = offset >> PAGE_CACHE_SHIFT;
 		end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-		new_offset = shmem_seek_hole_data(mapping, start, end, origin);
+		new_offset = shmem_seek_hole_data(mapping, start, end, whence);
 		new_offset <<= PAGE_CACHE_SHIFT;
 		if (new_offset > offset) {
 			if (new_offset < inode->i_size)
 				offset = new_offset;
-			else if (origin == SEEK_DATA)
+			else if (whence == SEEK_DATA)
 				offset = -ENXIO;
 			else
 				offset = inode->i_size;
-- 
cgit v1.2.3-55-g7522


From 8cc9764c9c7d01a6e2c3ddac8f0ac7716be01868 Mon Sep 17 00:00:00 2001
From: Kim, Milo
Date: Mon, 17 Dec 2012 16:00:43 -0800
Subject: drivers/video/backlight/lp855x_bl.c: use generic PWM functions

The LP855x family devices support the PWM input for the backlight control.
 Period of the PWM is configurable in the platform side.  Platform
specific functions are unnecessary anymore because generic PWM functions
are used inside the driver.

(PWM input mode)
To set the brightness, new lp855x_pwm_ctrl() is used.
If a PWM device is not allocated, devm_pwm_get() is called.
The PWM consumer name is from the chip name such as 'lp8550' and 'lp8556'.
To get the brightness value, no additional handling is required.
Just the value of 'props.brightness' is returned.

If the PWM driver is not ready while initializing the LP855x driver, it's
OK.  The PWM device can be retrieved later, when the brightness value is
changed.

Documentation is updated with an example.

[akpm@linux-foundation.org: coding-style simplification, per Thierry]
Signed-off-by: Milo(Woogyom) Kim <milo.kim@ti.com>
Cc: Thierry Reding <thierry.reding@avionic-design.de>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Bryan Wu <bryan.wu@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/backlight/lp855x-driver.txt | 10 ++-------
 drivers/video/backlight/lp855x_bl.c       | 37 ++++++++++++++++++++++---------
 include/linux/platform_data/lp855x.h      |  9 ++------
 3 files changed, 30 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt
index f5e4caafab7d..1529394cfe8b 100644
--- a/Documentation/backlight/lp855x-driver.txt
+++ b/Documentation/backlight/lp855x-driver.txt
@@ -35,11 +35,8 @@ For supporting platform specific data, the lp855x platform data can be used.
 * mode : Brightness control mode. PWM or register based.
 * device_control : Value of DEVICE CONTROL register.
 * initial_brightness : Initial value of backlight brightness.
-* pwm_data : Platform specific pwm generation functions.
+* period_ns : Platform specific PWM period value. unit is nano.
 	     Only valid when brightness is pwm input mode.
-	     Functions should be implemented by PWM driver.
-	     - pwm_set_intensity() : set duty of PWM
-	     - pwm_get_intensity() : get current duty of PWM
 * load_new_rom_data :
 	0 : use default configuration data
 	1 : update values of eeprom or eprom registers on loading driver
@@ -71,8 +68,5 @@ static struct lp855x_platform_data lp8556_pdata = {
 	.mode = PWM_BASED,
 	.device_control = PWM_CONFIG(LP8556),
 	.initial_brightness = INITIAL_BRT,
-	.pwm_data = {
-		     .pwm_set_intensity = platform_pwm_set_intensity,
-		     .pwm_get_intensity = platform_pwm_get_intensity,
-		     },
+	.period_ns = 1000000,
 };
diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c
index fd985e0681e9..b437541555ff 100644
--- a/drivers/video/backlight/lp855x_bl.c
+++ b/drivers/video/backlight/lp855x_bl.c
@@ -15,6 +15,7 @@
 #include <linux/backlight.h>
 #include <linux/err.h>
 #include <linux/platform_data/lp855x.h>
+#include <linux/pwm.h>
 
 /* Registers */
 #define BRIGHTNESS_CTRL		0x00
@@ -36,6 +37,7 @@ struct lp855x {
 	struct device *dev;
 	struct mutex xfer_lock;
 	struct lp855x_platform_data *pdata;
+	struct pwm_device *pwm;
 };
 
 static int lp855x_read_byte(struct lp855x *lp, u8 reg, u8 *data)
@@ -121,6 +123,28 @@ static int lp855x_init_registers(struct lp855x *lp)
 	return ret;
 }
 
+static void lp855x_pwm_ctrl(struct lp855x *lp, int br, int max_br)
+{
+	unsigned int period = lp->pdata->period_ns;
+	unsigned int duty = br * period / max_br;
+	struct pwm_device *pwm;
+
+	/* request pwm device with the consumer name */
+	if (!lp->pwm) {
+		pwm = devm_pwm_get(lp->dev, lp->chipname);
+		if (IS_ERR(pwm))
+			return;
+
+		lp->pwm = pwm;
+	}
+
+	pwm_config(lp->pwm, duty, period);
+	if (duty)
+		pwm_enable(lp->pwm);
+	else
+		pwm_disable(lp->pwm);
+}
+
 static int lp855x_bl_update_status(struct backlight_device *bl)
 {
 	struct lp855x *lp = bl_get_data(bl);
@@ -130,12 +154,10 @@ static int lp855x_bl_update_status(struct backlight_device *bl)
 		bl->props.brightness = 0;
 
 	if (mode == PWM_BASED) {
-		struct lp855x_pwm_data *pd = &lp->pdata->pwm_data;
 		int br = bl->props.brightness;
 		int max_br = bl->props.max_brightness;
 
-		if (pd->pwm_set_intensity)
-			pd->pwm_set_intensity(br, max_br);
+		lp855x_pwm_ctrl(lp, br, max_br);
 
 	} else if (mode == REGISTER_BASED) {
 		u8 val = bl->props.brightness;
@@ -150,14 +172,7 @@ static int lp855x_bl_get_brightness(struct backlight_device *bl)
 	struct lp855x *lp = bl_get_data(bl);
 	enum lp855x_brightness_ctrl_mode mode = lp->pdata->mode;
 
-	if (mode == PWM_BASED) {
-		struct lp855x_pwm_data *pd = &lp->pdata->pwm_data;
-		int max_br = bl->props.max_brightness;
-
-		if (pd->pwm_get_intensity)
-			bl->props.brightness = pd->pwm_get_intensity(max_br);
-
-	} else if (mode == REGISTER_BASED) {
+	if (mode == REGISTER_BASED) {
 		u8 val = 0;
 
 		lp855x_read_byte(lp, BRIGHTNESS_CTRL, &val);
diff --git a/include/linux/platform_data/lp855x.h b/include/linux/platform_data/lp855x.h
index 761f31752367..e81f62d24ee2 100644
--- a/include/linux/platform_data/lp855x.h
+++ b/include/linux/platform_data/lp855x.h
@@ -89,11 +89,6 @@ enum lp8556_brightness_source {
 	LP8556_COMBINED2,	/* pwm + i2c after the shaper block */
 };
 
-struct lp855x_pwm_data {
-	void (*pwm_set_intensity) (int brightness, int max_brightness);
-	int (*pwm_get_intensity) (int max_brightness);
-};
-
 struct lp855x_rom_data {
 	u8 addr;
 	u8 val;
@@ -105,7 +100,7 @@ struct lp855x_rom_data {
  * @mode : brightness control by pwm or lp855x register
  * @device_control : value of DEVICE CONTROL register
  * @initial_brightness : initial value of backlight brightness
- * @pwm_data : platform specific pwm generation functions.
+ * @period_ns : platform specific pwm period value. unit is nano.
 		Only valid when mode is PWM_BASED.
  * @load_new_rom_data :
 	0 : use default configuration data
@@ -118,7 +113,7 @@ struct lp855x_platform_data {
 	enum lp855x_brightness_ctrl_mode mode;
 	u8 device_control;
 	int initial_brightness;
-	struct lp855x_pwm_data pwm_data;
+	unsigned int period_ns;
 	u8 load_new_rom_data;
 	int size_program;
 	struct lp855x_rom_data *rom_data;
-- 
cgit v1.2.3-55-g7522


From 762a936fba7bd9225ca9a96e4860f6969b6b5670 Mon Sep 17 00:00:00 2001
From: Thierry Reding
Date: Mon, 17 Dec 2012 16:01:06 -0800
Subject: backlight: add of_find_backlight_by_node()

This function finds the struct backlight_device for a given device tree
node.  A dummy function is provided so that it safely compiles out if OF
support is disabled.

[akpm@linux-foundation.org: Don't use IS_ENABLED(CONFIG_OF)]
Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Acked-by: Jingoo Han <jg1.han@samsung.com>
Reviewed-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Thierry Reding <thierry.reding@avionic-design.de>
Reviewed-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/backlight.c | 29 +++++++++++++++++++++++++++++
 include/linux/backlight.h           | 10 ++++++++++
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 297db2fa91f5..345f6660d4b3 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -370,6 +370,35 @@ void backlight_device_unregister(struct backlight_device *bd)
 }
 EXPORT_SYMBOL(backlight_device_unregister);
 
+#ifdef CONFIG_OF
+static int of_parent_match(struct device *dev, void *data)
+{
+	return dev->parent && dev->parent->of_node == data;
+}
+
+/**
+ * of_find_backlight_by_node() - find backlight device by device-tree node
+ * @node: device-tree node of the backlight device
+ *
+ * Returns a pointer to the backlight device corresponding to the given DT
+ * node or NULL if no such backlight device exists or if the device hasn't
+ * been probed yet.
+ *
+ * This function obtains a reference on the backlight device and it is the
+ * caller's responsibility to drop the reference by calling put_device() on
+ * the backlight device's .dev field.
+ */
+struct backlight_device *of_find_backlight_by_node(struct device_node *node)
+{
+	struct device *dev;
+
+	dev = class_find_device(backlight_class, NULL, node, of_parent_match);
+
+	return dev ? to_backlight_device(dev) : NULL;
+}
+EXPORT_SYMBOL(of_find_backlight_by_node);
+#endif
+
 static void __exit backlight_class_exit(void)
 {
 	class_destroy(backlight_class);
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 5ffc6dda4675..da9a0825e007 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -134,4 +134,14 @@ struct generic_bl_info {
 	void (*kick_battery)(void);
 };
 
+#ifdef CONFIG_OF
+struct backlight_device *of_find_backlight_by_node(struct device_node *node);
+#else
+static inline struct backlight_device *
+of_find_backlight_by_node(struct device_node *node)
+{
+	return NULL;
+}
+#endif
+
 #endif
-- 
cgit v1.2.3-55-g7522


From b18888ab256f05626193be955a7a03f01d676f8c Mon Sep 17 00:00:00 2001
From: Andy Shevchenko
Date: Mon, 17 Dec 2012 16:01:18 -0800
Subject: string: introduce helper to get base file name from given path

There are several places in the kernel that use functionality like
basename(3) with the exception: in case of '/foo/bar/' we expect to get an
empty string.  Let's do it common helper for them.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: YAMANE Toshiaki <yamanetoshi@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index 630125818ca8..ac889c5ea11b 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -143,4 +143,15 @@ static inline bool strstarts(const char *str, const char *prefix)
 
 extern size_t memweight(const void *ptr, size_t bytes);
 
+/**
+ * kbasename - return the last part of a pathname.
+ *
+ * @path: path to extract the filename from.
+ */
+static inline const char *kbasename(const char *path)
+{
+	const char *tail = strrchr(path, '/');
+	return tail ? tail + 1 : path;
+}
+
 #endif /* _LINUX_STRING_H_ */
-- 
cgit v1.2.3-55-g7522


From a1fd3e24d8a484b3265a6d485202afe093c058f3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 17 Dec 2012 16:01:32 -0800
Subject: percpu_rw_semaphore: reimplement to not block the readers
 unnecessarily

Currently the writer does msleep() plus synchronize_sched() 3 times to
acquire/release the semaphore, and during this time the readers are
blocked completely.  Even if the "write" section was not actually started
or if it was already finished.

With this patch down_write/up_write does synchronize_sched() twice and
down_read/up_read are still possible during this time, just they use the
slow path.

percpu_down_write() first forces the readers to use rw_semaphore and
increment the "slow" counter to take the lock for reading, then it
takes that rw_semaphore for writing and blocks the readers.

Also.  With this patch the code relies on the documented behaviour of
synchronize_sched(), it doesn't try to pair synchronize_sched() with
barrier.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu-rwsem.h |  83 ++++-------------------
 lib/Makefile                 |   2 +-
 lib/percpu-rwsem.c           | 154 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 168 insertions(+), 71 deletions(-)
 create mode 100644 lib/percpu-rwsem.c

(limited to 'include/linux')

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index bd1e86071e57..592f0d610d8e 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -2,82 +2,25 @@
 #define _LINUX_PERCPU_RWSEM_H
 
 #include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/percpu.h>
-#include <linux/rcupdate.h>
-#include <linux/delay.h>
+#include <linux/wait.h>
 
 struct percpu_rw_semaphore {
-	unsigned __percpu *counters;
-	bool locked;
-	struct mutex mtx;
+	unsigned int __percpu	*fast_read_ctr;
+	struct mutex		writer_mutex;
+	struct rw_semaphore	rw_sem;
+	atomic_t		slow_read_ctr;
+	wait_queue_head_t	write_waitq;
 };
 
-#define light_mb()	barrier()
-#define heavy_mb()	synchronize_sched_expedited()
+extern void percpu_down_read(struct percpu_rw_semaphore *);
+extern void percpu_up_read(struct percpu_rw_semaphore *);
 
-static inline void percpu_down_read(struct percpu_rw_semaphore *p)
-{
-	rcu_read_lock_sched();
-	if (unlikely(p->locked)) {
-		rcu_read_unlock_sched();
-		mutex_lock(&p->mtx);
-		this_cpu_inc(*p->counters);
-		mutex_unlock(&p->mtx);
-		return;
-	}
-	this_cpu_inc(*p->counters);
-	rcu_read_unlock_sched();
-	light_mb(); /* A, between read of p->locked and read of data, paired with D */
-}
+extern void percpu_down_write(struct percpu_rw_semaphore *);
+extern void percpu_up_write(struct percpu_rw_semaphore *);
 
-static inline void percpu_up_read(struct percpu_rw_semaphore *p)
-{
-	light_mb(); /* B, between read of the data and write to p->counter, paired with C */
-	this_cpu_dec(*p->counters);
-}
-
-static inline unsigned __percpu_count(unsigned __percpu *counters)
-{
-	unsigned total = 0;
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu));
-
-	return total;
-}
-
-static inline void percpu_down_write(struct percpu_rw_semaphore *p)
-{
-	mutex_lock(&p->mtx);
-	p->locked = true;
-	synchronize_sched_expedited(); /* make sure that all readers exit the rcu_read_lock_sched region */
-	while (__percpu_count(p->counters))
-		msleep(1);
-	heavy_mb(); /* C, between read of p->counter and write to data, paired with B */
-}
-
-static inline void percpu_up_write(struct percpu_rw_semaphore *p)
-{
-	heavy_mb(); /* D, between write to data and write to p->locked, paired with A */
-	p->locked = false;
-	mutex_unlock(&p->mtx);
-}
-
-static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p)
-{
-	p->counters = alloc_percpu(unsigned);
-	if (unlikely(!p->counters))
-		return -ENOMEM;
-	p->locked = false;
-	mutex_init(&p->mtx);
-	return 0;
-}
-
-static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p)
-{
-	free_percpu(p->counters);
-	p->counters = NULL; /* catch use after free bugs */
-}
+extern int percpu_init_rwsem(struct percpu_rw_semaphore *);
+extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
 
 #endif
diff --git a/lib/Makefile b/lib/Makefile
index e2152fa7ff4d..e959c20efb24 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -9,7 +9,7 @@ endif
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
-	 idr.o int_sqrt.o extable.o \
+	 idr.o int_sqrt.o extable.o percpu-rwsem.o \
 	 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c
new file mode 100644
index 000000000000..2e03bcfe48f9
--- /dev/null
+++ b/lib/percpu-rwsem.c
@@ -0,0 +1,154 @@
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/percpu.h>
+#include <linux/wait.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+
+int percpu_init_rwsem(struct percpu_rw_semaphore *brw)
+{
+	brw->fast_read_ctr = alloc_percpu(int);
+	if (unlikely(!brw->fast_read_ctr))
+		return -ENOMEM;
+
+	mutex_init(&brw->writer_mutex);
+	init_rwsem(&brw->rw_sem);
+	atomic_set(&brw->slow_read_ctr, 0);
+	init_waitqueue_head(&brw->write_waitq);
+	return 0;
+}
+
+void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
+{
+	free_percpu(brw->fast_read_ctr);
+	brw->fast_read_ctr = NULL; /* catch use after free bugs */
+}
+
+/*
+ * This is the fast-path for down_read/up_read, it only needs to ensure
+ * there is no pending writer (!mutex_is_locked() check) and inc/dec the
+ * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
+ * serialize with the preempt-disabled section below.
+ *
+ * The nontrivial part is that we should guarantee acquire/release semantics
+ * in case when
+ *
+ *	R_W: down_write() comes after up_read(), the writer should see all
+ *	     changes done by the reader
+ * or
+ *	W_R: down_read() comes after up_write(), the reader should see all
+ *	     changes done by the writer
+ *
+ * If this helper fails the callers rely on the normal rw_semaphore and
+ * atomic_dec_and_test(), so in this case we have the necessary barriers.
+ *
+ * But if it succeeds we do not have any barriers, mutex_is_locked() or
+ * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
+ * reader inside the critical section. See the comments in down_write and
+ * up_write below.
+ */
+static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
+{
+	bool success = false;
+
+	preempt_disable();
+	if (likely(!mutex_is_locked(&brw->writer_mutex))) {
+		__this_cpu_add(*brw->fast_read_ctr, val);
+		success = true;
+	}
+	preempt_enable();
+
+	return success;
+}
+
+/*
+ * Like the normal down_read() this is not recursive, the writer can
+ * come after the first percpu_down_read() and create the deadlock.
+ */
+void percpu_down_read(struct percpu_rw_semaphore *brw)
+{
+	if (likely(update_fast_ctr(brw, +1)))
+		return;
+
+	down_read(&brw->rw_sem);
+	atomic_inc(&brw->slow_read_ctr);
+	up_read(&brw->rw_sem);
+}
+
+void percpu_up_read(struct percpu_rw_semaphore *brw)
+{
+	if (likely(update_fast_ctr(brw, -1)))
+		return;
+
+	/* false-positive is possible but harmless */
+	if (atomic_dec_and_test(&brw->slow_read_ctr))
+		wake_up_all(&brw->write_waitq);
+}
+
+static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
+{
+	unsigned int sum = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		sum += per_cpu(*brw->fast_read_ctr, cpu);
+		per_cpu(*brw->fast_read_ctr, cpu) = 0;
+	}
+
+	return sum;
+}
+
+/*
+ * A writer takes ->writer_mutex to exclude other writers and to force the
+ * readers to switch to the slow mode, note the mutex_is_locked() check in
+ * update_fast_ctr().
+ *
+ * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
+ * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
+ * counter it represents the number of active readers.
+ *
+ * Finally the writer takes ->rw_sem for writing and blocks the new readers,
+ * then waits until the slow counter becomes zero.
+ */
+void percpu_down_write(struct percpu_rw_semaphore *brw)
+{
+	/* also blocks update_fast_ctr() which checks mutex_is_locked() */
+	mutex_lock(&brw->writer_mutex);
+
+	/*
+	 * 1. Ensures mutex_is_locked() is visible to any down_read/up_read
+	 *    so that update_fast_ctr() can't succeed.
+	 *
+	 * 2. Ensures we see the result of every previous this_cpu_add() in
+	 *    update_fast_ctr().
+	 *
+	 * 3. Ensures that if any reader has exited its critical section via
+	 *    fast-path, it executes a full memory barrier before we return.
+	 *    See R_W case in the comment above update_fast_ctr().
+	 */
+	synchronize_sched_expedited();
+
+	/* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
+	atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
+
+	/* block the new readers completely */
+	down_write(&brw->rw_sem);
+
+	/* wait for all readers to complete their percpu_up_read() */
+	wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
+}
+
+void percpu_up_write(struct percpu_rw_semaphore *brw)
+{
+	/* allow the new readers, but only the slow-path */
+	up_write(&brw->rw_sem);
+
+	/*
+	 * Insert the barrier before the next fast-path in down_read,
+	 * see W_R case in the comment above update_fast_ctr().
+	 */
+	synchronize_sched_expedited();
+	mutex_unlock(&brw->writer_mutex);
+}
-- 
cgit v1.2.3-55-g7522


From 9390ef0c85fd065f01045fef708b046c98cda04c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 17 Dec 2012 16:01:36 -0800
Subject: percpu_rw_semaphore: kill ->writer_mutex, add ->write_ctr

percpu_rw_semaphore->writer_mutex was only added to simplify the initial
rewrite, the only thing it protects is clear_fast_ctr() which otherwise
could be called by multiple writers.  ->rw_sem is enough to serialize the
writers.

Kill this mutex and add "atomic_t write_ctr" instead.  The writers
increment/decrement this counter, the readers check it is zero instead of
mutex_is_locked().

Move atomic_add(clear_fast_ctr(), slow_read_ctr) under down_write() to
avoid the race with other writers.  This is a bit sub-optimal, only the
first writer needs this and we do not need to exclude the readers at this
stage.  But this is simple, we do not want another internal lock until we
add more features.

And this speeds up the write-contended case.  Before this patch the racing
writers sleep in synchronize_sched_expedited() sequentially, with this
patch multiple synchronize_sched_expedited's can "overlap" with each
other.  Note: we can do more optimizations, this is only the first step.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu-rwsem.h |  4 ++--
 lib/percpu-rwsem.c           | 34 ++++++++++++++++------------------
 2 files changed, 18 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 592f0d610d8e..d2146a4f833e 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -1,14 +1,14 @@
 #ifndef _LINUX_PERCPU_RWSEM_H
 #define _LINUX_PERCPU_RWSEM_H
 
-#include <linux/mutex.h>
+#include <linux/atomic.h>
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
 #include <linux/wait.h>
 
 struct percpu_rw_semaphore {
 	unsigned int __percpu	*fast_read_ctr;
-	struct mutex		writer_mutex;
+	atomic_t		write_ctr;
 	struct rw_semaphore	rw_sem;
 	atomic_t		slow_read_ctr;
 	wait_queue_head_t	write_waitq;
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c
index 2e03bcfe48f9..ce92ab563a08 100644
--- a/lib/percpu-rwsem.c
+++ b/lib/percpu-rwsem.c
@@ -1,4 +1,4 @@
-#include <linux/mutex.h>
+#include <linux/atomic.h>
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
 #include <linux/wait.h>
@@ -13,8 +13,8 @@ int percpu_init_rwsem(struct percpu_rw_semaphore *brw)
 	if (unlikely(!brw->fast_read_ctr))
 		return -ENOMEM;
 
-	mutex_init(&brw->writer_mutex);
 	init_rwsem(&brw->rw_sem);
+	atomic_set(&brw->write_ctr, 0);
 	atomic_set(&brw->slow_read_ctr, 0);
 	init_waitqueue_head(&brw->write_waitq);
 	return 0;
@@ -28,7 +28,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
 
 /*
  * This is the fast-path for down_read/up_read, it only needs to ensure
- * there is no pending writer (!mutex_is_locked() check) and inc/dec the
+ * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
  * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
  * serialize with the preempt-disabled section below.
  *
@@ -44,7 +44,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
  * If this helper fails the callers rely on the normal rw_semaphore and
  * atomic_dec_and_test(), so in this case we have the necessary barriers.
  *
- * But if it succeeds we do not have any barriers, mutex_is_locked() or
+ * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
  * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
  * reader inside the critical section. See the comments in down_write and
  * up_write below.
@@ -54,7 +54,7 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
 	bool success = false;
 
 	preempt_disable();
-	if (likely(!mutex_is_locked(&brw->writer_mutex))) {
+	if (likely(!atomic_read(&brw->write_ctr))) {
 		__this_cpu_add(*brw->fast_read_ctr, val);
 		success = true;
 	}
@@ -101,9 +101,8 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
 }
 
 /*
- * A writer takes ->writer_mutex to exclude other writers and to force the
- * readers to switch to the slow mode, note the mutex_is_locked() check in
- * update_fast_ctr().
+ * A writer increments ->write_ctr to force the readers to switch to the
+ * slow mode, note the atomic_read() check in update_fast_ctr().
  *
  * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
  * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
@@ -114,11 +113,10 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
  */
 void percpu_down_write(struct percpu_rw_semaphore *brw)
 {
-	/* also blocks update_fast_ctr() which checks mutex_is_locked() */
-	mutex_lock(&brw->writer_mutex);
-
+	/* tell update_fast_ctr() there is a pending writer */
+	atomic_inc(&brw->write_ctr);
 	/*
-	 * 1. Ensures mutex_is_locked() is visible to any down_read/up_read
+	 * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
 	 *    so that update_fast_ctr() can't succeed.
 	 *
 	 * 2. Ensures we see the result of every previous this_cpu_add() in
@@ -130,25 +128,25 @@ void percpu_down_write(struct percpu_rw_semaphore *brw)
 	 */
 	synchronize_sched_expedited();
 
+	/* exclude other writers, and block the new readers completely */
+	down_write(&brw->rw_sem);
+
 	/* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
 	atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
 
-	/* block the new readers completely */
-	down_write(&brw->rw_sem);
-
 	/* wait for all readers to complete their percpu_up_read() */
 	wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
 }
 
 void percpu_up_write(struct percpu_rw_semaphore *brw)
 {
-	/* allow the new readers, but only the slow-path */
+	/* release the lock, but the readers can't use the fast-path */
 	up_write(&brw->rw_sem);
-
 	/*
 	 * Insert the barrier before the next fast-path in down_read,
 	 * see W_R case in the comment above update_fast_ctr().
 	 */
 	synchronize_sched_expedited();
-	mutex_unlock(&brw->writer_mutex);
+	/* the last writer unblocks update_fast_ctr() */
+	atomic_dec(&brw->write_ctr);
 }
-- 
cgit v1.2.3-55-g7522


From 8ebe34731a0d1a9d89b536430afd98d0fadec99b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 17 Dec 2012 16:01:38 -0800
Subject: percpu_rw_semaphore: add lockdep annotations

Add lockdep annotations.  Not only this can help to find the potential
problems, we do not want the false warnings if, say, the task takes two
different percpu_rw_semaphore's for reading.  IOW, at least ->rw_sem
should not use a single class.

This patch exposes this internal lock to lockdep so that it represents the
whole percpu_rw_semaphore.  This way we do not need to add another "fake"
->lockdep_map and lock_class_key.  More importantly, this also makes the
output from lockdep much more understandable if it finds the problem.

In short, with this patch from lockdep pov percpu_down_read() and
percpu_up_read() acquire/release ->rw_sem for reading, this matches the
actual semantics.  This abuses __up_read() but I hope this is fine and in
fact I'd like to have down_read_no_lockdep() as well,
percpu_down_read_recursive_readers() will need it.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu-rwsem.h | 10 +++++++++-
 lib/percpu-rwsem.c           | 21 +++++++++++++++++----
 2 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index d2146a4f833e..3e88c9a7d57f 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -5,6 +5,7 @@
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
 #include <linux/wait.h>
+#include <linux/lockdep.h>
 
 struct percpu_rw_semaphore {
 	unsigned int __percpu	*fast_read_ctr;
@@ -20,7 +21,14 @@ extern void percpu_up_read(struct percpu_rw_semaphore *);
 extern void percpu_down_write(struct percpu_rw_semaphore *);
 extern void percpu_up_write(struct percpu_rw_semaphore *);
 
-extern int percpu_init_rwsem(struct percpu_rw_semaphore *);
+extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
+				const char *, struct lock_class_key *);
 extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
 
+#define percpu_init_rwsem(brw)	\
+({								\
+	static struct lock_class_key rwsem_key;			\
+	__percpu_init_rwsem(brw, #brw, &rwsem_key);		\
+})
+
 #endif
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c
index ce92ab563a08..652a8ee8efe9 100644
--- a/lib/percpu-rwsem.c
+++ b/lib/percpu-rwsem.c
@@ -2,18 +2,21 @@
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
 #include <linux/wait.h>
+#include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
 
-int percpu_init_rwsem(struct percpu_rw_semaphore *brw)
+int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
+			const char *name, struct lock_class_key *rwsem_key)
 {
 	brw->fast_read_ctr = alloc_percpu(int);
 	if (unlikely(!brw->fast_read_ctr))
 		return -ENOMEM;
 
-	init_rwsem(&brw->rw_sem);
+	/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
+	__init_rwsem(&brw->rw_sem, name, rwsem_key);
 	atomic_set(&brw->write_ctr, 0);
 	atomic_set(&brw->slow_read_ctr, 0);
 	init_waitqueue_head(&brw->write_waitq);
@@ -66,19 +69,29 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
 /*
  * Like the normal down_read() this is not recursive, the writer can
  * come after the first percpu_down_read() and create the deadlock.
+ *
+ * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
+ * percpu_up_read() does rwsem_release(). This pairs with the usage
+ * of ->rw_sem in percpu_down/up_write().
  */
 void percpu_down_read(struct percpu_rw_semaphore *brw)
 {
-	if (likely(update_fast_ctr(brw, +1)))
+	might_sleep();
+	if (likely(update_fast_ctr(brw, +1))) {
+		rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
 		return;
+	}
 
 	down_read(&brw->rw_sem);
 	atomic_inc(&brw->slow_read_ctr);
-	up_read(&brw->rw_sem);
+	/* avoid up_read()->rwsem_release() */
+	__up_read(&brw->rw_sem);
 }
 
 void percpu_up_read(struct percpu_rw_semaphore *brw)
 {
+	rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
+
 	if (likely(update_fast_ctr(brw, -1)))
 		return;
 
-- 
cgit v1.2.3-55-g7522


From 0ad50c3896afbb3c103409a18260e601b87a744c Mon Sep 17 00:00:00 2001
From: Catalin Marinas
Date: Mon, 17 Dec 2012 16:01:45 -0800
Subject: compat: generic compat_sys_sched_rr_get_interval() implementation

This function is used by sparc, powerpc tile and arm64 for compat support.
 The patch adds a generic implementation with a wrapper for PowerPC to do
the u32->int sign extension.

The reason for a single patch covering powerpc, tile, sparc and arm64 is
to keep it bisectable, otherwise kernel building may fail with mismatched
function declarations.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>  [for tile]
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/include/asm/unistd.h   |  1 +
 arch/arm64/kernel/sys_compat.c    | 15 ---------------
 arch/powerpc/include/asm/systbl.h |  2 +-
 arch/powerpc/include/asm/unistd.h |  1 +
 arch/powerpc/kernel/sys_ppc32.c   | 17 ++++-------------
 arch/sparc/include/asm/unistd.h   |  1 +
 arch/sparc/kernel/sys_sparc32.c   | 14 --------------
 arch/tile/include/asm/compat.h    |  2 --
 arch/tile/include/asm/unistd.h    |  1 +
 arch/tile/kernel/compat.c         | 18 ------------------
 include/linux/compat.h            |  3 +++
 kernel/compat.c                   | 17 +++++++++++++++++
 12 files changed, 29 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index d69aeea6da1e..76fb7dd3350a 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -20,6 +20,7 @@
 #define __ARCH_WANT_SYS_GETPGRP
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
+#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index f7b05edf8ce3..26e9c4eeaba8 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -28,21 +28,6 @@
 #include <asm/cacheflush.h>
 #include <asm/unistd32.h>
 
-asmlinkage int compat_sys_sched_rr_get_interval(compat_pid_t pid,
-						struct compat_timespec __user *interval)
-{
-	struct timespec t;
-	int ret;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
-	set_fs(old_fs);
-	if (put_compat_timespec(&t, interval))
-		return -EFAULT;
-	return ret;
-}
-
 static inline void
 do_compat_cache_op(unsigned long start, unsigned long end, int flags)
 {
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 840838769853..cec8aae5cbf8 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -164,7 +164,7 @@ COMPAT_SYS_SPU(sched_getscheduler)
 SYSCALL_SPU(sched_yield)
 COMPAT_SYS_SPU(sched_get_priority_max)
 COMPAT_SYS_SPU(sched_get_priority_min)
-COMPAT_SYS_SPU(sched_rr_get_interval)
+SYSX_SPU(sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval_wrapper,sys_sched_rr_get_interval)
 COMPAT_SYS_SPU(nanosleep)
 SYSCALL_SPU(mremap)
 SYSCALL_SPU(setresuid)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 76fe846ec40e..bcbbe413c606 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -54,6 +54,7 @@
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_NEWFSTATAT
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #endif
 #define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 9c2ed90ece8f..8a93778ed9f5 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -175,19 +175,10 @@ asmlinkage long compat_sys_prctl(u32 option, u32 arg2, u32 arg3, u32 arg4, u32 a
  * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
  * and the register representation of a signed int (msr in 64-bit mode) is performed.
  */
-asmlinkage long compat_sys_sched_rr_get_interval(u32 pid, struct compat_timespec __user *interval)
-{
-	struct timespec t;
-	int ret;
-	mm_segment_t old_fs = get_fs ();
-
-	/* The __user pointer cast is valid because of the set_fs() */
-	set_fs (KERNEL_DS);
-	ret = sys_sched_rr_get_interval((int)pid, (struct timespec __user *) &t);
-	set_fs (old_fs);
-	if (put_compat_timespec(&t, interval))
-		return -EFAULT;
-	return ret;
+asmlinkage long compat_sys_sched_rr_get_interval_wrapper(u32 pid,
+							 struct compat_timespec __user *interval)
+{
+	return compat_sys_sched_rr_get_interval((int)pid, interval);
 }
 
 /* Note: it is necessary to treat mode as an unsigned int,
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index c3e5d8b64171..497386a7ed28 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -45,6 +45,7 @@
 #define __ARCH_WANT_COMPAT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #endif
 #define __ARCH_WANT_SYS_EXECVE
 
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index 03c7e929ec34..4a4cdc633f6b 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -211,20 +211,6 @@ asmlinkage long compat_sys_sysfs(int option, u32 arg1, u32 arg2)
 	return sys_sysfs(option, arg1, arg2);
 }
 
-asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval)
-{
-	struct timespec t;
-	int ret;
-	mm_segment_t old_fs = get_fs ();
-	
-	set_fs (KERNEL_DS);
-	ret = sys_sched_rr_get_interval(pid, (struct timespec __user *) &t);
-	set_fs (old_fs);
-	if (put_compat_timespec(&t, interval))
-		return -EFAULT;
-	return ret;
-}
-
 asmlinkage long compat_sys_rt_sigprocmask(int how,
 					  compat_sigset_t __user *set,
 					  compat_sigset_t __user *oset,
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index ca61fb4296b3..88f3c227afd9 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -296,8 +296,6 @@ long compat_sys_sync_file_range2(int fd, unsigned int flags,
 long compat_sys_fallocate(int fd, int mode,
 			  u32 offset_lo, u32 offset_hi,
 			  u32 len_lo, u32 len_hi);
-long compat_sys_sched_rr_get_interval(compat_pid_t pid,
-				      struct compat_timespec __user *interval);
 
 /* Assembly trampoline to avoid clobbering r0. */
 long _compat_sys_rt_sigreturn(void);
diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h
index b51c6ee3cd6c..fe841e7d4963 100644
--- a/arch/tile/include/asm/unistd.h
+++ b/arch/tile/include/asm/unistd.h
@@ -14,6 +14,7 @@
 /* In compat mode, we use sys_llseek() for compat_sys_llseek(). */
 #ifdef CONFIG_COMPAT
 #define __ARCH_WANT_SYS_LLSEEK
+#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #endif
 #define __ARCH_WANT_SYS_NEWFSTATAT
 #define __ARCH_WANT_SYS_EXECVE
diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
index 9cd7cb6041c0..7f72401b4f45 100644
--- a/arch/tile/kernel/compat.c
+++ b/arch/tile/kernel/compat.c
@@ -76,24 +76,6 @@ long compat_sys_fallocate(int fd, int mode,
 			     ((loff_t)len_hi << 32) | len_lo);
 }
 
-
-
-long compat_sys_sched_rr_get_interval(compat_pid_t pid,
-				      struct compat_timespec __user *interval)
-{
-	struct timespec t;
-	int ret;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	ret = sys_sched_rr_get_interval(pid,
-					(struct timespec __force __user *)&t);
-	set_fs(old_fs);
-	if (put_compat_timespec(&t, interval))
-		return -EFAULT;
-	return ret;
-}
-
 /* Provide the compat syscall number to call mapping. */
 #undef __SYSCALL
 #define __SYSCALL(nr, call) [nr] = (call),
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 784ebfe63c48..e4920bd58a47 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -588,6 +588,9 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
 asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
 				    compat_off_t __user *offset, compat_size_t count);
 
+asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
+						 struct compat_timespec __user *interval);
+
 #else
 
 #define is_compat_task() (0)
diff --git a/kernel/compat.c b/kernel/compat.c
index c28a306ae05c..f6150e92dfc9 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -1215,6 +1215,23 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info)
 	return 0;
 }
 
+#ifdef __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
+asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
+						 struct compat_timespec __user *interval)
+{
+	struct timespec t;
+	int ret;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
+	set_fs(old_fs);
+	if (put_compat_timespec(&t, interval))
+		return -EFAULT;
+	return ret;
+}
+#endif /* __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL */
+
 /*
  * Allocate user-space memory for the duration of a single system call,
  * in order to marshall parameters inside a compat thunk.
-- 
cgit v1.2.3-55-g7522


From 4c925d6031f719fad6ea8b1c94a636f4c0fea39b Mon Sep 17 00:00:00 2001
From: Eldad Zack
Date: Mon, 17 Dec 2012 16:03:04 -0800
Subject: kstrto*: add documentation

As Bruce Fields pointed out, kstrto* is currently lacking kerneldoc
comments.  This patch adds kerneldoc comments to common variants of
kstrto*: kstrto(u)l, kstrto(u)ll and kstrto(u)int.

Signed-off-by: Eldad Zack <eldad@fogrefinery.com>
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Joe Perches <joe@perches.com>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Rob Landley <rob@landley.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DocBook/kernel-api.tmpl |  3 ++
 include/linux/kernel.h                | 33 ++++++++++++++++++
 lib/kstrtox.c                         | 64 +++++++++++++++++++++++++++++++++++
 3 files changed, 100 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 00687ee9d363..f75ab4c1b281 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -58,6 +58,9 @@
 
      <sect1><title>String Conversions</title>
 !Elib/vsprintf.c
+!Finclude/linux/kernel.h kstrtol
+!Finclude/linux/kernel.h kstrtoul
+!Elib/kstrtox.c
      </sect1>
      <sect1><title>String Manipulation</title>
 <!-- All functions are exported at now
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d97ed5897447..d140e8fb075f 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -220,6 +220,23 @@ int __must_check _kstrtol(const char *s, unsigned int base, long *res);
 
 int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res);
 int __must_check kstrtoll(const char *s, unsigned int base, long long *res);
+
+/**
+ * kstrtoul - convert a string to an unsigned long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+*/
 static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res)
 {
 	/*
@@ -233,6 +250,22 @@ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsign
 		return _kstrtoul(s, base, res);
 }
 
+/**
+ * kstrtol - convert a string to a long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 static inline int __must_check kstrtol(const char *s, unsigned int base, long *res)
 {
 	/*
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index c3615eab0cc3..f78ae0c0c4e2 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -104,6 +104,22 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 	return 0;
 }
 
+/**
+ * kstrtoull - convert a string to an unsigned long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 {
 	if (s[0] == '+')
@@ -112,6 +128,22 @@ int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 }
 EXPORT_SYMBOL(kstrtoull);
 
+/**
+ * kstrtoll - convert a string to a long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 int kstrtoll(const char *s, unsigned int base, long long *res)
 {
 	unsigned long long tmp;
@@ -168,6 +200,22 @@ int _kstrtol(const char *s, unsigned int base, long *res)
 }
 EXPORT_SYMBOL(_kstrtol);
 
+/**
+ * kstrtouint - convert a string to an unsigned int
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 int kstrtouint(const char *s, unsigned int base, unsigned int *res)
 {
 	unsigned long long tmp;
@@ -183,6 +231,22 @@ int kstrtouint(const char *s, unsigned int base, unsigned int *res)
 }
 EXPORT_SYMBOL(kstrtouint);
 
+/**
+ * kstrtoint - convert a string to an int
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 int kstrtoint(const char *s, unsigned int base, int *res)
 {
 	long long tmp;
-- 
cgit v1.2.3-55-g7522


From 992fb6e170639b0849bace8e49bf31bd37c4123c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 17 Dec 2012 16:03:07 -0800
Subject: ptrace: introduce PTRACE_O_EXITKILL

Ptrace jailers want to be sure that the tracee can never escape
from the control. However if the tracer dies unexpectedly the
tracee continues to run in potentially unsafe mode.

Add the new ptrace option PTRACE_O_EXITKILL. If the tracer exits
it sends SIGKILL to every tracee which has this bit set.

Note that the new option is not equal to the last-option << 1.  Because
currently all options have an event, and the new one starts the eventless
group.  It uses the random 20 bit, so we have the room for 12 more events,
but we can also add the new eventless options below this one.

Suggested by Amnon Shiloh.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Tested-by: Amnon Shiloh <u3557@miso.sublimeip.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: Chris Evans <scarybeasts@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h      | 2 ++
 include/uapi/linux/ptrace.h | 5 ++++-
 kernel/ptrace.c             | 3 +++
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index a89ff04bddd9..addfbe7c180e 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -32,6 +32,8 @@
 #define PT_TRACE_EXIT		PT_EVENT_FLAG(PTRACE_EVENT_EXIT)
 #define PT_TRACE_SECCOMP	PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP)
 
+#define PT_EXITKILL		(PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
+
 /* single stepping state bits (used on ARM and PA-RISC) */
 #define PT_SINGLESTEP_BIT	31
 #define PT_SINGLESTEP		(1<<PT_SINGLESTEP_BIT)
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 1ef6c056a9e4..022ab186a812 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -73,7 +73,10 @@
 #define PTRACE_O_TRACEEXIT	(1 << PTRACE_EVENT_EXIT)
 #define PTRACE_O_TRACESECCOMP	(1 << PTRACE_EVENT_SECCOMP)
 
-#define PTRACE_O_MASK		0x000000ff
+/* eventless options */
+#define PTRACE_O_EXITKILL	(1 << 20)
+
+#define PTRACE_O_MASK		(0x000000ff | PTRACE_O_EXITKILL)
 
 #include <asm/ptrace.h>
 
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1f5e55dda955..ec8118ab2a47 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -457,6 +457,9 @@ void exit_ptrace(struct task_struct *tracer)
 		return;
 
 	list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) {
+		if (unlikely(p->ptrace & PT_EXITKILL))
+			send_sig_info(SIGKILL, SEND_SIG_FORCED, p);
+
 		if (__ptrace_detach(tracer, p))
 			list_add(&p->ptrace_entry, &ptrace_dead);
 	}
-- 
cgit v1.2.3-55-g7522


From d740269867021faf4ce38a449353d2b986c34a67 Mon Sep 17 00:00:00 2001
From: Kees Cook
Date: Mon, 17 Dec 2012 16:03:20 -0800
Subject: exec: use -ELOOP for max recursion depth

To avoid an explosion of request_module calls on a chain of abusive
scripts, fail maximum recursion with -ELOOP instead of -ENOEXEC. As soon
as maximum recursion depth is hit, the error will fail all the way back
up the chain, aborting immediately.

This also has the side-effect of stopping the user's shell from attempting
to reexecute the top-level file as a shell script. As seen in the
dash source:

        if (cmd != path_bshell && errno == ENOEXEC) {
                *argv-- = cmd;
                *argv = cmd = path_bshell;
                goto repeat;
        }

The above logic was designed for running scripts automatically that lacked
the "#!" header, not to re-try failed recursion. On a legitimate -ENOEXEC,
things continue to behave as the shell expects.

Additionally, when tracking recursion, the binfmt handlers should not be
involved. The recursion being tracked is the depth of calls through
search_binary_handler(), so that function should be exclusively responsible
for tracking the depth.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: halfdog <me@halfdog.net>
Cc: P J P <ppandit@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_em86.c        |  1 -
 fs/binfmt_misc.c        |  6 ------
 fs/binfmt_script.c      |  4 +---
 fs/exec.c               | 10 +++++-----
 include/linux/binfmts.h |  2 --
 5 files changed, 6 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 4e6cce57d113..037a3e2b045b 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -42,7 +42,6 @@ static int load_em86(struct linux_binprm *bprm)
 			return -ENOEXEC;
 	}
 
-	bprm->recursion_depth++; /* Well, the bang-shell is implicit... */
 	allow_write_access(bprm->file);
 	fput(bprm->file);
 	bprm->file = NULL;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index b0b70fbea06c..9be335fb8a7c 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -117,10 +117,6 @@ static int load_misc_binary(struct linux_binprm *bprm)
 	if (!enabled)
 		goto _ret;
 
-	retval = -ENOEXEC;
-	if (bprm->recursion_depth > BINPRM_MAX_RECURSION)
-		goto _ret;
-
 	/* to keep locking time low, we copy the interpreter string */
 	read_lock(&entries_lock);
 	fmt = check_file(bprm);
@@ -197,8 +193,6 @@ static int load_misc_binary(struct linux_binprm *bprm)
 	if (retval < 0)
 		goto _error;
 
-	bprm->recursion_depth++;
-
 	retval = search_binary_handler(bprm);
 	if (retval < 0)
 		goto _error;
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 8c954997e7f7..1610a91637e5 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -22,15 +22,13 @@ static int load_script(struct linux_binprm *bprm)
 	char interp[BINPRM_BUF_SIZE];
 	int retval;
 
-	if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') ||
-	    (bprm->recursion_depth > BINPRM_MAX_RECURSION))
+	if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
 		return -ENOEXEC;
 	/*
 	 * This section does the #! interpretation.
 	 * Sorta complicated, but hopefully it will work.  -TYT
 	 */
 
-	bprm->recursion_depth++;
 	allow_write_access(bprm->file);
 	fput(bprm->file);
 	bprm->file = NULL;
diff --git a/fs/exec.c b/fs/exec.c
index 721a29929511..d5eb9e605ffd 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1356,6 +1356,10 @@ int search_binary_handler(struct linux_binprm *bprm)
 	struct linux_binfmt *fmt;
 	pid_t old_pid, old_vpid;
 
+	/* This allows 4 levels of binfmt rewrites before failing hard. */
+	if (depth > 5)
+		return -ELOOP;
+
 	retval = security_bprm_check(bprm);
 	if (retval)
 		return retval;
@@ -1380,12 +1384,8 @@ int search_binary_handler(struct linux_binprm *bprm)
 			if (!try_module_get(fmt->module))
 				continue;
 			read_unlock(&binfmt_lock);
+			bprm->recursion_depth = depth + 1;
 			retval = fn(bprm);
-			/*
-			 * Restore the depth counter to its starting value
-			 * in this call, so we don't have to rely on every
-			 * load_binary function to restore it on return.
-			 */
 			bprm->recursion_depth = depth;
 			if (retval >= 0) {
 				if (depth == 0) {
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 2630c9b41a86..a4c2b565c835 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -54,8 +54,6 @@ struct linux_binprm {
 #define BINPRM_FLAGS_EXECFD_BIT 1
 #define BINPRM_FLAGS_EXECFD (1 << BINPRM_FLAGS_EXECFD_BIT)
 
-#define BINPRM_MAX_RECURSION 4
-
 /* Function parameter for binfmt->coredump */
 struct coredump_params {
 	siginfo_t *siginfo;
-- 
cgit v1.2.3-55-g7522


From a5ba911ec3792168530d35e16a8ec3b6fc60bcb5 Mon Sep 17 00:00:00 2001
From: Gao feng
Date: Mon, 17 Dec 2012 16:03:22 -0800
Subject: pidns: remove unused is_container_init()

Since commit 1cdcbec1a337 ("CRED: Neuter sys_capset()")
is_container_init() has no callers.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Cc: David Howells <dhowells@redhat.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Cc: James Morris <jmorris@namei.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  6 ------
 kernel/pid.c          | 15 ---------------
 2 files changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b089c92c609b..9914c662ed7b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1778,12 +1778,6 @@ static inline int is_global_init(struct task_struct *tsk)
 	return tsk->pid == 1;
 }
 
-/*
- * is_container_init:
- * check whether in the task is init in its own pid namespace.
- */
-extern int is_container_init(struct task_struct *tsk);
-
 extern struct pid *cad_pid;
 
 extern void free_task(struct task_struct *tsk);
diff --git a/kernel/pid.c b/kernel/pid.c
index fd996c1ed9f8..a54a1123c7cf 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -81,21 +81,6 @@ struct pid_namespace init_pid_ns = {
 };
 EXPORT_SYMBOL_GPL(init_pid_ns);
 
-int is_container_init(struct task_struct *tsk)
-{
-	int ret = 0;
-	struct pid *pid;
-
-	rcu_read_lock();
-	pid = task_pid(tsk);
-	if (pid != NULL && pid->numbers[pid->level].nr == 1)
-		ret = 1;
-	rcu_read_unlock();
-
-	return ret;
-}
-EXPORT_SYMBOL(is_container_init);
-
 /*
  * Note: disable interrupts while the pidmap_lock is held as an
  * interrupt might come in and do read_lock(&tasklist_lock).
-- 
cgit v1.2.3-55-g7522


From 8529091e8e2ae25e0f4003086f619765ff255e4b Mon Sep 17 00:00:00 2001
From: Josh Triplett
Date: Mon, 17 Dec 2012 16:03:24 -0800
Subject: linux/compiler.h: add __must_hold macro for functions called with a
 lock held

linux/compiler.h has macros to denote functions that acquire or release
locks, but not to denote functions called with a lock held that return
with the lock still held.  Add a __must_hold macro to cover that case.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Reported-by: Ed Cashin <ecashin@coraid.com>
Tested-by: Ed Cashin <ecashin@coraid.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index f430e4162f41..b121554f1fe2 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -10,6 +10,7 @@
 # define __force	__attribute__((force))
 # define __nocast	__attribute__((nocast))
 # define __iomem	__attribute__((noderef, address_space(2)))
+# define __must_hold(x)	__attribute__((context(x,1,1)))
 # define __acquires(x)	__attribute__((context(x,0,1)))
 # define __releases(x)	__attribute__((context(x,1,0)))
 # define __acquire(x)	__context__(x,1)
@@ -33,6 +34,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __chk_user_ptr(x) (void)0
 # define __chk_io_ptr(x) (void)0
 # define __builtin_warning(x, y...) (1)
+# define __must_hold(x)
 # define __acquires(x)
 # define __releases(x)
 # define __acquire(x) (void)0
-- 
cgit v1.2.3-55-g7522


From 496f2f93b1cc286f5a4f4f9acdc1e5314978683f Mon Sep 17 00:00:00 2001
From: Akinobu Mita
Date: Mon, 17 Dec 2012 16:04:23 -0800
Subject: random32: rename random32 to prandom

This renames all random32 functions to have 'prandom_' prefix as follows:

  void prandom_seed(u32 seed);	/* rename from srandom32() */
  u32 prandom_u32(void);		/* rename from random32() */
  void prandom_seed_state(struct rnd_state *state, u64 seed);
  				/* rename from prandom32_seed() */
  u32 prandom_u32_state(struct rnd_state *state);
  				/* rename from prandom32() */

The purpose of this renaming is to prevent some kernel developers from
assuming that prandom32() and random32() might imply that only
prandom32() was the one using a pseudo-random number generator by
prandom32's "p", and the result may be a very embarassing security
exposure.  This concern was expressed by Theodore Ts'o.

And furthermore, I'm going to introduce new functions for getting the
requested number of pseudo-random bytes.  If I continue to use both
prandom32 and random32 prefixes for these functions, the confusion
is getting worse.

As a result of this renaming, "prandom_" is the common prefix for
pseudo-random number library.

Currently, srandom32() and random32() are preserved because it is
difficult to rename too many users at once.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Robert Love <robert.w.love@intel.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Cc: David Laight <david.laight@aculab.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Artem Bityutskiy <dedekind1@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/scsi/fcoe/fcoe_ctlr.c |  4 ++--
 include/linux/random.h        | 17 ++++++++++-----
 lib/interval_tree_test_main.c |  7 ++++---
 lib/random32.c                | 48 +++++++++++++++++++++----------------------
 lib/rbtree_test.c             |  6 +++---
 5 files changed, 45 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 2ebe03a4b51d..4a909d7cfde1 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -2144,7 +2144,7 @@ static void fcoe_ctlr_vn_restart(struct fcoe_ctlr *fip)
 	 */
 	port_id = fip->port_id;
 	if (fip->probe_tries)
-		port_id = prandom32(&fip->rnd_state) & 0xffff;
+		port_id = prandom_u32_state(&fip->rnd_state) & 0xffff;
 	else if (!port_id)
 		port_id = fip->lp->wwpn & 0xffff;
 	if (!port_id || port_id == 0xffff)
@@ -2169,7 +2169,7 @@ static void fcoe_ctlr_vn_restart(struct fcoe_ctlr *fip)
 static void fcoe_ctlr_vn_start(struct fcoe_ctlr *fip)
 {
 	fip->probe_tries = 0;
-	prandom32_seed(&fip->rnd_state, fip->lp->wwpn);
+	prandom_seed_state(&fip->rnd_state, fip->lp->wwpn);
 	fcoe_ctlr_vn_restart(fip);
 }
 
diff --git a/include/linux/random.h b/include/linux/random.h
index 6330ed47b38b..db6debc6649e 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -25,10 +25,17 @@ extern const struct file_operations random_fops, urandom_fops;
 unsigned int get_random_int(void);
 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
 
-u32 random32(void);
-void srandom32(u32 seed);
+u32 prandom_u32(void);
+void prandom_seed(u32 seed);
 
-u32 prandom32(struct rnd_state *);
+/*
+ * These macros are preserved for backward compatibility and should be
+ * removed as soon as a transition is finished.
+ */
+#define random32() prandom_u32()
+#define srandom32(seed) prandom_seed(seed)
+
+u32 prandom_u32_state(struct rnd_state *);
 
 /*
  * Handle minimum values for seeds
@@ -39,11 +46,11 @@ static inline u32 __seed(u32 x, u32 m)
 }
 
 /**
- * prandom32_seed - set seed for prandom32().
+ * prandom_seed_state - set seed for prandom_u32_state().
  * @state: pointer to state structure to receive the seed.
  * @seed: arbitrary 64-bit value to use as a seed.
  */
-static inline void prandom32_seed(struct rnd_state *state, u64 seed)
+static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
 {
 	u32 i = (seed >> 32) ^ (seed << 10) ^ seed;
 
diff --git a/lib/interval_tree_test_main.c b/lib/interval_tree_test_main.c
index b25903987f7a..245900b98c8e 100644
--- a/lib/interval_tree_test_main.c
+++ b/lib/interval_tree_test_main.c
@@ -30,7 +30,8 @@ static void init(void)
 {
 	int i;
 	for (i = 0; i < NODES; i++) {
-		u32 a = prandom32(&rnd), b = prandom32(&rnd);
+		u32 a = prandom_u32_state(&rnd);
+		u32 b = prandom_u32_state(&rnd);
 		if (a <= b) {
 			nodes[i].start = a;
 			nodes[i].last = b;
@@ -40,7 +41,7 @@ static void init(void)
 		}
 	}
 	for (i = 0; i < SEARCHES; i++)
-		queries[i] = prandom32(&rnd);
+		queries[i] = prandom_u32_state(&rnd);
 }
 
 static int interval_tree_test_init(void)
@@ -51,7 +52,7 @@ static int interval_tree_test_init(void)
 
 	printk(KERN_ALERT "interval tree insert/remove");
 
-	prandom32_seed(&rnd, 3141592653589793238ULL);
+	prandom_seed_state(&rnd, 3141592653589793238ULL);
 	init();
 
 	time1 = get_cycles();
diff --git a/lib/random32.c b/lib/random32.c
index 938bde5876ac..d1830fade915 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -42,13 +42,13 @@
 static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
 
 /**
- *	prandom32 - seeded pseudo-random number generator.
+ *	prandom_u32_state - seeded pseudo-random number generator.
  *	@state: pointer to state structure holding seeded state.
  *
  *	This is used for pseudo-randomness with no outside seeding.
- *	For more random results, use random32().
+ *	For more random results, use prandom_u32().
  */
-u32 prandom32(struct rnd_state *state)
+u32 prandom_u32_state(struct rnd_state *state)
 {
 #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
 
@@ -58,32 +58,32 @@ u32 prandom32(struct rnd_state *state)
 
 	return (state->s1 ^ state->s2 ^ state->s3);
 }
-EXPORT_SYMBOL(prandom32);
+EXPORT_SYMBOL(prandom_u32_state);
 
 /**
- *	random32 - pseudo random number generator
+ *	prandom_u32 - pseudo random number generator
  *
  *	A 32 bit pseudo-random number is generated using a fast
  *	algorithm suitable for simulation. This algorithm is NOT
  *	considered safe for cryptographic use.
  */
-u32 random32(void)
+u32 prandom_u32(void)
 {
 	unsigned long r;
 	struct rnd_state *state = &get_cpu_var(net_rand_state);
-	r = prandom32(state);
+	r = prandom_u32_state(state);
 	put_cpu_var(state);
 	return r;
 }
-EXPORT_SYMBOL(random32);
+EXPORT_SYMBOL(prandom_u32);
 
 /**
- *	srandom32 - add entropy to pseudo random number generator
+ *	prandom_seed - add entropy to pseudo random number generator
  *	@seed: seed value
  *
- *	Add some additional seeding to the random32() pool.
+ *	Add some additional seeding to the prandom pool.
  */
-void srandom32(u32 entropy)
+void prandom_seed(u32 entropy)
 {
 	int i;
 	/*
@@ -95,13 +95,13 @@ void srandom32(u32 entropy)
 		state->s1 = __seed(state->s1 ^ entropy, 1);
 	}
 }
-EXPORT_SYMBOL(srandom32);
+EXPORT_SYMBOL(prandom_seed);
 
 /*
  *	Generate some initially weak seeding values to allow
- *	to start the random32() engine.
+ *	to start the prandom_u32() engine.
  */
-static int __init random32_init(void)
+static int __init prandom_init(void)
 {
 	int i;
 
@@ -114,22 +114,22 @@ static int __init random32_init(void)
 		state->s3 = __seed(LCG(state->s2), 15);
 
 		/* "warm it up" */
-		prandom32(state);
-		prandom32(state);
-		prandom32(state);
-		prandom32(state);
-		prandom32(state);
-		prandom32(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
 	}
 	return 0;
 }
-core_initcall(random32_init);
+core_initcall(prandom_init);
 
 /*
  *	Generate better values after random number generator
  *	is fully initialized.
  */
-static int __init random32_reseed(void)
+static int __init prandom_reseed(void)
 {
 	int i;
 
@@ -143,8 +143,8 @@ static int __init random32_reseed(void)
 		state->s3 = __seed(seeds[2], 15);
 
 		/* mix it in */
-		prandom32(state);
+		prandom_u32_state(state);
 	}
 	return 0;
 }
-late_initcall(random32_reseed);
+late_initcall(prandom_reseed);
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index d7f491a54579..af38aedbd874 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -96,8 +96,8 @@ static void init(void)
 {
 	int i;
 	for (i = 0; i < NODES; i++) {
-		nodes[i].key = prandom32(&rnd);
-		nodes[i].val = prandom32(&rnd);
+		nodes[i].key = prandom_u32_state(&rnd);
+		nodes[i].val = prandom_u32_state(&rnd);
 	}
 }
 
@@ -155,7 +155,7 @@ static int rbtree_test_init(void)
 
 	printk(KERN_ALERT "rbtree testing");
 
-	prandom32_seed(&rnd, 3141592653589793238ULL);
+	prandom_seed_state(&rnd, 3141592653589793238ULL);
 	init();
 
 	time1 = get_cycles();
-- 
cgit v1.2.3-55-g7522


From 6582c665d6b882dad8329e05749fbcf119f1ab88 Mon Sep 17 00:00:00 2001
From: Akinobu Mita
Date: Mon, 17 Dec 2012 16:04:25 -0800
Subject: prandom: introduce prandom_bytes() and prandom_bytes_state()

Add functions to get the requested number of pseudo-random bytes.

The difference from get_random_bytes() is that it generates pseudo-random
numbers by prandom_u32().  It doesn't consume the entropy pool, and the
sequence is reproducible if the same rnd_state is used.  So it is suitable
for generating random bytes for testing.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Artem Bityutskiy <dedekind1@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Eilon Greenstein <eilong@broadcom.com>
Cc: David Laight <david.laight@aculab.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Robert Love <robert.w.love@intel.com>
Cc: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/random.h |  2 ++
 lib/random32.c         | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index db6debc6649e..d9846088c2c5 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -26,6 +26,7 @@ unsigned int get_random_int(void);
 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
 
 u32 prandom_u32(void);
+void prandom_bytes(void *buf, int nbytes);
 void prandom_seed(u32 seed);
 
 /*
@@ -36,6 +37,7 @@ void prandom_seed(u32 seed);
 #define srandom32(seed) prandom_seed(seed)
 
 u32 prandom_u32_state(struct rnd_state *);
+void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes);
 
 /*
  * Handle minimum values for seeds
diff --git a/lib/random32.c b/lib/random32.c
index d1830fade915..52280d5526be 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -77,6 +77,55 @@ u32 prandom_u32(void)
 }
 EXPORT_SYMBOL(prandom_u32);
 
+/*
+ *	prandom_bytes_state - get the requested number of pseudo-random bytes
+ *
+ *	@state: pointer to state structure holding seeded state.
+ *	@buf: where to copy the pseudo-random bytes to
+ *	@bytes: the requested number of bytes
+ *
+ *	This is used for pseudo-randomness with no outside seeding.
+ *	For more random results, use prandom_bytes().
+ */
+void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes)
+{
+	unsigned char *p = buf;
+	int i;
+
+	for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) {
+		u32 random = prandom_u32_state(state);
+		int j;
+
+		for (j = 0; j < sizeof(u32); j++) {
+			p[i + j] = random;
+			random >>= BITS_PER_BYTE;
+		}
+	}
+	if (i < bytes) {
+		u32 random = prandom_u32_state(state);
+
+		for (; i < bytes; i++) {
+			p[i] = random;
+			random >>= BITS_PER_BYTE;
+		}
+	}
+}
+EXPORT_SYMBOL(prandom_bytes_state);
+
+/**
+ *	prandom_bytes - get the requested number of pseudo-random bytes
+ *	@buf: where to copy the pseudo-random bytes to
+ *	@bytes: the requested number of bytes
+ */
+void prandom_bytes(void *buf, int bytes)
+{
+	struct rnd_state *state = &get_cpu_var(net_rand_state);
+
+	prandom_bytes_state(state, buf, bytes);
+	put_cpu_var(state);
+}
+EXPORT_SYMBOL(prandom_bytes);
+
 /**
  *	prandom_seed - add entropy to pseudo random number generator
  *	@seed: seed value
-- 
cgit v1.2.3-55-g7522


From 55985dd72ab27b47530dcc8bdddd28b69f4abe8b Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Mon, 17 Dec 2012 16:04:55 -0800
Subject: procfs: add ability to plug in auxiliary fdinfo providers

This patch brings ability to print out auxiliary data associated with
file in procfs interface /proc/pid/fdinfo/fd.

In particular further patches make eventfd, evenpoll, signalfd and
fsnotify to print additional information complete enough to restore
these objects after checkpoint.

To simplify the code we add show_fdinfo callback inside struct
file_operations (as Al and Pavel are proposing).

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: James Bottomley <jbottomley@parallels.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Matthew Helsley <matt.helsley@gmail.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/fd.c       | 2 ++
 include/linux/fs.h | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index f28a875f8779..d7a4a28ef630 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -50,6 +50,8 @@ static int seq_show(struct seq_file *m, void *v)
 	if (!ret) {
                 seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
 			   (long long)file->f_pos, f_flags);
+		if (file->f_op->show_fdinfo)
+			ret = file->f_op->show_fdinfo(m, file);
 		fput(file);
 	}
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 029552ff774c..5abf703d06ba 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -44,6 +44,7 @@ struct vm_area_struct;
 struct vfsmount;
 struct cred;
 struct swap_info_struct;
+struct seq_file;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -1543,6 +1544,7 @@ struct file_operations {
 	int (*setlease)(struct file *, long, struct file_lock **);
 	long (*fallocate)(struct file *file, int mode, loff_t offset,
 			  loff_t len);
+	int (*show_fdinfo)(struct seq_file *m, struct file *f);
 };
 
 struct inode_operations {
@@ -1578,8 +1580,6 @@ struct inode_operations {
 			   umode_t create_mode, int *opened);
 } ____cacheline_aligned;
 
-struct seq_file;
-
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
-- 
cgit v1.2.3-55-g7522


From 138d22b58696c506799f8de759804083ff9effae Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Mon, 17 Dec 2012 16:05:02 -0800
Subject: fs, epoll: add procfs fdinfo helper

This allows us to print out eventpoll target file descriptor, events and
data, the /proc/pid/fdinfo/fd consists of

 | pos:	0
 | flags:	02
 | tfd:        5 events:       1d data: ffffffffffffffff enabled: 1

[avagin@: fix for unitialized ret variable]

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: James Bottomley <jbottomley@parallels.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Matthew Helsley <matt.helsley@gmail.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c          | 28 ++++++++++++++++++++++++++++
 fs/proc/array.c         |  2 +-
 fs/signalfd.c           | 18 ++++++++++++++++++
 include/linux/proc_fs.h |  3 +++
 4 files changed, 50 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cd96649bfe62..be56b21435f8 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -38,6 +38,8 @@
 #include <asm/io.h>
 #include <asm/mman.h>
 #include <linux/atomic.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 
 /*
  * LOCKING:
@@ -783,8 +785,34 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
 	return pollflags != -1 ? pollflags : 0;
 }
 
+#ifdef CONFIG_PROC_FS
+static int ep_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	struct eventpoll *ep = f->private_data;
+	struct rb_node *rbp;
+	int ret = 0;
+
+	mutex_lock(&ep->mtx);
+	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+		struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
+
+		ret = seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
+				 epi->ffd.fd, epi->event.events,
+				 (long long)epi->event.data);
+		if (ret)
+			break;
+	}
+	mutex_unlock(&ep->mtx);
+
+	return ret;
+}
+#endif
+
 /* File callbacks that implement the eventpoll file behaviour */
 static const struct file_operations eventpoll_fops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= ep_show_fdinfo,
+#endif
 	.release	= ep_eventpoll_release,
 	.poll		= ep_eventpoll_poll,
 	.llseek		= noop_llseek,
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 439544fec388..060a56a91278 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -220,7 +220,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	seq_putc(m, '\n');
 }
 
-static void render_sigset_t(struct seq_file *m, const char *header,
+void render_sigset_t(struct seq_file *m, const char *header,
 				sigset_t *set)
 {
 	int i;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 8bee4e570911..b53486961735 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -29,6 +29,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/signalfd.h>
 #include <linux/syscalls.h>
+#include <linux/proc_fs.h>
 
 void signalfd_cleanup(struct sighand_struct *sighand)
 {
@@ -227,7 +228,24 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
 	return total ? total: ret;
 }
 
+#ifdef CONFIG_PROC_FS
+static int signalfd_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	struct signalfd_ctx *ctx = f->private_data;
+	sigset_t sigmask;
+
+	sigmask = ctx->sigmask;
+	signotset(&sigmask);
+	render_sigset_t(m, "sigmask:\t", &sigmask);
+
+	return 0;
+}
+#endif
+
 static const struct file_operations signalfd_fops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= signalfd_show_fdinfo,
+#endif
 	.release	= signalfd_release,
 	.poll		= signalfd_poll,
 	.read		= signalfd_read,
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 3fd2e871ff1b..b4f70f0a9a48 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -290,4 +290,7 @@ static inline struct net *PDE_NET(struct proc_dir_entry *pde)
 	return pde->parent->data;
 }
 
+#include <linux/signal.h>
+
+void render_sigset_t(struct seq_file *m, const char *header, sigset_t *set);
 #endif /* _LINUX_PROC_FS_H */
-- 
cgit v1.2.3-55-g7522


From 711c7bf9914060d7aaf3c1a15f38094a5d5e748f Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Mon, 17 Dec 2012 16:05:08 -0800
Subject: fs, exportfs: add exportfs_encode_inode_fh() helper

We will need this helper in the next patch to provide a file handle for
inotify marks in /proc/pid/fdinfo output.

The patch is rather providing the way to use inodes directly when dentry
is not available (like in case of inotify system).

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: James Bottomley <jbottomley@parallels.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Matthew Helsley <matt.helsley@gmail.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exportfs/expfs.c      | 19 ++++++++++++++-----
 include/linux/exportfs.h |  2 ++
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 10f137381ac7..606bb074c501 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -341,10 +341,21 @@ static int export_encode_fh(struct inode *inode, struct fid *fid,
 	return type;
 }
 
+int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
+			     int *max_len, struct inode *parent)
+{
+	const struct export_operations *nop = inode->i_sb->s_export_op;
+
+	if (nop && nop->encode_fh)
+		return nop->encode_fh(inode, fid->raw, max_len, parent);
+
+	return export_encode_fh(inode, fid, max_len, parent);
+}
+EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh);
+
 int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
 		int connectable)
 {
-	const struct export_operations *nop = dentry->d_sb->s_export_op;
 	int error;
 	struct dentry *p = NULL;
 	struct inode *inode = dentry->d_inode, *parent = NULL;
@@ -357,10 +368,8 @@ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
 		 */
 		parent = p->d_inode;
 	}
-	if (nop && nop->encode_fh)
-		error = nop->encode_fh(inode, fid->raw, max_len, parent);
-	else
-		error = export_encode_fh(inode, fid, max_len, parent);
+
+	error = exportfs_encode_inode_fh(inode, fid, max_len, parent);
 	dput(p);
 
 	return error;
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 12291a7ee275..c7e6b6392ab8 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -177,6 +177,8 @@ struct export_operations {
 	int (*commit_metadata)(struct inode *inode);
 };
 
+extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
+				    int *max_len, struct inode *parent);
 extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
 	int *max_len, int connectable);
 extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
-- 
cgit v1.2.3-55-g7522


From ebb351cf78ee6bf3262e6b4b6906f456c05e6d5e Mon Sep 17 00:00:00 2001
From: Roger Pau Monne
Date: Tue, 4 Dec 2012 15:21:53 +0100
Subject: llist/xen-blkfront: implement safe version of llist_for_each_entry

Implement a safe version of llist_for_each_entry, and use it in
blkif_free. Previously grants where freed while iterating the list,
which lead to dereferences when trying to fetch the next item.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Acked-by:  Andrew Morton <akpm@linux-foundation.org>
[v2: Move the llist_for_each_entry_safe in llist.h]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkfront.c |  3 ++-
 include/linux/llist.h        | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 96e9b00db081..cfdb033c7107 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -792,6 +792,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 {
 	struct llist_node *all_gnts;
 	struct grant *persistent_gnt;
+	struct llist_node *n;
 
 	/* Prevent new requests being issued until we fix things up. */
 	spin_lock_irq(&info->io_lock);
@@ -804,7 +805,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	/* Remove all persistent grants */
 	if (info->persistent_gnts_c) {
 		all_gnts = llist_del_all(&info->persistent_gnts);
-		llist_for_each_entry(persistent_gnt, all_gnts, node) {
+		llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) {
 			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
 			__free_page(pfn_to_page(persistent_gnt->pfn));
 			kfree(persistent_gnt);
diff --git a/include/linux/llist.h b/include/linux/llist.h
index a5199f6d0e82..d0ab98f73d38 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -124,6 +124,31 @@ static inline void init_llist_head(struct llist_head *list)
 	     &(pos)->member != NULL;					\
 	     (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
 
+/**
+ * llist_for_each_entry_safe - iterate safely against remove over some entries
+ * of lock-less list of given type.
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as a temporary storage.
+ * @node:	the fist entry of deleted list entries.
+ * @member:	the name of the llist_node with the struct.
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being removed from list, so start with an entry
+ * instead of list head. This variant allows removal of entries
+ * as we iterate.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry.  If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each_entry_safe(pos, n, node, member)		\
+	for ((pos) = llist_entry((node), typeof(*(pos)), member),	\
+	     (n) = (pos)->member.next;					\
+	     &(pos)->member != NULL;					\
+	     (pos) = llist_entry(n, typeof(*(pos)), member),		\
+	     (n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL)
+
 /**
  * llist_empty - tests whether a lock-less list is empty
  * @head:	the list to test
-- 
cgit v1.2.3-55-g7522


From afc59400d6c65bad66d4ad0b2daf879cbff8e23e Mon Sep 17 00:00:00 2001
From: J. Bruce Fields
Date: Mon, 10 Dec 2012 18:01:37 -0500
Subject: nfsd4: cleanup: replace rq_resused count by rq_next_page pointer

It may be a matter of personal taste, but I find this makes the code
clearer.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs2acl.c                       |  2 +-
 fs/nfsd/nfs3acl.c                       |  2 +-
 fs/nfsd/nfs3proc.c                      |  6 +++---
 fs/nfsd/nfs3xdr.c                       | 33 ++++++++++++++++-----------------
 fs/nfsd/nfs4xdr.c                       | 24 ++++++++++++------------
 fs/nfsd/nfsxdr.c                        | 11 ++++++-----
 fs/nfsd/vfs.c                           | 18 ++++++++----------
 include/linux/sunrpc/svc.h              |  6 +++---
 net/sunrpc/svc.c                        |  2 +-
 net/sunrpc/svcsock.c                    |  2 ++
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 10 +++++-----
 net/sunrpc/xprtrdma/svc_rdma_sendto.c   |  4 +++-
 12 files changed, 61 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index b314888825d5..9170861c804a 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -253,7 +253,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 		(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
 		(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
 	while (w > 0) {
-		if (!rqstp->rq_respages[rqstp->rq_resused++])
+		if (!*(rqstp->rq_next_page++))
 			return 0;
 		w -= PAGE_SIZE;
 	}
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index a596e9d987e4..9cbc1a841f87 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -184,7 +184,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 			(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
 			(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
 		while (w > 0) {
-			if (!rqstp->rq_respages[rqstp->rq_resused++])
+			if (!*(rqstp->rq_next_page++))
 				return 0;
 			w -= PAGE_SIZE;
 		}
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 97d90d1c8608..1fc02dfdc5c4 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -460,7 +460,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 	__be32	nfserr;
 	int	count = 0;
 	loff_t	offset;
-	int	i;
+	struct page **p;
 	caddr_t	page_addr = NULL;
 
 	dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n",
@@ -484,8 +484,8 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 				     &resp->common,
 				     nfs3svc_encode_entry_plus);
 	memcpy(resp->verf, argp->verf, 8);
-	for (i=1; i<rqstp->rq_resused ; i++) {
-		page_addr = page_address(rqstp->rq_respages[i]);
+	for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
+		page_addr = page_address(*p);
 
 		if (((caddr_t)resp->buffer >= page_addr) &&
 		    ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 2b8618de6c27..324c0baf7cda 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -325,7 +325,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readargs *args)
 {
 	unsigned int len;
-	int v,pn;
+	int v;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
 	if (!(p = decode_fh(p, &args->fh)))
@@ -340,8 +340,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 	/* set up the kvec */
 	v=0;
 	while (len > 0) {
-		pn = rqstp->rq_resused++;
-		rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+		struct page *p = *(rqstp->rq_next_page++);
+
+		rqstp->rq_vec[v].iov_base = page_address(p);
 		rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
 		len -= rqstp->rq_vec[v].iov_len;
 		v++;
@@ -463,8 +464,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 	len = ntohl(*p++);
 	if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
 		return 0;
-	args->tname = new =
-		page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->tname = new = page_address(*(rqstp->rq_next_page++));
 	args->tlen = len;
 	/* first copy and check from the first page */
 	old = (char*)p;
@@ -535,8 +535,7 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
 {
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
-	args->buffer =
-		page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
@@ -567,8 +566,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 	if (args->count > PAGE_SIZE)
 		args->count = PAGE_SIZE;
 
-	args->buffer =
-		page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
@@ -577,7 +575,7 @@ int
 nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readdirargs *args)
 {
-	int len, pn;
+	int len;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
 	if (!(p = decode_fh(p, &args->fh)))
@@ -592,9 +590,9 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 	args->count = len;
 
 	while (len > 0) {
-		pn = rqstp->rq_resused++;
+		struct page *p = *(rqstp->rq_next_page++);
 		if (!args->buffer)
-			args->buffer = page_address(rqstp->rq_respages[pn]);
+			args->buffer = page_address(p);
 		len -= PAGE_SIZE;
 	}
 
@@ -880,7 +878,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
 		       					common);
 	__be32		*p = cd->buffer;
 	caddr_t		curr_page_addr = NULL;
-	int		pn;		/* current page number */
+	struct page **	page;
 	int		slen;		/* string (name) length */
 	int		elen;		/* estimated entry length in words */
 	int		num_entry_words = 0;	/* actual number of words */
@@ -917,8 +915,9 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
 	}
 
 	/* determine which page in rq_respages[] we are currently filling */
-	for (pn=1; pn < cd->rqstp->rq_resused; pn++) {
-		curr_page_addr = page_address(cd->rqstp->rq_respages[pn]);
+	for (page = cd->rqstp->rq_respages + 1;
+				page < cd->rqstp->rq_next_page; page++) {
+		curr_page_addr = page_address(*page);
 
 		if (((caddr_t)cd->buffer >= curr_page_addr) &&
 		    ((caddr_t)cd->buffer <  curr_page_addr + PAGE_SIZE))
@@ -933,14 +932,14 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
 		if (plus)
 			p = encode_entryplus_baggage(cd, p, name, namlen);
 		num_entry_words = p - cd->buffer;
-	} else if (cd->rqstp->rq_respages[pn+1] != NULL) {
+	} else if (*(page+1) != NULL) {
 		/* temporarily encode entry into next page, then move back to
 		 * current and next page in rq_respages[] */
 		__be32 *p1, *tmp;
 		int len1, len2;
 
 		/* grab next page for temporary storage of entry */
-		p1 = tmp = page_address(cd->rqstp->rq_respages[pn+1]);
+		p1 = tmp = page_address(*(page+1));
 
 		p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d7a3be5ab777..0dc11586682f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2906,7 +2906,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_read *read)
 {
 	u32 eof;
-	int v, pn;
+	int v;
+	struct page *page;
 	unsigned long maxcount; 
 	long len;
 	__be32 *p;
@@ -2925,16 +2926,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 	len = maxcount;
 	v = 0;
 	while (len > 0) {
-		pn = resp->rqstp->rq_resused;
-		if (!resp->rqstp->rq_respages[pn]) { /* ran out of pages */
+		page = *(resp->rqstp->rq_next_page);
+		if (!page) { /* ran out of pages */
 			maxcount -= len;
 			break;
 		}
-		resp->rqstp->rq_vec[v].iov_base =
-			page_address(resp->rqstp->rq_respages[pn]);
+		resp->rqstp->rq_vec[v].iov_base = page_address(page);
 		resp->rqstp->rq_vec[v].iov_len =
 			len < PAGE_SIZE ? len : PAGE_SIZE;
-		resp->rqstp->rq_resused++;
+		resp->rqstp->rq_next_page++;
 		v++;
 		len -= PAGE_SIZE;
 	}
@@ -2980,10 +2980,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
 		return nfserr;
 	if (resp->xbuf->page_len)
 		return nfserr_resource;
-	if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused])
+	if (!*resp->rqstp->rq_next_page)
 		return nfserr_resource;
 
-	page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
+	page = page_address(*(resp->rqstp->rq_next_page++));
 
 	maxcount = PAGE_SIZE;
 	RESERVE_SPACE(4);
@@ -3031,7 +3031,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 		return nfserr;
 	if (resp->xbuf->page_len)
 		return nfserr_resource;
-	if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused])
+	if (!*resp->rqstp->rq_next_page)
 		return nfserr_resource;
 
 	RESERVE_SPACE(NFS4_VERIFIER_SIZE);
@@ -3059,7 +3059,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 		goto err_no_verf;
 	}
 
-	page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
+	page = page_address(*(resp->rqstp->rq_next_page++));
 	readdir->common.err = 0;
 	readdir->buflen = maxcount;
 	readdir->buffer = page;
@@ -3082,8 +3082,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 	p = readdir->buffer;
 	*p++ = 0;	/* no more entries */
 	*p++ = htonl(readdir->common.err == nfserr_eof);
-	resp->xbuf->page_len = ((char*)p) - (char*)page_address(
-		resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+	resp->xbuf->page_len = ((char*)p) -
+		(char*)page_address(*(resp->rqstp->rq_next_page-1));
 
 	/* Use rest of head for padding and remaining ops: */
 	resp->xbuf->tail[0].iov_base = tailbase;
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 65ec595e2226..979b42106979 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -246,7 +246,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_readargs *args)
 {
 	unsigned int len;
-	int v,pn;
+	int v;
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
 
@@ -262,8 +262,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 	 */
 	v=0;
 	while (len > 0) {
-		pn = rqstp->rq_resused++;
-		rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+		struct page *p = *(rqstp->rq_next_page++);
+
+		rqstp->rq_vec[v].iov_base = page_address(p);
 		rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
 		len -= rqstp->rq_vec[v].iov_len;
 		v++;
@@ -355,7 +356,7 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli
 {
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
-	args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
@@ -396,7 +397,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 	if (args->count > PAGE_SIZE)
 		args->count = PAGE_SIZE;
 
-	args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b31e46eeb026..f0a6d88d7fff 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -886,7 +886,7 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 		  struct splice_desc *sd)
 {
 	struct svc_rqst *rqstp = sd->u.data;
-	struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
+	struct page **pp = rqstp->rq_next_page;
 	struct page *page = buf->page;
 	size_t size;
 
@@ -894,17 +894,15 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 
 	if (rqstp->rq_res.page_len == 0) {
 		get_page(page);
-		put_page(*pp);
-		*pp = page;
-		rqstp->rq_resused++;
+		put_page(*rqstp->rq_next_page);
+		*(rqstp->rq_next_page++) = page;
 		rqstp->rq_res.page_base = buf->offset;
 		rqstp->rq_res.page_len = size;
 	} else if (page != pp[-1]) {
 		get_page(page);
-		if (*pp)
-			put_page(*pp);
-		*pp = page;
-		rqstp->rq_resused++;
+		if (*rqstp->rq_next_page)
+			put_page(*rqstp->rq_next_page);
+		*(rqstp->rq_next_page++) = page;
 		rqstp->rq_res.page_len += size;
 	} else
 		rqstp->rq_res.page_len += size;
@@ -936,8 +934,8 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 			.u.data		= rqstp,
 		};
 
-		WARN_ON_ONCE(rqstp->rq_resused != 1);
-		rqstp->rq_resused = 1;
+		WARN_ON_ONCE(rqstp->rq_next_page != rqstp->rq_respages + 1);
+		rqstp->rq_next_page = rqstp->rq_respages + 1;
 		host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
 	} else {
 		oldfs = get_fs();
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d83db800fe02..676ddf53b3ee 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -243,6 +243,7 @@ struct svc_rqst {
 	struct page *		rq_pages[RPCSVC_MAXPAGES];
 	struct page *		*rq_respages;	/* points into rq_pages */
 	int			rq_resused;	/* number of pages used for result */
+	struct page *		*rq_next_page; /* next reply page to use */
 
 	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
 
@@ -338,9 +339,8 @@ xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p)
 
 static inline void svc_free_res_pages(struct svc_rqst *rqstp)
 {
-	while (rqstp->rq_resused) {
-		struct page **pp = (rqstp->rq_respages +
-				    --rqstp->rq_resused);
+	while (rqstp->rq_next_page != rqstp->rq_respages) {
+		struct page **pp = --rqstp->rq_next_page;
 		if (*pp) {
 			put_page(*pp);
 			*pp = NULL;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 529400d59755..c6abf1a6ba95 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1297,7 +1297,7 @@ svc_process(struct svc_rqst *rqstp)
 	 * Setup response xdr_buf.
 	 * Initially it has just one page
 	 */
-	rqstp->rq_resused = 1;
+	rqstp->rq_next_page = &rqstp->rq_respages[1];
 	resv->iov_base = page_address(rqstp->rq_respages[0]);
 	resv->iov_len = 0;
 	rqstp->rq_res.pages = rqstp->rq_respages + 1;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d8e5adfeac30..dcd5669c5154 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -601,6 +601,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 		rqstp->rq_respages = rqstp->rq_pages + 1 +
 			DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);
 	}
+	rqstp->rq_next_page = rqstp->rq_respages+1;
 
 	if (serv->sv_stats)
 		serv->sv_stats->netudpcnt++;
@@ -1066,6 +1067,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 						svsk->sk_datalen + want);
 
 	rqstp->rq_respages = &rqstp->rq_pages[pnum];
+	rqstp->rq_next_page = rqstp->rq_respages + 1;
 
 	/* Now receive data */
 	len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 41cb63b623df..0ce75524ed21 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -521,11 +521,11 @@ next_sge:
 		rqstp->rq_pages[ch_no] = NULL;
 
 	/*
-	 * Detach res pages. svc_release must see a resused count of
-	 * zero or it will attempt to put them.
+	 * Detach res pages. If svc_release sees any it will attempt to
+	 * put them.
 	 */
-	while (rqstp->rq_resused)
-		rqstp->rq_respages[--rqstp->rq_resused] = NULL;
+	while (rqstp->rq_next_page != rqstp->rq_respages)
+		*(--rqstp->rq_next_page) = NULL;
 
 	return err;
 }
@@ -550,7 +550,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
 
 	/* rq_respages starts after the last arg page */
 	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
-	rqstp->rq_resused = 0;
+	rqstp->rq_next_page = &rqstp->rq_arg.pages[page_no];
 
 	/* Rebuild rq_arg head and tail. */
 	rqstp->rq_arg.head[0] = head->arg.head[0];
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 42eb7ba0b903..c1d124dc772b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -548,6 +548,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	int sge_no;
 	int sge_bytes;
 	int page_no;
+	int pages;
 	int ret;
 
 	/* Post a recv buffer to handle another request. */
@@ -611,7 +612,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	 * respages array. They are our pages until the I/O
 	 * completes.
 	 */
-	for (page_no = 0; page_no < rqstp->rq_resused; page_no++) {
+	pages = rqstp->rq_next_page - rqstp->rq_respages;
+	for (page_no = 0; page_no < pages; page_no++) {
 		ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
 		ctxt->count++;
 		rqstp->rq_respages[page_no] = NULL;
-- 
cgit v1.2.3-55-g7522


From 06ca287dbac9cc19d04ac2901b8c4882c03795ff Mon Sep 17 00:00:00 2001
From: Rusty Russell
Date: Tue, 16 Oct 2012 23:56:14 +1030
Subject: virtio: move queue_index and num_free fields into core struct
 virtqueue.

They're generic concepts, so hoist them.  This also avoids accessor
functions (though kept around for merge with DaveM's net tree).

This goes even further than Jason Wang's 17bb6d4088 patch
("virtio-ring: move queue_index to vring_virtqueue") which moved the
queue_index from the specific transport.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_mmio.c |  4 ++--
 drivers/virtio/virtio_pci.c  |  6 ++----
 drivers/virtio/virtio_ring.c | 34 +++++++++++-----------------------
 include/linux/virtio.h       | 14 +++++++++++++-
 4 files changed, 28 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 6b1b7e184939..5a0e1d32ce13 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -225,7 +225,7 @@ static void vm_notify(struct virtqueue *vq)
 
 	/* We write the queue's selector into the notification register to
 	 * signal the other end */
-	writel(virtqueue_get_queue_index(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
+	writel(vq->index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
 }
 
 /* Notify all virtqueues on an interrupt. */
@@ -266,7 +266,7 @@ static void vm_del_vq(struct virtqueue *vq)
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
 	struct virtio_mmio_vq_info *info = vq->priv;
 	unsigned long flags, size;
-	unsigned int index = virtqueue_get_queue_index(vq);
+	unsigned int index = vq->index;
 
 	spin_lock_irqsave(&vm_dev->lock, flags);
 	list_del(&info->node);
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index b59237c1d540..e3ecc94591ad 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -203,8 +203,7 @@ static void vp_notify(struct virtqueue *vq)
 
 	/* we write the queue's selector into the notification register to
 	 * signal the other end */
-	iowrite16(virtqueue_get_queue_index(vq),
-		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
+	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
 }
 
 /* Handle a configuration change: Tell driver if it wants to know. */
@@ -479,8 +478,7 @@ static void vp_del_vq(struct virtqueue *vq)
 	list_del(&info->node);
 	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
-	iowrite16(virtqueue_get_queue_index(vq),
-		vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 
 	if (vp_dev->msix_enabled) {
 		iowrite16(VIRTIO_MSI_NO_VECTOR,
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 286c30cb393d..33a4ce009bcc 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -93,8 +93,6 @@ struct vring_virtqueue
 	/* Host publishes avail event idx */
 	bool event;
 
-	/* Number of free buffers */
-	unsigned int num_free;
 	/* Head of free buffer list. */
 	unsigned int free_head;
 	/* Number we've added since last sync. */
@@ -106,9 +104,6 @@ struct vring_virtqueue
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	void (*notify)(struct virtqueue *vq);
 
-	/* Index of the queue */
-	int queue_index;
-
 #ifdef DEBUG
 	/* They're supposed to lock for us. */
 	unsigned int in_use;
@@ -167,7 +162,7 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
 	desc[i-1].next = 0;
 
 	/* We're about to use a buffer */
-	vq->num_free--;
+	vq->vq.num_free--;
 
 	/* Use a single buffer which doesn't continue */
 	head = vq->free_head;
@@ -181,13 +176,6 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
 	return head;
 }
 
-int virtqueue_get_queue_index(struct virtqueue *_vq)
-{
-	struct vring_virtqueue *vq = to_vvq(_vq);
-	return vq->queue_index;
-}
-EXPORT_SYMBOL_GPL(virtqueue_get_queue_index);
-
 /**
  * virtqueue_add_buf - expose buffer to other end
  * @vq: the struct virtqueue we're talking about.
@@ -235,7 +223,7 @@ int virtqueue_add_buf(struct virtqueue *_vq,
 
 	/* If the host supports indirect descriptor tables, and we have multiple
 	 * buffers, then go indirect. FIXME: tune this threshold */
-	if (vq->indirect && (out + in) > 1 && vq->num_free) {
+	if (vq->indirect && (out + in) > 1 && vq->vq.num_free) {
 		head = vring_add_indirect(vq, sg, out, in, gfp);
 		if (likely(head >= 0))
 			goto add_head;
@@ -244,9 +232,9 @@ int virtqueue_add_buf(struct virtqueue *_vq,
 	BUG_ON(out + in > vq->vring.num);
 	BUG_ON(out + in == 0);
 
-	if (vq->num_free < out + in) {
+	if (vq->vq.num_free < out + in) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
-			 out + in, vq->num_free);
+			 out + in, vq->vq.num_free);
 		/* FIXME: for historical reasons, we force a notify here if
 		 * there are outgoing parts to the buffer.  Presumably the
 		 * host should service the ring ASAP. */
@@ -257,7 +245,7 @@ int virtqueue_add_buf(struct virtqueue *_vq,
 	}
 
 	/* We're about to use some buffers from the free list. */
-	vq->num_free -= out + in;
+	vq->vq.num_free -= out + in;
 
 	head = vq->free_head;
 	for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) {
@@ -303,7 +291,7 @@ add_head:
 	pr_debug("Added buffer head %i to %p\n", head, vq);
 	END_USE(vq);
 
-	return vq->num_free;
+	return vq->vq.num_free;
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_buf);
 
@@ -400,13 +388,13 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 
 	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
 		i = vq->vring.desc[i].next;
-		vq->num_free++;
+		vq->vq.num_free++;
 	}
 
 	vq->vring.desc[i].next = vq->free_head;
 	vq->free_head = head;
 	/* Plus final descriptor */
-	vq->num_free++;
+	vq->vq.num_free++;
 }
 
 static inline bool more_used(const struct vring_virtqueue *vq)
@@ -606,7 +594,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 		return buf;
 	}
 	/* That should have freed everything. */
-	BUG_ON(vq->num_free != vq->vring.num);
+	BUG_ON(vq->vq.num_free != vq->vring.num);
 
 	END_USE(vq);
 	return NULL;
@@ -660,12 +648,13 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.name = name;
+	vq->vq.num_free = num;
+	vq->vq.index = index;
 	vq->notify = notify;
 	vq->weak_barriers = weak_barriers;
 	vq->broken = false;
 	vq->last_used_idx = 0;
 	vq->num_added = 0;
-	vq->queue_index = index;
 	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
 	vq->in_use = false;
@@ -680,7 +669,6 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 		vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
 
 	/* Put everything in free lists. */
-	vq->num_free = num;
 	vq->free_head = 0;
 	for (i = 0; i < num-1; i++) {
 		vq->vring.desc[i].next = i+1;
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 533b1157f22e..4b8f17d90458 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -16,12 +16,20 @@
  * @name: the name of this virtqueue (mainly for debugging)
  * @vdev: the virtio device this queue was created for.
  * @priv: a pointer for the virtqueue implementation to use.
+ * @index: the zero-based ordinal number for this queue.
+ * @num_free: number of elements we expect to be able to fit.
+ *
+ * A note on @num_free: with indirect buffers, each buffer needs one
+ * element in the queue, otherwise a buffer will need one element per
+ * sg element.
  */
 struct virtqueue {
 	struct list_head list;
 	void (*callback)(struct virtqueue *vq);
 	const char *name;
 	struct virtio_device *vdev;
+	unsigned int index;
+	unsigned int num_free;
 	void *priv;
 };
 
@@ -50,7 +58,11 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq);
 
 unsigned int virtqueue_get_vring_size(struct virtqueue *vq);
 
-int virtqueue_get_queue_index(struct virtqueue *vq);
+/* FIXME: Obsolete accessor, but required for virtio_net merge. */
+static inline unsigned int virtqueue_get_queue_index(struct virtqueue *vq)
+{
+	return vq->index;
+}
 
 /**
  * virtio_device - representation of a device using virtio
-- 
cgit v1.2.3-55-g7522


From 9bffdca8c64a72ac54c47a552734ab457bc720d4 Mon Sep 17 00:00:00 2001
From: Wanlong Gao
Date: Tue, 11 Dec 2012 11:04:50 +1030
Subject: virtio: use dev_to_virtio wrapper in virtio

Use dev_to_virtio wrapper in virtio to make code clearly.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio.c | 19 +++++++++----------
 include/linux/virtio.h  |  6 +++++-
 2 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 1e8659ca27ef..1346ae8e14f3 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -10,33 +10,32 @@ static DEFINE_IDA(virtio_index_ida);
 static ssize_t device_show(struct device *_d,
 			   struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	return sprintf(buf, "0x%04x\n", dev->id.device);
 }
 static ssize_t vendor_show(struct device *_d,
 			   struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	return sprintf(buf, "0x%04x\n", dev->id.vendor);
 }
 static ssize_t status_show(struct device *_d,
 			   struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	return sprintf(buf, "0x%08x\n", dev->config->get_status(dev));
 }
 static ssize_t modalias_show(struct device *_d,
 			     struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
-
+	struct virtio_device *dev = dev_to_virtio(_d);
 	return sprintf(buf, "virtio:d%08Xv%08X\n",
 		       dev->id.device, dev->id.vendor);
 }
 static ssize_t features_show(struct device *_d,
 			     struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d, struct virtio_device, dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	unsigned int i;
 	ssize_t len = 0;
 
@@ -71,7 +70,7 @@ static inline int virtio_id_match(const struct virtio_device *dev,
 static int virtio_dev_match(struct device *_dv, struct device_driver *_dr)
 {
 	unsigned int i;
-	struct virtio_device *dev = container_of(_dv,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_dv);
 	const struct virtio_device_id *ids;
 
 	ids = container_of(_dr, struct virtio_driver, driver)->id_table;
@@ -83,7 +82,7 @@ static int virtio_dev_match(struct device *_dv, struct device_driver *_dr)
 
 static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env)
 {
-	struct virtio_device *dev = container_of(_dv,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_dv);
 
 	return add_uevent_var(env, "MODALIAS=virtio:d%08Xv%08X",
 			      dev->id.device, dev->id.vendor);
@@ -111,7 +110,7 @@ EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature);
 static int virtio_dev_probe(struct device *_d)
 {
 	int err, i;
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	struct virtio_driver *drv = container_of(dev->dev.driver,
 						 struct virtio_driver, driver);
 	u32 device_features;
@@ -152,7 +151,7 @@ static int virtio_dev_probe(struct device *_d)
 
 static int virtio_dev_remove(struct device *_d)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	struct virtio_driver *drv = container_of(dev->dev.driver,
 						 struct virtio_driver, driver);
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 4b8f17d90458..5f1f80c76468 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -85,7 +85,11 @@ struct virtio_device {
 	void *priv;
 };
 
-#define dev_to_virtio(dev) container_of(dev, struct virtio_device, dev)
+static inline struct virtio_device *dev_to_virtio(struct device *_dev)
+{
+	return container_of(_dev, struct virtio_device, dev);
+}
+
 int register_virtio_device(struct virtio_device *dev);
 void unregister_virtio_device(struct virtio_device *dev);
 
-- 
cgit v1.2.3-55-g7522


From 9a2bdcc85d28506d4e5d4a9618fb133a3f40945d Mon Sep 17 00:00:00 2001
From: Wanlong Gao
Date: Mon, 10 Dec 2012 16:38:33 +0800
Subject: virtio: add drv_to_virtio to make code clearly

Add drv_to_virtio wrapper to get virtio_driver from device_driver.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio.c | 11 ++++-------
 include/linux/virtio.h  |  5 +++++
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 1346ae8e14f3..1c01ac3fad08 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -73,7 +73,7 @@ static int virtio_dev_match(struct device *_dv, struct device_driver *_dr)
 	struct virtio_device *dev = dev_to_virtio(_dv);
 	const struct virtio_device_id *ids;
 
-	ids = container_of(_dr, struct virtio_driver, driver)->id_table;
+	ids = drv_to_virtio(_dr)->id_table;
 	for (i = 0; ids[i].device; i++)
 		if (virtio_id_match(dev, &ids[i]))
 			return 1;
@@ -97,8 +97,7 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
 					 unsigned int fbit)
 {
 	unsigned int i;
-	struct virtio_driver *drv = container_of(vdev->dev.driver,
-						 struct virtio_driver, driver);
+	struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
 
 	for (i = 0; i < drv->feature_table_size; i++)
 		if (drv->feature_table[i] == fbit)
@@ -111,8 +110,7 @@ static int virtio_dev_probe(struct device *_d)
 {
 	int err, i;
 	struct virtio_device *dev = dev_to_virtio(_d);
-	struct virtio_driver *drv = container_of(dev->dev.driver,
-						 struct virtio_driver, driver);
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
 	u32 device_features;
 
 	/* We have a driver! */
@@ -152,8 +150,7 @@ static int virtio_dev_probe(struct device *_d)
 static int virtio_dev_remove(struct device *_d)
 {
 	struct virtio_device *dev = dev_to_virtio(_d);
-	struct virtio_driver *drv = container_of(dev->dev.driver,
-						 struct virtio_driver, driver);
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
 
 	drv->remove(dev);
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 5f1f80c76468..cf8adb1f5b2c 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -119,6 +119,11 @@ struct virtio_driver {
 #endif
 };
 
+static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv)
+{
+	return container_of(drv, struct virtio_driver, driver);
+}
+
 int register_virtio_driver(struct virtio_driver *drv);
 void unregister_virtio_driver(struct virtio_driver *drv);
 #endif /* _LINUX_VIRTIO_H */
-- 
cgit v1.2.3-55-g7522


From 7a64bf05b2a6fe3703062d13d389e3eb904741c6 Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:21:51 -0800
Subject: mm: add a __GFP_KMEMCG flag

This flag is used to indicate to the callees that this allocation is a
kernel allocation in process context, and should be accounted to current's
memcg.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h             | 2 ++
 include/trace/events/gfpflags.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f74856e17e48..643c9a6f7f34 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -30,6 +30,7 @@ struct vm_area_struct;
 #define ___GFP_HARDWALL		0x20000u
 #define ___GFP_THISNODE		0x40000u
 #define ___GFP_RECLAIMABLE	0x80000u
+#define ___GFP_KMEMCG		0x100000u
 #define ___GFP_NOTRACK		0x200000u
 #define ___GFP_NO_KSWAPD	0x400000u
 #define ___GFP_OTHER_NODE	0x800000u
@@ -89,6 +90,7 @@ struct vm_area_struct;
 
 #define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
 #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
+#define __GFP_KMEMCG	((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
 
 /*
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index d6fd8e5b14b7..1eddbf1557f2 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -34,6 +34,7 @@
 	{(unsigned long)__GFP_HARDWALL,		"GFP_HARDWALL"},	\
 	{(unsigned long)__GFP_THISNODE,		"GFP_THISNODE"},	\
 	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
+	{(unsigned long)__GFP_KMEMCG,		"GFP_KMEMCG"},		\
 	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"},		\
 	{(unsigned long)__GFP_NOTRACK,		"GFP_NOTRACK"},		\
 	{(unsigned long)__GFP_NO_KSWAPD,	"GFP_NO_KSWAPD"},	\
-- 
cgit v1.2.3-55-g7522


From 7ae1e1d0f8ac2927ed7e3ca6d15e42d485903459 Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:21:56 -0800
Subject: memcg: kmem controller infrastructure

Introduce infrastructure for tracking kernel memory pages to a given
memcg.  This will happen whenever the caller includes the flag
__GFP_KMEMCG flag, and the task belong to a memcg other than the root.

In memcontrol.h those functions are wrapped in inline acessors.  The idea
is to later on, patch those with static branches, so we don't incur any
overhead when no mem cgroups with limited kmem are being used.

Users of this functionality shall interact with the memcg core code
through the following functions:

memcg_kmem_newpage_charge: will return true if the group can handle the
                           allocation. At this point, struct page is not
                           yet allocated.

memcg_kmem_commit_charge: will either revert the charge, if struct page
                          allocation failed, or embed memcg information
                          into page_cgroup.

memcg_kmem_uncharge_page: called at free time, will revert the charge.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 110 +++++++++++++++++++++++++++++
 mm/memcontrol.c            | 170 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 280 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e98a74c0c9c0..afa2ad40457e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -21,6 +21,7 @@
 #define _LINUX_MEMCONTROL_H
 #include <linux/cgroup.h>
 #include <linux/vm_event_item.h>
+#include <linux/hardirq.h>
 
 struct mem_cgroup;
 struct page_cgroup;
@@ -414,5 +415,114 @@ static inline void sock_release_memcg(struct sock *sk)
 {
 }
 #endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
+
+#ifdef CONFIG_MEMCG_KMEM
+static inline bool memcg_kmem_enabled(void)
+{
+	return true;
+}
+
+/*
+ * In general, we'll do everything in our power to not incur in any overhead
+ * for non-memcg users for the kmem functions. Not even a function call, if we
+ * can avoid it.
+ *
+ * Therefore, we'll inline all those functions so that in the best case, we'll
+ * see that kmemcg is off for everybody and proceed quickly.  If it is on,
+ * we'll still do most of the flag checking inline. We check a lot of
+ * conditions, but because they are pretty simple, they are expected to be
+ * fast.
+ */
+bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg,
+					int order);
+void __memcg_kmem_commit_charge(struct page *page,
+				       struct mem_cgroup *memcg, int order);
+void __memcg_kmem_uncharge_pages(struct page *page, int order);
+
+/**
+ * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
+ * @gfp: the gfp allocation flags.
+ * @memcg: a pointer to the memcg this was charged against.
+ * @order: allocation order.
+ *
+ * returns true if the memcg where the current task belongs can hold this
+ * allocation.
+ *
+ * We return true automatically if this allocation is not to be accounted to
+ * any memcg.
+ */
+static inline bool
+memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
+{
+	if (!memcg_kmem_enabled())
+		return true;
+
+	/*
+	 * __GFP_NOFAIL allocations will move on even if charging is not
+	 * possible. Therefore we don't even try, and have this allocation
+	 * unaccounted. We could in theory charge it with
+	 * res_counter_charge_nofail, but we hope those allocations are rare,
+	 * and won't be worth the trouble.
+	 */
+	if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
+		return true;
+	if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
+		return true;
+
+	/* If the test is dying, just let it go. */
+	if (unlikely(fatal_signal_pending(current)))
+		return true;
+
+	return __memcg_kmem_newpage_charge(gfp, memcg, order);
+}
+
+/**
+ * memcg_kmem_uncharge_pages: uncharge pages from memcg
+ * @page: pointer to struct page being freed
+ * @order: allocation order.
+ *
+ * there is no need to specify memcg here, since it is embedded in page_cgroup
+ */
+static inline void
+memcg_kmem_uncharge_pages(struct page *page, int order)
+{
+	if (memcg_kmem_enabled())
+		__memcg_kmem_uncharge_pages(page, order);
+}
+
+/**
+ * memcg_kmem_commit_charge: embeds correct memcg in a page
+ * @page: pointer to struct page recently allocated
+ * @memcg: the memcg structure we charged against
+ * @order: allocation order.
+ *
+ * Needs to be called after memcg_kmem_newpage_charge, regardless of success or
+ * failure of the allocation. if @page is NULL, this function will revert the
+ * charges. Otherwise, it will commit the memcg given by @memcg to the
+ * corresponding page_cgroup.
+ */
+static inline void
+memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
+{
+	if (memcg_kmem_enabled() && memcg)
+		__memcg_kmem_commit_charge(page, memcg, order);
+}
+
+#else
+static inline bool
+memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
+{
+	return true;
+}
+
+static inline void memcg_kmem_uncharge_pages(struct page *page, int order)
+{
+}
+
+static inline void
+memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bba1cb4bbb82..b9afa060b8d6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -10,6 +10,10 @@
  * Copyright (C) 2009 Nokia Corporation
  * Author: Kirill A. Shutemov
  *
+ * Kernel Memory Controller
+ * Copyright (C) 2012 Parallels Inc. and Google Inc.
+ * Authors: Glauber Costa and Suleiman Souhlal
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -2661,6 +2665,172 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 	memcg_check_events(memcg, page);
 }
 
+#ifdef CONFIG_MEMCG_KMEM
+static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
+{
+	return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&
+		(memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK);
+}
+
+static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
+{
+	struct res_counter *fail_res;
+	struct mem_cgroup *_memcg;
+	int ret = 0;
+	bool may_oom;
+
+	ret = res_counter_charge(&memcg->kmem, size, &fail_res);
+	if (ret)
+		return ret;
+
+	/*
+	 * Conditions under which we can wait for the oom_killer. Those are
+	 * the same conditions tested by the core page allocator
+	 */
+	may_oom = (gfp & __GFP_FS) && !(gfp & __GFP_NORETRY);
+
+	_memcg = memcg;
+	ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT,
+				      &_memcg, may_oom);
+
+	if (ret == -EINTR)  {
+		/*
+		 * __mem_cgroup_try_charge() chosed to bypass to root due to
+		 * OOM kill or fatal signal.  Since our only options are to
+		 * either fail the allocation or charge it to this cgroup, do
+		 * it as a temporary condition. But we can't fail. From a
+		 * kmem/slab perspective, the cache has already been selected,
+		 * by mem_cgroup_kmem_get_cache(), so it is too late to change
+		 * our minds.
+		 *
+		 * This condition will only trigger if the task entered
+		 * memcg_charge_kmem in a sane state, but was OOM-killed during
+		 * __mem_cgroup_try_charge() above. Tasks that were already
+		 * dying when the allocation triggers should have been already
+		 * directed to the root cgroup in memcontrol.h
+		 */
+		res_counter_charge_nofail(&memcg->res, size, &fail_res);
+		if (do_swap_account)
+			res_counter_charge_nofail(&memcg->memsw, size,
+						  &fail_res);
+		ret = 0;
+	} else if (ret)
+		res_counter_uncharge(&memcg->kmem, size);
+
+	return ret;
+}
+
+static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
+{
+	res_counter_uncharge(&memcg->kmem, size);
+	res_counter_uncharge(&memcg->res, size);
+	if (do_swap_account)
+		res_counter_uncharge(&memcg->memsw, size);
+}
+
+/*
+ * We need to verify if the allocation against current->mm->owner's memcg is
+ * possible for the given order. But the page is not allocated yet, so we'll
+ * need a further commit step to do the final arrangements.
+ *
+ * It is possible for the task to switch cgroups in this mean time, so at
+ * commit time, we can't rely on task conversion any longer.  We'll then use
+ * the handle argument to return to the caller which cgroup we should commit
+ * against. We could also return the memcg directly and avoid the pointer
+ * passing, but a boolean return value gives better semantics considering
+ * the compiled-out case as well.
+ *
+ * Returning true means the allocation is possible.
+ */
+bool
+__memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
+{
+	struct mem_cgroup *memcg;
+	int ret;
+
+	*_memcg = NULL;
+	memcg = try_get_mem_cgroup_from_mm(current->mm);
+
+	/*
+	 * very rare case described in mem_cgroup_from_task. Unfortunately there
+	 * isn't much we can do without complicating this too much, and it would
+	 * be gfp-dependent anyway. Just let it go
+	 */
+	if (unlikely(!memcg))
+		return true;
+
+	if (!memcg_can_account_kmem(memcg)) {
+		css_put(&memcg->css);
+		return true;
+	}
+
+	mem_cgroup_get(memcg);
+
+	ret = memcg_charge_kmem(memcg, gfp, PAGE_SIZE << order);
+	if (!ret)
+		*_memcg = memcg;
+	else
+		mem_cgroup_put(memcg);
+
+	css_put(&memcg->css);
+	return (ret == 0);
+}
+
+void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
+			      int order)
+{
+	struct page_cgroup *pc;
+
+	VM_BUG_ON(mem_cgroup_is_root(memcg));
+
+	/* The page allocation failed. Revert */
+	if (!page) {
+		memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
+		mem_cgroup_put(memcg);
+		return;
+	}
+
+	pc = lookup_page_cgroup(page);
+	lock_page_cgroup(pc);
+	pc->mem_cgroup = memcg;
+	SetPageCgroupUsed(pc);
+	unlock_page_cgroup(pc);
+}
+
+void __memcg_kmem_uncharge_pages(struct page *page, int order)
+{
+	struct mem_cgroup *memcg = NULL;
+	struct page_cgroup *pc;
+
+
+	pc = lookup_page_cgroup(page);
+	/*
+	 * Fast unlocked return. Theoretically might have changed, have to
+	 * check again after locking.
+	 */
+	if (!PageCgroupUsed(pc))
+		return;
+
+	lock_page_cgroup(pc);
+	if (PageCgroupUsed(pc)) {
+		memcg = pc->mem_cgroup;
+		ClearPageCgroupUsed(pc);
+	}
+	unlock_page_cgroup(pc);
+
+	/*
+	 * We trust that only if there is a memcg associated with the page, it
+	 * is a valid allocation
+	 */
+	if (!memcg)
+		return;
+
+	VM_BUG_ON(mem_cgroup_is_root(memcg));
+	memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
+	mem_cgroup_put(memcg);
+}
+#endif /* CONFIG_MEMCG_KMEM */
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
 #define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
-- 
cgit v1.2.3-55-g7522


From 6a1a0d3b625a4091e7a0eb249aefc6a644385149 Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:22:00 -0800
Subject: mm: allocate kernel pages to the right memcg

When a process tries to allocate a page with the __GFP_KMEMCG flag, the
page allocator will call the corresponding memcg functions to validate
the allocation.  Tasks in the root memcg can always proceed.

To avoid adding markers to the page - and a kmem flag that would
necessarily follow, as much as doing page_cgroup lookups for no reason,
whoever is marking its allocations with __GFP_KMEMCG flag is responsible
for telling the page allocator that this is such an allocation at
free_pages() time.  This is done by the invocation of
__free_accounted_pages() and free_accounted_pages().

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h |  3 +++
 mm/page_alloc.c     | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 643c9a6f7f34..0f615eb23d05 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -367,6 +367,9 @@ extern void free_pages(unsigned long addr, unsigned int order);
 extern void free_hot_cold_page(struct page *page, int cold);
 extern void free_hot_cold_page_list(struct list_head *list, int cold);
 
+extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
+extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
+
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 62496edbd8dd..2ad2ad168efe 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2612,6 +2612,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 	int migratetype = allocflags_to_migratetype(gfp_mask);
 	unsigned int cpuset_mems_cookie;
 	int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
+	struct mem_cgroup *memcg = NULL;
 
 	gfp_mask &= gfp_allowed_mask;
 
@@ -2630,6 +2631,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 	if (unlikely(!zonelist->_zonerefs->zone))
 		return NULL;
 
+	/*
+	 * Will only have any effect when __GFP_KMEMCG is set.  This is
+	 * verified in the (always inline) callee
+	 */
+	if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+		return NULL;
+
 retry_cpuset:
 	cpuset_mems_cookie = get_mems_allowed();
 
@@ -2665,6 +2673,8 @@ out:
 	if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
 		goto retry_cpuset;
 
+	memcg_kmem_commit_charge(page, memcg, order);
+
 	return page;
 }
 EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -2717,6 +2727,31 @@ void free_pages(unsigned long addr, unsigned int order)
 
 EXPORT_SYMBOL(free_pages);
 
+/*
+ * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
+ * pages allocated with __GFP_KMEMCG.
+ *
+ * Those pages are accounted to a particular memcg, embedded in the
+ * corresponding page_cgroup. To avoid adding a hit in the allocator to search
+ * for that information only to find out that it is NULL for users who have no
+ * interest in that whatsoever, we provide these functions.
+ *
+ * The caller knows better which flags it relies on.
+ */
+void __free_memcg_kmem_pages(struct page *page, unsigned int order)
+{
+	memcg_kmem_uncharge_pages(page, order);
+	__free_pages(page, order);
+}
+
+void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
+{
+	if (addr != 0) {
+		VM_BUG_ON(!virt_addr_valid((void *)addr));
+		__free_memcg_kmem_pages(virt_to_page((void *)addr), order);
+	}
+}
+
 static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
 {
 	if (addr) {
-- 
cgit v1.2.3-55-g7522


From 50bdd430c20566b13d8bc59946184b08f5875de6 Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:22:04 -0800
Subject: res_counter: return amount of charges after res_counter_uncharge()

It is useful to know how many charges are still left after a call to
res_counter_uncharge.  While it is possible to issue a res_counter_read
after uncharge, this can be racy.

If we need, for instance, to take some action when the counters drop down
to 0, only one of the callers should see it.  This is the same semantics
as the atomic variables in the kernel.

Since the current return value is void, we don't need to worry about
anything breaking due to this change: nobody relied on that, and only
users appearing from now on will be checking this value.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/resource_counter.txt |  7 ++++---
 include/linux/res_counter.h                | 12 +++++++-----
 kernel/res_counter.c                       | 20 +++++++++++++-------
 3 files changed, 24 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt
index 0c4a344e78fa..c4d99ed0b418 100644
--- a/Documentation/cgroups/resource_counter.txt
+++ b/Documentation/cgroups/resource_counter.txt
@@ -83,16 +83,17 @@ to work with it.
 	res_counter->lock internally (it must be called with res_counter->lock
 	held). The force parameter indicates whether we can bypass the limit.
 
- e. void res_counter_uncharge[_locked]
+ e. u64 res_counter_uncharge[_locked]
 			(struct res_counter *rc, unsigned long val)
 
 	When a resource is released (freed) it should be de-accounted
 	from the resource counter it was accounted to.  This is called
-	"uncharging".
+	"uncharging". The return value of this function indicate the amount
+	of charges still present in the counter.
 
 	The _locked routines imply that the res_counter->lock is taken.
 
- f. void res_counter_uncharge_until
+ f. u64 res_counter_uncharge_until
 		(struct res_counter *rc, struct res_counter *top,
 		 unsinged long val)
 
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 6f54e40fa218..5ae8456d9670 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -125,14 +125,16 @@ int res_counter_charge_nofail(struct res_counter *counter,
  *
  * these calls check for usage underflow and show a warning on the console
  * _locked call expects the counter->lock to be taken
+ *
+ * returns the total charges still present in @counter.
  */
 
-void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
-void res_counter_uncharge(struct res_counter *counter, unsigned long val);
+u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
+u64 res_counter_uncharge(struct res_counter *counter, unsigned long val);
 
-void res_counter_uncharge_until(struct res_counter *counter,
-				struct res_counter *top,
-				unsigned long val);
+u64 res_counter_uncharge_until(struct res_counter *counter,
+			       struct res_counter *top,
+			       unsigned long val);
 /**
  * res_counter_margin - calculate chargeable space of a counter
  * @cnt: the counter
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 3920d593e63c..ff55247e7049 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -86,33 +86,39 @@ int res_counter_charge_nofail(struct res_counter *counter, unsigned long val,
 	return __res_counter_charge(counter, val, limit_fail_at, true);
 }
 
-void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
+u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
 {
 	if (WARN_ON(counter->usage < val))
 		val = counter->usage;
 
 	counter->usage -= val;
+	return counter->usage;
 }
 
-void res_counter_uncharge_until(struct res_counter *counter,
-				struct res_counter *top,
-				unsigned long val)
+u64 res_counter_uncharge_until(struct res_counter *counter,
+			       struct res_counter *top,
+			       unsigned long val)
 {
 	unsigned long flags;
 	struct res_counter *c;
+	u64 ret = 0;
 
 	local_irq_save(flags);
 	for (c = counter; c != top; c = c->parent) {
+		u64 r;
 		spin_lock(&c->lock);
-		res_counter_uncharge_locked(c, val);
+		r = res_counter_uncharge_locked(c, val);
+		if (c == counter)
+			ret = r;
 		spin_unlock(&c->lock);
 	}
 	local_irq_restore(flags);
+	return ret;
 }
 
-void res_counter_uncharge(struct res_counter *counter, unsigned long val)
+u64 res_counter_uncharge(struct res_counter *counter, unsigned long val)
 {
-	res_counter_uncharge_until(counter, NULL, val);
+	return res_counter_uncharge_until(counter, NULL, val);
 }
 
 static inline unsigned long long *
-- 
cgit v1.2.3-55-g7522


From a8964b9b84f99c0b1b5d7c09520f89f0700e742e Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:22:09 -0800
Subject: memcg: use static branches when code not in use

We can use static branches to patch the code in or out when not used.

Because the _ACTIVE bit on kmem_accounted is only set after the increment
is done, we guarantee that the root memcg will always be selected for kmem
charges until all call sites are patched (see memcg_kmem_enabled).  This
guarantees that no mischarges are applied.

Static branch decrement happens when the last reference count from the
kmem accounting in memcg dies.  This will only happen when the charges
drop down to 0.

When that happens, we need to disable the static branch only on those
memcgs that enabled it.  To achieve this, we would be forced to complicate
the code by keeping track of which memcgs were the ones that actually
enabled limits, and which ones got it from its parents.

It is a lot simpler just to do static_key_slow_inc() on every child
that is accounted.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  4 ++-
 mm/memcontrol.c            | 79 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 78 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index afa2ad40457e..87d61e840ddd 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -22,6 +22,7 @@
 #include <linux/cgroup.h>
 #include <linux/vm_event_item.h>
 #include <linux/hardirq.h>
+#include <linux/jump_label.h>
 
 struct mem_cgroup;
 struct page_cgroup;
@@ -417,9 +418,10 @@ static inline void sock_release_memcg(struct sock *sk)
 #endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
 
 #ifdef CONFIG_MEMCG_KMEM
+extern struct static_key memcg_kmem_enabled_key;
 static inline bool memcg_kmem_enabled(void)
 {
-	return true;
+	return static_key_false(&memcg_kmem_enabled_key);
 }
 
 /*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9a62ac3ea881..bc70254558fa 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -346,10 +346,13 @@ struct mem_cgroup {
 /* internal only representation about the status of kmem accounting. */
 enum {
 	KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */
+	KMEM_ACCOUNTED_ACTIVATED, /* static key enabled. */
 	KMEM_ACCOUNTED_DEAD, /* dead memcg with pending kmem charges */
 };
 
-#define KMEM_ACCOUNTED_MASK (1 << KMEM_ACCOUNTED_ACTIVE)
+/* We account when limit is on, but only after call sites are patched */
+#define KMEM_ACCOUNTED_MASK \
+		((1 << KMEM_ACCOUNTED_ACTIVE) | (1 << KMEM_ACCOUNTED_ACTIVATED))
 
 #ifdef CONFIG_MEMCG_KMEM
 static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
@@ -362,6 +365,11 @@ static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
 	return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
 }
 
+static void memcg_kmem_set_activated(struct mem_cgroup *memcg)
+{
+	set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
+}
+
 static void memcg_kmem_mark_dead(struct mem_cgroup *memcg)
 {
 	if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags))
@@ -532,6 +540,26 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
 }
 #endif
 
+#ifdef CONFIG_MEMCG_KMEM
+struct static_key memcg_kmem_enabled_key;
+
+static void disarm_kmem_keys(struct mem_cgroup *memcg)
+{
+	if (memcg_kmem_is_active(memcg))
+		static_key_slow_dec(&memcg_kmem_enabled_key);
+}
+#else
+static void disarm_kmem_keys(struct mem_cgroup *memcg)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
+
+static void disarm_static_keys(struct mem_cgroup *memcg)
+{
+	disarm_sock_keys(memcg);
+	disarm_kmem_keys(memcg);
+}
+
 static void drain_all_stock_async(struct mem_cgroup *memcg);
 
 static struct mem_cgroup_per_zone *
@@ -4204,6 +4232,8 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
 {
 	int ret = -EINVAL;
 #ifdef CONFIG_MEMCG_KMEM
+	bool must_inc_static_branch = false;
+
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 	/*
 	 * For simplicity, we won't allow this to be disabled.  It also can't
@@ -4234,7 +4264,15 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
 		ret = res_counter_set_limit(&memcg->kmem, val);
 		VM_BUG_ON(ret);
 
-		memcg_kmem_set_active(memcg);
+		/*
+		 * After this point, kmem_accounted (that we test atomically in
+		 * the beginning of this conditional), is no longer 0. This
+		 * guarantees only one process will set the following boolean
+		 * to true. We don't need test_and_set because we're protected
+		 * by the set_limit_mutex anyway.
+		 */
+		memcg_kmem_set_activated(memcg);
+		must_inc_static_branch = true;
 		/*
 		 * kmem charges can outlive the cgroup. In the case of slab
 		 * pages, for instance, a page contain objects from various
@@ -4247,6 +4285,27 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
 out:
 	mutex_unlock(&set_limit_mutex);
 	cgroup_unlock();
+
+	/*
+	 * We are by now familiar with the fact that we can't inc the static
+	 * branch inside cgroup_lock. See disarm functions for details. A
+	 * worker here is overkill, but also wrong: After the limit is set, we
+	 * must start accounting right away. Since this operation can't fail,
+	 * we can safely defer it to here - no rollback will be needed.
+	 *
+	 * The boolean used to control this is also safe, because
+	 * KMEM_ACCOUNTED_ACTIVATED guarantees that only one process will be
+	 * able to set it to true;
+	 */
+	if (must_inc_static_branch) {
+		static_key_slow_inc(&memcg_kmem_enabled_key);
+		/*
+		 * setting the active bit after the inc will guarantee no one
+		 * starts accounting before all call sites are patched
+		 */
+		memcg_kmem_set_active(memcg);
+	}
+
 #endif
 	return ret;
 }
@@ -4258,8 +4317,20 @@ static void memcg_propagate_kmem(struct mem_cgroup *memcg)
 		return;
 	memcg->kmem_account_flags = parent->kmem_account_flags;
 #ifdef CONFIG_MEMCG_KMEM
-	if (memcg_kmem_is_active(memcg))
+	/*
+	 * When that happen, we need to disable the static branch only on those
+	 * memcgs that enabled it. To achieve this, we would be forced to
+	 * complicate the code by keeping track of which memcgs were the ones
+	 * that actually enabled limits, and which ones got it from its
+	 * parents.
+	 *
+	 * It is a lot simpler just to do static_key_slow_inc() on every child
+	 * that is accounted.
+	 */
+	if (memcg_kmem_is_active(memcg)) {
 		mem_cgroup_get(memcg);
+		static_key_slow_inc(&memcg_kmem_enabled_key);
+	}
 #endif
 }
 
@@ -5184,7 +5255,7 @@ static void free_work(struct work_struct *work)
 	 * to move this code around, and make sure it is outside
 	 * the cgroup_lock.
 	 */
-	disarm_sock_keys(memcg);
+	disarm_static_keys(memcg);
 	if (size < PAGE_SIZE)
 		kfree(memcg);
 	else
-- 
cgit v1.2.3-55-g7522


From 2ad306b17c0ac5a1b1f250d5f772aeb87fdf1eba Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:22:18 -0800
Subject: fork: protect architectures where THREAD_SIZE >= PAGE_SIZE against
 fork bombs

Because those architectures will draw their stacks directly from the page
allocator, rather than the slab cache, we can directly pass __GFP_KMEMCG
flag, and issue the corresponding free_pages.

This code path is taken when the architecture doesn't define
CONFIG_ARCH_THREAD_INFO_ALLOCATOR (only ia64 seems to), and has
THREAD_SIZE >= PAGE_SIZE.  Luckily, most - if not all - of the remaining
architectures fall in this category.

This will guarantee that every stack page is accounted to the memcg the
process currently lives on, and will have the allocations to fail if they
go over limit.

For the time being, I am defining a new variant of THREADINFO_GFP, not to
mess with the other path.  Once the slab is also tracked by memcg, we can
get rid of that flag.

Tested to successfully protect against :(){ :|:& };:

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Frederic Weisbecker <fweisbec@redhat.com>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/thread_info.h | 2 ++
 kernel/fork.c               | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ccc1899bd62e..e7e04736802f 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -61,6 +61,8 @@ extern long do_no_restart_syscall(struct restart_block *parm);
 # define THREADINFO_GFP		(GFP_KERNEL | __GFP_NOTRACK)
 #endif
 
+#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
+
 /*
  * flag set/clear/test wrappers
  * - pass TIF_xxxx constants to these functions
diff --git a/kernel/fork.c b/kernel/fork.c
index c36c4e301efe..85f6d536608d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -146,7 +146,7 @@ void __weak arch_release_thread_info(struct thread_info *ti)
 static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 						  int node)
 {
-	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
+	struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
 					     THREAD_SIZE_ORDER);
 
 	return page ? page_address(page) : NULL;
@@ -154,7 +154,7 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 
 static inline void free_thread_info(struct thread_info *ti)
 {
-	free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+	free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
 }
 # else
 static struct kmem_cache *thread_info_cache;
-- 
cgit v1.2.3-55-g7522


From ba6c496ed834a37a26fc6fc87fc9aecb0fa0014d Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:22:27 -0800
Subject: slab/slub: struct memcg_params

For the kmem slab controller, we need to record some extra information in
the kmem_cache structure.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Signed-off-by: Suleiman Souhlal <suleiman@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h     | 24 ++++++++++++++++++++++++
 include/linux/slab_def.h |  3 +++
 include/linux/slub_def.h |  3 +++
 mm/slab.h                | 13 +++++++++++++
 4 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 743a10415122..00efba149222 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -176,6 +176,30 @@ void kmem_cache_free(struct kmem_cache *, void *);
 #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
 #endif
 
+/*
+ * This is the main placeholder for memcg-related information in kmem caches.
+ * struct kmem_cache will hold a pointer to it, so the memory cost while
+ * disabled is 1 pointer. The runtime cost while enabled, gets bigger than it
+ * would otherwise be if that would be bundled in kmem_cache: we'll need an
+ * extra pointer chase. But the trade off clearly lays in favor of not
+ * penalizing non-users.
+ *
+ * Both the root cache and the child caches will have it. For the root cache,
+ * this will hold a dynamically allocated array large enough to hold
+ * information about the currently limited memcgs in the system.
+ *
+ * Child caches will hold extra metadata needed for its operation. Fields are:
+ *
+ * @memcg: pointer to the memcg this cache belongs to
+ */
+struct memcg_cache_params {
+	bool is_root_cache;
+	union {
+		struct kmem_cache *memcg_caches[0];
+		struct mem_cgroup *memcg;
+	};
+};
+
 /*
  * Common kmalloc functions provided by all allocators
  */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 45c0356fdc8c..8bb6e0eaf3c6 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -81,6 +81,9 @@ struct kmem_cache {
 	 */
 	int obj_offset;
 #endif /* CONFIG_DEBUG_SLAB */
+#ifdef CONFIG_MEMCG_KMEM
+	struct memcg_cache_params *memcg_params;
+#endif
 
 /* 6) per-cpu/per-node data, touched during every alloc/free */
 	/*
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index df448adb7283..961e72eab907 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -101,6 +101,9 @@ struct kmem_cache {
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;	/* For sysfs */
 #endif
+#ifdef CONFIG_MEMCG_KMEM
+	struct memcg_cache_params *memcg_params;
+#endif
 
 #ifdef CONFIG_NUMA
 	/*
diff --git a/mm/slab.h b/mm/slab.h
index 1cb9c9ee0e6f..49e7a8b1d27e 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -100,4 +100,17 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo);
 void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s);
 ssize_t slabinfo_write(struct file *file, const char __user *buffer,
 		       size_t count, loff_t *ppos);
+
+#ifdef CONFIG_MEMCG_KMEM
+static inline bool is_root_cache(struct kmem_cache *s)
+{
+	return !s->memcg_params || s->memcg_params->is_root_cache;
+}
+#else
+static inline bool is_root_cache(struct kmem_cache *s)
+{
+	return true;
+}
+
+#endif
 #endif
-- 
cgit v1.2.3-55-g7522


From 2633d7a028239a738b793be5ca8fa6ac312f5793 Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:22:34 -0800
Subject: slab/slub: consider a memcg parameter in kmem_create_cache

Allow a memcg parameter to be passed during cache creation.  When the slub
allocator is being used, it will only merge caches that belong to the same
memcg.  We'll do this by scanning the global list, and then translating
the cache to a memcg-specific cache

Default function is created as a wrapper, passing NULL to the memcg
version.  We only merge caches that belong to the same memcg.

A helper is provided, memcg_css_id: because slub needs a unique cache name
for sysfs.  Since this is visible, but not the canonical location for slab
data, the cache name is not used, the css_id should suffice.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 26 +++++++++++++++++++++++
 include/linux/slab.h       | 14 ++++++++++++-
 mm/memcontrol.c            | 51 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/slab.h                  | 23 +++++++++++++++++----
 mm/slab_common.c           | 42 ++++++++++++++++++++++++++++++--------
 mm/slub.c                  | 19 +++++++++++++----
 6 files changed, 157 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 87d61e840ddd..0b69a0470007 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -28,6 +28,7 @@ struct mem_cgroup;
 struct page_cgroup;
 struct page;
 struct mm_struct;
+struct kmem_cache;
 
 /* Stats that can be updated by kernel. */
 enum mem_cgroup_page_stat_item {
@@ -441,6 +442,11 @@ void __memcg_kmem_commit_charge(struct page *page,
 				       struct mem_cgroup *memcg, int order);
 void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
+int memcg_cache_id(struct mem_cgroup *memcg);
+int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s);
+void memcg_release_cache(struct kmem_cache *cachep);
+void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
+
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
  * @gfp: the gfp allocation flags.
@@ -525,6 +531,26 @@ static inline void
 memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
 {
 }
+
+static inline int memcg_cache_id(struct mem_cgroup *memcg)
+{
+	return -1;
+}
+
+static inline int memcg_register_cache(struct mem_cgroup *memcg,
+				       struct kmem_cache *s)
+{
+	return 0;
+}
+
+static inline void memcg_release_cache(struct kmem_cache *cachep)
+{
+}
+
+static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
+					struct kmem_cache *s)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 00efba149222..c0fcf28c15b2 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -116,6 +116,7 @@ struct kmem_cache {
 };
 #endif
 
+struct mem_cgroup;
 /*
  * struct kmem_cache related prototypes
  */
@@ -125,6 +126,9 @@ int slab_is_available(void);
 struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			unsigned long,
 			void (*)(void *));
+struct kmem_cache *
+kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t,
+			unsigned long, void (*)(void *));
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
@@ -191,15 +195,23 @@ void kmem_cache_free(struct kmem_cache *, void *);
  * Child caches will hold extra metadata needed for its operation. Fields are:
  *
  * @memcg: pointer to the memcg this cache belongs to
+ * @list: list_head for the list of all caches in this memcg
+ * @root_cache: pointer to the global, root cache, this cache was derived from
  */
 struct memcg_cache_params {
 	bool is_root_cache;
 	union {
 		struct kmem_cache *memcg_caches[0];
-		struct mem_cgroup *memcg;
+		struct {
+			struct mem_cgroup *memcg;
+			struct list_head list;
+			struct kmem_cache *root_cache;
+		};
 	};
 };
 
+int memcg_update_all_caches(int num_memcgs);
+
 /*
  * Common kmalloc functions provided by all allocators
  */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e16694d5e118..3eafe6cf6ca4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -341,6 +341,14 @@ struct mem_cgroup {
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
 	struct tcp_memcontrol tcp_mem;
 #endif
+#if defined(CONFIG_MEMCG_KMEM)
+	/* analogous to slab_common's slab_caches list. per-memcg */
+	struct list_head memcg_slab_caches;
+	/* Not a spinlock, we can take a lot of time walking the list */
+	struct mutex slab_caches_mutex;
+        /* Index in the kmem_cache->memcg_params->memcg_caches array */
+	int kmemcg_id;
+#endif
 };
 
 /* internal only representation about the status of kmem accounting. */
@@ -2785,6 +2793,47 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
 		mem_cgroup_put(memcg);
 }
 
+void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep)
+{
+	if (!memcg)
+		return;
+
+	mutex_lock(&memcg->slab_caches_mutex);
+	list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
+	mutex_unlock(&memcg->slab_caches_mutex);
+}
+
+/*
+ * helper for acessing a memcg's index. It will be used as an index in the
+ * child cache array in kmem_cache, and also to derive its name. This function
+ * will return -1 when this is not a kmem-limited memcg.
+ */
+int memcg_cache_id(struct mem_cgroup *memcg)
+{
+	return memcg ? memcg->kmemcg_id : -1;
+}
+
+int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
+{
+	size_t size = sizeof(struct memcg_cache_params);
+
+	if (!memcg_kmem_enabled())
+		return 0;
+
+	s->memcg_params = kzalloc(size, GFP_KERNEL);
+	if (!s->memcg_params)
+		return -ENOMEM;
+
+	if (memcg)
+		s->memcg_params->memcg = memcg;
+	return 0;
+}
+
+void memcg_release_cache(struct kmem_cache *s)
+{
+	kfree(s->memcg_params);
+}
+
 /*
  * We need to verify if the allocation against current->mm->owner's memcg is
  * possible for the given order. But the page is not allocated yet, so we'll
@@ -5026,7 +5075,9 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
 #ifdef CONFIG_MEMCG_KMEM
 static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
+	memcg->kmemcg_id = -1;
 	memcg_propagate_kmem(memcg);
+
 	return mem_cgroup_sockets_init(memcg, ss);
 };
 
diff --git a/mm/slab.h b/mm/slab.h
index 49e7a8b1d27e..abe582d20c79 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -43,12 +43,15 @@ extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size,
 extern void create_boot_cache(struct kmem_cache *, const char *name,
 			size_t size, unsigned long flags);
 
+struct mem_cgroup;
 #ifdef CONFIG_SLUB
-struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
-	size_t align, unsigned long flags, void (*ctor)(void *));
+struct kmem_cache *
+__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
+		   size_t align, unsigned long flags, void (*ctor)(void *));
 #else
-static inline struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
-	size_t align, unsigned long flags, void (*ctor)(void *))
+static inline struct kmem_cache *
+__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
+		   size_t align, unsigned long flags, void (*ctor)(void *))
 { return NULL; }
 #endif
 
@@ -106,11 +109,23 @@ static inline bool is_root_cache(struct kmem_cache *s)
 {
 	return !s->memcg_params || s->memcg_params->is_root_cache;
 }
+
+static inline bool cache_match_memcg(struct kmem_cache *cachep,
+				     struct mem_cgroup *memcg)
+{
+	return (is_root_cache(cachep) && !memcg) ||
+				(cachep->memcg_params->memcg == memcg);
+}
 #else
 static inline bool is_root_cache(struct kmem_cache *s)
 {
 	return true;
 }
 
+static inline bool cache_match_memcg(struct kmem_cache *cachep,
+				     struct mem_cgroup *memcg)
+{
+	return true;
+}
 #endif
 #endif
diff --git a/mm/slab_common.c b/mm/slab_common.c
index a8e76d79ee65..3031badcc577 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -18,6 +18,7 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/page.h>
+#include <linux/memcontrol.h>
 
 #include "slab.h"
 
@@ -27,7 +28,8 @@ DEFINE_MUTEX(slab_mutex);
 struct kmem_cache *kmem_cache;
 
 #ifdef CONFIG_DEBUG_VM
-static int kmem_cache_sanity_check(const char *name, size_t size)
+static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
+				   size_t size)
 {
 	struct kmem_cache *s = NULL;
 
@@ -53,7 +55,13 @@ static int kmem_cache_sanity_check(const char *name, size_t size)
 			continue;
 		}
 
-		if (!strcmp(s->name, name)) {
+		/*
+		 * For simplicity, we won't check this in the list of memcg
+		 * caches. We have control over memcg naming, and if there
+		 * aren't duplicates in the global list, there won't be any
+		 * duplicates in the memcg lists as well.
+		 */
+		if (!memcg && !strcmp(s->name, name)) {
 			pr_err("%s (%s): Cache name already exists.\n",
 			       __func__, name);
 			dump_stack();
@@ -66,7 +74,8 @@ static int kmem_cache_sanity_check(const char *name, size_t size)
 	return 0;
 }
 #else
-static inline int kmem_cache_sanity_check(const char *name, size_t size)
+static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg,
+					  const char *name, size_t size)
 {
 	return 0;
 }
@@ -125,8 +134,9 @@ unsigned long calculate_alignment(unsigned long flags,
  * as davem.
  */
 
-struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align,
-		unsigned long flags, void (*ctor)(void *))
+struct kmem_cache *
+kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
+			size_t align, unsigned long flags, void (*ctor)(void *))
 {
 	struct kmem_cache *s = NULL;
 	int err = 0;
@@ -134,7 +144,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
 	get_online_cpus();
 	mutex_lock(&slab_mutex);
 
-	if (!kmem_cache_sanity_check(name, size) == 0)
+	if (!kmem_cache_sanity_check(memcg, name, size) == 0)
 		goto out_locked;
 
 	/*
@@ -145,7 +155,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
 	 */
 	flags &= CACHE_CREATE_MASK;
 
-	s = __kmem_cache_alias(name, size, align, flags, ctor);
+	s = __kmem_cache_alias(memcg, name, size, align, flags, ctor);
 	if (s)
 		goto out_locked;
 
@@ -154,6 +164,13 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
 		s->object_size = s->size = size;
 		s->align = calculate_alignment(flags, align, size);
 		s->ctor = ctor;
+
+		if (memcg_register_cache(memcg, s)) {
+			kmem_cache_free(kmem_cache, s);
+			err = -ENOMEM;
+			goto out_locked;
+		}
+
 		s->name = kstrdup(name, GFP_KERNEL);
 		if (!s->name) {
 			kmem_cache_free(kmem_cache, s);
@@ -163,10 +180,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
 
 		err = __kmem_cache_create(s, flags);
 		if (!err) {
-
 			s->refcount = 1;
 			list_add(&s->list, &slab_caches);
-
+			memcg_cache_list_add(memcg, s);
 		} else {
 			kfree(s->name);
 			kmem_cache_free(kmem_cache, s);
@@ -194,6 +210,13 @@ out_locked:
 
 	return s;
 }
+
+struct kmem_cache *
+kmem_cache_create(const char *name, size_t size, size_t align,
+		  unsigned long flags, void (*ctor)(void *))
+{
+	return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor);
+}
 EXPORT_SYMBOL(kmem_cache_create);
 
 void kmem_cache_destroy(struct kmem_cache *s)
@@ -209,6 +232,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
 			if (s->flags & SLAB_DESTROY_BY_RCU)
 				rcu_barrier();
 
+			memcg_release_cache(s);
 			kfree(s->name);
 			kmem_cache_free(kmem_cache, s);
 		} else {
diff --git a/mm/slub.c b/mm/slub.c
index 87f9f32bf0cd..985332b38852 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -31,6 +31,7 @@
 #include <linux/fault-inject.h>
 #include <linux/stacktrace.h>
 #include <linux/prefetch.h>
+#include <linux/memcontrol.h>
 
 #include <trace/events/kmem.h>
 
@@ -3786,7 +3787,7 @@ static int slab_unmergeable(struct kmem_cache *s)
 	return 0;
 }
 
-static struct kmem_cache *find_mergeable(size_t size,
+static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size,
 		size_t align, unsigned long flags, const char *name,
 		void (*ctor)(void *))
 {
@@ -3822,17 +3823,21 @@ static struct kmem_cache *find_mergeable(size_t size,
 		if (s->size - size >= sizeof(void *))
 			continue;
 
+		if (!cache_match_memcg(s, memcg))
+			continue;
+
 		return s;
 	}
 	return NULL;
 }
 
-struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
-		size_t align, unsigned long flags, void (*ctor)(void *))
+struct kmem_cache *
+__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
+		   size_t align, unsigned long flags, void (*ctor)(void *))
 {
 	struct kmem_cache *s;
 
-	s = find_mergeable(size, align, flags, name, ctor);
+	s = find_mergeable(memcg, size, align, flags, name, ctor);
 	if (s) {
 		s->refcount++;
 		/*
@@ -5156,6 +5161,12 @@ static char *create_unique_id(struct kmem_cache *s)
 	if (p != name + 1)
 		*p++ = '-';
 	p += sprintf(p, "%07d", s->size);
+
+#ifdef CONFIG_MEMCG_KMEM
+	if (!is_root_cache(s))
+		p += sprintf(p, "-%08d", memcg_cache_id(s->memcg_params->memcg));
+#endif
+
 	BUG_ON(p > name + ID_STR_LENGTH - 1);
 	return name;
 }
-- 
cgit v1.2.3-55-g7522


From 55007d849759252ddd573aeb36143b947202d509 Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:22:38 -0800
Subject: memcg: allocate memory for memcg caches whenever a new memcg appears

Every cache that is considered a root cache (basically the "original"
caches, tied to the root memcg/no-memcg) will have an array that should be
large enough to store a cache pointer per each memcg in the system.

Theoreticaly, this is as high as 1 << sizeof(css_id), which is currently
in the 64k pointers range.  Most of the time, we won't be using that much.

What goes in this patch, is a simple scheme to dynamically allocate such
an array, in order to minimize memory usage for memcg caches.  Because we
would also like to avoid allocations all the time, at least for now, the
array will only grow.  It will tend to be big enough to hold the maximum
number of kmem-limited memcgs ever achieved.

We'll allocate it to be a minimum of 64 kmem-limited memcgs.  When we have
more than that, we'll start doubling the size of this array every time the
limit is reached.

Because we are only considering kmem limited memcgs, a natural point for
this to happen is when we write to the limit.  At that point, we already
have set_limit_mutex held, so that will become our natural synchronization
mechanism.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |   2 +
 mm/memcontrol.c            | 207 +++++++++++++++++++++++++++++++++++++++++----
 mm/slab_common.c           |  28 ++++++
 3 files changed, 221 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0b69a0470007..45085e14e023 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -447,6 +447,8 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s);
 void memcg_release_cache(struct kmem_cache *cachep);
 void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
 
+int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
+void memcg_update_array_size(int num_groups);
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
  * @gfp: the gfp allocation flags.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3eafe6cf6ca4..db38b60e5f87 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -378,6 +378,11 @@ static void memcg_kmem_set_activated(struct mem_cgroup *memcg)
 	set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
 }
 
+static void memcg_kmem_clear_activated(struct mem_cgroup *memcg)
+{
+	clear_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
+}
+
 static void memcg_kmem_mark_dead(struct mem_cgroup *memcg)
 {
 	if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags))
@@ -549,12 +554,48 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
 #endif
 
 #ifdef CONFIG_MEMCG_KMEM
+/*
+ * This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
+ * There are two main reasons for not using the css_id for this:
+ *  1) this works better in sparse environments, where we have a lot of memcgs,
+ *     but only a few kmem-limited. Or also, if we have, for instance, 200
+ *     memcgs, and none but the 200th is kmem-limited, we'd have to have a
+ *     200 entry array for that.
+ *
+ *  2) In order not to violate the cgroup API, we would like to do all memory
+ *     allocation in ->create(). At that point, we haven't yet allocated the
+ *     css_id. Having a separate index prevents us from messing with the cgroup
+ *     core for this
+ *
+ * The current size of the caches array is stored in
+ * memcg_limited_groups_array_size.  It will double each time we have to
+ * increase it.
+ */
+static DEFINE_IDA(kmem_limited_groups);
+static int memcg_limited_groups_array_size;
+/*
+ * MIN_SIZE is different than 1, because we would like to avoid going through
+ * the alloc/free process all the time. In a small machine, 4 kmem-limited
+ * cgroups is a reasonable guess. In the future, it could be a parameter or
+ * tunable, but that is strictly not necessary.
+ *
+ * MAX_SIZE should be as large as the number of css_ids. Ideally, we could get
+ * this constant directly from cgroup, but it is understandable that this is
+ * better kept as an internal representation in cgroup.c. In any case, the
+ * css_id space is not getting any smaller, and we don't have to necessarily
+ * increase ours as well if it increases.
+ */
+#define MEMCG_CACHES_MIN_SIZE 4
+#define MEMCG_CACHES_MAX_SIZE 65535
+
 struct static_key memcg_kmem_enabled_key;
 
 static void disarm_kmem_keys(struct mem_cgroup *memcg)
 {
-	if (memcg_kmem_is_active(memcg))
+	if (memcg_kmem_is_active(memcg)) {
 		static_key_slow_dec(&memcg_kmem_enabled_key);
+		ida_simple_remove(&kmem_limited_groups, memcg->kmemcg_id);
+	}
 	/*
 	 * This check can't live in kmem destruction function,
 	 * since the charges will outlive the cgroup
@@ -2813,6 +2854,120 @@ int memcg_cache_id(struct mem_cgroup *memcg)
 	return memcg ? memcg->kmemcg_id : -1;
 }
 
+/*
+ * This ends up being protected by the set_limit mutex, during normal
+ * operation, because that is its main call site.
+ *
+ * But when we create a new cache, we can call this as well if its parent
+ * is kmem-limited. That will have to hold set_limit_mutex as well.
+ */
+int memcg_update_cache_sizes(struct mem_cgroup *memcg)
+{
+	int num, ret;
+
+	num = ida_simple_get(&kmem_limited_groups,
+				0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
+	if (num < 0)
+		return num;
+	/*
+	 * After this point, kmem_accounted (that we test atomically in
+	 * the beginning of this conditional), is no longer 0. This
+	 * guarantees only one process will set the following boolean
+	 * to true. We don't need test_and_set because we're protected
+	 * by the set_limit_mutex anyway.
+	 */
+	memcg_kmem_set_activated(memcg);
+
+	ret = memcg_update_all_caches(num+1);
+	if (ret) {
+		ida_simple_remove(&kmem_limited_groups, num);
+		memcg_kmem_clear_activated(memcg);
+		return ret;
+	}
+
+	memcg->kmemcg_id = num;
+	INIT_LIST_HEAD(&memcg->memcg_slab_caches);
+	mutex_init(&memcg->slab_caches_mutex);
+	return 0;
+}
+
+static size_t memcg_caches_array_size(int num_groups)
+{
+	ssize_t size;
+	if (num_groups <= 0)
+		return 0;
+
+	size = 2 * num_groups;
+	if (size < MEMCG_CACHES_MIN_SIZE)
+		size = MEMCG_CACHES_MIN_SIZE;
+	else if (size > MEMCG_CACHES_MAX_SIZE)
+		size = MEMCG_CACHES_MAX_SIZE;
+
+	return size;
+}
+
+/*
+ * We should update the current array size iff all caches updates succeed. This
+ * can only be done from the slab side. The slab mutex needs to be held when
+ * calling this.
+ */
+void memcg_update_array_size(int num)
+{
+	if (num > memcg_limited_groups_array_size)
+		memcg_limited_groups_array_size = memcg_caches_array_size(num);
+}
+
+int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
+{
+	struct memcg_cache_params *cur_params = s->memcg_params;
+
+	VM_BUG_ON(s->memcg_params && !s->memcg_params->is_root_cache);
+
+	if (num_groups > memcg_limited_groups_array_size) {
+		int i;
+		ssize_t size = memcg_caches_array_size(num_groups);
+
+		size *= sizeof(void *);
+		size += sizeof(struct memcg_cache_params);
+
+		s->memcg_params = kzalloc(size, GFP_KERNEL);
+		if (!s->memcg_params) {
+			s->memcg_params = cur_params;
+			return -ENOMEM;
+		}
+
+		s->memcg_params->is_root_cache = true;
+
+		/*
+		 * There is the chance it will be bigger than
+		 * memcg_limited_groups_array_size, if we failed an allocation
+		 * in a cache, in which case all caches updated before it, will
+		 * have a bigger array.
+		 *
+		 * But if that is the case, the data after
+		 * memcg_limited_groups_array_size is certainly unused
+		 */
+		for (i = 0; i < memcg_limited_groups_array_size; i++) {
+			if (!cur_params->memcg_caches[i])
+				continue;
+			s->memcg_params->memcg_caches[i] =
+						cur_params->memcg_caches[i];
+		}
+
+		/*
+		 * Ideally, we would wait until all caches succeed, and only
+		 * then free the old one. But this is not worth the extra
+		 * pointer per-cache we'd have to have for this.
+		 *
+		 * It is not a big deal if some caches are left with a size
+		 * bigger than the others. And all updates will reset this
+		 * anyway.
+		 */
+		kfree(cur_params);
+	}
+	return 0;
+}
+
 int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
 {
 	size_t size = sizeof(struct memcg_cache_params);
@@ -2820,6 +2975,9 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
 	if (!memcg_kmem_enabled())
 		return 0;
 
+	if (!memcg)
+		size += memcg_limited_groups_array_size * sizeof(void *);
+
 	s->memcg_params = kzalloc(size, GFP_KERNEL);
 	if (!s->memcg_params)
 		return -ENOMEM;
@@ -4326,14 +4484,11 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
 		ret = res_counter_set_limit(&memcg->kmem, val);
 		VM_BUG_ON(ret);
 
-		/*
-		 * After this point, kmem_accounted (that we test atomically in
-		 * the beginning of this conditional), is no longer 0. This
-		 * guarantees only one process will set the following boolean
-		 * to true. We don't need test_and_set because we're protected
-		 * by the set_limit_mutex anyway.
-		 */
-		memcg_kmem_set_activated(memcg);
+		ret = memcg_update_cache_sizes(memcg);
+		if (ret) {
+			res_counter_set_limit(&memcg->kmem, RESOURCE_MAX);
+			goto out;
+		}
 		must_inc_static_branch = true;
 		/*
 		 * kmem charges can outlive the cgroup. In the case of slab
@@ -4372,11 +4527,13 @@ out:
 	return ret;
 }
 
-static void memcg_propagate_kmem(struct mem_cgroup *memcg)
+static int memcg_propagate_kmem(struct mem_cgroup *memcg)
 {
+	int ret = 0;
 	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
 	if (!parent)
-		return;
+		goto out;
+
 	memcg->kmem_account_flags = parent->kmem_account_flags;
 #ifdef CONFIG_MEMCG_KMEM
 	/*
@@ -4389,11 +4546,24 @@ static void memcg_propagate_kmem(struct mem_cgroup *memcg)
 	 * It is a lot simpler just to do static_key_slow_inc() on every child
 	 * that is accounted.
 	 */
-	if (memcg_kmem_is_active(memcg)) {
-		mem_cgroup_get(memcg);
-		static_key_slow_inc(&memcg_kmem_enabled_key);
-	}
+	if (!memcg_kmem_is_active(memcg))
+		goto out;
+
+	/*
+	 * destroy(), called if we fail, will issue static_key_slow_inc() and
+	 * mem_cgroup_put() if kmem is enabled. We have to either call them
+	 * unconditionally, or clear the KMEM_ACTIVE flag. I personally find
+	 * this more consistent, since it always leads to the same destroy path
+	 */
+	mem_cgroup_get(memcg);
+	static_key_slow_inc(&memcg_kmem_enabled_key);
+
+	mutex_lock(&set_limit_mutex);
+	ret = memcg_update_cache_sizes(memcg);
+	mutex_unlock(&set_limit_mutex);
 #endif
+out:
+	return ret;
 }
 
 /*
@@ -5075,8 +5245,12 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
 #ifdef CONFIG_MEMCG_KMEM
 static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
+	int ret;
+
 	memcg->kmemcg_id = -1;
-	memcg_propagate_kmem(memcg);
+	ret = memcg_propagate_kmem(memcg);
+	if (ret)
+		return ret;
 
 	return mem_cgroup_sockets_init(memcg, ss);
 };
@@ -5479,6 +5653,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
 		res_counter_init(&memcg->res, &parent->res);
 		res_counter_init(&memcg->memsw, &parent->memsw);
 		res_counter_init(&memcg->kmem, &parent->kmem);
+
 		/*
 		 * We increment refcnt of the parent to ensure that we can
 		 * safely access it on res_counter_charge/uncharge.
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 3031badcc577..1c424b6511bf 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -81,6 +81,34 @@ static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg,
 }
 #endif
 
+#ifdef CONFIG_MEMCG_KMEM
+int memcg_update_all_caches(int num_memcgs)
+{
+	struct kmem_cache *s;
+	int ret = 0;
+	mutex_lock(&slab_mutex);
+
+	list_for_each_entry(s, &slab_caches, list) {
+		if (!is_root_cache(s))
+			continue;
+
+		ret = memcg_update_cache_size(s, num_memcgs);
+		/*
+		 * See comment in memcontrol.c, memcg_update_cache_size:
+		 * Instead of freeing the memory, we'll just leave the caches
+		 * up to this point in an updated state.
+		 */
+		if (ret)
+			goto out;
+	}
+
+	memcg_update_array_size(num_memcgs);
+out:
+	mutex_unlock(&slab_mutex);
+	return ret;
+}
+#endif
+
 /*
  * Figure out what the alignment of the objects will be given a set of
  * flags, a user specified alignment and the size of the objects.
-- 
cgit v1.2.3-55-g7522


From d7f25f8a2f81252d1ac134470ba1d0a287cf8fcd Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:22:40 -0800
Subject: memcg: infrastructure to match an allocation to the right cache

The page allocator is able to bind a page to a memcg when it is
allocated.  But for the caches, we'd like to have as many objects as
possible in a page belonging to the same cache.

This is done in this patch by calling memcg_kmem_get_cache in the
beginning of every allocation function.  This function is patched out by
static branches when kernel memory controller is not being used.

It assumes that the task allocating, which determines the memcg in the
page allocator, belongs to the same cgroup throughout the whole process.
Misaccounting can happen if the task calls memcg_kmem_get_cache() while
belonging to a cgroup, and later on changes.  This is considered
acceptable, and should only happen upon task migration.

Before the cache is created by the memcg core, there is also a possible
imbalance: the task belongs to a memcg, but the cache being allocated from
is the global cache, since the child cache is not yet guaranteed to be
ready.  This case is also fine, since in this case the GFP_KMEMCG will not
be passed and the page allocator will not attempt any cgroup accounting.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  41 +++++++++
 init/Kconfig               |   1 -
 mm/memcontrol.c            | 217 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 258 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 45085e14e023..bd9b5d73bc2b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -449,6 +449,10 @@ void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
 
 int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
 void memcg_update_array_size(int num_groups);
+
+struct kmem_cache *
+__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
+
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
  * @gfp: the gfp allocation flags.
@@ -518,6 +522,37 @@ memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
 		__memcg_kmem_commit_charge(page, memcg, order);
 }
 
+/**
+ * memcg_kmem_get_cache: selects the correct per-memcg cache for allocation
+ * @cachep: the original global kmem cache
+ * @gfp: allocation flags.
+ *
+ * This function assumes that the task allocating, which determines the memcg
+ * in the page allocator, belongs to the same cgroup throughout the whole
+ * process.  Misacounting can happen if the task calls memcg_kmem_get_cache()
+ * while belonging to a cgroup, and later on changes. This is considered
+ * acceptable, and should only happen upon task migration.
+ *
+ * Before the cache is created by the memcg core, there is also a possible
+ * imbalance: the task belongs to a memcg, but the cache being allocated from
+ * is the global cache, since the child cache is not yet guaranteed to be
+ * ready. This case is also fine, since in this case the GFP_KMEMCG will not be
+ * passed and the page allocator will not attempt any cgroup accounting.
+ */
+static __always_inline struct kmem_cache *
+memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
+{
+	if (!memcg_kmem_enabled())
+		return cachep;
+	if (gfp & __GFP_NOFAIL)
+		return cachep;
+	if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
+		return cachep;
+	if (unlikely(fatal_signal_pending(current)))
+		return cachep;
+
+	return __memcg_kmem_get_cache(cachep, gfp);
+}
 #else
 static inline bool
 memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
@@ -553,6 +588,12 @@ static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
 					struct kmem_cache *s)
 {
 }
+
+static inline struct kmem_cache *
+memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
+{
+	return cachep;
+}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/init/Kconfig b/init/Kconfig
index 19ccb33c99d9..7d30240e5bfe 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -883,7 +883,6 @@ config MEMCG_KMEM
 	bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)"
 	depends on MEMCG && EXPERIMENTAL
 	depends on SLUB || SLAB
-	default n
 	help
 	  The Kernel Memory extension for Memory Resource Controller can limit
 	  the amount of memory used by kernel objects in the system. Those are
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index db38b60e5f87..efd26620a60b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -588,7 +588,14 @@ static int memcg_limited_groups_array_size;
 #define MEMCG_CACHES_MIN_SIZE 4
 #define MEMCG_CACHES_MAX_SIZE 65535
 
+/*
+ * A lot of the calls to the cache allocation functions are expected to be
+ * inlined by the compiler. Since the calls to memcg_kmem_get_cache are
+ * conditional to this static branch, we'll have to allow modules that does
+ * kmem_cache_alloc and the such to see this symbol as well
+ */
 struct static_key memcg_kmem_enabled_key;
+EXPORT_SYMBOL(memcg_kmem_enabled_key);
 
 static void disarm_kmem_keys(struct mem_cgroup *memcg)
 {
@@ -2989,9 +2996,219 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
 
 void memcg_release_cache(struct kmem_cache *s)
 {
+	struct kmem_cache *root;
+	struct mem_cgroup *memcg;
+	int id;
+
+	/*
+	 * This happens, for instance, when a root cache goes away before we
+	 * add any memcg.
+	 */
+	if (!s->memcg_params)
+		return;
+
+	if (s->memcg_params->is_root_cache)
+		goto out;
+
+	memcg = s->memcg_params->memcg;
+	id  = memcg_cache_id(memcg);
+
+	root = s->memcg_params->root_cache;
+	root->memcg_params->memcg_caches[id] = NULL;
+	mem_cgroup_put(memcg);
+
+	mutex_lock(&memcg->slab_caches_mutex);
+	list_del(&s->memcg_params->list);
+	mutex_unlock(&memcg->slab_caches_mutex);
+
+out:
 	kfree(s->memcg_params);
 }
 
+static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
+{
+	char *name;
+	struct dentry *dentry;
+
+	rcu_read_lock();
+	dentry = rcu_dereference(memcg->css.cgroup->dentry);
+	rcu_read_unlock();
+
+	BUG_ON(dentry == NULL);
+
+	name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
+			 memcg_cache_id(memcg), dentry->d_name.name);
+
+	return name;
+}
+
+static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
+					 struct kmem_cache *s)
+{
+	char *name;
+	struct kmem_cache *new;
+
+	name = memcg_cache_name(memcg, s);
+	if (!name)
+		return NULL;
+
+	new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
+				      (s->flags & ~SLAB_PANIC), s->ctor);
+
+	kfree(name);
+	return new;
+}
+
+/*
+ * This lock protects updaters, not readers. We want readers to be as fast as
+ * they can, and they will either see NULL or a valid cache value. Our model
+ * allow them to see NULL, in which case the root memcg will be selected.
+ *
+ * We need this lock because multiple allocations to the same cache from a non
+ * will span more than one worker. Only one of them can create the cache.
+ */
+static DEFINE_MUTEX(memcg_cache_mutex);
+static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
+						  struct kmem_cache *cachep)
+{
+	struct kmem_cache *new_cachep;
+	int idx;
+
+	BUG_ON(!memcg_can_account_kmem(memcg));
+
+	idx = memcg_cache_id(memcg);
+
+	mutex_lock(&memcg_cache_mutex);
+	new_cachep = cachep->memcg_params->memcg_caches[idx];
+	if (new_cachep)
+		goto out;
+
+	new_cachep = kmem_cache_dup(memcg, cachep);
+
+	if (new_cachep == NULL) {
+		new_cachep = cachep;
+		goto out;
+	}
+
+	mem_cgroup_get(memcg);
+	new_cachep->memcg_params->root_cache = cachep;
+
+	cachep->memcg_params->memcg_caches[idx] = new_cachep;
+	/*
+	 * the readers won't lock, make sure everybody sees the updated value,
+	 * so they won't put stuff in the queue again for no reason
+	 */
+	wmb();
+out:
+	mutex_unlock(&memcg_cache_mutex);
+	return new_cachep;
+}
+
+struct create_work {
+	struct mem_cgroup *memcg;
+	struct kmem_cache *cachep;
+	struct work_struct work;
+};
+
+static void memcg_create_cache_work_func(struct work_struct *w)
+{
+	struct create_work *cw;
+
+	cw = container_of(w, struct create_work, work);
+	memcg_create_kmem_cache(cw->memcg, cw->cachep);
+	/* Drop the reference gotten when we enqueued. */
+	css_put(&cw->memcg->css);
+	kfree(cw);
+}
+
+/*
+ * Enqueue the creation of a per-memcg kmem_cache.
+ * Called with rcu_read_lock.
+ */
+static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
+				       struct kmem_cache *cachep)
+{
+	struct create_work *cw;
+
+	cw = kmalloc(sizeof(struct create_work), GFP_NOWAIT);
+	if (cw == NULL)
+		return;
+
+	/* The corresponding put will be done in the workqueue. */
+	if (!css_tryget(&memcg->css)) {
+		kfree(cw);
+		return;
+	}
+
+	cw->memcg = memcg;
+	cw->cachep = cachep;
+
+	INIT_WORK(&cw->work, memcg_create_cache_work_func);
+	schedule_work(&cw->work);
+}
+
+/*
+ * Return the kmem_cache we're supposed to use for a slab allocation.
+ * We try to use the current memcg's version of the cache.
+ *
+ * If the cache does not exist yet, if we are the first user of it,
+ * we either create it immediately, if possible, or create it asynchronously
+ * in a workqueue.
+ * In the latter case, we will let the current allocation go through with
+ * the original cache.
+ *
+ * Can't be called in interrupt context or from kernel threads.
+ * This function needs to be called with rcu_read_lock() held.
+ */
+struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
+					  gfp_t gfp)
+{
+	struct mem_cgroup *memcg;
+	int idx;
+
+	VM_BUG_ON(!cachep->memcg_params);
+	VM_BUG_ON(!cachep->memcg_params->is_root_cache);
+
+	rcu_read_lock();
+	memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
+	rcu_read_unlock();
+
+	if (!memcg_can_account_kmem(memcg))
+		return cachep;
+
+	idx = memcg_cache_id(memcg);
+
+	/*
+	 * barrier to mare sure we're always seeing the up to date value.  The
+	 * code updating memcg_caches will issue a write barrier to match this.
+	 */
+	read_barrier_depends();
+	if (unlikely(cachep->memcg_params->memcg_caches[idx] == NULL)) {
+		/*
+		 * If we are in a safe context (can wait, and not in interrupt
+		 * context), we could be be predictable and return right away.
+		 * This would guarantee that the allocation being performed
+		 * already belongs in the new cache.
+		 *
+		 * However, there are some clashes that can arrive from locking.
+		 * For instance, because we acquire the slab_mutex while doing
+		 * kmem_cache_dup, this means no further allocation could happen
+		 * with the slab_mutex held.
+		 *
+		 * Also, because cache creation issue get_online_cpus(), this
+		 * creates a lock chain: memcg_slab_mutex -> cpu_hotplug_mutex,
+		 * that ends up reversed during cpu hotplug. (cpuset allocates
+		 * a bunch of GFP_KERNEL memory during cpuup). Due to all that,
+		 * better to defer everything.
+		 */
+		memcg_create_cache_enqueue(memcg, cachep);
+		return cachep;
+	}
+
+	return cachep->memcg_params->memcg_caches[idx];
+}
+EXPORT_SYMBOL(__memcg_kmem_get_cache);
+
 /*
  * We need to verify if the allocation against current->mm->owner's memcg is
  * possible for the given order. But the page is not allocated yet, so we'll
-- 
cgit v1.2.3-55-g7522


From 0e9d92f2d02d8c8320f0502307c688d07bdac2b3 Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:22:42 -0800
Subject: memcg: skip memcg kmem allocations in specified code regions

Create a mechanism that skip memcg allocations during certain pieces of
our core code.  It basically works in the same way as
preempt_disable()/preempt_enable(): By marking a region under which all
allocations will be accounted to the root memcg.

We need this to prevent races in early cache creation, when we
allocate data using caches that are not necessarily created already.

Signed-off-by: Glauber Costa <glommer@parallels.com>
yCc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  1 +
 mm/memcontrol.c       | 57 ++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 55 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9914c662ed7b..f712465b05c5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1597,6 +1597,7 @@ struct task_struct {
 		unsigned long nr_pages;	/* uncharged usage */
 		unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
 	} memcg_batch;
+	unsigned int memcg_kmem_skip_account;
 #endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	atomic_t ptrace_bp_refcnt;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index efd26620a60b..65302a083d2f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3025,6 +3025,37 @@ out:
 	kfree(s->memcg_params);
 }
 
+/*
+ * During the creation a new cache, we need to disable our accounting mechanism
+ * altogether. This is true even if we are not creating, but rather just
+ * enqueing new caches to be created.
+ *
+ * This is because that process will trigger allocations; some visible, like
+ * explicit kmallocs to auxiliary data structures, name strings and internal
+ * cache structures; some well concealed, like INIT_WORK() that can allocate
+ * objects during debug.
+ *
+ * If any allocation happens during memcg_kmem_get_cache, we will recurse back
+ * to it. This may not be a bounded recursion: since the first cache creation
+ * failed to complete (waiting on the allocation), we'll just try to create the
+ * cache again, failing at the same point.
+ *
+ * memcg_kmem_get_cache is prepared to abort after seeing a positive count of
+ * memcg_kmem_skip_account. So we enclose anything that might allocate memory
+ * inside the following two functions.
+ */
+static inline void memcg_stop_kmem_account(void)
+{
+	VM_BUG_ON(!current->mm);
+	current->memcg_kmem_skip_account++;
+}
+
+static inline void memcg_resume_kmem_account(void)
+{
+	VM_BUG_ON(!current->mm);
+	current->memcg_kmem_skip_account--;
+}
+
 static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
 {
 	char *name;
@@ -3084,7 +3115,6 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
 		goto out;
 
 	new_cachep = kmem_cache_dup(memcg, cachep);
-
 	if (new_cachep == NULL) {
 		new_cachep = cachep;
 		goto out;
@@ -3125,8 +3155,8 @@ static void memcg_create_cache_work_func(struct work_struct *w)
  * Enqueue the creation of a per-memcg kmem_cache.
  * Called with rcu_read_lock.
  */
-static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
-				       struct kmem_cache *cachep)
+static void __memcg_create_cache_enqueue(struct mem_cgroup *memcg,
+					 struct kmem_cache *cachep)
 {
 	struct create_work *cw;
 
@@ -3147,6 +3177,24 @@ static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
 	schedule_work(&cw->work);
 }
 
+static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
+				       struct kmem_cache *cachep)
+{
+	/*
+	 * We need to stop accounting when we kmalloc, because if the
+	 * corresponding kmalloc cache is not yet created, the first allocation
+	 * in __memcg_create_cache_enqueue will recurse.
+	 *
+	 * However, it is better to enclose the whole function. Depending on
+	 * the debugging options enabled, INIT_WORK(), for instance, can
+	 * trigger an allocation. This too, will make us recurse. Because at
+	 * this point we can't allow ourselves back into memcg_kmem_get_cache,
+	 * the safest choice is to do it like this, wrapping the whole function.
+	 */
+	memcg_stop_kmem_account();
+	__memcg_create_cache_enqueue(memcg, cachep);
+	memcg_resume_kmem_account();
+}
 /*
  * Return the kmem_cache we're supposed to use for a slab allocation.
  * We try to use the current memcg's version of the cache.
@@ -3169,6 +3217,9 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
 	VM_BUG_ON(!cachep->memcg_params);
 	VM_BUG_ON(!cachep->memcg_params->is_root_cache);
 
+	if (!current->mm || current->memcg_kmem_skip_account)
+		return cachep;
+
 	rcu_read_lock();
 	memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
 	rcu_read_unlock();
-- 
cgit v1.2.3-55-g7522


From b9ce5ef49f00daf2254c6953c8d31f79aabccd34 Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:22:46 -0800
Subject: sl[au]b: always get the cache from its page in kmem_cache_free()

struct page already has this information.  If we start chaining caches,
this information will always be more trustworthy than whatever is passed
into the function.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  5 +++++
 mm/slab.c                  |  6 +++++-
 mm/slab.h                  | 39 +++++++++++++++++++++++++++++++++++++++
 mm/slob.c                  |  2 +-
 mm/slub.c                  | 15 +++------------
 5 files changed, 53 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bd9b5d73bc2b..2298122e71ad 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -554,6 +554,11 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 	return __memcg_kmem_get_cache(cachep, gfp);
 }
 #else
+static inline bool memcg_kmem_enabled(void)
+{
+	return false;
+}
+
 static inline bool
 memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
 {
diff --git a/mm/slab.c b/mm/slab.c
index c26ab9fbe1f5..bab6fec765a7 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -87,7 +87,6 @@
  */
 
 #include	<linux/slab.h>
-#include	"slab.h"
 #include	<linux/mm.h>
 #include	<linux/poison.h>
 #include	<linux/swap.h>
@@ -128,6 +127,8 @@
 
 #include	"internal.h"
 
+#include	"slab.h"
+
 /*
  * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
  *		  0 for faster, smaller code (especially in the critical paths).
@@ -3883,6 +3884,9 @@ EXPORT_SYMBOL(__kmalloc);
 void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 {
 	unsigned long flags;
+	cachep = cache_from_obj(cachep, objp);
+	if (!cachep)
+		return;
 
 	local_irq_save(flags);
 	debug_check_no_locks_freed(objp, cachep->object_size);
diff --git a/mm/slab.h b/mm/slab.h
index abe582d20c79..c95e922b166d 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -116,6 +116,13 @@ static inline bool cache_match_memcg(struct kmem_cache *cachep,
 	return (is_root_cache(cachep) && !memcg) ||
 				(cachep->memcg_params->memcg == memcg);
 }
+
+static inline bool slab_equal_or_root(struct kmem_cache *s,
+					struct kmem_cache *p)
+{
+	return (p == s) ||
+		(s->memcg_params && (p == s->memcg_params->root_cache));
+}
 #else
 static inline bool is_root_cache(struct kmem_cache *s)
 {
@@ -127,5 +134,37 @@ static inline bool cache_match_memcg(struct kmem_cache *cachep,
 {
 	return true;
 }
+
+static inline bool slab_equal_or_root(struct kmem_cache *s,
+				      struct kmem_cache *p)
+{
+	return true;
+}
 #endif
+
+static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
+{
+	struct kmem_cache *cachep;
+	struct page *page;
+
+	/*
+	 * When kmemcg is not being used, both assignments should return the
+	 * same value. but we don't want to pay the assignment price in that
+	 * case. If it is not compiled in, the compiler should be smart enough
+	 * to not do even the assignment. In that case, slab_equal_or_root
+	 * will also be a constant.
+	 */
+	if (!memcg_kmem_enabled() && !unlikely(s->flags & SLAB_DEBUG_FREE))
+		return s;
+
+	page = virt_to_head_page(x);
+	cachep = page->slab_cache;
+	if (slab_equal_or_root(cachep, s))
+		return cachep;
+
+	pr_err("%s: Wrong slab cache. %s but object is from %s\n",
+		__FUNCTION__, cachep->name, s->name);
+	WARN_ON_ONCE(1);
+	return s;
+}
 #endif
diff --git a/mm/slob.c b/mm/slob.c
index 795bab7d391d..a99fdf7a0907 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -58,7 +58,6 @@
 
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include "slab.h"
 
 #include <linux/mm.h>
 #include <linux/swap.h> /* struct reclaim_state */
@@ -73,6 +72,7 @@
 
 #include <linux/atomic.h>
 
+#include "slab.h"
 /*
  * slob_block has a field 'units', which indicates size of block if +ve,
  * or offset of next block if -ve (in SLOB_UNITs).
diff --git a/mm/slub.c b/mm/slub.c
index 985332b38852..6d5f2305d7a4 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2611,19 +2611,10 @@ redo:
 
 void kmem_cache_free(struct kmem_cache *s, void *x)
 {
-	struct page *page;
-
-	page = virt_to_head_page(x);
-
-	if (kmem_cache_debug(s) && page->slab_cache != s) {
-		pr_err("kmem_cache_free: Wrong slab cache. %s but object"
-			" is from  %s\n", page->slab_cache->name, s->name);
-		WARN_ON_ONCE(1);
+	s = cache_from_obj(s, x);
+	if (!s)
 		return;
-	}
-
-	slab_free(s, page, x, _RET_IP_);
-
+	slab_free(s, virt_to_head_page(x), x, _RET_IP_);
 	trace_kmem_cache_free(_RET_IP_, x);
 }
 EXPORT_SYMBOL(kmem_cache_free);
-- 
cgit v1.2.3-55-g7522


From d79923fad95b0cdf7770e024677180c734cb7148 Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:22:48 -0800
Subject: sl[au]b: allocate objects from memcg cache

We are able to match a cache allocation to a particular memcg.  If the
task doesn't change groups during the allocation itself - a rare event,
this will give us a good picture about who is the first group to touch a
cache page.

This patch uses the now available infrastructure by calling
memcg_kmem_get_cache() before all the cache allocations.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h | 5 ++++-
 mm/memcontrol.c          | 3 +++
 mm/slab.c                | 6 +++++-
 mm/slub.c                | 7 ++++---
 4 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 961e72eab907..364ba6c9fe21 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -225,7 +225,10 @@ void *__kmalloc(size_t size, gfp_t flags);
 static __always_inline void *
 kmalloc_order(size_t size, gfp_t flags, unsigned int order)
 {
-	void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
+	void *ret;
+
+	flags |= (__GFP_COMP | __GFP_KMEMCG);
+	ret = (void *) __get_free_pages(flags, order);
 	kmemleak_alloc(ret, size, 1, flags);
 	return ret;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 65302a083d2f..cc13797d0fbc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3086,6 +3086,9 @@ static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
 	new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
 				      (s->flags & ~SLAB_PANIC), s->ctor);
 
+	if (new)
+		new->allocflags |= __GFP_KMEMCG;
+
 	kfree(name);
 	return new;
 }
diff --git a/mm/slab.c b/mm/slab.c
index bab6fec765a7..e265865e8700 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1933,7 +1933,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
 	}
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
-	free_pages((unsigned long)addr, cachep->gfporder);
+	free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
 }
 
 static void kmem_rcu_free(struct rcu_head *head)
@@ -3486,6 +3486,8 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 	if (slab_should_failslab(cachep, flags))
 		return NULL;
 
+	cachep = memcg_kmem_get_cache(cachep, flags);
+
 	cache_alloc_debugcheck_before(cachep, flags);
 	local_irq_save(save_flags);
 
@@ -3571,6 +3573,8 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
 	if (slab_should_failslab(cachep, flags))
 		return NULL;
 
+	cachep = memcg_kmem_get_cache(cachep, flags);
+
 	cache_alloc_debugcheck_before(cachep, flags);
 	local_irq_save(save_flags);
 	objp = __do_cache_alloc(cachep, flags);
diff --git a/mm/slub.c b/mm/slub.c
index 6d5f2305d7a4..ef39e872b8eb 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1405,7 +1405,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 	reset_page_mapcount(page);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
-	__free_pages(page, order);
+	__free_memcg_kmem_pages(page, order);
 }
 
 #define need_reserve_slab_rcu						\
@@ -2323,6 +2323,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
 	if (slab_pre_alloc_hook(s, gfpflags))
 		return NULL;
 
+	s = memcg_kmem_get_cache(s, gfpflags);
 redo:
 
 	/*
@@ -3284,7 +3285,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
 	struct page *page;
 	void *ptr = NULL;
 
-	flags |= __GFP_COMP | __GFP_NOTRACK;
+	flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
 	page = alloc_pages_node(node, flags, get_order(size));
 	if (page)
 		ptr = page_address(page);
@@ -3390,7 +3391,7 @@ void kfree(const void *x)
 	if (unlikely(!PageSlab(page))) {
 		BUG_ON(!PageCompound(page));
 		kmemleak_free(x);
-		__free_pages(page, compound_order(page));
+		__free_memcg_kmem_pages(page, compound_order(page));
 		return;
 	}
 	slab_free(page->slab_cache, page, object, _RET_IP_);
-- 
cgit v1.2.3-55-g7522


From 1f458cbf122288b23620ee822e19bcbb76c8d6ec Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:22:50 -0800
Subject: memcg: destroy memcg caches

Implement destruction of memcg caches.  Right now, only caches where our
reference counter is the last remaining are deleted.  If there are any
other reference counters around, we just leave the caches lying around
until they go away.

When that happens, a destruction function is called from the cache code.
Caches are only destroyed in process context, so we queue them up for
later processing in the general case.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  2 ++
 include/linux/slab.h       | 10 +++++++-
 mm/memcontrol.c            | 63 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/slab.c                  |  3 +++
 mm/slab.h                  | 23 +++++++++++++++++
 mm/slub.c                  |  7 +++++-
 6 files changed, 106 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 2298122e71ad..79fcf0cd7186 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -453,6 +453,8 @@ void memcg_update_array_size(int num_groups);
 struct kmem_cache *
 __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 
+void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
+
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
  * @gfp: the gfp allocation flags.
diff --git a/include/linux/slab.h b/include/linux/slab.h
index c0fcf28c15b2..869efb8d2377 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -11,6 +11,8 @@
 
 #include <linux/gfp.h>
 #include <linux/types.h>
+#include <linux/workqueue.h>
+
 
 /*
  * Flags to pass to kmem_cache_create().
@@ -179,7 +181,6 @@ void kmem_cache_free(struct kmem_cache *, void *);
 #ifndef ARCH_SLAB_MINALIGN
 #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
 #endif
-
 /*
  * This is the main placeholder for memcg-related information in kmem caches.
  * struct kmem_cache will hold a pointer to it, so the memory cost while
@@ -197,6 +198,10 @@ void kmem_cache_free(struct kmem_cache *, void *);
  * @memcg: pointer to the memcg this cache belongs to
  * @list: list_head for the list of all caches in this memcg
  * @root_cache: pointer to the global, root cache, this cache was derived from
+ * @dead: set to true after the memcg dies; the cache may still be around.
+ * @nr_pages: number of pages that belongs to this cache.
+ * @destroy: worker to be called whenever we are ready, or believe we may be
+ *           ready, to destroy this cache.
  */
 struct memcg_cache_params {
 	bool is_root_cache;
@@ -206,6 +211,9 @@ struct memcg_cache_params {
 			struct mem_cgroup *memcg;
 			struct list_head list;
 			struct kmem_cache *root_cache;
+			bool dead;
+			atomic_t nr_pages;
+			struct work_struct destroy;
 		};
 	};
 };
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cc13797d0fbc..270a36789859 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2779,6 +2779,19 @@ static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
 		(memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK);
 }
 
+/*
+ * This is a bit cumbersome, but it is rarely used and avoids a backpointer
+ * in the memcg_cache_params struct.
+ */
+static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
+{
+	struct kmem_cache *cachep;
+
+	VM_BUG_ON(p->is_root_cache);
+	cachep = p->root_cache;
+	return cachep->memcg_params->memcg_caches[memcg_cache_id(p->memcg)];
+}
+
 static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 {
 	struct res_counter *fail_res;
@@ -3056,6 +3069,31 @@ static inline void memcg_resume_kmem_account(void)
 	current->memcg_kmem_skip_account--;
 }
 
+static void kmem_cache_destroy_work_func(struct work_struct *w)
+{
+	struct kmem_cache *cachep;
+	struct memcg_cache_params *p;
+
+	p = container_of(w, struct memcg_cache_params, destroy);
+
+	cachep = memcg_params_to_cache(p);
+
+	if (!atomic_read(&cachep->memcg_params->nr_pages))
+		kmem_cache_destroy(cachep);
+}
+
+void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
+{
+	if (!cachep->memcg_params->dead)
+		return;
+
+	/*
+	 * We have to defer the actual destroying to a workqueue, because
+	 * we might currently be in a context that cannot sleep.
+	 */
+	schedule_work(&cachep->memcg_params->destroy);
+}
+
 static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
 {
 	char *name;
@@ -3125,6 +3163,7 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
 
 	mem_cgroup_get(memcg);
 	new_cachep->memcg_params->root_cache = cachep;
+	atomic_set(&new_cachep->memcg_params->nr_pages , 0);
 
 	cachep->memcg_params->memcg_caches[idx] = new_cachep;
 	/*
@@ -3143,6 +3182,25 @@ struct create_work {
 	struct work_struct work;
 };
 
+static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+{
+	struct kmem_cache *cachep;
+	struct memcg_cache_params *params;
+
+	if (!memcg_kmem_is_active(memcg))
+		return;
+
+	mutex_lock(&memcg->slab_caches_mutex);
+	list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
+		cachep = memcg_params_to_cache(params);
+		cachep->memcg_params->dead = true;
+		INIT_WORK(&cachep->memcg_params->destroy,
+			  kmem_cache_destroy_work_func);
+		schedule_work(&cachep->memcg_params->destroy);
+	}
+	mutex_unlock(&memcg->slab_caches_mutex);
+}
+
 static void memcg_create_cache_work_func(struct work_struct *w)
 {
 	struct create_work *cw;
@@ -3358,6 +3416,10 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
 	VM_BUG_ON(mem_cgroup_is_root(memcg));
 	memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
 }
+#else
+static inline void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -5975,6 +6037,7 @@ static void mem_cgroup_css_offline(struct cgroup *cont)
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
 	mem_cgroup_reparent_charges(memcg);
+	mem_cgroup_destroy_all_caches(memcg);
 }
 
 static void mem_cgroup_css_free(struct cgroup *cont)
diff --git a/mm/slab.c b/mm/slab.c
index e265865e8700..7467343f9fe7 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1895,6 +1895,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 		if (page->pfmemalloc)
 			SetPageSlabPfmemalloc(page + i);
 	}
+	memcg_bind_pages(cachep, cachep->gfporder);
 
 	if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
 		kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
@@ -1931,6 +1932,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
 		__ClearPageSlab(page);
 		page++;
 	}
+
+	memcg_release_pages(cachep, cachep->gfporder);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
 	free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
diff --git a/mm/slab.h b/mm/slab.h
index c95e922b166d..43d8a38b534f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -117,6 +117,21 @@ static inline bool cache_match_memcg(struct kmem_cache *cachep,
 				(cachep->memcg_params->memcg == memcg);
 }
 
+static inline void memcg_bind_pages(struct kmem_cache *s, int order)
+{
+	if (!is_root_cache(s))
+		atomic_add(1 << order, &s->memcg_params->nr_pages);
+}
+
+static inline void memcg_release_pages(struct kmem_cache *s, int order)
+{
+	if (is_root_cache(s))
+		return;
+
+	if (atomic_sub_and_test((1 << order), &s->memcg_params->nr_pages))
+		mem_cgroup_destroy_cache(s);
+}
+
 static inline bool slab_equal_or_root(struct kmem_cache *s,
 					struct kmem_cache *p)
 {
@@ -135,6 +150,14 @@ static inline bool cache_match_memcg(struct kmem_cache *cachep,
 	return true;
 }
 
+static inline void memcg_bind_pages(struct kmem_cache *s, int order)
+{
+}
+
+static inline void memcg_release_pages(struct kmem_cache *s, int order)
+{
+}
+
 static inline bool slab_equal_or_root(struct kmem_cache *s,
 				      struct kmem_cache *p)
 {
diff --git a/mm/slub.c b/mm/slub.c
index ef39e872b8eb..692177bebdf0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1344,6 +1344,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	void *start;
 	void *last;
 	void *p;
+	int order;
 
 	BUG_ON(flags & GFP_SLAB_BUG_MASK);
 
@@ -1352,7 +1353,9 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	if (!page)
 		goto out;
 
+	order = compound_order(page);
 	inc_slabs_node(s, page_to_nid(page), page->objects);
+	memcg_bind_pages(s, order);
 	page->slab_cache = s;
 	__SetPageSlab(page);
 	if (page->pfmemalloc)
@@ -1361,7 +1364,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	start = page_address(page);
 
 	if (unlikely(s->flags & SLAB_POISON))
-		memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
+		memset(start, POISON_INUSE, PAGE_SIZE << order);
 
 	last = start;
 	for_each_object(p, s, start, page->objects) {
@@ -1402,6 +1405,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 
 	__ClearPageSlabPfmemalloc(page);
 	__ClearPageSlab(page);
+
+	memcg_release_pages(s, order);
 	reset_page_mapcount(page);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
-- 
cgit v1.2.3-55-g7522


From 7cf2798240a2a2230cb16a391beef98d8a7ad362 Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:22:55 -0800
Subject: memcg/sl[au]b: track all the memcg children of a kmem_cache

This enables us to remove all the children of a kmem_cache being
destroyed, if for example the kernel module it's being used in gets
unloaded.  Otherwise, the children will still point to the destroyed
parent.

Signed-off-by: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  5 +++++
 mm/memcontrol.c            | 49 ++++++++++++++++++++++++++++++++++++++++++++--
 mm/slab_common.c           |  3 +++
 3 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 79fcf0cd7186..e119f3ef793c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -454,6 +454,7 @@ struct kmem_cache *
 __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 
 void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
+void kmem_cache_destroy_memcg_children(struct kmem_cache *s);
 
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
@@ -601,6 +602,10 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	return cachep;
 }
+
+static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 270a36789859..4b68ec2c8df6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2772,6 +2772,8 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 	memcg_check_events(memcg, page);
 }
 
+static DEFINE_MUTEX(set_limit_mutex);
+
 #ifdef CONFIG_MEMCG_KMEM
 static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
 {
@@ -3176,6 +3178,51 @@ out:
 	return new_cachep;
 }
 
+void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+{
+	struct kmem_cache *c;
+	int i;
+
+	if (!s->memcg_params)
+		return;
+	if (!s->memcg_params->is_root_cache)
+		return;
+
+	/*
+	 * If the cache is being destroyed, we trust that there is no one else
+	 * requesting objects from it. Even if there are, the sanity checks in
+	 * kmem_cache_destroy should caught this ill-case.
+	 *
+	 * Still, we don't want anyone else freeing memcg_caches under our
+	 * noses, which can happen if a new memcg comes to life. As usual,
+	 * we'll take the set_limit_mutex to protect ourselves against this.
+	 */
+	mutex_lock(&set_limit_mutex);
+	for (i = 0; i < memcg_limited_groups_array_size; i++) {
+		c = s->memcg_params->memcg_caches[i];
+		if (!c)
+			continue;
+
+		/*
+		 * We will now manually delete the caches, so to avoid races
+		 * we need to cancel all pending destruction workers and
+		 * proceed with destruction ourselves.
+		 *
+		 * kmem_cache_destroy() will call kmem_cache_shrink internally,
+		 * and that could spawn the workers again: it is likely that
+		 * the cache still have active pages until this very moment.
+		 * This would lead us back to mem_cgroup_destroy_cache.
+		 *
+		 * But that will not execute at all if the "dead" flag is not
+		 * set, so flip it down to guarantee we are in control.
+		 */
+		c->memcg_params->dead = false;
+		cancel_delayed_work_sync(&c->memcg_params->destroy);
+		kmem_cache_destroy(c);
+	}
+	mutex_unlock(&set_limit_mutex);
+}
+
 struct create_work {
 	struct mem_cgroup *memcg;
 	struct kmem_cache *cachep;
@@ -4284,8 +4331,6 @@ void mem_cgroup_print_bad_page(struct page *page)
 }
 #endif
 
-static DEFINE_MUTEX(set_limit_mutex);
-
 static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
 				unsigned long long val)
 {
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 1c424b6511bf..080a43804bf1 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -249,6 +249,9 @@ EXPORT_SYMBOL(kmem_cache_create);
 
 void kmem_cache_destroy(struct kmem_cache *s)
 {
+	/* Destroy all the children caches if we aren't a memcg cache */
+	kmem_cache_destroy_memcg_children(s);
+
 	get_online_cpus();
 	mutex_lock(&slab_mutex);
 	s->refcount--;
-- 
cgit v1.2.3-55-g7522


From 749c54151a6e5b229e4ae067dbc651e54b161fbc Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:23:01 -0800
Subject: memcg: aggregate memcg cache values in slabinfo

When we create caches in memcgs, we need to display their usage
information somewhere.  We'll adopt a scheme similar to /proc/meminfo,
with aggregate totals shown in the global file, and per-group information
stored in the group itself.

For the time being, only reads are allowed in the per-group cache.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  8 ++++++++
 include/linux/slab.h       |  4 ++++
 mm/memcontrol.c            | 30 +++++++++++++++++++++++++++++-
 mm/slab.h                  | 27 +++++++++++++++++++++++++++
 mm/slab_common.c           | 44 ++++++++++++++++++++++++++++++++++++++++----
 5 files changed, 108 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e119f3ef793c..8dc7c746b44f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -420,6 +420,11 @@ static inline void sock_release_memcg(struct sock *sk)
 
 #ifdef CONFIG_MEMCG_KMEM
 extern struct static_key memcg_kmem_enabled_key;
+
+extern int memcg_limited_groups_array_size;
+#define for_each_memcg_cache_index(_idx)	\
+	for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++)
+
 static inline bool memcg_kmem_enabled(void)
 {
 	return static_key_false(&memcg_kmem_enabled_key);
@@ -557,6 +562,9 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 	return __memcg_kmem_get_cache(cachep, gfp);
 }
 #else
+#define for_each_memcg_cache_index(_idx)	\
+	for (; NULL; )
+
 static inline bool memcg_kmem_enabled(void)
 {
 	return false;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 869efb8d2377..b9278663f22a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -220,6 +220,10 @@ struct memcg_cache_params {
 
 int memcg_update_all_caches(int num_memcgs);
 
+struct seq_file;
+int cache_show(struct kmem_cache *s, struct seq_file *m);
+void print_slabinfo_header(struct seq_file *m);
+
 /*
  * Common kmalloc functions provided by all allocators
  */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7633e0d429e0..a32d83c2e353 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -572,7 +572,8 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
  * increase it.
  */
 static DEFINE_IDA(kmem_limited_groups);
-static int memcg_limited_groups_array_size;
+int memcg_limited_groups_array_size;
+
 /*
  * MIN_SIZE is different than 1, because we would like to avoid going through
  * the alloc/free process all the time. In a small machine, 4 kmem-limited
@@ -2794,6 +2795,27 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
 	return cachep->memcg_params->memcg_caches[memcg_cache_id(p->memcg)];
 }
 
+#ifdef CONFIG_SLABINFO
+static int mem_cgroup_slabinfo_read(struct cgroup *cont, struct cftype *cft,
+					struct seq_file *m)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct memcg_cache_params *params;
+
+	if (!memcg_can_account_kmem(memcg))
+		return -EIO;
+
+	print_slabinfo_header(m);
+
+	mutex_lock(&memcg->slab_caches_mutex);
+	list_for_each_entry(params, &memcg->memcg_slab_caches, list)
+		cache_show(memcg_params_to_cache(params), m);
+	mutex_unlock(&memcg->slab_caches_mutex);
+
+	return 0;
+}
+#endif
+
 static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 {
 	struct res_counter *fail_res;
@@ -5822,6 +5844,12 @@ static struct cftype mem_cgroup_files[] = {
 		.trigger = mem_cgroup_reset,
 		.read = mem_cgroup_read,
 	},
+#ifdef CONFIG_SLABINFO
+	{
+		.name = "kmem.slabinfo",
+		.read_seq_string = mem_cgroup_slabinfo_read,
+	},
+#endif
 #endif
 	{ },	/* terminate */
 };
diff --git a/mm/slab.h b/mm/slab.h
index 43d8a38b534f..ec5dae1c8e75 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -138,6 +138,23 @@ static inline bool slab_equal_or_root(struct kmem_cache *s,
 	return (p == s) ||
 		(s->memcg_params && (p == s->memcg_params->root_cache));
 }
+
+/*
+ * We use suffixes to the name in memcg because we can't have caches
+ * created in the system with the same name. But when we print them
+ * locally, better refer to them with the base name
+ */
+static inline const char *cache_name(struct kmem_cache *s)
+{
+	if (!is_root_cache(s))
+		return s->memcg_params->root_cache->name;
+	return s->name;
+}
+
+static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx)
+{
+	return s->memcg_params->memcg_caches[idx];
+}
 #else
 static inline bool is_root_cache(struct kmem_cache *s)
 {
@@ -163,6 +180,16 @@ static inline bool slab_equal_or_root(struct kmem_cache *s,
 {
 	return true;
 }
+
+static inline const char *cache_name(struct kmem_cache *s)
+{
+	return s->name;
+}
+
+static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx)
+{
+	return NULL;
+}
 #endif
 
 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 080a43804bf1..081f1b8d9a7b 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -322,7 +322,7 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
 
 
 #ifdef CONFIG_SLABINFO
-static void print_slabinfo_header(struct seq_file *m)
+void print_slabinfo_header(struct seq_file *m)
 {
 	/*
 	 * Output format version, so at least we can change it
@@ -366,16 +366,43 @@ static void s_stop(struct seq_file *m, void *p)
 	mutex_unlock(&slab_mutex);
 }
 
-static int s_show(struct seq_file *m, void *p)
+static void
+memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
+{
+	struct kmem_cache *c;
+	struct slabinfo sinfo;
+	int i;
+
+	if (!is_root_cache(s))
+		return;
+
+	for_each_memcg_cache_index(i) {
+		c = cache_from_memcg(s, i);
+		if (!c)
+			continue;
+
+		memset(&sinfo, 0, sizeof(sinfo));
+		get_slabinfo(c, &sinfo);
+
+		info->active_slabs += sinfo.active_slabs;
+		info->num_slabs += sinfo.num_slabs;
+		info->shared_avail += sinfo.shared_avail;
+		info->active_objs += sinfo.active_objs;
+		info->num_objs += sinfo.num_objs;
+	}
+}
+
+int cache_show(struct kmem_cache *s, struct seq_file *m)
 {
-	struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
 	struct slabinfo sinfo;
 
 	memset(&sinfo, 0, sizeof(sinfo));
 	get_slabinfo(s, &sinfo);
 
+	memcg_accumulate_slabinfo(s, &sinfo);
+
 	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
-		   s->name, sinfo.active_objs, sinfo.num_objs, s->size,
+		   cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
 		   sinfo.objects_per_slab, (1 << sinfo.cache_order));
 
 	seq_printf(m, " : tunables %4u %4u %4u",
@@ -387,6 +414,15 @@ static int s_show(struct seq_file *m, void *p)
 	return 0;
 }
 
+static int s_show(struct seq_file *m, void *p)
+{
+	struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
+
+	if (!is_root_cache(s))
+		return 0;
+	return cache_show(s, m);
+}
+
 /*
  * slabinfo_op - iterator that generates /proc/slabinfo
  *
-- 
cgit v1.2.3-55-g7522


From 943a451a87d229ca564a27274b58eaeae35fde5d Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:23:03 -0800
Subject: slab: propagate tunable values

SLAB allows us to tune a particular cache behavior with tunables.  When
creating a new memcg cache copy, we'd like to preserve any tunables the
parent cache already had.

This could be done by an explicit call to do_tune_cpucache() after the
cache is created.  But this is not very convenient now that the caches are
created from common code, since this function is SLAB-specific.

Another method of doing that is taking advantage of the fact that
do_tune_cpucache() is always called from enable_cpucache(), which is
called at cache initialization.  We can just preset the values, and then
things work as expected.

It can also happen that a root cache has its tunables updated during
normal system operation.  In this case, we will propagate the change to
all caches that are already active.

This change will require us to move the assignment of root_cache in
memcg_params a bit earlier.  We need this to be already set - which
memcg_kmem_register_cache will do - when we reach __kmem_cache_create()

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  8 +++++---
 include/linux/slab.h       |  2 +-
 mm/memcontrol.c            | 10 ++++++----
 mm/slab.c                  | 44 +++++++++++++++++++++++++++++++++++++++++---
 mm/slab.h                  | 12 ++++++++++++
 mm/slab_common.c           |  7 ++++---
 6 files changed, 69 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8dc7c746b44f..ea02ff970836 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -448,7 +448,8 @@ void __memcg_kmem_commit_charge(struct page *page,
 void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
 int memcg_cache_id(struct mem_cgroup *memcg);
-int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s);
+int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
+			 struct kmem_cache *root_cache);
 void memcg_release_cache(struct kmem_cache *cachep);
 void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
 
@@ -590,8 +591,9 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 	return -1;
 }
 
-static inline int memcg_register_cache(struct mem_cgroup *memcg,
-				       struct kmem_cache *s)
+static inline int
+memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
+		     struct kmem_cache *root_cache)
 {
 	return 0;
 }
diff --git a/include/linux/slab.h b/include/linux/slab.h
index b9278663f22a..5d168d7e0a28 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -130,7 +130,7 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			void (*)(void *));
 struct kmem_cache *
 kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t,
-			unsigned long, void (*)(void *));
+			unsigned long, void (*)(void *), struct kmem_cache *);
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a32d83c2e353..f3009b4bae51 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3012,7 +3012,8 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
 	return 0;
 }
 
-int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
+int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
+			 struct kmem_cache *root_cache)
 {
 	size_t size = sizeof(struct memcg_cache_params);
 
@@ -3026,8 +3027,10 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
 	if (!s->memcg_params)
 		return -ENOMEM;
 
-	if (memcg)
+	if (memcg) {
 		s->memcg_params->memcg = memcg;
+		s->memcg_params->root_cache = root_cache;
+	}
 	return 0;
 }
 
@@ -3186,7 +3189,7 @@ static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
 		return NULL;
 
 	new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
-				      (s->flags & ~SLAB_PANIC), s->ctor);
+				      (s->flags & ~SLAB_PANIC), s->ctor, s);
 
 	if (new)
 		new->allocflags |= __GFP_KMEMCG;
@@ -3226,7 +3229,6 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
 	}
 
 	mem_cgroup_get(memcg);
-	new_cachep->memcg_params->root_cache = cachep;
 	atomic_set(&new_cachep->memcg_params->nr_pages , 0);
 
 	cachep->memcg_params->memcg_caches[idx] = new_cachep;
diff --git a/mm/slab.c b/mm/slab.c
index 7467343f9fe7..4dcbf96a77b4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4041,7 +4041,7 @@ static void do_ccupdate_local(void *info)
 }
 
 /* Always called with the slab_mutex held */
-static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
 				int batchcount, int shared, gfp_t gfp)
 {
 	struct ccupdate_struct *new;
@@ -4084,12 +4084,48 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
 	return alloc_kmemlist(cachep, gfp);
 }
 
+static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+				int batchcount, int shared, gfp_t gfp)
+{
+	int ret;
+	struct kmem_cache *c = NULL;
+	int i = 0;
+
+	ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
+
+	if (slab_state < FULL)
+		return ret;
+
+	if ((ret < 0) || !is_root_cache(cachep))
+		return ret;
+
+	for_each_memcg_cache_index(i) {
+		c = cache_from_memcg(cachep, i);
+		if (c)
+			/* return value determined by the parent cache only */
+			__do_tune_cpucache(c, limit, batchcount, shared, gfp);
+	}
+
+	return ret;
+}
+
 /* Called with slab_mutex held always */
 static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	int err;
-	int limit, shared;
+	int limit = 0;
+	int shared = 0;
+	int batchcount = 0;
+
+	if (!is_root_cache(cachep)) {
+		struct kmem_cache *root = memcg_root_cache(cachep);
+		limit = root->limit;
+		shared = root->shared;
+		batchcount = root->batchcount;
+	}
 
+	if (limit && shared && batchcount)
+		goto skip_setup;
 	/*
 	 * The head array serves three purposes:
 	 * - create a LIFO ordering, i.e. return objects that are cache-warm
@@ -4131,7 +4167,9 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
 	if (limit > 32)
 		limit = 32;
 #endif
-	err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
+	batchcount = (limit + 1) / 2;
+skip_setup:
+	err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
 	if (err)
 		printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
 		       cachep->name, -err);
diff --git a/mm/slab.h b/mm/slab.h
index ec5dae1c8e75..34a98d642196 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -155,6 +155,13 @@ static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx)
 {
 	return s->memcg_params->memcg_caches[idx];
 }
+
+static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
+{
+	if (is_root_cache(s))
+		return s;
+	return s->memcg_params->root_cache;
+}
 #else
 static inline bool is_root_cache(struct kmem_cache *s)
 {
@@ -190,6 +197,11 @@ static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx)
 {
 	return NULL;
 }
+
+static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
+{
+	return s;
+}
 #endif
 
 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 081f1b8d9a7b..3f3cd97d3fdf 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -164,7 +164,8 @@ unsigned long calculate_alignment(unsigned long flags,
 
 struct kmem_cache *
 kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
-			size_t align, unsigned long flags, void (*ctor)(void *))
+			size_t align, unsigned long flags, void (*ctor)(void *),
+			struct kmem_cache *parent_cache)
 {
 	struct kmem_cache *s = NULL;
 	int err = 0;
@@ -193,7 +194,7 @@ kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
 		s->align = calculate_alignment(flags, align, size);
 		s->ctor = ctor;
 
-		if (memcg_register_cache(memcg, s)) {
+		if (memcg_register_cache(memcg, s, parent_cache)) {
 			kmem_cache_free(kmem_cache, s);
 			err = -ENOMEM;
 			goto out_locked;
@@ -243,7 +244,7 @@ struct kmem_cache *
 kmem_cache_create(const char *name, size_t size, size_t align,
 		  unsigned long flags, void (*ctor)(void *))
 {
-	return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor);
+	return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL);
 }
 EXPORT_SYMBOL(kmem_cache_create);
 
-- 
cgit v1.2.3-55-g7522


From 107dab5c92d5f9c3afe962036e47c207363255c7 Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:23:05 -0800
Subject: slub: slub-specific propagation changes

SLUB allows us to tune a particular cache behavior with sysfs-based
tunables.  When creating a new memcg cache copy, we'd like to preserve any
tunables the parent cache already had.

This can be done by tapping into the store attribute function provided by
the allocator.  We of course don't need to mess with read-only fields.
Since the attributes can have multiple types and are stored internally by
sysfs, the best strategy is to issue a ->show() in the root cache, and
then ->store() in the memcg cache.

The drawback of that, is that sysfs can allocate up to a page in buffering
for show(), that we are likely not to need, but also can't guarantee.  To
avoid always allocating a page for that, we can update the caches at store
time with the maximum attribute size ever stored to the root cache.  We
will then get a buffer big enough to hold it.  The corolary to this, is
that if no stores happened, nothing will be propagated.

It can also happen that a root cache has its tunables updated during
normal system operation.  In this case, we will propagate the change to
all caches that are already active.

[akpm@linux-foundation.org: tweak code to avoid __maybe_unused]
Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h |  1 +
 mm/slub.c                | 76 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 76 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 364ba6c9fe21..9db4825cd393 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -103,6 +103,7 @@ struct kmem_cache {
 #endif
 #ifdef CONFIG_MEMCG_KMEM
 	struct memcg_cache_params *memcg_params;
+	int max_attr_size; /* for propagation, maximum size of a stored attr */
 #endif
 
 #ifdef CONFIG_NUMA
diff --git a/mm/slub.c b/mm/slub.c
index 692177bebdf0..21c94d9695ec 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -201,13 +201,14 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
 static int sysfs_slab_add(struct kmem_cache *);
 static int sysfs_slab_alias(struct kmem_cache *, const char *);
 static void sysfs_slab_remove(struct kmem_cache *);
-
+static void memcg_propagate_slab_attrs(struct kmem_cache *s);
 #else
 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
 							{ return 0; }
 static inline void sysfs_slab_remove(struct kmem_cache *s) { }
 
+static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
 #endif
 
 static inline void stat(const struct kmem_cache *s, enum stat_item si)
@@ -3865,6 +3866,7 @@ int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
 	if (slab_state <= UP)
 		return 0;
 
+	memcg_propagate_slab_attrs(s);
 	mutex_unlock(&slab_mutex);
 	err = sysfs_slab_add(s);
 	mutex_lock(&slab_mutex);
@@ -5098,10 +5100,82 @@ static ssize_t slab_attr_store(struct kobject *kobj,
 		return -EIO;
 
 	err = attribute->store(s, buf, len);
+#ifdef CONFIG_MEMCG_KMEM
+	if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
+		int i;
+
+		mutex_lock(&slab_mutex);
+		if (s->max_attr_size < len)
+			s->max_attr_size = len;
 
+		for_each_memcg_cache_index(i) {
+			struct kmem_cache *c = cache_from_memcg(s, i);
+			/*
+			 * This function's return value is determined by the
+			 * parent cache only
+			 */
+			if (c)
+				attribute->store(c, buf, len);
+		}
+		mutex_unlock(&slab_mutex);
+	}
+#endif
 	return err;
 }
 
+static void memcg_propagate_slab_attrs(struct kmem_cache *s)
+{
+#ifdef CONFIG_MEMCG_KMEM
+	int i;
+	char *buffer = NULL;
+
+	if (!is_root_cache(s))
+		return;
+
+	/*
+	 * This mean this cache had no attribute written. Therefore, no point
+	 * in copying default values around
+	 */
+	if (!s->max_attr_size)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
+		char mbuf[64];
+		char *buf;
+		struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
+
+		if (!attr || !attr->store || !attr->show)
+			continue;
+
+		/*
+		 * It is really bad that we have to allocate here, so we will
+		 * do it only as a fallback. If we actually allocate, though,
+		 * we can just use the allocated buffer until the end.
+		 *
+		 * Most of the slub attributes will tend to be very small in
+		 * size, but sysfs allows buffers up to a page, so they can
+		 * theoretically happen.
+		 */
+		if (buffer)
+			buf = buffer;
+		else if (s->max_attr_size < ARRAY_SIZE(mbuf))
+			buf = mbuf;
+		else {
+			buffer = (char *) get_zeroed_page(GFP_KERNEL);
+			if (WARN_ON(!buffer))
+				continue;
+			buf = buffer;
+		}
+
+		attr->show(s->memcg_params->root_cache, buf);
+		attr->store(s, buf, strlen(buf));
+	}
+
+	if (buffer)
+		free_page((unsigned long)buffer);
+#endif
+}
+
 static const struct sysfs_ops slab_sysfs_ops = {
 	.show = slab_attr_show,
 	.store = slab_attr_store,
-- 
cgit v1.2.3-55-g7522


From ebe945c27628fca03723582eba138acc2e2f3d15 Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Tue, 18 Dec 2012 14:23:10 -0800
Subject: memcg: add comments clarifying aspects of cache attribute propagation

This patch clarifies two aspects of cache attribute propagation.

First, the expected context for the for_each_memcg_cache macro in
memcontrol.h.  The usages already in the codebase are safe.  In mm/slub.c,
it is trivially safe because the lock is acquired right before the loop.
In mm/slab.c, it is less so: the lock is acquired by an outer function a
few steps back in the stack, so a VM_BUG_ON() is added to make sure it is
indeed safe.

A comment is also added to detail why we are returning the value of the
parent cache and ignoring the children's when we propagate the attributes.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  6 ++++++
 mm/slab.c                  |  1 +
 mm/slub.c                  | 21 +++++++++++++++++----
 3 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ea02ff970836..0108a56f814e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -422,6 +422,12 @@ static inline void sock_release_memcg(struct sock *sk)
 extern struct static_key memcg_kmem_enabled_key;
 
 extern int memcg_limited_groups_array_size;
+
+/*
+ * Helper macro to loop through all memcg-specific caches. Callers must still
+ * check if the cache is valid (it is either valid or NULL).
+ * the slab_mutex must be held when looping through those caches
+ */
 #define for_each_memcg_cache_index(_idx)	\
 	for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++)
 
diff --git a/mm/slab.c b/mm/slab.c
index 4dcbf96a77b4..e7667a3584bc 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4099,6 +4099,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
 	if ((ret < 0) || !is_root_cache(cachep))
 		return ret;
 
+	VM_BUG_ON(!mutex_is_locked(&slab_mutex));
 	for_each_memcg_cache_index(i) {
 		c = cache_from_memcg(cachep, i);
 		if (c)
diff --git a/mm/slub.c b/mm/slub.c
index 21c94d9695ec..efe2cffc29b0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5108,12 +5108,25 @@ static ssize_t slab_attr_store(struct kobject *kobj,
 		if (s->max_attr_size < len)
 			s->max_attr_size = len;
 
+		/*
+		 * This is a best effort propagation, so this function's return
+		 * value will be determined by the parent cache only. This is
+		 * basically because not all attributes will have a well
+		 * defined semantics for rollbacks - most of the actions will
+		 * have permanent effects.
+		 *
+		 * Returning the error value of any of the children that fail
+		 * is not 100 % defined, in the sense that users seeing the
+		 * error code won't be able to know anything about the state of
+		 * the cache.
+		 *
+		 * Only returning the error code for the parent cache at least
+		 * has well defined semantics. The cache being written to
+		 * directly either failed or succeeded, in which case we loop
+		 * through the descendants with best-effort propagation.
+		 */
 		for_each_memcg_cache_index(i) {
 			struct kmem_cache *c = cache_from_memcg(s, i);
-			/*
-			 * This function's return value is determined by the
-			 * parent cache only
-			 */
 			if (c)
 				attribute->store(c, buf, len);
 		}
-- 
cgit v1.2.3-55-g7522


From 7179e7bf4592ac5a7b30257a7df6259ee81e51da Mon Sep 17 00:00:00 2001
From: Jianguo Wu
Date: Tue, 18 Dec 2012 14:23:19 -0800
Subject: mm/hugetlb: create hugetlb cgroup file in hugetlb_init

Build kernel with CONFIG_HUGETLBFS=y,CONFIG_HUGETLB_PAGE=y and
CONFIG_CGROUP_HUGETLB=y, then specify hugepagesz=xx boot option, system
will fail to boot.

This failure is caused by following code path:

  setup_hugepagesz
    hugetlb_add_hstate
      hugetlb_cgroup_file_init
        cgroup_add_cftypes
          kzalloc <--slab is *not available* yet

For this path, slab is not available yet, so memory allocated will be
failed, and cause WARN_ON() in hugetlb_cgroup_file_init().

So I move hugetlb_cgroup_file_init() into hugetlb_init().

[akpm@linux-foundation.org: tweak coding-style, remove pointless __init on inlined function]
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Jianguo Wu <wujianguo@huawei.com>
Signed-off-by: Jiang Liu <jiang.liu@huawei.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb_cgroup.h |  5 ++---
 mm/hugetlb.c                   | 11 +----------
 mm/hugetlb_cgroup.c            | 19 +++++++++++++++++--
 3 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index d73878c694b3..ce8217f7b5c2 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -62,7 +62,7 @@ extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 					 struct page *page);
 extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 					   struct hugetlb_cgroup *h_cg);
-extern int hugetlb_cgroup_file_init(int idx) __init;
+extern void hugetlb_cgroup_file_init(void) __init;
 extern void hugetlb_cgroup_migrate(struct page *oldhpage,
 				   struct page *newhpage);
 
@@ -111,9 +111,8 @@ hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 	return;
 }
 
-static inline int __init hugetlb_cgroup_file_init(int idx)
+static inline void hugetlb_cgroup_file_init(void)
 {
-	return 0;
 }
 
 static inline void hugetlb_cgroup_migrate(struct page *oldhpage,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e5318c7793ae..4f3ea0b1e57c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1906,14 +1906,12 @@ static int __init hugetlb_init(void)
 		default_hstate.max_huge_pages = default_hstate_max_huge_pages;
 
 	hugetlb_init_hstates();
-
 	gather_bootmem_prealloc();
-
 	report_hugepages();
 
 	hugetlb_sysfs_init();
-
 	hugetlb_register_all_nodes();
+	hugetlb_cgroup_file_init();
 
 	return 0;
 }
@@ -1943,13 +1941,6 @@ void __init hugetlb_add_hstate(unsigned order)
 	h->next_nid_to_free = first_node(node_states[N_MEMORY]);
 	snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
 					huge_page_size(h)/1024);
-	/*
-	 * Add cgroup control files only if the huge page consists
-	 * of more than two normal pages. This is because we use
-	 * page[2].lru.next for storing cgoup details.
-	 */
-	if (order >= HUGETLB_CGROUP_MIN_ORDER)
-		hugetlb_cgroup_file_init(hugetlb_max_hstate - 1);
 
 	parsed_hstate = h;
 }
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index b5bde7a5c017..9cea7de22ffb 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -333,7 +333,7 @@ static char *mem_fmt(char *buf, int size, unsigned long hsize)
 	return buf;
 }
 
-int __init hugetlb_cgroup_file_init(int idx)
+static void __init __hugetlb_cgroup_file_init(int idx)
 {
 	char buf[32];
 	struct cftype *cft;
@@ -375,7 +375,22 @@ int __init hugetlb_cgroup_file_init(int idx)
 
 	WARN_ON(cgroup_add_cftypes(&hugetlb_subsys, h->cgroup_files));
 
-	return 0;
+	return;
+}
+
+void __init hugetlb_cgroup_file_init(void)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		/*
+		 * Add cgroup control files only if the huge page consists
+		 * of more than two normal pages. This is because we use
+		 * page[2].lru.next for storing cgroup details.
+		 */
+		if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
+			__hugetlb_cgroup_file_init(hstate_index(h));
+	}
 }
 
 /*
-- 
cgit v1.2.3-55-g7522


From 59771079c18c44e39106f0f30054025acafadb41 Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Wed, 19 Dec 2012 07:18:35 -0800
Subject: blk: avoid divide-by-zero with zero discard granularity

Commit 8dd2cb7e880d ("block: discard granularity might not be power of
2") changed a couple of 'binary and' operations into modulus operations.
Which turned the harmless case of a zero discard_granularity into a
possible divide-by-zero.

The code also had a much more subtle bug: it was doing the modulus of a
value in bytes using 'sector_t'.  That was always conceptually wrong,
but didn't actually matter back when the code assumed a power-of-two
granularity: we only looked at the low bits anyway.

But with potentially arbitrary sector numbers, using a 'sector_t' to
express bytes is very very wrong: depending on configuration it limits
the starting offset of the device to just 32 bits, and any overflow
would result in a wrong value if the modulus wasn't a power-of-two.

So re-write the code to not only protect against the divide-by-zero, but
to do the starting sector arithmetic in sectors, and using the proper
types.

[ For any mathematicians out there: it also looks monumentally stupid to
  do the 'modulo granularity' operation *twice*, never mind having a "+
  granularity" in the second modulus op.

  But that's the easiest way to avoid negative values or overflow, and
  it is how the original code was done. ]

Reported-by: Ingo Molnar <mingo@kernel.org>
Reported-by: Doug Anderson <dianders@chromium.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Shaohua Li <shli@fusionio.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/blkdev.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index acb4f7bbbd32..f94bc83011ed 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1188,14 +1188,25 @@ static inline int queue_discard_alignment(struct request_queue *q)
 
 static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
 {
-	sector_t alignment = sector << 9;
-	alignment = sector_div(alignment, lim->discard_granularity);
+	unsigned int alignment, granularity, offset;
 
 	if (!lim->max_discard_sectors)
 		return 0;
 
-	alignment = lim->discard_granularity + lim->discard_alignment - alignment;
-	return sector_div(alignment, lim->discard_granularity);
+	/* Why are these in bytes, not sectors? */
+	alignment = lim->discard_alignment >> 9;
+	granularity = lim->discard_granularity >> 9;
+	if (!granularity)
+		return 0;
+
+	/* Offset of the partition start in 'granularity' sectors */
+	offset = sector_div(sector, granularity);
+
+	/* And why do we do this modulus *again* in blkdev_issue_discard()? */
+	offset = (granularity + alignment - offset) % granularity;
+
+	/* Turn it back into bytes, gaah */
+	return offset << 9;
 }
 
 static inline int bdev_discard_alignment(struct block_device *bdev)
-- 
cgit v1.2.3-55-g7522


From ab28698d33af05abab0bcf8021eafe38f7434f24 Mon Sep 17 00:00:00 2001
From: Jonas Gorski
Date: Wed, 19 Dec 2012 09:10:09 -0600
Subject: of: define struct device in of_platform.h if !OF_DEVICE and
 !OF_ADDRESS

Fixes the following warning:

include/linux/of_platform.h:106:13: warning: 'struct device' declared
inside parameter list [enabled by default]
include/linux/of_platform.h:106:13: warning: its scope is only this
definition or declaration, which is probably not what you want [enabled
by default]

Signed-off-by: Jonas Gorski <jogo@openwrt.org>
Signed-off-by: Rob Herring <robherring2@gmail.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of_platform.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index b47d2040c9f2..3863a4dbdf18 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -100,6 +100,7 @@ extern int of_platform_populate(struct device_node *root,
 
 #if !defined(CONFIG_OF_ADDRESS)
 struct of_dev_auxdata;
+struct device;
 static inline int of_platform_populate(struct device_node *root,
 					const struct of_device_id *matches,
 					const struct of_dev_auxdata *lookup,
-- 
cgit v1.2.3-55-g7522


From 3c439b5586e9200f7e6287ee77c175c4d5b0eeed Mon Sep 17 00:00:00 2001
From: Jack Morgenstein
Date: Thu, 6 Dec 2012 17:12:00 +0000
Subject: mlx4_core: Allow choosing flow steering mode

Device managed flow steering will be enabled only under administrator
directive provided through setting the existing module parameter
log_num_mgm_entry_size to -1 (if the device actually supports flow
steering).  If flow steering isn't requested or not available, the
driver will use the value of log_num_mgm_entry_size and B0 steering.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 52 ++++++++++++++++++++++++++-----
 drivers/net/ethernet/mellanox/mlx4/mcg.c  |  7 +----
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |  6 ++--
 include/linux/mlx4/device.h               |  1 +
 4 files changed, 50 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 680d81026cbd..e1bafffbc3b1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -85,15 +85,15 @@ static int probe_vf;
 module_param(probe_vf, int, 0644);
 MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)");
 
-int mlx4_log_num_mgm_entry_size = 10;
+int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
 module_param_named(log_num_mgm_entry_size,
 			mlx4_log_num_mgm_entry_size, int, 0444);
 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
 					 " of qp per mcg, for example:"
-					 " 10 gives 248.range: 9<="
+					 " 10 gives 248.range: 7 <="
 					 " log_num_mgm_entry_size <= 12."
-					 " Not in use with device managed"
-					 " flow steering");
+					 " To activate device managed"
+					 " flow steering when available, set to -1");
 
 static bool enable_64b_cqe_eqe;
 module_param(enable_64b_cqe_eqe, bool, 0444);
@@ -1318,12 +1318,30 @@ static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
 	}
 }
 
+static int choose_log_fs_mgm_entry_size(int qp_per_entry)
+{
+	int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
+
+	for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
+	      i++) {
+		if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
+			break;
+	}
+
+	return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
+}
+
 static void choose_steering_mode(struct mlx4_dev *dev,
 				 struct mlx4_dev_cap *dev_cap)
 {
-	if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
+	if (mlx4_log_num_mgm_entry_size == -1 &&
+	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
 	    (!mlx4_is_mfunc(dev) ||
-	     (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1)))) {
+	     (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1))) &&
+	    choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
+		MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
+		dev->oper_log_mgm_entry_size =
+			choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
 		dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
 		dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
 		dev->caps.fs_log_max_ucast_qp_range_size =
@@ -1340,10 +1358,17 @@ static void choose_steering_mode(struct mlx4_dev *dev,
 				mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags "
 					  "set to use B0 steering. Falling back to A0 steering mode.\n");
 		}
+		dev->oper_log_mgm_entry_size =
+			mlx4_log_num_mgm_entry_size > 0 ?
+			mlx4_log_num_mgm_entry_size :
+			MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
 		dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
 	}
-	mlx4_dbg(dev, "Steering mode is: %s\n",
-		 mlx4_steering_mode_str(dev->caps.steering_mode));
+	mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, "
+		 "modparam log_num_mgm_entry_size = %d\n",
+		 mlx4_steering_mode_str(dev->caps.steering_mode),
+		 dev->oper_log_mgm_entry_size,
+		 mlx4_log_num_mgm_entry_size);
 }
 
 static int mlx4_init_hca(struct mlx4_dev *dev)
@@ -2479,6 +2504,17 @@ static int __init mlx4_verify_params(void)
 		port_type_array[0] = true;
 	}
 
+	if (mlx4_log_num_mgm_entry_size != -1 &&
+	    (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
+	     mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) {
+		pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not "
+			   "in legal range (-1 or %d..%d)\n",
+			   mlx4_log_num_mgm_entry_size,
+			   MLX4_MIN_MGM_LOG_ENTRY_SIZE,
+			   MLX4_MAX_MGM_LOG_ENTRY_SIZE);
+		return -1;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index e151c21baf2b..1ee4db3c6400 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -54,12 +54,7 @@ struct mlx4_mgm {
 
 int mlx4_get_mgm_entry_size(struct mlx4_dev *dev)
 {
-	if (dev->caps.steering_mode ==
-	    MLX4_STEERING_MODE_DEVICE_MANAGED)
-		return 1 << MLX4_FS_MGM_LOG_ENTRY_SIZE;
-	else
-		return min((1 << mlx4_log_num_mgm_entry_size),
-			   MLX4_MAX_MGM_ENTRY_SIZE);
+	return 1 << dev->oper_log_mgm_entry_size;
 }
 
 int mlx4_get_qp_per_mgm(struct mlx4_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 1cf42036d7bb..116c5c29d2d1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -94,8 +94,10 @@ enum {
 };
 
 enum {
-	MLX4_MAX_MGM_ENTRY_SIZE = 0x1000,
-	MLX4_MAX_QP_PER_MGM	= 4 * (MLX4_MAX_MGM_ENTRY_SIZE / 16 - 2),
+	MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE = 10,
+	MLX4_MIN_MGM_LOG_ENTRY_SIZE = 7,
+	MLX4_MAX_MGM_LOG_ENTRY_SIZE = 12,
+	MLX4_MAX_QP_PER_MGM = 4 * ((1 << MLX4_MAX_MGM_LOG_ENTRY_SIZE) / 16 - 2),
 	MLX4_MTT_ENTRY_PER_SEG	= 8,
 };
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 21821da2abfd..20ea939c22a6 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -625,6 +625,7 @@ struct mlx4_dev {
 	u8			rev_id;
 	char			board_id[MLX4_BOARD_ID_LEN];
 	int			num_vfs;
+	int			oper_log_mgm_entry_size;
 	u64			regid_promisc_array[MLX4_MAX_PORTS + 1];
 	u64			regid_allmulti_array[MLX4_MAX_PORTS + 1];
 };
-- 
cgit v1.2.3-55-g7522


From a1c088e01b71d90852b0df5a77cdae46bd0e0c05 Mon Sep 17 00:00:00 2001
From: Oliver Neukum
Date: Tue, 18 Dec 2012 04:45:29 +0000
Subject: usbnet: handle PM failure gracefully

If a device fails to do remote wakeup, this is no reason
to abort an open totally. This patch just continues without
runtime PM.

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 15 ++++++++-------
 include/linux/usb/usbnet.h |  1 +
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index c04110ba677f..50ed7ab09c9f 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -719,7 +719,8 @@ int usbnet_stop (struct net_device *net)
 	dev->flags = 0;
 	del_timer_sync (&dev->delay);
 	tasklet_kill (&dev->bh);
-	if (info->manage_power)
+	if (info->manage_power &&
+	    !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags))
 		info->manage_power(dev, 0);
 	else
 		usb_autopm_put_interface(dev->intf);
@@ -794,14 +795,14 @@ int usbnet_open (struct net_device *net)
 	tasklet_schedule (&dev->bh);
 	if (info->manage_power) {
 		retval = info->manage_power(dev, 1);
-		if (retval < 0)
-			goto done_manage_power_error;
-		usb_autopm_put_interface(dev->intf);
+		if (retval < 0) {
+			retval = 0;
+			set_bit(EVENT_NO_RUNTIME_PM, &dev->flags);
+		} else {
+			usb_autopm_put_interface(dev->intf);
+		}
 	}
 	return retval;
-
-done_manage_power_error:
-	clear_bit(EVENT_DEV_OPEN, &dev->flags);
 done:
 	usb_autopm_put_interface(dev->intf);
 done_nopm:
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 9bbeabf66c54..288b32aadab2 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -69,6 +69,7 @@ struct usbnet {
 #		define EVENT_DEV_ASLEEP 6
 #		define EVENT_DEV_OPEN	7
 #		define EVENT_DEVICE_REPORT_IDLE	8
+#		define EVENT_NO_RUNTIME_PM	9
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
-- 
cgit v1.2.3-55-g7522


From 2dd7c8cf29769f6b66f26b501db2364640c2c9d0 Mon Sep 17 00:00:00 2001
From: Oliver Neukum
Date: Tue, 18 Dec 2012 04:45:52 +0000
Subject: usbnet: generic manage_power()

Centralise common code for manage_power() in usbnet
by making a generic simple implementation

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 10 ++++++++++
 include/linux/usb/usbnet.h |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 50ed7ab09c9f..3d4bf01641b4 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1616,6 +1616,16 @@ void usbnet_device_suggests_idle(struct usbnet *dev)
 }
 EXPORT_SYMBOL(usbnet_device_suggests_idle);
 
+/*
+ * For devices that can do without special commands
+ */
+int usbnet_manage_power(struct usbnet *dev, int on)
+{
+	dev->intf->needs_remote_wakeup = on;
+	return 0;
+}
+EXPORT_SYMBOL(usbnet_manage_power);
+
 /*-------------------------------------------------------------------------*/
 static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
 			     u16 value, u16 index, void *data, u16 size)
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 288b32aadab2..bd45eb7bedc8 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -241,4 +241,6 @@ extern void usbnet_set_msglevel(struct net_device *, u32);
 extern void usbnet_get_drvinfo(struct net_device *, struct ethtool_drvinfo *);
 extern int usbnet_nway_reset(struct net_device *net);
 
+extern int usbnet_manage_power(struct usbnet *, int);
+
 #endif /* __LINUX_USB_USBNET_H */
-- 
cgit v1.2.3-55-g7522


From cf13a84d174947df4bb809edfb4887393642303e Mon Sep 17 00:00:00 2001
From: Fabio Porcedda
Date: Fri, 5 Oct 2012 12:16:09 +0200
Subject: watchdog: WatchDog Timer Driver Core: fix comment

Signed-off-by: Fabio Porcedda <fabio.porcedda@gmail.com>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 include/linux/watchdog.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 87490ac4bd87..3a9df2f43be6 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -129,7 +129,7 @@ static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
 	return wdd->driver_data;
 }
 
-/* drivers/watchdog/core/watchdog_core.c */
+/* drivers/watchdog/watchdog_core.c */
 extern int watchdog_register_device(struct watchdog_device *);
 extern void watchdog_unregister_device(struct watchdog_device *);
 
-- 
cgit v1.2.3-55-g7522


From 468366138850f20543f1d4878028900672b23dae Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Fri, 23 Nov 2012 09:12:59 -0500
Subject: COMPAT_SYSCALL_DEFINE: infrastructure

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/s390/include/asm/compat.h |  3 +++
 include/linux/compat.h         | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 18cd6b592650..f8c6df6cd1f0 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -7,6 +7,9 @@
 #include <linux/sched.h>
 #include <linux/thread_info.h>
 
+#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64))
+#define __SC_DELOUSE(t,v) (t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v))
+
 #define PSW32_MASK_PER		0x40000000UL
 #define PSW32_MASK_DAT		0x04000000UL
 #define PSW32_MASK_IO		0x02000000UL
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 784ebfe63c48..a7877fa809fd 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -23,6 +23,48 @@
 #define COMPAT_USE_64BIT_TIME 0
 #endif
 
+#ifndef __SC_DELOUSE
+#define __SC_DELOUSE(t,v) ((t)(unsigned long)(v))
+#endif
+
+#define __SC_CCAST1(t1, a1)      __SC_DELOUSE(t1,a1)
+#define __SC_CCAST2(t2, a2, ...) __SC_DELOUSE(t2,a2), __SC_CCAST1(__VA_ARGS__)
+#define __SC_CCAST3(t3, a3, ...) __SC_DELOUSE(t3,a3), __SC_CCAST2(__VA_ARGS__)
+#define __SC_CCAST4(t4, a4, ...) __SC_DELOUSE(t4,a4), __SC_CCAST3(__VA_ARGS__)
+#define __SC_CCAST5(t5, a5, ...) __SC_DELOUSE(t5,a5), __SC_CCAST4(__VA_ARGS__)
+#define __SC_CCAST6(t6, a6, ...) __SC_DELOUSE(t6,a6), __SC_CCAST5(__VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE1(name, ...) \
+        COMPAT_SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE2(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE3(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE4(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE5(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE6(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
+
+#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)				\
+	asmlinkage long compat_sys##name(__SC_DECL##x(__VA_ARGS__));	\
+	static inline long C_SYSC##name(__SC_DECL##x(__VA_ARGS__));	\
+	asmlinkage long compat_SyS##name(__SC_LONG##x(__VA_ARGS__))	\
+	{								\
+		return (long) C_SYSC##name(__SC_CCAST##x(__VA_ARGS__));	\
+	}								\
+	SYSCALL_ALIAS(compat_sys##name, compat_SyS##name);		\
+	static inline long C_SYSC##name(__SC_DECL##x(__VA_ARGS__))
+
+#else /* CONFIG_HAVE_SYSCALL_WRAPPERS */
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)				\
+	asmlinkage long compat_sys##name(__SC_DECL##x(__VA_ARGS__))
+
+#endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */
+
 #define compat_jiffies_to_clock_t(x)	\
 		(((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
 
-- 
cgit v1.2.3-55-g7522


From ae903caae267154de7cf8576b130ff474630596b Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Fri, 14 Dec 2012 12:44:11 -0500
Subject: Bury the conditionals from kernel_thread/kernel_execve series

All architectures have
	CONFIG_GENERIC_KERNEL_THREAD
	CONFIG_GENERIC_KERNEL_EXECVE
	__ARCH_WANT_SYS_EXECVE
None of them have __ARCH_WANT_KERNEL_EXECVE and there are only two callers
of kernel_execve() (which is a trivial wrapper for do_execve() now) left.
Kill the conditionals and make both callers use do_execve().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/Kconfig                             |  6 ------
 arch/alpha/Kconfig                       |  2 --
 arch/alpha/include/asm/unistd.h          |  1 -
 arch/arm/Kconfig                         |  2 --
 arch/arm/include/asm/unistd.h            |  1 -
 arch/arm64/Kconfig                       |  2 --
 arch/arm64/include/asm/unistd.h          |  1 -
 arch/avr32/Kconfig                       |  2 --
 arch/avr32/include/asm/unistd.h          |  1 -
 arch/blackfin/Kconfig                    |  2 --
 arch/blackfin/include/asm/unistd.h       |  1 -
 arch/c6x/Kconfig                         |  2 --
 arch/c6x/include/uapi/asm/unistd.h       |  1 -
 arch/cris/Kconfig                        |  2 --
 arch/cris/include/asm/unistd.h           |  1 -
 arch/frv/Kconfig                         |  2 --
 arch/frv/include/asm/unistd.h            |  1 -
 arch/h8300/Kconfig                       |  2 --
 arch/h8300/include/asm/unistd.h          |  1 -
 arch/hexagon/Kconfig                     |  2 --
 arch/hexagon/include/uapi/asm/unistd.h   |  1 -
 arch/ia64/Kconfig                        |  2 --
 arch/ia64/include/asm/unistd.h           |  1 -
 arch/m32r/Kconfig                        |  2 --
 arch/m32r/include/asm/unistd.h           |  1 -
 arch/m68k/Kconfig                        |  2 --
 arch/m68k/include/asm/unistd.h           |  1 -
 arch/microblaze/Kconfig                  |  2 --
 arch/microblaze/include/asm/unistd.h     |  1 -
 arch/mips/Kconfig                        |  2 --
 arch/mips/include/asm/unistd.h           |  1 -
 arch/mn10300/Kconfig                     |  2 --
 arch/mn10300/include/asm/unistd.h        |  1 -
 arch/openrisc/Kconfig                    |  2 --
 arch/openrisc/include/uapi/asm/unistd.h  |  1 -
 arch/parisc/Kconfig                      |  2 --
 arch/parisc/include/asm/unistd.h         |  1 -
 arch/powerpc/Kconfig                     |  2 --
 arch/powerpc/include/asm/unistd.h        |  1 -
 arch/s390/Kconfig                        |  2 --
 arch/s390/include/asm/unistd.h           |  1 -
 arch/score/Kconfig                       |  2 --
 arch/score/include/asm/unistd.h          |  1 -
 arch/sh/Kconfig                          |  2 --
 arch/sh/include/asm/unistd.h             |  1 -
 arch/sparc/Kconfig                       |  2 --
 arch/sparc/include/asm/unistd.h          |  1 -
 arch/tile/Kconfig                        |  2 --
 arch/tile/include/asm/unistd.h           |  1 -
 arch/unicore32/Kconfig                   |  2 --
 arch/unicore32/include/uapi/asm/unistd.h |  1 -
 arch/x86/Kconfig                         |  2 --
 arch/x86/include/asm/unistd.h            |  1 -
 arch/x86/um/Kconfig                      |  2 --
 arch/xtensa/Kconfig                      |  2 --
 arch/xtensa/include/asm/unistd.h         |  1 -
 fs/exec.c                                | 21 ---------------------
 include/linux/binfmts.h                  |  4 ----
 include/linux/sched.h                    |  2 --
 include/linux/syscalls.h                 |  9 ---------
 init/main.c                              |  4 +++-
 kernel/fork.c                            |  2 --
 kernel/kmod.c                            |  6 +++---
 63 files changed, 6 insertions(+), 131 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 8d698fb5ccc9..0a8dd0585d0d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -271,12 +271,6 @@ config ARCH_WANT_OLD_COMPAT_IPC
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	bool
 
-config GENERIC_KERNEL_THREAD
-	bool
-
-config GENERIC_KERNEL_EXECVE
-	bool
-
 config HAVE_ARCH_SECCOMP_FILTER
 	bool
 	help
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 5dd7f5db24d4..7e3710c0cce5 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -20,8 +20,6 @@ config ALPHA
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
 	help
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index eb3a4664ced2..68c75f2fadb9 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -481,7 +481,6 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8918a2dd89b4..b789654e7e2f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -11,8 +11,6 @@ config ARM
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select GENERIC_PCI_IOMAP
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_STRNCPY_FROM_USER
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 7cd13cc62624..21a2700d2957 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -41,7 +41,6 @@
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_SOCKETCALL
 #endif
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4b03c56ec329..a846029bebcc 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -7,8 +7,6 @@ config ARM64
 	select GENERIC_IOMAP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
-	select GENERIC_KERNEL_EXECVE
-	select GENERIC_KERNEL_THREAD
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
 	select HARDIRQS_SW_RESEND
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index d69aeea6da1e..738322945d1a 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -27,6 +27,5 @@
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #endif
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index 649aeb9acecb..06e73bf665e9 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -17,8 +17,6 @@ config AVR32
 	select GENERIC_CLOCKEVENTS
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	help
 	  AVR32 is a high-performance 32-bit RISC microprocessor core,
 	  designed for cost-sensitive embedded applications, with particular
diff --git a/arch/avr32/include/asm/unistd.h b/arch/avr32/include/asm/unistd.h
index f05a9804e8e2..0bdf6371574e 100644
--- a/arch/avr32/include/asm/unistd.h
+++ b/arch/avr32/include/asm/unistd.h
@@ -39,7 +39,6 @@
 #define __ARCH_WANT_SYS_GETPGRP
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index ab9ff4075f4d..b6f3ad5441c5 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -45,8 +45,6 @@ config BLACKFIN
 	select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config GENERIC_CSUM
 	def_bool y
diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index 460514a1a4e1..38711e28baac 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h
@@ -446,7 +446,6 @@
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_VFORK
 
 /*
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index 66eab3703c75..f6a3648f5ec3 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -17,8 +17,6 @@ config C6X
 	select OF
 	select OF_EARLY_FLATTREE
 	select GENERIC_CLOCKEVENTS
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select MODULES_USE_ELF_RELA
 
 config MMU
diff --git a/arch/c6x/include/uapi/asm/unistd.h b/arch/c6x/include/uapi/asm/unistd.h
index f3987a8703d9..e7d09a614d10 100644
--- a/arch/c6x/include/uapi/asm/unistd.h
+++ b/arch/c6x/include/uapi/asm/unistd.h
@@ -14,7 +14,6 @@
  *   more details.
  */
 
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 
 /* Use the standard ABI for syscalls. */
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 0cac6a49f230..c59a01dd9c0c 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -49,8 +49,6 @@ config CRIS
 	select GENERIC_SMP_IDLE_THREAD if ETRAX_ARCH_V32
 	select GENERIC_CMOS_UPDATE
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS2
 
 config HZ
diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index f27b542e0ebc..5cda75a9cc1e 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h
@@ -371,7 +371,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index df2eb4bd9fa2..9d262645f667 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -12,8 +12,6 @@ config FRV
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CPU_DEVICES
 	select ARCH_WANT_IPC_PARSE_VERSION
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config ZONE_DMA
 	bool
diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index 1807d8ea8cb5..d685da17f5fb 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h
@@ -29,7 +29,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 04bef4d25b4a..98fabd10e95f 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -8,8 +8,6 @@ config H8300
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config SYMBOL_PREFIX
 	string
diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index c2c2f5c7d6bf..566f94860c45 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h
@@ -356,7 +356,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index e418803b6c8e..0744f7d7b1fd 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -31,8 +31,6 @@ config HEXAGON
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS_BROADCAST
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	---help---
 	  Qualcomm Hexagon is a processor architecture designed for high
 	  performance and low power across a wide variety of applications.
diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h
index 2af81533bd0f..4a87cc47075c 100644
--- a/arch/hexagon/include/uapi/asm/unistd.h
+++ b/arch/hexagon/include/uapi/asm/unistd.h
@@ -27,7 +27,6 @@
  */
 
 #define sys_mmap2 sys_mmap_pgoff
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 
 #include <asm-generic/unistd.h>
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 670600468128..3279646120e3 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -42,8 +42,6 @@ config IA64
 	select GENERIC_TIME_VSYSCALL_OLD
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 1574bca86138..8b3ff2f5b861 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -29,7 +29,6 @@
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 
 #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
 
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 5183f43a2cf7..f807721e19a5 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -15,8 +15,6 @@ config M32R
 	select GENERIC_ATOMIC64
 	select ARCH_USES_GETTIMEOFFSET
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config SBUS
 	bool
diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h
index d9e7351af2a4..cbfa39158fef 100644
--- a/arch/m32r/include/asm/unistd.h
+++ b/arch/m32r/include/asm/unistd.h
@@ -352,7 +352,6 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 953a7ba5d050..6710084e072a 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -15,8 +15,6 @@ config M68K
 	select FPU if MMU
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
 	select MODULES_USE_ELF_RELA
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index a021d67cdd72..847994ce6804 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -31,7 +31,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 4bcf89148f3c..ba3b7c8c04b8 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -26,8 +26,6 @@ config MICROBLAZE
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS
 
 config SWAP
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 94d978986b75..38cabf4db548 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -422,7 +422,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_VFORK
 #ifdef CONFIG_MMU
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 4183e62f178c..dba9390d37cf 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -40,8 +40,6 @@ config MIPS
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
 	select MODULES_USE_ELF_RELA if 64BIT
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 menu "Machine selection"
 
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index b306e2081cad..9e47cc11aa26 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -20,7 +20,6 @@
 #define __ARCH_OMIT_COMPAT_SYS_GETDENTS64
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 72471744a912..aa03f2e13385 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -8,8 +8,6 @@ config MN10300
 	select HAVE_ARCH_KGDB
 	select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER
 	select GENERIC_CLOCKEVENTS
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select MODULES_USE_ELF_RELA
 
 config AM33_2
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index cabf8ba73b27..e6d2ed4ba68f 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -43,7 +43,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index e7f1a2993f78..05f2ba41ff1a 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -22,8 +22,6 @@ config OPENRISC
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config MMU
 	def_bool y
diff --git a/arch/openrisc/include/uapi/asm/unistd.h b/arch/openrisc/include/uapi/asm/unistd.h
index 5082b8066325..ce40b71df006 100644
--- a/arch/openrisc/include/uapi/asm/unistd.h
+++ b/arch/openrisc/include/uapi/asm/unistd.h
@@ -20,7 +20,6 @@
 
 #define sys_mmap2 sys_mmap_pgoff
 
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_CLONE
 
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index e688a2be30f6..b77feffbadea 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -22,8 +22,6 @@ config PARISC
 	select GENERIC_STRNCPY_FROM_USER
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS
 
 	help
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 1efef41659c9..3043194547cd 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -163,7 +163,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5)	\
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 951a517a1a0f..17903f1f356b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -141,10 +141,8 @@ config PPC
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
-	select GENERIC_KERNEL_THREAD
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS
 
 config EARLY_PRINTK
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 76fe846ec40e..784872f93711 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -55,7 +55,6 @@
 #define __ARCH_WANT_SYS_NEWFSTATAT
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #endif
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 3cbb8757704e..5029ebf7110e 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -137,8 +137,6 @@ config S390
 	select GENERIC_CLOCKEVENTS
 	select KTIME_SCALAR if 32BIT
 	select HAVE_ARCH_SECCOMP_FILTER
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
 	select CLONE_BACKWARDS2
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 086bb8eaf6ab..636530872516 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -53,7 +53,6 @@
 #   define __ARCH_WANT_COMPAT_SYS_TIME
 #   define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 # endif
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index 45893390c7dd..3b1482e7afac 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -13,8 +13,6 @@ config SCORE
        select GENERIC_CLOCKEVENTS
        select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS
 
 choice
diff --git a/arch/score/include/asm/unistd.h b/arch/score/include/asm/unistd.h
index 56001c93095a..9cb4260a5f3e 100644
--- a/arch/score/include/asm/unistd.h
+++ b/arch/score/include/asm/unistd.h
@@ -4,7 +4,6 @@
 #define __ARCH_WANT_SYSCALL_NO_FLAGS
 #define __ARCH_WANT_SYSCALL_OFF_T
 #define __ARCH_WANT_SYSCALL_DEPRECATED
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 8451317eed58..babc2b826c5c 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -40,8 +40,6 @@ config SUPERH
 	select GENERIC_STRNLEN_USER
 	select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/sh/include/asm/unistd.h b/arch/sh/include/asm/unistd.h
index 43d3f26b2eab..012004ed3330 100644
--- a/arch/sh/include/asm/unistd.h
+++ b/arch/sh/include/asm/unistd.h
@@ -28,7 +28,6 @@
 # define __ARCH_WANT_SYS_SIGPENDING
 # define __ARCH_WANT_SYS_SIGPROCMASK
 # define __ARCH_WANT_SYS_RT_SIGACTION
-# define __ARCH_WANT_SYS_EXECVE
 # define __ARCH_WANT_SYS_FORK
 # define __ARCH_WANT_SYS_VFORK
 # define __ARCH_WANT_SYS_CLONE
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 0c7d365fa402..9f2edb5c5551 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -41,8 +41,6 @@ config SPARC
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index c3e5d8b64171..0ecea6ed943e 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -46,7 +46,6 @@
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #endif
-#define __ARCH_WANT_SYS_EXECVE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index ea7f61e8bc9e..875d008828b8 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -21,8 +21,6 @@ config TILE
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT
diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h
index b51c6ee3cd6c..940831fe9e94 100644
--- a/arch/tile/include/asm/unistd.h
+++ b/arch/tile/include/asm/unistd.h
@@ -16,6 +16,5 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #endif
 #define __ARCH_WANT_SYS_NEWFSTATAT
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index c4fbb21e802b..60651df5f952 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -16,8 +16,6 @@ config UNICORE32
 	select ARCH_WANT_FRAME_POINTERS
 	select GENERIC_IOMAP
 	select MODULES_USE_ELF_REL
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	help
 	  UniCore-32 is 32-bit Instruction Set Architecture,
 	  including a series of low-power-consumption RISC chip
diff --git a/arch/unicore32/include/uapi/asm/unistd.h b/arch/unicore32/include/uapi/asm/unistd.h
index 00cf5e286fca..d4cc4559d848 100644
--- a/arch/unicore32/include/uapi/asm/unistd.h
+++ b/arch/unicore32/include/uapi/asm/unistd.h
@@ -12,5 +12,4 @@
 
 /* Use the standard ABI for syscalls. */
 #include <asm-generic/unistd.h>
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0df6e7d84539..01ca0ebaff0e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -108,8 +108,6 @@ config X86
 	select GENERIC_STRNLEN_USER
 	select HAVE_RCU_USER_QS if X86_64
 	select HAVE_IRQ_TIME_ACCOUNTING
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select MODULES_USE_ELF_REL if X86_32
 	select MODULES_USE_ELF_RELA if X86_64
 	select CLONE_BACKWARDS if X86_32
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 0e7dea7d3669..9dcfcc1d6f92 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -50,7 +50,6 @@
 # define __ARCH_WANT_SYS_TIME
 # define __ARCH_WANT_SYS_UTIME
 # define __ARCH_WANT_SYS_WAITPID
-# define __ARCH_WANT_SYS_EXECVE
 # define __ARCH_WANT_SYS_FORK
 # define __ARCH_WANT_SYS_VFORK
 # define __ARCH_WANT_SYS_CLONE
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 8f51c39750d1..0fd20f241e40 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -13,8 +13,6 @@ endmenu
 config UML_X86
 	def_bool y
 	select GENERIC_FIND_FIRST_BIT
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config 64BIT
 	bool "64-bit kernel" if SUBARCH = "x86"
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 2481f267be29..03a8c107e07e 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -13,8 +13,6 @@ config XTENSA
 	select GENERIC_CPU_DEVICES
 	select MODULES_USE_ELF_RELA
 	select GENERIC_PCI_IOMAP
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select CLONE_BACKWARDS
 	help
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index e002dbcc88b6..eb63ea87815c 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -1,7 +1,6 @@
 #ifndef _XTENSA_UNISTD_H
 #define _XTENSA_UNISTD_H
 
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
 
diff --git a/fs/exec.c b/fs/exec.c
index 721a29929511..090ac91da2e9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1657,7 +1657,6 @@ int get_dumpable(struct mm_struct *mm)
 	return __get_dumpable(mm->flags);
 }
 
-#ifdef __ARCH_WANT_SYS_EXECVE
 SYSCALL_DEFINE3(execve,
 		const char __user *, filename,
 		const char __user *const __user *, argv,
@@ -1685,23 +1684,3 @@ asmlinkage long compat_sys_execve(const char __user * filename,
 	return error;
 }
 #endif
-#endif
-
-#ifdef __ARCH_WANT_KERNEL_EXECVE
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	int ret = do_execve(filename,
-			(const char __user *const __user *)argv,
-			(const char __user *const __user *)envp);
-	if (ret < 0)
-		return ret;
-
-	/*
-	 * We were successful.  We won't be returning to our caller, but
-	 * instead to user space by manipulating the kernel stack.
-	 */
-	ret_from_kernel_execve(current_pt_regs());
-}
-#endif
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 2630c9b41a86..8c1388c6ae27 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -121,8 +121,4 @@ extern void install_exec_creds(struct linux_binprm *bprm);
 extern void set_binfmt(struct linux_binfmt *new);
 extern void free_bprm(struct linux_binprm *);
 
-#ifdef __ARCH_WANT_KERNEL_EXECVE
-extern void ret_from_kernel_execve(struct pt_regs *normal) __noreturn;
-#endif
-
 #endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1162258bcaf0..9e5a54e3d84f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2291,9 +2291,7 @@ extern int do_execve(const char *,
 		     const char __user * const __user *);
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
-#ifdef CONFIG_GENERIC_KERNEL_THREAD
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-#endif
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 91835e7f364d..9fe5f946526e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -827,15 +827,6 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
 				  const char  __user *pathname);
 asmlinkage long sys_syncfs(int fd);
 
-#ifndef CONFIG_GENERIC_KERNEL_EXECVE
-int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]);
-#else
-#define kernel_execve(filename, argv, envp) \
-	do_execve(filename, \
-		(const char __user *const __user *)argv, \
-		(const char __user *const __user *)envp)
-#endif
-
 asmlinkage long sys_fork(void);
 asmlinkage long sys_vfork(void);
 #ifdef CONFIG_CLONE_BACKWARDS
diff --git a/init/main.c b/init/main.c
index e33e09df3cbc..155ac208d581 100644
--- a/init/main.c
+++ b/init/main.c
@@ -797,7 +797,9 @@ static void __init do_pre_smp_initcalls(void)
 static int run_init_process(const char *init_filename)
 {
 	argv_init[0] = init_filename;
-	return kernel_execve(init_filename, argv_init, envp_init);
+	return do_execve(init_filename,
+		(const char __user *const __user *)argv_init,
+		(const char __user *const __user *)envp_init);
 }
 
 static void __init kernel_init_freeable(void);
diff --git a/kernel/fork.c b/kernel/fork.c
index 540730783433..389712ffc0ad 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1623,7 +1623,6 @@ long do_fork(unsigned long clone_flags,
 	return nr;
 }
 
-#ifdef CONFIG_GENERIC_KERNEL_THREAD
 /*
  * Create a kernel thread.
  */
@@ -1632,7 +1631,6 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
 		(unsigned long)arg, NULL, NULL);
 }
-#endif
 
 #ifdef __ARCH_WANT_SYS_FORK
 SYSCALL_DEFINE0(fork)
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 1c317e386831..0023a87e8de6 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -219,9 +219,9 @@ static int ____call_usermodehelper(void *data)
 
 	commit_creds(new);
 
-	retval = kernel_execve(sub_info->path,
-			       (const char *const *)sub_info->argv,
-			       (const char *const *)sub_info->envp);
+	retval = do_execve(sub_info->path,
+			   (const char __user *const __user *)sub_info->argv,
+			   (const char __user *const __user *)sub_info->envp);
 	if (!retval)
 		return 0;
 
-- 
cgit v1.2.3-55-g7522


From 1ca97bb541a1f5a735e697a8bba763cde3aab452 Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Sun, 18 Nov 2012 12:50:10 -0500
Subject: new helper: current_user_stack_pointer()

	Cross-architecture equivalent of rdusp(); default is
user_stack_pointer(current_pt_regs()) - that works for almost all
platforms that have usp saved in pt_regs.  The only exception from
that is ia64 - we want memory stack, not the backing store for
register one.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/include/asm/ptrace.h    | 1 +
 arch/blackfin/include/asm/ptrace.h | 1 +
 arch/cris/include/asm/ptrace.h     | 1 +
 arch/h8300/include/asm/ptrace.h    | 1 +
 arch/ia64/include/asm/ptrace.h     | 5 +++++
 arch/m68k/include/asm/ptrace.h     | 1 +
 include/linux/ptrace.h             | 4 ++++
 7 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/ptrace.h b/arch/alpha/include/asm/ptrace.h
index b4c5b2fbb647..fd53c74ac943 100644
--- a/arch/alpha/include/asm/ptrace.h
+++ b/arch/alpha/include/asm/ptrace.h
@@ -72,6 +72,7 @@ struct switch_stack {
 #define user_mode(regs) (((regs)->ps & 8) != 0)
 #define instruction_pointer(regs) ((regs)->pc)
 #define profile_pc(regs) instruction_pointer(regs)
+#define current_user_stack_pointer() rdusp()
 
 #define task_pt_regs(task) \
   ((struct pt_regs *) (task_stack_page(task) + 2*PAGE_SIZE) - 1)
diff --git a/arch/blackfin/include/asm/ptrace.h b/arch/blackfin/include/asm/ptrace.h
index 10d8641180f2..c42002506a26 100644
--- a/arch/blackfin/include/asm/ptrace.h
+++ b/arch/blackfin/include/asm/ptrace.h
@@ -106,6 +106,7 @@ struct pt_regs {
 #define arch_has_single_step()	(1)
 /* common code demands this function */
 #define ptrace_disable(child) user_disable_single_step(child)
+#define current_user_stack_pointer() rdusp()
 
 extern int is_user_addr_valid(struct task_struct *child,
 			      unsigned long start, unsigned long len);
diff --git a/arch/cris/include/asm/ptrace.h b/arch/cris/include/asm/ptrace.h
index 6618893bfe8e..551c081ab62b 100644
--- a/arch/cris/include/asm/ptrace.h
+++ b/arch/cris/include/asm/ptrace.h
@@ -10,6 +10,7 @@
 #define PTRACE_SETREGS            13
 
 #define profile_pc(regs) instruction_pointer(regs)
+#define current_user_stack_pointer() rdusp()
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/h8300/include/asm/ptrace.h b/arch/h8300/include/asm/ptrace.h
index 7468589a128b..6183371d0c93 100644
--- a/arch/h8300/include/asm/ptrace.h
+++ b/arch/h8300/include/asm/ptrace.h
@@ -63,6 +63,7 @@ struct pt_regs {
 #define current_pt_regs() ((struct pt_regs *) \
 	(THREAD_SIZE + (unsigned long)current_thread_info()) - 1)
 #define signal_pt_regs() ((struct pt_regs *)current->thread.esp0)
+#define current_user_stack_pointer() rdusp()
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 #endif /* _H8300_PTRACE_H */
diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h
index b0e973649cb9..845143990a1d 100644
--- a/arch/ia64/include/asm/ptrace.h
+++ b/arch/ia64/include/asm/ptrace.h
@@ -78,6 +78,11 @@ static inline long regs_return_value(struct pt_regs *regs)
 	unsigned long __ip = instruction_pointer(regs);			\
 	(__ip & ~3UL) + ((__ip & 3UL) << 2);				\
 })
+/*
+ * Why not default?  Because user_stack_pointer() on ia64 gives register
+ * stack backing store instead...
+ */
+#define current_user_stack_pointer() (current_pt_regs()->r12)
 
   /* given a pointer to a task_struct, return the user's pt_regs */
 # define task_pt_regs(t)		(((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1)
diff --git a/arch/m68k/include/asm/ptrace.h b/arch/m68k/include/asm/ptrace.h
index 0f717045bdde..a45cb6894ad3 100644
--- a/arch/m68k/include/asm/ptrace.h
+++ b/arch/m68k/include/asm/ptrace.h
@@ -15,6 +15,7 @@
 #define profile_pc(regs) instruction_pointer(regs)
 #define current_pt_regs() \
 	(struct pt_regs *)((char *)current_thread_info() + THREAD_SIZE) - 1
+#define current_user_stack_pointer() rdusp()
 
 #define arch_has_single_step()	(1)
 
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index a89ff04bddd9..a3a9d085f932 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -342,6 +342,10 @@ static inline void user_single_step_siginfo(struct task_struct *tsk,
 #define signal_pt_regs() task_pt_regs(current)
 #endif
 
+#ifndef current_user_stack_pointer
+#define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
+#endif
+
 extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
-- 
cgit v1.2.3-55-g7522


From 5c49574ffd7ac07eae8c3b065d19e6ebc7e4760f Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Sun, 18 Nov 2012 15:29:16 -0500
Subject: new helper: restore_altstack()

to be used by rt_sigreturn instances

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/signal.h | 2 ++
 kernel/signal.c        | 7 +++++++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index e19a011b43b7..5969522136fe 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -385,4 +385,6 @@ int unhandled_signal(struct task_struct *tsk, int sig);
 
 void signals_init(void);
 
+int restore_altstack(const stack_t __user *);
+
 #endif /* _LINUX_SIGNAL_H */
diff --git a/kernel/signal.c b/kernel/signal.c
index e75e4bd2839b..887f2fefe207 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3103,6 +3103,13 @@ out:
 	return error;
 }
 
+int restore_altstack(const stack_t __user *uss)
+{
+	int err = do_sigaltstack(uss, NULL, current_user_stack_pointer());
+	/* squash all but EFAULT for now */
+	return err == -EFAULT ? err : 0;
+}
+
 #ifdef __ARCH_WANT_SYS_SIGPENDING
 
 /**
-- 
cgit v1.2.3-55-g7522


From 9b064fc3f95a8e44e929fdf4d6037334ea03d15b Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Fri, 14 Dec 2012 13:49:35 -0500
Subject: new helper: compat_user_stack_pointer()

Compat counterpart of current_user_stack_pointer(); for most of the biarch
architectures those two are identical, but e.g. arm64 and arm use different
registers for stack pointer...

Note that amd64 variants of current_user_stack_pointer/compat_user_stack_pointer
do *not* rely on pt_regs having been through FIXUP_TOP_OF_STACK.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm64/include/asm/compat.h | 5 +++--
 arch/x86/include/asm/ptrace.h   | 7 +++++++
 include/linux/compat.h          | 3 +++
 3 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 37e610dc084e..d9ec40217a27 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -209,10 +209,11 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
+#define compat_user_stack_pointer() (current_pt_regs()->compat_sp)
+
 static inline void __user *arch_compat_alloc_user_space(long len)
 {
-	struct pt_regs *regs = task_pt_regs(current);
-	return (void __user *)regs->compat_sp - len;
+	return (void __user *)compat_user_stack_pointer() - len;
 }
 
 struct compat_ipc64_perm {
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 19f16ebaf4fa..7e560b6daf5d 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -203,6 +203,13 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
 	return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
 #endif
 }
+
+#define current_user_stack_pointer()	this_cpu_read(old_rsp)
+/* ia32 vs. x32 difference */
+#define compat_user_stack_pointer()	\
+	(test_thread_flag(TIF_IA32) 	\
+	 ? current_pt_regs()->sp 	\
+	 : this_cpu_read(old_rsp))
 #endif
 
 #ifdef CONFIG_X86_32
diff --git a/include/linux/compat.h b/include/linux/compat.h
index a7877fa809fd..62bb76f91baf 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -65,6 +65,9 @@
 
 #endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */
 
+#ifndef compat_user_stack_pointer
+#define compat_user_stack_pointer() current_user_stack_pointer()
+#endif
 #define compat_jiffies_to_clock_t(x)	\
 		(((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
 
-- 
cgit v1.2.3-55-g7522


From 6bf9adfc90370b695cb111116e15fdc0e1906270 Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Fri, 14 Dec 2012 14:09:47 -0500
Subject: introduce generic sys_sigaltstack(), switch x86 and um to it

Conditional on CONFIG_GENERIC_SIGALTSTACK; architectures that do not
select it are completely unaffected

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/Kconfig                     | 3 +++
 arch/um/kernel/signal.c          | 5 -----
 arch/x86/Kconfig                 | 1 +
 arch/x86/include/asm/syscalls.h  | 3 ---
 arch/x86/kernel/entry_32.S       | 1 -
 arch/x86/kernel/entry_64.S       | 1 -
 arch/x86/kernel/signal.c         | 7 -------
 arch/x86/syscalls/syscall_32.tbl | 2 +-
 arch/x86/syscalls/syscall_64.tbl | 2 +-
 arch/x86/um/Kconfig              | 1 +
 arch/x86/um/sys_call_table_32.c  | 1 -
 arch/x86/um/sys_call_table_64.c  | 1 -
 include/linux/syscalls.h         | 6 ++++++
 kernel/signal.c                  | 6 ++++++
 14 files changed, 19 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 0a8dd0585d0d..330176824594 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -335,6 +335,9 @@ config MODULES_USE_ELF_REL
 	  Modules only use ELF REL relocations.  Modules with ELF RELA
 	  relocations will give an error.
 
+config GENERIC_SIGALTSTACK
+	bool
+
 #
 # ABI hall of shame
 #
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index db18eb6124e1..48ccf718e290 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -132,8 +132,3 @@ long sys_sigsuspend(int history0, int history1, old_sigset_t mask)
 	siginitset(&blocked, mask);
 	return sigsuspend(&blocked);
 }
-
-long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss)
-{
-	return do_sigaltstack(uss, uoss, PT_REGS_SP(&current->thread.regs));
-}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 01ca0ebaff0e..f380614d7d89 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -111,6 +111,7 @@ config X86
 	select MODULES_USE_ELF_REL if X86_32
 	select MODULES_USE_ELF_RELA if X86_64
 	select CLONE_BACKWARDS if X86_32
+	select GENERIC_SIGALTSTACK
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 2f8374718aa3..58b7e3eac0ae 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -25,9 +25,6 @@ asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
 
 /* kernel/signal.c */
 long sys_rt_sigreturn(struct pt_regs *);
-long sys_sigaltstack(const stack_t __user *, stack_t __user *,
-		     struct pt_regs *);
-
 
 /* kernel/tls.c */
 asmlinkage int sys_set_thread_area(struct user_desc __user *);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c763116c5359..ff84d5469d77 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -739,7 +739,6 @@ ENTRY(ptregs_##name) ; \
 ENDPROC(ptregs_##name)
 
 PTREGSCALL1(iopl)
-PTREGSCALL2(sigaltstack)
 PTREGSCALL0(sigreturn)
 PTREGSCALL0(rt_sigreturn)
 PTREGSCALL2(vm86)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2363e820ed68..6e462019f195 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -864,7 +864,6 @@ END(stub_\func)
 	FORK_LIKE  clone
 	FORK_LIKE  fork
 	FORK_LIKE  vfork
-	PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
 	PTREGSCALL stub_iopl, sys_iopl, %rsi
 
 ENTRY(ptregscall_common)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 70b27ee6118e..16d065c23baf 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -602,13 +602,6 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
 }
 #endif /* CONFIG_X86_32 */
 
-long
-sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
-		struct pt_regs *regs)
-{
-	return do_sigaltstack(uss, uoss, regs->sp);
-}
-
 /*
  * Do a signal return; undo the signal stack.
  */
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index ee3c220ee500..62c7b222e45c 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -192,7 +192,7 @@
 183	i386	getcwd			sys_getcwd
 184	i386	capget			sys_capget
 185	i386	capset			sys_capset
-186	i386	sigaltstack		ptregs_sigaltstack		stub32_sigaltstack
+186	i386	sigaltstack		sys_sigaltstack			stub32_sigaltstack
 187	i386	sendfile		sys_sendfile			sys32_sendfile
 188	i386	getpmsg
 189	i386	putpmsg
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index a582bfed95bb..6ffa7f9d005e 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -137,7 +137,7 @@
 128	64	rt_sigtimedwait		sys_rt_sigtimedwait
 129	64	rt_sigqueueinfo		sys_rt_sigqueueinfo
 130	common	rt_sigsuspend		sys_rt_sigsuspend
-131	64	sigaltstack		stub_sigaltstack
+131	64	sigaltstack		sys_sigaltstack
 132	common	utime			sys_utime
 133	common	mknod			sys_mknod
 134	64	uselib
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 0fd20f241e40..96b89d874ead 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -13,6 +13,7 @@ endmenu
 config UML_X86
 	def_bool y
 	select GENERIC_FIND_FIRST_BIT
+	select GENERIC_SIGALTSTACK
 
 config 64BIT
 	bool "64-bit kernel" if SUBARCH = "x86"
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 812e98c098e4..a0c3b0d1a122 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -27,7 +27,6 @@
 #define ptregs_iopl sys_iopl
 #define ptregs_vm86old sys_vm86old
 #define ptregs_vm86 sys_vm86
-#define ptregs_sigaltstack sys_sigaltstack
 
 #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
 #include <asm/syscalls_32.h>
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 170bd926a69c..f2f0723070ca 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -31,7 +31,6 @@
 #define stub_fork sys_fork
 #define stub_vfork sys_vfork
 #define stub_execve sys_execve
-#define stub_sigaltstack sys_sigaltstack
 #define stub_rt_sigreturn sys_rt_sigreturn
 
 #define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 9fe5f946526e..6ca1e08210c6 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -63,6 +63,7 @@ struct getcpu_cache;
 struct old_linux_dirent;
 struct perf_event_attr;
 struct file_handle;
+struct sigaltstack;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -299,6 +300,11 @@ asmlinkage long sys_personality(unsigned int personality);
 asmlinkage long sys_sigpending(old_sigset_t __user *set);
 asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set,
 				old_sigset_t __user *oset);
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss,
+				struct sigaltstack __user *uoss);
+#endif
+
 asmlinkage long sys_getitimer(int which, struct itimerval __user *value);
 asmlinkage long sys_setitimer(int which,
 				struct itimerval __user *value,
diff --git a/kernel/signal.c b/kernel/signal.c
index 887f2fefe207..f05f4c4150d9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3102,6 +3102,12 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
 out:
 	return error;
 }
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss)
+{
+	return do_sigaltstack(uss, uoss, current_user_stack_pointer());
+}
+#endif
 
 int restore_altstack(const stack_t __user *uss)
 {
-- 
cgit v1.2.3-55-g7522


From 9026843952adac5b123c7b8dc961e5c15828d9e1 Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Fri, 14 Dec 2012 14:47:53 -0500
Subject: generic compat_sys_sigaltstack()

Again, conditional on CONFIG_GENERIC_SIGALTSTACK

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/ia32/ia32_signal.c      | 50 +---------------------------------------
 arch/x86/ia32/ia32entry.S        |  1 -
 arch/x86/include/asm/ia32.h      | 10 ++------
 arch/x86/include/asm/sys_ia32.h  |  2 --
 arch/x86/kernel/entry_64.S       |  2 --
 arch/x86/kernel/signal.c         |  4 +---
 arch/x86/syscalls/syscall_32.tbl |  2 +-
 arch/x86/syscalls/syscall_64.tbl |  2 +-
 include/linux/compat.h           | 16 +++++++++++++
 kernel/signal.c                  | 45 ++++++++++++++++++++++++++++++++++++
 10 files changed, 67 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index efc6a958b71d..a866411a2fcc 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -136,52 +136,6 @@ asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
 	return sigsuspend(&blocked);
 }
 
-asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
-				  stack_ia32_t __user *uoss_ptr,
-				  struct pt_regs *regs)
-{
-	stack_t uss, uoss;
-	int ret, err = 0;
-	mm_segment_t seg;
-
-	if (uss_ptr) {
-		u32 ptr;
-
-		memset(&uss, 0, sizeof(stack_t));
-		if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)))
-			return -EFAULT;
-
-		get_user_try {
-			get_user_ex(ptr, &uss_ptr->ss_sp);
-			get_user_ex(uss.ss_flags, &uss_ptr->ss_flags);
-			get_user_ex(uss.ss_size, &uss_ptr->ss_size);
-		} get_user_catch(err);
-
-		if (err)
-			return -EFAULT;
-		uss.ss_sp = compat_ptr(ptr);
-	}
-	seg = get_fs();
-	set_fs(KERNEL_DS);
-	ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL),
-			     (stack_t __force __user *) &uoss, regs->sp);
-	set_fs(seg);
-	if (ret >= 0 && uoss_ptr)  {
-		if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)))
-			return -EFAULT;
-
-		put_user_try {
-			put_user_ex(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp);
-			put_user_ex(uoss.ss_flags, &uoss_ptr->ss_flags);
-			put_user_ex(uoss.ss_size, &uoss_ptr->ss_size);
-		} put_user_catch(err);
-
-		if (err)
-			ret = -EFAULT;
-	}
-	return ret;
-}
-
 /*
  * Do a signal return; undo the signal stack.
  */
@@ -292,7 +246,6 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
 	struct rt_sigframe_ia32 __user *frame;
 	sigset_t set;
 	unsigned int ax;
-	struct pt_regs tregs;
 
 	frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
 
@@ -306,8 +259,7 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
 	if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
-	tregs = *regs;
-	if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
+	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
 	return ax;
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 32e6f05ddaaa..102ff7cb3e41 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -464,7 +464,6 @@ GLOBAL(\label)
 
 	PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
 	PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
-	PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
 	PTREGSCALL stub32_execve, compat_sys_execve, %rcx
 	PTREGSCALL stub32_fork, sys_fork, %rdi
 	PTREGSCALL stub32_vfork, sys_vfork, %rdi
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index e6232773ce49..4c6da2e4bb1d 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -29,16 +29,10 @@ struct old_sigaction32 {
 	unsigned int sa_restorer;	/* Another 32 bit pointer */
 };
 
-typedef struct sigaltstack_ia32 {
-	unsigned int	ss_sp;
-	int		ss_flags;
-	unsigned int	ss_size;
-} stack_ia32_t;
-
 struct ucontext_ia32 {
 	unsigned int	  uc_flags;
 	unsigned int 	  uc_link;
-	stack_ia32_t	  uc_stack;
+	compat_stack_t	  uc_stack;
 	struct sigcontext_ia32 uc_mcontext;
 	compat_sigset_t	  uc_sigmask;	/* mask last for extensibility */
 };
@@ -46,7 +40,7 @@ struct ucontext_ia32 {
 struct ucontext_x32 {
 	unsigned int	  uc_flags;
 	unsigned int 	  uc_link;
-	stack_ia32_t	  uc_stack;
+	compat_stack_t	  uc_stack;
 	unsigned int	  uc__pad0;     /* needed for alignment */
 	struct sigcontext uc_mcontext;  /* the 64-bit sigcontext type */
 	compat_sigset_t	  uc_sigmask;	/* mask last for extensibility */
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index c76fae4d90be..31f61f96e0fb 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -69,8 +69,6 @@ asmlinkage long sys32_fallocate(int, int, unsigned,
 
 /* ia32/ia32_signal.c */
 asmlinkage long sys32_sigsuspend(int, int, old_sigset_t);
-asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *,
-				  stack_ia32_t __user *, struct pt_regs *);
 asmlinkage long sys32_sigreturn(struct pt_regs *);
 asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 6e462019f195..86d81199bbde 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -912,8 +912,6 @@ ENTRY(stub_rt_sigreturn)
 END(stub_rt_sigreturn)
 
 #ifdef CONFIG_X86_X32_ABI
-	PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx
-
 ENTRY(stub_x32_rt_sigreturn)
 	CFI_STARTPROC
 	addq $8, %rsp
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 16d065c23baf..b17ed37c61a2 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -857,7 +857,6 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
 	struct rt_sigframe_x32 __user *frame;
 	sigset_t set;
 	unsigned long ax;
-	struct pt_regs tregs;
 
 	frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
 
@@ -871,8 +870,7 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
-	tregs = *regs;
-	if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
+	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
 	return ax;
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 62c7b222e45c..235226efaa7f 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -192,7 +192,7 @@
 183	i386	getcwd			sys_getcwd
 184	i386	capget			sys_capget
 185	i386	capset			sys_capset
-186	i386	sigaltstack		sys_sigaltstack			stub32_sigaltstack
+186	i386	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
 187	i386	sendfile		sys_sendfile			sys32_sendfile
 188	i386	getpmsg
 189	i386	putpmsg
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 6ffa7f9d005e..c68cbe7174e7 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -337,7 +337,7 @@
 522	x32	rt_sigpending		sys32_rt_sigpending
 523	x32	rt_sigtimedwait		compat_sys_rt_sigtimedwait
 524	x32	rt_sigqueueinfo		sys32_rt_sigqueueinfo
-525	x32	sigaltstack		stub_x32_sigaltstack
+525	x32	sigaltstack		compat_sys_sigaltstack
 526	x32	timer_create		compat_sys_timer_create
 527	x32	mq_notify		compat_sys_mq_notify
 528	x32	kexec_load		compat_sys_kexec_load
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 62bb76f91baf..cb5637e2ee2c 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -68,6 +68,16 @@
 #ifndef compat_user_stack_pointer
 #define compat_user_stack_pointer() current_user_stack_pointer()
 #endif
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+#ifndef compat_sigaltstack	/* we'll need that for MIPS */
+typedef struct compat_sigaltstack {
+	compat_uptr_t			ss_sp;
+	int				ss_flags;
+	compat_size_t			ss_size;
+} compat_stack_t;
+#endif
+#endif
+
 #define compat_jiffies_to_clock_t(x)	\
 		(((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
 
@@ -632,6 +642,12 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
 
 asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
 				    compat_off_t __user *offset, compat_size_t count);
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
+				       compat_stack_t __user *uoss_ptr);
+
+int compat_restore_altstack(const compat_stack_t __user *uss);
+#endif
 
 #else
 
diff --git a/kernel/signal.c b/kernel/signal.c
index f05f4c4150d9..aee85bd76b8a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -31,6 +31,7 @@
 #include <linux/nsproxy.h>
 #include <linux/user_namespace.h>
 #include <linux/uprobes.h>
+#include <linux/compat.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/signal.h>
 
@@ -3116,6 +3117,50 @@ int restore_altstack(const stack_t __user *uss)
 	return err == -EFAULT ? err : 0;
 }
 
+#ifdef CONFIG_COMPAT
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
+				       compat_stack_t __user *uoss_ptr)
+{
+	stack_t uss, uoss;
+	int ret;
+	mm_segment_t seg;
+
+	if (uss_ptr) {
+		compat_stack_t uss32;
+
+		memset(&uss, 0, sizeof(stack_t));
+		if (copy_from_user(&uss32, uss_ptr, sizeof(compat_stack_t)))
+			return -EFAULT;
+		uss.ss_sp = compat_ptr(uss32.ss_sp);
+		uss.ss_flags = uss32.ss_flags;
+		uss.ss_size = uss32.ss_size;
+	}
+	seg = get_fs();
+	set_fs(KERNEL_DS);
+	ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL),
+			     (stack_t __force __user *) &uoss,
+			     compat_user_stack_pointer());
+	set_fs(seg);
+	if (ret >= 0 && uoss_ptr)  {
+		if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(compat_stack_t)) ||
+		    __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
+		    __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
+		    __put_user(uoss.ss_size, &uoss_ptr->ss_size))
+			ret = -EFAULT;
+	}
+	return ret;
+}
+
+int compat_restore_altstack(const compat_stack_t __user *uss)
+{
+	int err = compat_sys_sigaltstack(uss, NULL);
+	/* squash all but -EFAULT for now */
+	return err == -EFAULT ? err : 0;
+}
+#endif
+#endif
+
 #ifdef __ARCH_WANT_SYS_SIGPENDING
 
 /**
-- 
cgit v1.2.3-55-g7522


From c40702c49faef05ae324f121d8b3e215244ee152 Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Tue, 20 Nov 2012 14:24:26 -0500
Subject: new helpers: __save_altstack/__compat_save_altstack, switch x86 and
 um to those

note that they are relying on access_ok() already checked by caller.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/ia32/ia32_signal.c |  5 +----
 arch/x86/kernel/signal.c    | 18 ++++--------------
 arch/x86/um/signal.c        |  9 ++-------
 include/linux/compat.h      |  1 +
 include/linux/signal.h      |  1 +
 kernel/signal.c             | 16 ++++++++++++++++
 6 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index a866411a2fcc..a1daf4a65009 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -467,10 +467,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-		put_user_ex(sas_ss_flags(regs->sp),
-			    &frame->uc.uc_stack.ss_flags);
-		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+		err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
 
 		if (ka->sa.sa_flags & SA_RESTORER)
 			restorer = ka->sa.sa_restorer;
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b17ed37c61a2..a6c8a347b8c6 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -363,10 +363,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-		put_user_ex(sas_ss_flags(regs->sp),
-			    &frame->uc.uc_stack.ss_flags);
-		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+		err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
 
 		/* Set up to return from userspace.  */
 		restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -413,7 +410,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	struct rt_sigframe __user *frame;
 	void __user *fp = NULL;
 	int err = 0;
-	struct task_struct *me = current;
 
 	frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp);
 
@@ -432,10 +428,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-		put_user_ex(sas_ss_flags(regs->sp),
-			    &frame->uc.uc_stack.ss_flags);
-		put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+		err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
 
 		/* Set up to return from userspace.  If provided, use a stub
 		   already in userspace.  */
@@ -502,10 +495,7 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-		put_user_ex(sas_ss_flags(regs->sp),
-			    &frame->uc.uc_stack.ss_flags);
-		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+		err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
 		put_user_ex(0, &frame->uc.uc__pad0);
 
 		if (ka->sa.sa_flags & SA_RESTORER) {
@@ -651,7 +641,7 @@ long sys_rt_sigreturn(struct pt_regs *regs)
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
-	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
+	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
 	return ax;
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index bdaa08cfbcf4..71cef48ea5cd 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -342,9 +342,7 @@ static int copy_ucontext_to_user(struct ucontext __user *uc,
 {
 	int err = 0;
 
-	err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp);
-	err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags);
-	err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size);
+	err |= __save_altstack(&uc->uc_stack, sp);
 	err |= copy_sc_to_user(&uc->uc_mcontext, fp, &current->thread.regs, 0);
 	err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set));
 	return err;
@@ -529,10 +527,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(PT_REGS_SP(regs)),
-			  &frame->uc.uc_stack.ss_flags);
-	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs));
 	err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
 			       set->sig[0]);
 	err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index cb5637e2ee2c..334813307ec1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -647,6 +647,7 @@ asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
 				       compat_stack_t __user *uoss_ptr);
 
 int compat_restore_altstack(const compat_stack_t __user *uss);
+int __compat_save_altstack(compat_stack_t __user *, unsigned long);
 #endif
 
 #else
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 5969522136fe..0a89ffc48466 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -386,5 +386,6 @@ int unhandled_signal(struct task_struct *tsk, int sig);
 void signals_init(void);
 
 int restore_altstack(const stack_t __user *);
+int __save_altstack(stack_t __user *, unsigned long);
 
 #endif /* _LINUX_SIGNAL_H */
diff --git a/kernel/signal.c b/kernel/signal.c
index aee85bd76b8a..f072513302c3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3117,6 +3117,14 @@ int restore_altstack(const stack_t __user *uss)
 	return err == -EFAULT ? err : 0;
 }
 
+int __save_altstack(stack_t __user *uss, unsigned long sp)
+{
+	struct task_struct *t = current;
+	return  __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) |
+		__put_user(sas_ss_flags(sp), &uss->ss_flags) |
+		__put_user(t->sas_ss_size, &uss->ss_size);
+}
+
 #ifdef CONFIG_COMPAT
 #ifdef CONFIG_GENERIC_SIGALTSTACK
 asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
@@ -3158,6 +3166,14 @@ int compat_restore_altstack(const compat_stack_t __user *uss)
 	/* squash all but -EFAULT for now */
 	return err == -EFAULT ? err : 0;
 }
+
+int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
+{
+	struct task_struct *t = current;
+	return  __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), &uss->ss_sp) |
+		__put_user(sas_ss_flags(sp), &uss->ss_flags) |
+		__put_user(t->sas_ss_size, &uss->ss_size);
+}
 #endif
 #endif
 
-- 
cgit v1.2.3-55-g7522


From ada65c74059f8c104f1b467c126205471634c435 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst
Date: Wed, 12 Dec 2012 10:23:03 +0100
Subject: dma-buf: remove fallback for !CONFIG_DMA_SHARED_BUFFER

Documentation says that code requiring dma-buf should add it to
select, so inline fallbacks are not going to be used. A link error
will make it obvious what went wrong, instead of silently doing
nothing at runtime.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Rob Clark <rob.clark@linaro.org>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
---
 include/linux/dma-buf.h | 99 -------------------------------------------------
 1 file changed, 99 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index eb48f3816df9..bd2e52ccc4f2 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -156,7 +156,6 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
 	get_file(dmabuf->file);
 }
 
-#ifdef CONFIG_DMA_SHARED_BUFFER
 struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 							struct device *dev);
 void dma_buf_detach(struct dma_buf *dmabuf,
@@ -184,103 +183,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
 		 unsigned long);
 void *dma_buf_vmap(struct dma_buf *);
 void dma_buf_vunmap(struct dma_buf *, void *vaddr);
-#else
-
-static inline struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
-							struct device *dev)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void dma_buf_detach(struct dma_buf *dmabuf,
-				  struct dma_buf_attachment *dmabuf_attach)
-{
-	return;
-}
-
-static inline struct dma_buf *dma_buf_export(void *priv,
-					     const struct dma_buf_ops *ops,
-					     size_t size, int flags)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline int dma_buf_fd(struct dma_buf *dmabuf, int flags)
-{
-	return -ENODEV;
-}
-
-static inline struct dma_buf *dma_buf_get(int fd)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void dma_buf_put(struct dma_buf *dmabuf)
-{
-	return;
-}
-
-static inline struct sg_table *dma_buf_map_attachment(
-	struct dma_buf_attachment *attach, enum dma_data_direction write)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
-			struct sg_table *sg, enum dma_data_direction dir)
-{
-	return;
-}
-
-static inline int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
-					   size_t start, size_t len,
-					   enum dma_data_direction dir)
-{
-	return -ENODEV;
-}
-
-static inline void dma_buf_end_cpu_access(struct dma_buf *dmabuf,
-					  size_t start, size_t len,
-					  enum dma_data_direction dir)
-{
-}
-
-static inline void *dma_buf_kmap_atomic(struct dma_buf *dmabuf,
-					unsigned long pnum)
-{
-	return NULL;
-}
-
-static inline void dma_buf_kunmap_atomic(struct dma_buf *dmabuf,
-					 unsigned long pnum, void *vaddr)
-{
-}
-
-static inline void *dma_buf_kmap(struct dma_buf *dmabuf, unsigned long pnum)
-{
-	return NULL;
-}
-
-static inline void dma_buf_kunmap(struct dma_buf *dmabuf,
-				  unsigned long pnum, void *vaddr)
-{
-}
-
-static inline int dma_buf_mmap(struct dma_buf *dmabuf,
-			       struct vm_area_struct *vma,
-			       unsigned long pgoff)
-{
-	return -ENODEV;
-}
-
-static inline void *dma_buf_vmap(struct dma_buf *dmabuf)
-{
-	return NULL;
-}
-
-static inline void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
-{
-}
-#endif /* CONFIG_DMA_SHARED_BUFFER */
 
 #endif /* __DMA_BUF_H__ */
-- 
cgit v1.2.3-55-g7522


From 39e3c9553f34381a1b664c27b0c696a266a5735e Mon Sep 17 00:00:00 2001
From: Jeff Layton
Date: Wed, 28 Nov 2012 11:30:53 -0500
Subject: vfs: remove DCACHE_NEED_LOOKUP

The code that relied on that flag was ripped out of btrfs quite some
time ago, and never added back. Josef indicated that he was going to
take a different approach to the problem in btrfs, and that we
could just eliminate this flag.

Cc: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/inode.c       | 16 +---------------
 fs/dcache.c            | 33 +--------------------------------
 fs/namei.c             | 11 +----------
 include/linux/dcache.h |  8 --------
 4 files changed, 3 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 67ed24ae86bb..16d9e8e191e6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4262,16 +4262,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 	if (dentry->d_name.len > BTRFS_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	if (unlikely(d_need_lookup(dentry))) {
-		memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
-		kfree(dentry->d_fsdata);
-		dentry->d_fsdata = NULL;
-		/* This thing is hashed, drop it for now */
-		d_drop(dentry);
-	} else {
-		ret = btrfs_inode_by_name(dir, dentry, &location);
-	}
-
+	ret = btrfs_inode_by_name(dir, dentry, &location);
 	if (ret < 0)
 		return ERR_PTR(ret);
 
@@ -4341,11 +4332,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 	struct dentry *ret;
 
 	ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
-	if (unlikely(d_need_lookup(dentry))) {
-		spin_lock(&dentry->d_lock);
-		dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
-		spin_unlock(&dentry->d_lock);
-	}
 	return ret;
 }
 
diff --git a/fs/dcache.c b/fs/dcache.c
index 3a463d0c4fe8..1782be3fc3ef 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -454,24 +454,6 @@ void d_drop(struct dentry *dentry)
 }
 EXPORT_SYMBOL(d_drop);
 
-/*
- * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag
- * @dentry: dentry to drop
- *
- * This is called when we do a lookup on a placeholder dentry that needed to be
- * looked up.  The dentry should have been hashed in order for it to be found by
- * the lookup code, but now needs to be unhashed while we do the actual lookup
- * and clear the DCACHE_NEED_LOOKUP flag.
- */
-void d_clear_need_lookup(struct dentry *dentry)
-{
-	spin_lock(&dentry->d_lock);
-	__d_drop(dentry);
-	dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
-	spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL(d_clear_need_lookup);
-
 /*
  * Finish off a dentry we've decided to kill.
  * dentry->d_lock must be held, returns with it unlocked.
@@ -565,13 +547,7 @@ repeat:
  	if (d_unhashed(dentry))
 		goto kill_it;
 
-	/*
-	 * If this dentry needs lookup, don't set the referenced flag so that it
-	 * is more likely to be cleaned up by the dcache shrinker in case of
-	 * memory pressure.
-	 */
-	if (!d_need_lookup(dentry))
-		dentry->d_flags |= DCACHE_REFERENCED;
+	dentry->d_flags |= DCACHE_REFERENCED;
 	dentry_lru_add(dentry);
 
 	dentry->d_count--;
@@ -1736,13 +1712,6 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 		return found;
 	}
 
-	/*
-	 * We are going to instantiate this dentry, unhash it and clear the
-	 * lookup flag so we can do that.
-	 */
-	if (unlikely(d_need_lookup(found)))
-		d_clear_need_lookup(found);
-
 	/*
 	 * Negative dentry: instantiate it unless the inode is a directory and
 	 * already has a dentry.
diff --git a/fs/namei.c b/fs/namei.c
index 35195ff9d194..25a41e02984b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1275,9 +1275,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
 	*need_lookup = false;
 	dentry = d_lookup(dir, name);
 	if (dentry) {
-		if (d_need_lookup(dentry)) {
-			*need_lookup = true;
-		} else if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
+		if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
 			error = d_revalidate(dentry, flags);
 			if (unlikely(error <= 0)) {
 				if (error < 0) {
@@ -1383,8 +1381,6 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name,
 			return -ECHILD;
 		nd->seq = seq;
 
-		if (unlikely(d_need_lookup(dentry)))
-			goto unlazy;
 		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
 			status = d_revalidate(dentry, nd->flags);
 			if (unlikely(status <= 0)) {
@@ -1410,11 +1406,6 @@ unlazy:
 	if (unlikely(!dentry))
 		goto need_lookup;
 
-	if (unlikely(d_need_lookup(dentry))) {
-		dput(dentry);
-		goto need_lookup;
-	}
-
 	if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
 		status = d_revalidate(dentry, nd->flags);
 	if (unlikely(status <= 0)) {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 59200795482e..c1754b59ddd3 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -202,7 +202,6 @@ struct dentry_operations {
 #define DCACHE_MOUNTED		0x10000	/* is a mountpoint */
 #define DCACHE_NEED_AUTOMOUNT	0x20000	/* handle automount on this dir */
 #define DCACHE_MANAGE_TRANSIT	0x40000	/* manage transit from this dirent */
-#define DCACHE_NEED_LOOKUP	0x80000 /* dentry requires i_op->lookup */
 #define DCACHE_MANAGED_DENTRY \
 	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
 
@@ -408,13 +407,6 @@ static inline bool d_mountpoint(struct dentry *dentry)
 	return dentry->d_flags & DCACHE_MOUNTED;
 }
 
-static inline bool d_need_lookup(struct dentry *dentry)
-{
-	return dentry->d_flags & DCACHE_NEED_LOOKUP;
-}
-
-extern void d_clear_need_lookup(struct dentry *dentry);
-
 extern int sysctl_vfs_cache_pressure;
 
 #endif	/* __LINUX_DCACHE_H */
-- 
cgit v1.2.3-55-g7522


From c4d6d8dbf335c7fa47341654a37c53a512b519bb Mon Sep 17 00:00:00 2001
From: David Howells
Date: Thu, 20 Dec 2012 21:52:32 +0000
Subject: CacheFiles: Fix the marking of cached pages

Under some circumstances CacheFiles defers the marking of pages with PG_fscache
so that it can take advantage of pagevecs to reduce the number of calls to
fscache_mark_pages_cached() and the netfs's hook to keep track of this.

There are, however, two problems with this:

 (1) It can lead to the PG_fscache mark being applied _after_ the page is set
     PG_uptodate and unlocked (by the call to fscache_end_io()).

 (2) CacheFiles's ref on the page is dropped immediately following
     fscache_end_io() - and so may not still be held when the mark is applied.
     This can lead to the page being passed back to the allocator before the
     mark is applied.

Fix this by, where appropriate, marking the page before calling
fscache_end_io() and releasing the page.  This means that we can't take
advantage of pagevecs and have to make a separate call for each page to the
marking routines.

The symptoms of this are Bad Page state errors cropping up under memory
pressure, for example:

BUG: Bad page state in process tar  pfn:002da
page:ffffea0000009fb0 count:0 mapcount:0 mapping:          (null) index:0x1447
page flags: 0x1000(private_2)
Pid: 4574, comm: tar Tainted: G        W   3.1.0-rc4-fsdevel+ #1064
Call Trace:
 [<ffffffff8109583c>] ? dump_page+0xb9/0xbe
 [<ffffffff81095916>] bad_page+0xd5/0xea
 [<ffffffff81095d82>] get_page_from_freelist+0x35b/0x46a
 [<ffffffff810961f3>] __alloc_pages_nodemask+0x362/0x662
 [<ffffffff810989da>] __do_page_cache_readahead+0x13a/0x267
 [<ffffffff81098942>] ? __do_page_cache_readahead+0xa2/0x267
 [<ffffffff81098d7b>] ra_submit+0x1c/0x20
 [<ffffffff8109900a>] ondemand_readahead+0x28b/0x29a
 [<ffffffff81098ee2>] ? ondemand_readahead+0x163/0x29a
 [<ffffffff810990ce>] page_cache_sync_readahead+0x38/0x3a
 [<ffffffff81091d8a>] generic_file_aio_read+0x2ab/0x67e
 [<ffffffffa008cfbe>] nfs_file_read+0xa4/0xc9 [nfs]
 [<ffffffff810c22c4>] do_sync_read+0xba/0xfa
 [<ffffffff81177a47>] ? security_file_permission+0x7b/0x84
 [<ffffffff810c25dd>] ? rw_verify_area+0xab/0xc8
 [<ffffffff810c29a4>] vfs_read+0xaa/0x13a
 [<ffffffff810c2a79>] sys_read+0x45/0x6c
 [<ffffffff813ac37b>] system_call_fastpath+0x16/0x1b

As can be seen, PG_private_2 (== PG_fscache) is set in the page flags.

Instrumenting fscache_mark_pages_cached() to verify whether page->mapping was
set appropriately showed that sometimes it wasn't.  This led to the discovery
that sometimes the page has apparently been reclaimed by the time the marker
got to see it.

Reported-by: M. Stevens <m@tippett.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 fs/cachefiles/rdwr.c          | 34 ++++++++-----------------
 fs/fscache/page.c             | 59 ++++++++++++++++++++++++++-----------------
 include/linux/fscache-cache.h |  3 +++
 include/linux/fscache.h       | 12 ++++-----
 4 files changed, 56 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index c994691d9445..3367abdcdac4 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -176,9 +176,8 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
 	recheck:
 		if (PageUptodate(monitor->back_page)) {
 			copy_highpage(monitor->netfs_page, monitor->back_page);
-
-			pagevec_add(&pagevec, monitor->netfs_page);
-			fscache_mark_pages_cached(monitor->op, &pagevec);
+			fscache_mark_page_cached(monitor->op,
+						 monitor->netfs_page);
 			error = 0;
 		} else if (!PageError(monitor->back_page)) {
 			/* the page has probably been truncated */
@@ -335,8 +334,7 @@ backing_page_already_present:
 backing_page_already_uptodate:
 	_debug("- uptodate");
 
-	pagevec_add(pagevec, netpage);
-	fscache_mark_pages_cached(op, pagevec);
+	fscache_mark_page_cached(op, netpage);
 
 	copy_highpage(netpage, backpage);
 	fscache_end_io(op, netpage, 0);
@@ -448,8 +446,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 						       &pagevec);
 	} else if (cachefiles_has_space(cache, 0, 1) == 0) {
 		/* there's space in the cache we can use */
-		pagevec_add(&pagevec, page);
-		fscache_mark_pages_cached(op, &pagevec);
+		fscache_mark_page_cached(op, page);
 		ret = -ENODATA;
 	} else {
 		ret = -ENOBUFS;
@@ -465,8 +462,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
  */
 static int cachefiles_read_backing_file(struct cachefiles_object *object,
 					struct fscache_retrieval *op,
-					struct list_head *list,
-					struct pagevec *mark_pvec)
+					struct list_head *list)
 {
 	struct cachefiles_one_read *monitor = NULL;
 	struct address_space *bmapping = object->backer->d_inode->i_mapping;
@@ -626,13 +622,13 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
 		page_cache_release(backpage);
 		backpage = NULL;
 
-		if (!pagevec_add(mark_pvec, netpage))
-			fscache_mark_pages_cached(op, mark_pvec);
+		fscache_mark_page_cached(op, netpage);
 
 		page_cache_get(netpage);
 		if (!pagevec_add(&lru_pvec, netpage))
 			__pagevec_lru_add_file(&lru_pvec);
 
+		/* the netpage is unlocked and marked up to date here */
 		fscache_end_io(op, netpage, 0);
 		page_cache_release(netpage);
 		netpage = NULL;
@@ -775,15 +771,11 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 	/* submit the apparently valid pages to the backing fs to be read from
 	 * disk */
 	if (nrbackpages > 0) {
-		ret2 = cachefiles_read_backing_file(object, op, &backpages,
-						    &pagevec);
+		ret2 = cachefiles_read_backing_file(object, op, &backpages);
 		if (ret2 == -ENOMEM || ret2 == -EINTR)
 			ret = ret2;
 	}
 
-	if (pagevec_count(&pagevec) > 0)
-		fscache_mark_pages_cached(op, &pagevec);
-
 	_leave(" = %d [nr=%u%s]",
 	       ret, *nr_pages, list_empty(pages) ? " empty" : "");
 	return ret;
@@ -806,7 +798,6 @@ int cachefiles_allocate_page(struct fscache_retrieval *op,
 {
 	struct cachefiles_object *object;
 	struct cachefiles_cache *cache;
-	struct pagevec pagevec;
 	int ret;
 
 	object = container_of(op->op.object,
@@ -817,13 +808,10 @@ int cachefiles_allocate_page(struct fscache_retrieval *op,
 	_enter("%p,{%lx},", object, page->index);
 
 	ret = cachefiles_has_space(cache, 0, 1);
-	if (ret == 0) {
-		pagevec_init(&pagevec, 0);
-		pagevec_add(&pagevec, page);
-		fscache_mark_pages_cached(op, &pagevec);
-	} else {
+	if (ret == 0)
+		fscache_mark_page_cached(op, page);
+	else
 		ret = -ENOBUFS;
-	}
 
 	_leave(" = %d", ret);
 	return ret;
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 3f7a59bfa7ad..d7c663cfc923 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -914,6 +914,40 @@ done:
 }
 EXPORT_SYMBOL(__fscache_uncache_page);
 
+/**
+ * fscache_mark_page_cached - Mark a page as being cached
+ * @op: The retrieval op pages are being marked for
+ * @page: The page to be marked
+ *
+ * Mark a netfs page as being cached.  After this is called, the netfs
+ * must call fscache_uncache_page() to remove the mark.
+ */
+void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page)
+{
+	struct fscache_cookie *cookie = op->op.object->cookie;
+
+#ifdef CONFIG_FSCACHE_STATS
+	atomic_inc(&fscache_n_marks);
+#endif
+
+	_debug("- mark %p{%lx}", page, page->index);
+	if (TestSetPageFsCache(page)) {
+		static bool once_only;
+		if (!once_only) {
+			once_only = true;
+			printk(KERN_WARNING "FS-Cache:"
+			       " Cookie type %s marked page %lx"
+			       " multiple times\n",
+			       cookie->def->name, page->index);
+		}
+	}
+
+	if (cookie->def->mark_page_cached)
+		cookie->def->mark_page_cached(cookie->netfs_data,
+					      op->mapping, page);
+}
+EXPORT_SYMBOL(fscache_mark_page_cached);
+
 /**
  * fscache_mark_pages_cached - Mark pages as being cached
  * @op: The retrieval op pages are being marked for
@@ -925,32 +959,11 @@ EXPORT_SYMBOL(__fscache_uncache_page);
 void fscache_mark_pages_cached(struct fscache_retrieval *op,
 			       struct pagevec *pagevec)
 {
-	struct fscache_cookie *cookie = op->op.object->cookie;
 	unsigned long loop;
 
-#ifdef CONFIG_FSCACHE_STATS
-	atomic_add(pagevec->nr, &fscache_n_marks);
-#endif
-
-	for (loop = 0; loop < pagevec->nr; loop++) {
-		struct page *page = pagevec->pages[loop];
-
-		_debug("- mark %p{%lx}", page, page->index);
-		if (TestSetPageFsCache(page)) {
-			static bool once_only;
-			if (!once_only) {
-				once_only = true;
-				printk(KERN_WARNING "FS-Cache:"
-				       " Cookie type %s marked page %lx"
-				       " multiple times\n",
-				       cookie->def->name, page->index);
-			}
-		}
-	}
+	for (loop = 0; loop < pagevec->nr; loop++)
+		fscache_mark_page_cached(op, pagevec->pages[loop]);
 
-	if (cookie->def->mark_pages_cached)
-		cookie->def->mark_pages_cached(cookie->netfs_data,
-					       op->mapping, pagevec);
 	pagevec_reinit(pagevec);
 }
 EXPORT_SYMBOL(fscache_mark_pages_cached);
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index ce31408b1e47..9879183b55d8 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -504,6 +504,9 @@ extern void fscache_withdraw_cache(struct fscache_cache *cache);
 
 extern void fscache_io_error(struct fscache_cache *cache);
 
+extern void fscache_mark_page_cached(struct fscache_retrieval *op,
+				     struct page *page);
+
 extern void fscache_mark_pages_cached(struct fscache_retrieval *op,
 				      struct pagevec *pagevec);
 
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 9ec20dec3353..f4b6353543bf 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -135,14 +135,14 @@ struct fscache_cookie_def {
 	 */
 	void (*put_context)(void *cookie_netfs_data, void *context);
 
-	/* indicate pages that now have cache metadata retained
-	 * - this function should mark the specified pages as now being cached
-	 * - the pages will have been marked with PG_fscache before this is
+	/* indicate page that now have cache metadata retained
+	 * - this function should mark the specified page as now being cached
+	 * - the page will have been marked with PG_fscache before this is
 	 *   called, so this is optional
 	 */
-	void (*mark_pages_cached)(void *cookie_netfs_data,
-				  struct address_space *mapping,
-				  struct pagevec *cached_pvec);
+	void (*mark_page_cached)(void *cookie_netfs_data,
+				 struct address_space *mapping,
+				 struct page *page);
 
 	/* indicate the cookie is no longer cached
 	 * - this function is called when the backing store currently caching
-- 
cgit v1.2.3-55-g7522


From ef46ed888efb1e8da33be5d33c9b54476289a43b Mon Sep 17 00:00:00 2001
From: David Howells
Date: Thu, 20 Dec 2012 21:52:35 +0000
Subject: FS-Cache: Make cookie relinquishment wait for outstanding reads

Make fscache_relinquish_cookie() log a warning and wait if there are any
outstanding reads left on the cookie it was given.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/fscache/cookie.c           | 18 ++++++++++++++----
 fs/fscache/operation.c        | 10 ++++++++--
 include/linux/fscache-cache.h |  1 +
 3 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 0666996adf80..66be9eccede0 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -442,22 +442,32 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)
 
 	event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE;
 
+try_again:
 	spin_lock(&cookie->lock);
 
 	/* break links with all the active objects */
 	while (!hlist_empty(&cookie->backing_objects)) {
+		int n_reads;
 		object = hlist_entry(cookie->backing_objects.first,
 				     struct fscache_object,
 				     cookie_link);
 
 		_debug("RELEASE OBJ%x", object->debug_id);
 
-		if (atomic_read(&object->n_reads)) {
+		set_bit(FSCACHE_COOKIE_WAITING_ON_READS, &cookie->flags);
+		n_reads = atomic_read(&object->n_reads);
+		if (n_reads) {
+			int n_ops = object->n_ops;
+			int n_in_progress = object->n_in_progress;
 			spin_unlock(&cookie->lock);
 			printk(KERN_ERR "FS-Cache:"
-			       " Cookie '%s' still has %d outstanding reads\n",
-			       cookie->def->name, atomic_read(&object->n_reads));
-			BUG();
+			       " Cookie '%s' still has %d outstanding reads (%d,%d)\n",
+			       cookie->def->name,
+			       n_reads, n_ops, n_in_progress);
+			wait_on_bit(&cookie->flags, FSCACHE_COOKIE_WAITING_ON_READS,
+				    fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+			printk("Wait finished\n");
+			goto try_again;
 		}
 
 		/* detach each cache object from the object cookie */
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 30afdfa7aec7..c857ab824d6e 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -340,8 +340,14 @@ void fscache_put_operation(struct fscache_operation *op)
 
 	object = op->object;
 
-	if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags))
-		atomic_dec(&object->n_reads);
+	if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) {
+		if (atomic_dec_and_test(&object->n_reads)) {
+			clear_bit(FSCACHE_COOKIE_WAITING_ON_READS,
+				  &object->cookie->flags);
+			wake_up_bit(&object->cookie->flags,
+				    FSCACHE_COOKIE_WAITING_ON_READS);
+		}
+	}
 
 	/* now... we may get called with the object spinlock held, so we
 	 * complete the cleanup here only if we can immediately acquire the
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 9879183b55d8..e3d6d939d959 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -301,6 +301,7 @@ struct fscache_cookie {
 #define FSCACHE_COOKIE_PENDING_FILL	3	/* T if pending initial fill on object */
 #define FSCACHE_COOKIE_FILLING		4	/* T if filling object incrementally */
 #define FSCACHE_COOKIE_UNAVAILABLE	5	/* T if cookie is unavailable (error, etc) */
+#define FSCACHE_COOKIE_WAITING_ON_READS	6	/* T if cookie is waiting on reads */
 };
 
 extern struct fscache_cookie fscache_fsdef_index;
-- 
cgit v1.2.3-55-g7522


From 9f10523f891928330b7529da54c1a3cc65180b1a Mon Sep 17 00:00:00 2001
From: David Howells
Date: Thu, 20 Dec 2012 21:52:35 +0000
Subject: FS-Cache: Fix operation state management and accounting

Fix the state management of internal fscache operations and the accounting of
what operations are in what states.

This is done by:

 (1) Give struct fscache_operation a enum variable that directly represents the
     state it's currently in, rather than spreading this knowledge over a bunch
     of flags, who's processing the operation at the moment and whether it is
     queued or not.

     This makes it easier to write assertions to check the state at various
     points and to prevent invalid state transitions.

 (2) Add an 'operation complete' state and supply a function to indicate the
     completion of an operation (fscache_op_complete()) and make things call
     it.  The final call to fscache_put_operation() can then check that an op
     in the appropriate state (complete or cancelled).

 (3) Adjust the use of object->n_ops, ->n_in_progress, ->n_exclusive to better
     govern the state of an object:

	(a) The ->n_ops is now the number of extant operations on the object
	    and is now decremented by fscache_put_operation() only.

	(b) The ->n_in_progress is simply the number of objects that have been
	    taken off of the object's pending queue for the purposes of being
	    run.  This is decremented by fscache_op_complete() only.

	(c) The ->n_exclusive is the number of exclusive ops that have been
	    submitted and queued or are in progress.  It is decremented by
	    fscache_op_complete() and by fscache_cancel_op().

     fscache_put_operation() and fscache_operation_gc() now no longer try to
     clean up ->n_exclusive and ->n_in_progress.  That was leading to double
     decrements against fscache_cancel_op().

     fscache_cancel_op() now no longer decrements ->n_ops.  That was leading to
     double decrements against fscache_put_operation().

     fscache_submit_exclusive_op() now decides whether it has to queue an op
     based on ->n_in_progress being > 0 rather than ->n_ops > 0 as the latter
     will persist in being true even after all preceding operations have been
     cancelled or completed.  Furthermore, if an object is active and there are
     runnable ops against it, there must be at least one op running.

 (4) Add a remaining-pages counter (n_pages) to struct fscache_retrieval and
     provide a function to record completion of the pages as they complete.

     When n_pages reaches 0, the operation is deemed to be complete and
     fscache_op_complete() is called.

     Add calls to fscache_retrieval_complete() anywhere we've finished with a
     page we've been given to read or allocate for.  This includes places where
     we just return pages to the netfs for reading from the server and where
     accessing the cache fails and we discard the proposed netfs page.

The bugs in the unfixed state management manifest themselves as oopses like the
following where the operation completion gets out of sync with return of the
cookie by the netfs.  This is possible because the cache unlocks and returns
all the netfs pages before recording its completion - which means that there's
nothing to stop the netfs discarding them and returning the cookie.


FS-Cache: Cookie 'NFS.fh' still has outstanding reads
------------[ cut here ]------------
kernel BUG at fs/fscache/cookie.c:519!
invalid opcode: 0000 [#1] SMP
CPU 1
Modules linked in: cachefiles nfs fscache auth_rpcgss nfs_acl lockd sunrpc

Pid: 400, comm: kswapd0 Not tainted 3.1.0-rc7-fsdevel+ #1090                  /DG965RY
RIP: 0010:[<ffffffffa007050a>]  [<ffffffffa007050a>] __fscache_relinquish_cookie+0x170/0x343 [fscache]
RSP: 0018:ffff8800368cfb00  EFLAGS: 00010282
RAX: 000000000000003c RBX: ffff880023cc8790 RCX: 0000000000000000
RDX: 0000000000002f2e RSI: 0000000000000001 RDI: ffffffff813ab86c
RBP: ffff8800368cfb50 R08: 0000000000000002 R09: 0000000000000000
R10: ffff88003a1b7890 R11: ffff88001df6e488 R12: ffff880023d8ed98
R13: ffff880023cc8798 R14: 0000000000000004 R15: ffff88003b8bf370
FS:  0000000000000000(0000) GS:ffff88003bd00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 00000000008ba008 CR3: 0000000023d93000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process kswapd0 (pid: 400, threadinfo ffff8800368ce000, task ffff88003b8bf040)
Stack:
 ffff88003b8bf040 ffff88001df6e528 ffff88001df6e528 ffffffffa00b46b0
 ffff88003b8bf040 ffff88001df6e488 ffff88001df6e620 ffffffffa00b46b0
 ffff88001ebd04c8 0000000000000004 ffff8800368cfb70 ffffffffa00b2c91
Call Trace:
 [<ffffffffa00b2c91>] nfs_fscache_release_inode_cookie+0x3b/0x47 [nfs]
 [<ffffffffa008f25f>] nfs_clear_inode+0x3c/0x41 [nfs]
 [<ffffffffa0090df1>] nfs4_evict_inode+0x2f/0x33 [nfs]
 [<ffffffff810d8d47>] evict+0xa1/0x15c
 [<ffffffff810d8e2e>] dispose_list+0x2c/0x38
 [<ffffffff810d9ebd>] prune_icache_sb+0x28c/0x29b
 [<ffffffff810c56b7>] prune_super+0xd5/0x140
 [<ffffffff8109b615>] shrink_slab+0x102/0x1ab
 [<ffffffff8109d690>] balance_pgdat+0x2f2/0x595
 [<ffffffff8103e009>] ? process_timeout+0xb/0xb
 [<ffffffff8109dba3>] kswapd+0x270/0x289
 [<ffffffff8104c5ea>] ? __init_waitqueue_head+0x46/0x46
 [<ffffffff8109d933>] ? balance_pgdat+0x595/0x595
 [<ffffffff8104bf7a>] kthread+0x7f/0x87
 [<ffffffff813ad6b4>] kernel_thread_helper+0x4/0x10
 [<ffffffff81026b98>] ? finish_task_switch+0x45/0xc0
 [<ffffffff813abcdd>] ? retint_restore_args+0xe/0xe
 [<ffffffff8104befb>] ? __init_kthread_worker+0x53/0x53
 [<ffffffff813ad6b0>] ? gs_change+0xb/0xb

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/filesystems/caching/backend-api.txt | 26 ++++++-
 Documentation/filesystems/caching/operations.txt  |  2 +-
 fs/cachefiles/rdwr.c                              | 31 ++++++--
 fs/fscache/object.c                               |  2 -
 fs/fscache/operation.c                            | 91 +++++++++++++++--------
 fs/fscache/page.c                                 | 25 ++++++-
 include/linux/fscache-cache.h                     | 37 +++++++--
 7 files changed, 164 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt
index 382d52cdaf2d..f4769b9399df 100644
--- a/Documentation/filesystems/caching/backend-api.txt
+++ b/Documentation/filesystems/caching/backend-api.txt
@@ -419,7 +419,10 @@ performed on the denizens of the cache.  These are held in a structure of type:
 
      If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS
      returned if possible or fscache_end_io() called with a suitable error
-     code..
+     code.
+
+     fscache_put_retrieval() should be called after a page or pages are dealt
+     with.  This will complete the operation when all pages are dealt with.
 
 
  (*) Request pages be read from cache [mandatory]:
@@ -526,6 +529,27 @@ FS-Cache provides some utilities that a cache backend may make use of:
      error value should be 0 if successful and an error otherwise.
 
 
+ (*) Record that one or more pages being retrieved or allocated have been dealt
+     with:
+
+	void fscache_retrieval_complete(struct fscache_retrieval *op,
+					int n_pages);
+
+     This is called to record the fact that one or more pages have been dealt
+     with and are no longer the concern of this operation.  When the number of
+     pages remaining in the operation reaches 0, the operation will be
+     completed.
+
+
+ (*) Record operation completion:
+
+	void fscache_op_complete(struct fscache_operation *op);
+
+     This is called to record the completion of an operation.  This deducts
+     this operation from the parent object's run state, potentially permitting
+     one or more pending operations to start running.
+
+
  (*) Set highest store limit:
 
 	void fscache_set_store_limit(struct fscache_object *object,
diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt
index b6b070c57cbf..bee2a5f93d60 100644
--- a/Documentation/filesystems/caching/operations.txt
+++ b/Documentation/filesystems/caching/operations.txt
@@ -174,7 +174,7 @@ Operations are used through the following procedure:
      necessary (the object might have died whilst the thread was waiting).
 
      When it has finished doing its processing, it should call
-     fscache_put_operation() on it.
+     fscache_op_complete() and fscache_put_operation() on it.
 
  (4) The operation holds an effective lock upon the object, preventing other
      exclusive ops conflicting until it is released.  The operation can be
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index bf123d9c3206..93a0815e0498 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -197,6 +197,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
 
 		fscache_end_io(op, monitor->netfs_page, error);
 		page_cache_release(monitor->netfs_page);
+		fscache_retrieval_complete(op, 1);
 		fscache_put_retrieval(op);
 		kfree(monitor);
 
@@ -339,6 +340,7 @@ backing_page_already_uptodate:
 
 	copy_highpage(netpage, backpage);
 	fscache_end_io(op, netpage, 0);
+	fscache_retrieval_complete(op, 1);
 
 success:
 	_debug("success");
@@ -360,6 +362,7 @@ read_error:
 		goto out;
 io_error:
 	cachefiles_io_error_obj(object, "Page read error on backing file");
+	fscache_retrieval_complete(op, 1);
 	ret = -ENOBUFS;
 	goto out;
 
@@ -369,6 +372,7 @@ nomem_monitor:
 	fscache_put_retrieval(monitor->op);
 	kfree(monitor);
 nomem:
+	fscache_retrieval_complete(op, 1);
 	_leave(" = -ENOMEM");
 	return -ENOMEM;
 }
@@ -407,7 +411,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 	_enter("{%p},{%lx},,,", object, page->index);
 
 	if (!object->backer)
-		return -ENOBUFS;
+		goto enobufs;
 
 	inode = object->backer->d_inode;
 	ASSERT(S_ISREG(inode->i_mode));
@@ -416,7 +420,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 
 	/* calculate the shift required to use bmap */
 	if (inode->i_sb->s_blocksize > PAGE_SIZE)
-		return -ENOBUFS;
+		goto enobufs;
 
 	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
 
@@ -448,13 +452,19 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 	} else if (cachefiles_has_space(cache, 0, 1) == 0) {
 		/* there's space in the cache we can use */
 		fscache_mark_page_cached(op, page);
+		fscache_retrieval_complete(op, 1);
 		ret = -ENODATA;
 	} else {
-		ret = -ENOBUFS;
+		goto enobufs;
 	}
 
 	_leave(" = %d", ret);
 	return ret;
+
+enobufs:
+	fscache_retrieval_complete(op, 1);
+	_leave(" = -ENOBUFS");
+	return -ENOBUFS;
 }
 
 /*
@@ -632,6 +642,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
 
 		/* the netpage is unlocked and marked up to date here */
 		fscache_end_io(op, netpage, 0);
+		fscache_retrieval_complete(op, 1);
 		page_cache_release(netpage);
 		netpage = NULL;
 		continue;
@@ -659,6 +670,7 @@ out:
 	list_for_each_entry_safe(netpage, _n, list, lru) {
 		list_del(&netpage->lru);
 		page_cache_release(netpage);
+		fscache_retrieval_complete(op, 1);
 	}
 
 	_leave(" = %d", ret);
@@ -707,7 +719,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 	       *nr_pages);
 
 	if (!object->backer)
-		return -ENOBUFS;
+		goto all_enobufs;
 
 	space = 1;
 	if (cachefiles_has_space(cache, 0, *nr_pages) < 0)
@@ -720,7 +732,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 
 	/* calculate the shift required to use bmap */
 	if (inode->i_sb->s_blocksize > PAGE_SIZE)
-		return -ENOBUFS;
+		goto all_enobufs;
 
 	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
 
@@ -760,7 +772,10 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 			nrbackpages++;
 		} else if (space && pagevec_add(&pagevec, page) == 0) {
 			fscache_mark_pages_cached(op, &pagevec);
+			fscache_retrieval_complete(op, 1);
 			ret = -ENODATA;
+		} else {
+			fscache_retrieval_complete(op, 1);
 		}
 	}
 
@@ -781,6 +796,10 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 	_leave(" = %d [nr=%u%s]",
 	       ret, *nr_pages, list_empty(pages) ? " empty" : "");
 	return ret;
+
+all_enobufs:
+	fscache_retrieval_complete(op, *nr_pages);
+	return -ENOBUFS;
 }
 
 /*
@@ -815,6 +834,7 @@ int cachefiles_allocate_page(struct fscache_retrieval *op,
 	else
 		ret = -ENOBUFS;
 
+	fscache_retrieval_complete(op, 1);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -864,6 +884,7 @@ int cachefiles_allocate_pages(struct fscache_retrieval *op,
 		ret = -ENOBUFS;
 	}
 
+	fscache_retrieval_complete(op, *nr_pages);
 	_leave(" = %d", ret);
 	return ret;
 }
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index b6b897c550ac..773bc798a416 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -587,8 +587,6 @@ static void fscache_object_available(struct fscache_object *object)
 	if (object->n_in_progress == 0) {
 		if (object->n_ops > 0) {
 			ASSERTCMP(object->n_ops, >=, object->n_obj_ops);
-			ASSERTIF(object->n_ops > object->n_obj_ops,
-				 !list_empty(&object->pending_ops));
 			fscache_start_operations(object);
 		} else {
 			ASSERT(list_empty(&object->pending_ops));
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index c857ab824d6e..748f9553c2cb 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -37,6 +37,7 @@ void fscache_enqueue_operation(struct fscache_operation *op)
 	ASSERT(op->processor != NULL);
 	ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS);
 
 	fscache_stat(&fscache_n_op_enqueue);
 	switch (op->flags & FSCACHE_OP_TYPE) {
@@ -64,6 +65,9 @@ EXPORT_SYMBOL(fscache_enqueue_operation);
 static void fscache_run_op(struct fscache_object *object,
 			   struct fscache_operation *op)
 {
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING);
+
+	op->state = FSCACHE_OP_ST_IN_PROGRESS;
 	object->n_in_progress++;
 	if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
 		wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
@@ -80,22 +84,23 @@ static void fscache_run_op(struct fscache_object *object,
 int fscache_submit_exclusive_op(struct fscache_object *object,
 				struct fscache_operation *op)
 {
-	int ret;
-
 	_enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
 
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED);
+	ASSERTCMP(atomic_read(&op->usage), >, 0);
+
 	spin_lock(&object->lock);
 	ASSERTCMP(object->n_ops, >=, object->n_in_progress);
 	ASSERTCMP(object->n_ops, >=, object->n_exclusive);
 	ASSERT(list_empty(&op->pend_link));
 
-	ret = -ENOBUFS;
+	op->state = FSCACHE_OP_ST_PENDING;
 	if (fscache_object_is_active(object)) {
 		op->object = object;
 		object->n_ops++;
 		object->n_exclusive++;	/* reads and writes must wait */
 
-		if (object->n_ops > 1) {
+		if (object->n_in_progress > 0) {
 			atomic_inc(&op->usage);
 			list_add_tail(&op->pend_link, &object->pending_ops);
 			fscache_stat(&fscache_n_op_pend);
@@ -111,7 +116,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
 
 		/* need to issue a new write op after this */
 		clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
-		ret = 0;
 	} else if (object->state == FSCACHE_OBJECT_CREATING) {
 		op->object = object;
 		object->n_ops++;
@@ -119,14 +123,13 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
 		atomic_inc(&op->usage);
 		list_add_tail(&op->pend_link, &object->pending_ops);
 		fscache_stat(&fscache_n_op_pend);
-		ret = 0;
 	} else {
 		/* not allowed to submit ops in any other state */
 		BUG();
 	}
 
 	spin_unlock(&object->lock);
-	return ret;
+	return 0;
 }
 
 /*
@@ -186,6 +189,7 @@ int fscache_submit_op(struct fscache_object *object,
 	_enter("{OBJ%x OP%x},{%u}",
 	       object->debug_id, op->debug_id, atomic_read(&op->usage));
 
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED);
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
 
 	spin_lock(&object->lock);
@@ -196,6 +200,7 @@ int fscache_submit_op(struct fscache_object *object,
 	ostate = object->state;
 	smp_rmb();
 
+	op->state = FSCACHE_OP_ST_PENDING;
 	if (fscache_object_is_active(object)) {
 		op->object = object;
 		object->n_ops++;
@@ -225,12 +230,15 @@ int fscache_submit_op(struct fscache_object *object,
 		   object->state == FSCACHE_OBJECT_LC_DYING ||
 		   object->state == FSCACHE_OBJECT_WITHDRAWING) {
 		fscache_stat(&fscache_n_op_rejected);
+		op->state = FSCACHE_OP_ST_CANCELLED;
 		ret = -ENOBUFS;
 	} else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) {
 		fscache_report_unexpected_submission(object, op, ostate);
 		ASSERT(!fscache_object_is_active(object));
+		op->state = FSCACHE_OP_ST_CANCELLED;
 		ret = -ENOBUFS;
 	} else {
+		op->state = FSCACHE_OP_ST_CANCELLED;
 		ret = -ENOBUFS;
 	}
 
@@ -290,13 +298,18 @@ int fscache_cancel_op(struct fscache_operation *op)
 
 	_enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id);
 
+	ASSERTCMP(op->state, >=, FSCACHE_OP_ST_PENDING);
+	ASSERTCMP(op->state, !=, FSCACHE_OP_ST_CANCELLED);
+	ASSERTCMP(atomic_read(&op->usage), >, 0);
+
 	spin_lock(&object->lock);
 
 	ret = -EBUSY;
-	if (!list_empty(&op->pend_link)) {
+	if (op->state == FSCACHE_OP_ST_PENDING) {
+		ASSERT(!list_empty(&op->pend_link));
 		fscache_stat(&fscache_n_op_cancelled);
 		list_del_init(&op->pend_link);
-		object->n_ops--;
+		op->state = FSCACHE_OP_ST_CANCELLED;
 		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))
 			object->n_exclusive--;
 		if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
@@ -310,6 +323,37 @@ int fscache_cancel_op(struct fscache_operation *op)
 	return ret;
 }
 
+/*
+ * Record the completion of an in-progress operation.
+ */
+void fscache_op_complete(struct fscache_operation *op)
+{
+	struct fscache_object *object = op->object;
+
+	_enter("OBJ%x", object->debug_id);
+
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS);
+	ASSERTCMP(object->n_in_progress, >, 0);
+	ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags),
+		    object->n_exclusive, >, 0);
+	ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags),
+		    object->n_in_progress, ==, 1);
+
+	spin_lock(&object->lock);
+
+	op->state = FSCACHE_OP_ST_COMPLETE;
+
+	if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))
+		object->n_exclusive--;
+	object->n_in_progress--;
+	if (object->n_in_progress == 0)
+		fscache_start_operations(object);
+
+	spin_unlock(&object->lock);
+	_leave("");
+}
+EXPORT_SYMBOL(fscache_op_complete);
+
 /*
  * release an operation
  * - queues pending ops if this is the last in-progress op
@@ -328,8 +372,9 @@ void fscache_put_operation(struct fscache_operation *op)
 		return;
 
 	_debug("PUT OP");
-	if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
-		BUG();
+	ASSERTIFCMP(op->state != FSCACHE_OP_ST_COMPLETE,
+		    op->state, ==, FSCACHE_OP_ST_CANCELLED);
+	op->state = FSCACHE_OP_ST_DEAD;
 
 	fscache_stat(&fscache_n_op_release);
 
@@ -365,16 +410,6 @@ void fscache_put_operation(struct fscache_operation *op)
 		return;
 	}
 
-	if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) {
-		ASSERTCMP(object->n_exclusive, >, 0);
-		object->n_exclusive--;
-	}
-
-	ASSERTCMP(object->n_in_progress, >, 0);
-	object->n_in_progress--;
-	if (object->n_in_progress == 0)
-		fscache_start_operations(object);
-
 	ASSERTCMP(object->n_ops, >, 0);
 	object->n_ops--;
 	if (object->n_ops == 0)
@@ -413,23 +448,14 @@ void fscache_operation_gc(struct work_struct *work)
 		spin_unlock(&cache->op_gc_list_lock);
 
 		object = op->object;
+		spin_lock(&object->lock);
 
 		_debug("GC DEFERRED REL OBJ%x OP%x",
 		       object->debug_id, op->debug_id);
 		fscache_stat(&fscache_n_op_gc);
 
 		ASSERTCMP(atomic_read(&op->usage), ==, 0);
-
-		spin_lock(&object->lock);
-		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) {
-			ASSERTCMP(object->n_exclusive, >, 0);
-			object->n_exclusive--;
-		}
-
-		ASSERTCMP(object->n_in_progress, >, 0);
-		object->n_in_progress--;
-		if (object->n_in_progress == 0)
-			fscache_start_operations(object);
+		ASSERTCMP(op->state, ==, FSCACHE_OP_ST_DEAD);
 
 		ASSERTCMP(object->n_ops, >, 0);
 		object->n_ops--;
@@ -437,6 +463,7 @@ void fscache_operation_gc(struct work_struct *work)
 			fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED);
 
 		spin_unlock(&object->lock);
+		kfree(op);
 
 	} while (count++ < 20);
 
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 248a12e22532..b38b13d2a555 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -162,6 +162,7 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
 			fscache_abort_object(object);
 	}
 
+	fscache_op_complete(op);
 	_leave("");
 }
 
@@ -223,6 +224,8 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op)
 
 	_enter("{OP%x}", op->op.debug_id);
 
+	ASSERTCMP(op->n_pages, ==, 0);
+
 	fscache_hist(fscache_retrieval_histogram, op->start_time);
 	if (op->context)
 		fscache_put_context(op->op.object->cookie, op->context);
@@ -320,6 +323,11 @@ static int fscache_wait_for_retrieval_activation(struct fscache_object *object,
 	_debug("<<< GO");
 
 check_if_dead:
+	if (op->op.state == FSCACHE_OP_ST_CANCELLED) {
+		fscache_stat(stat_object_dead);
+		_leave(" = -ENOBUFS [cancelled]");
+		return -ENOBUFS;
+	}
 	if (unlikely(fscache_object_is_dead(object))) {
 		fscache_stat(stat_object_dead);
 		return -ENOBUFS;
@@ -364,6 +372,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
+	op->n_pages = 1;
 
 	spin_lock(&cookie->lock);
 
@@ -375,10 +384,10 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 	ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP);
 
 	atomic_inc(&object->n_reads);
-	set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
+	__set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
 
 	if (fscache_submit_op(object, &op->op) < 0)
-		goto nobufs_unlock;
+		goto nobufs_unlock_dec;
 	spin_unlock(&cookie->lock);
 
 	fscache_stat(&fscache_n_retrieval_ops);
@@ -425,6 +434,8 @@ error:
 	_leave(" = %d", ret);
 	return ret;
 
+nobufs_unlock_dec:
+	atomic_dec(&object->n_reads);
 nobufs_unlock:
 	spin_unlock(&cookie->lock);
 	kfree(op);
@@ -482,6 +493,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 	op = fscache_alloc_retrieval(mapping, end_io_func, context);
 	if (!op)
 		return -ENOMEM;
+	op->n_pages = *nr_pages;
 
 	spin_lock(&cookie->lock);
 
@@ -491,10 +503,10 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 			     struct fscache_object, cookie_link);
 
 	atomic_inc(&object->n_reads);
-	set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
+	__set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
 
 	if (fscache_submit_op(object, &op->op) < 0)
-		goto nobufs_unlock;
+		goto nobufs_unlock_dec;
 	spin_unlock(&cookie->lock);
 
 	fscache_stat(&fscache_n_retrieval_ops);
@@ -541,6 +553,8 @@ error:
 	_leave(" = %d", ret);
 	return ret;
 
+nobufs_unlock_dec:
+	atomic_dec(&object->n_reads);
 nobufs_unlock:
 	spin_unlock(&cookie->lock);
 	kfree(op);
@@ -583,6 +597,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
 	op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
 	if (!op)
 		return -ENOMEM;
+	op->n_pages = 1;
 
 	spin_lock(&cookie->lock);
 
@@ -696,6 +711,7 @@ static void fscache_write_op(struct fscache_operation *_op)
 	fscache_end_page_write(object, page);
 	if (ret < 0) {
 		fscache_abort_object(object);
+		fscache_op_complete(&op->op);
 	} else {
 		fscache_enqueue_operation(&op->op);
 	}
@@ -710,6 +726,7 @@ superseded:
 	spin_unlock(&cookie->stores_lock);
 	clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
 	spin_unlock(&object->lock);
+	fscache_op_complete(&op->op);
 	_leave("");
 }
 
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index e3d6d939d959..f5facd1d333f 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -75,6 +75,16 @@ extern wait_queue_head_t fscache_cache_cleared_wq;
 typedef void (*fscache_operation_release_t)(struct fscache_operation *op);
 typedef void (*fscache_operation_processor_t)(struct fscache_operation *op);
 
+enum fscache_operation_state {
+	FSCACHE_OP_ST_BLANK,		/* Op is not yet submitted */
+	FSCACHE_OP_ST_INITIALISED,	/* Op is initialised */
+	FSCACHE_OP_ST_PENDING,		/* Op is blocked from running */
+	FSCACHE_OP_ST_IN_PROGRESS,	/* Op is in progress */
+	FSCACHE_OP_ST_COMPLETE,		/* Op is complete */
+	FSCACHE_OP_ST_CANCELLED,	/* Op has been cancelled */
+	FSCACHE_OP_ST_DEAD		/* Op is now dead */
+};
+
 struct fscache_operation {
 	struct work_struct	work;		/* record for async ops */
 	struct list_head	pend_link;	/* link in object->pending_ops */
@@ -86,10 +96,10 @@ struct fscache_operation {
 #define FSCACHE_OP_MYTHREAD	0x0002	/* - processing is done be issuing thread, not pool */
 #define FSCACHE_OP_WAITING	4	/* cleared when op is woken */
 #define FSCACHE_OP_EXCLUSIVE	5	/* exclusive op, other ops must wait */
-#define FSCACHE_OP_DEAD		6	/* op is now dead */
-#define FSCACHE_OP_DEC_READ_CNT	7	/* decrement object->n_reads on destruction */
-#define FSCACHE_OP_KEEP_FLAGS	0xc0	/* flags to keep when repurposing an op */
+#define FSCACHE_OP_DEC_READ_CNT	6	/* decrement object->n_reads on destruction */
+#define FSCACHE_OP_KEEP_FLAGS	0x0070	/* flags to keep when repurposing an op */
 
+	enum fscache_operation_state state;
 	atomic_t		usage;
 	unsigned		debug_id;	/* debugging ID */
 
@@ -106,6 +116,7 @@ extern atomic_t fscache_op_debug_id;
 extern void fscache_op_work_func(struct work_struct *work);
 
 extern void fscache_enqueue_operation(struct fscache_operation *);
+extern void fscache_op_complete(struct fscache_operation *);
 extern void fscache_put_operation(struct fscache_operation *);
 
 /**
@@ -122,6 +133,7 @@ static inline void fscache_operation_init(struct fscache_operation *op,
 {
 	INIT_WORK(&op->work, fscache_op_work_func);
 	atomic_set(&op->usage, 1);
+	op->state = FSCACHE_OP_ST_INITIALISED;
 	op->debug_id = atomic_inc_return(&fscache_op_debug_id);
 	op->processor = processor;
 	op->release = release;
@@ -138,6 +150,7 @@ struct fscache_retrieval {
 	void			*context;	/* netfs read context (pinned) */
 	struct list_head	to_do;		/* list of things to be done by the backend */
 	unsigned long		start_time;	/* time at which retrieval started */
+	unsigned		n_pages;	/* number of pages to be retrieved */
 };
 
 typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op,
@@ -173,9 +186,23 @@ static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op)
 	fscache_enqueue_operation(&op->op);
 }
 
+/**
+ * fscache_retrieval_complete - Record (partial) completion of a retrieval
+ * @op: The retrieval operation affected
+ * @n_pages: The number of pages to account for
+ */
+static inline void fscache_retrieval_complete(struct fscache_retrieval *op,
+					      int n_pages)
+{
+	op->n_pages -= n_pages;
+	if (op->n_pages <= 0)
+		fscache_op_complete(&op->op);
+}
+
 /**
  * fscache_put_retrieval - Drop a reference to a retrieval operation
  * @op: The retrieval operation affected
+ * @n_pages: The number of pages to account for
  *
  * Drop a reference to a retrieval operation.
  */
@@ -333,10 +360,10 @@ struct fscache_object {
 
 	int			debug_id;	/* debugging ID */
 	int			n_children;	/* number of child objects */
-	int			n_ops;		/* number of ops outstanding on object */
+	int			n_ops;		/* number of extant ops on object */
 	int			n_obj_ops;	/* number of object ops outstanding on object */
 	int			n_in_progress;	/* number of ops in progress */
-	int			n_exclusive;	/* number of exclusive ops queued */
+	int			n_exclusive;	/* number of exclusive ops queued or in progress */
 	atomic_t		n_reads;	/* number of read ops in progress */
 	spinlock_t		lock;		/* state and operations lock */
 
-- 
cgit v1.2.3-55-g7522


From ef778e7ae67cd426c30cad43378b908f5eb0bad5 Mon Sep 17 00:00:00 2001
From: David Howells
Date: Thu, 20 Dec 2012 21:52:36 +0000
Subject: FS-Cache: Provide proper invalidation

Provide a proper invalidation method rather than relying on the netfs retiring
the cookie it has and getting a new one.  The problem with this is that isn't
easy for the netfs to make sure that it has completed/cancelled all its
outstanding storage and retrieval operations on the cookie it is retiring.

Instead, have the cache provide an invalidation method that will cancel or wait
for all currently outstanding operations before invalidating the cache, and
will cause new operations to queue up behind that.  Whilst invalidation is in
progress, some requests will be rejected until the cache can stack a barrier on
the operation queue to cause new operations to be deferred behind it.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/filesystems/caching/backend-api.txt | 12 ++++
 Documentation/filesystems/caching/netfs-api.txt   | 46 ++++++++++++---
 Documentation/filesystems/caching/object.txt      | 23 +++++---
 fs/fscache/cookie.c                               | 60 +++++++++++++++++++
 fs/fscache/internal.h                             | 10 ++++
 fs/fscache/object.c                               | 72 +++++++++++++++++++++++
 fs/fscache/operation.c                            | 32 ++++++++++
 fs/fscache/page.c                                 | 51 ++++++++++++++++
 fs/fscache/stats.c                                | 11 +++-
 include/linux/fscache-cache.h                     |  8 ++-
 include/linux/fscache.h                           | 38 ++++++++++++
 11 files changed, 345 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt
index f4769b9399df..d78bab9622c6 100644
--- a/Documentation/filesystems/caching/backend-api.txt
+++ b/Documentation/filesystems/caching/backend-api.txt
@@ -308,6 +308,18 @@ performed on the denizens of the cache.  These are held in a structure of type:
      obtained by calling object->cookie->def->get_aux()/get_attr().
 
 
+ (*) Invalidate data object [mandatory]:
+
+	int (*invalidate_object)(struct fscache_operation *op)
+
+     This is called to invalidate a data object (as pointed to by op->object).
+     All the data stored for this object should be discarded and an
+     attr_changed operation should be performed.  The caller will follow up
+     with an object update operation.
+
+     fscache_op_complete() must be called on op before returning.
+
+
  (*) Discard object [mandatory]:
 
 	void (*drop_object)(struct fscache_object *object)
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
index 7cc6bf2871eb..97e6c0ecc5ef 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -35,8 +35,9 @@ This document contains the following sections:
 	(12) Index and data file update
 	(13) Miscellaneous cookie operations
 	(14) Cookie unregistration
-	(15) Index and data file invalidation
-	(16) FS-Cache specific page flags.
+	(15) Index invalidation
+	(16) Data file invalidation
+	(17) FS-Cache specific page flags.
 
 
 =============================
@@ -767,13 +768,42 @@ the cookies for "child" indices, objects and pages have been relinquished
 first.
 
 
-================================
-INDEX AND DATA FILE INVALIDATION
-================================
+==================
+INDEX INVALIDATION
+==================
+
+There is no direct way to invalidate an index subtree.  To do this, the caller
+should relinquish and retire the cookie they have, and then acquire a new one.
+
+
+======================
+DATA FILE INVALIDATION
+======================
+
+Sometimes it will be necessary to invalidate an object that contains data.
+Typically this will be necessary when the server tells the netfs of a foreign
+change - at which point the netfs has to throw away all the state it had for an
+inode and reload from the server.
+
+To indicate that a cache object should be invalidated, the following function
+can be called:
+
+	void fscache_invalidate(struct fscache_cookie *cookie);
+
+This can be called with spinlocks held as it defers the work to a thread pool.
+All extant storage, retrieval and attribute change ops at this point are
+cancelled and discarded.  Some future operations will be rejected until the
+cache has had a chance to insert a barrier in the operations queue.  After
+that, operations will be queued again behind the invalidation operation.
+
+The invalidation operation will perform an attribute change operation and an
+auxiliary data update operation as it is very likely these will have changed.
+
+Using the following function, the netfs can wait for the invalidation operation
+to have reached a point at which it can start submitting ordinary operations
+once again:
 
-There is no direct way to invalidate an index subtree or a data file.  To do
-this, the caller should relinquish and retire the cookie they have, and then
-acquire a new one.
+	void fscache_wait_on_invalidate(struct fscache_cookie *cookie);
 
 
 ===========================
diff --git a/Documentation/filesystems/caching/object.txt b/Documentation/filesystems/caching/object.txt
index 58313348da87..100ff41127e4 100644
--- a/Documentation/filesystems/caching/object.txt
+++ b/Documentation/filesystems/caching/object.txt
@@ -216,7 +216,14 @@ servicing netfs requests:
      The normal running state.  In this state, requests the netfs makes will be
      passed on to the cache.
 
- (6) State FSCACHE_OBJECT_UPDATING.
+ (6) State FSCACHE_OBJECT_INVALIDATING.
+
+     The object is undergoing invalidation.  When the state comes here, it
+     discards all pending read, write and attribute change operations as it is
+     going to clear out the cache entirely and reinitialise it.  It will then
+     continue to the FSCACHE_OBJECT_UPDATING state.
+
+ (7) State FSCACHE_OBJECT_UPDATING.
 
      The state machine comes here to update the object in the cache from the
      netfs's records.  This involves updating the auxiliary data that is used
@@ -225,13 +232,13 @@ servicing netfs requests:
 And there are terminal states in which an object cleans itself up, deallocates
 memory and potentially deletes stuff from disk:
 
- (7) State FSCACHE_OBJECT_LC_DYING.
+ (8) State FSCACHE_OBJECT_LC_DYING.
 
      The object comes here if it is dying because of a lookup or creation
      error.  This would be due to a disk error or system error of some sort.
      Temporary data is cleaned up, and the parent is released.
 
- (8) State FSCACHE_OBJECT_DYING.
+ (9) State FSCACHE_OBJECT_DYING.
 
      The object comes here if it is dying due to an error, because its parent
      cookie has been relinquished by the netfs or because the cache is being
@@ -241,27 +248,27 @@ memory and potentially deletes stuff from disk:
      can destroy themselves.  This object waits for all its children to go away
      before advancing to the next state.
 
- (9) State FSCACHE_OBJECT_ABORT_INIT.
+(10) State FSCACHE_OBJECT_ABORT_INIT.
 
      The object comes to this state if it was waiting on its parent in
      FSCACHE_OBJECT_INIT, but its parent died.  The object will destroy itself
      so that the parent may proceed from the FSCACHE_OBJECT_DYING state.
 
-(10) State FSCACHE_OBJECT_RELEASING.
-(11) State FSCACHE_OBJECT_RECYCLING.
+(11) State FSCACHE_OBJECT_RELEASING.
+(12) State FSCACHE_OBJECT_RECYCLING.
 
      The object comes to one of these two states when dying once it is rid of
      all its children, if it is dying because the netfs relinquished its
      cookie.  In the first state, the cached data is expected to persist, and
      in the second it will be deleted.
 
-(12) State FSCACHE_OBJECT_WITHDRAWING.
+(13) State FSCACHE_OBJECT_WITHDRAWING.
 
      The object transits to this state if the cache decides it wants to
      withdraw the object from service, perhaps to make space, but also due to
      error or just because the whole cache is being withdrawn.
 
-(13) State FSCACHE_OBJECT_DEAD.
+(14) State FSCACHE_OBJECT_DEAD.
 
      The object transits to this state when the in-memory object record is
      ready to be deleted.  The object processor shouldn't ever see an object in
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 66be9eccede0..8dcb114758e3 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -369,6 +369,66 @@ cant_attach_object:
 	return ret;
 }
 
+/*
+ * Invalidate an object.  Callable with spinlocks held.
+ */
+void __fscache_invalidate(struct fscache_cookie *cookie)
+{
+	struct fscache_object *object;
+
+	_enter("{%s}", cookie->def->name);
+
+	fscache_stat(&fscache_n_invalidates);
+
+	/* Only permit invalidation of data files.  Invalidating an index will
+	 * require the caller to release all its attachments to the tree rooted
+	 * there, and if it's doing that, it may as well just retire the
+	 * cookie.
+	 */
+	ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE);
+
+	/* We will be updating the cookie too. */
+	BUG_ON(!cookie->def->get_aux);
+
+	/* If there's an object, we tell the object state machine to handle the
+	 * invalidation on our behalf, otherwise there's nothing to do.
+	 */
+	if (!hlist_empty(&cookie->backing_objects)) {
+		spin_lock(&cookie->lock);
+
+		if (!hlist_empty(&cookie->backing_objects) &&
+		    !test_and_set_bit(FSCACHE_COOKIE_INVALIDATING,
+				      &cookie->flags)) {
+			object = hlist_entry(cookie->backing_objects.first,
+					     struct fscache_object,
+					     cookie_link);
+			if (object->state < FSCACHE_OBJECT_DYING)
+				fscache_raise_event(
+					object, FSCACHE_OBJECT_EV_INVALIDATE);
+		}
+
+		spin_unlock(&cookie->lock);
+	}
+
+	_leave("");
+}
+EXPORT_SYMBOL(__fscache_invalidate);
+
+/*
+ * Wait for object invalidation to complete.
+ */
+void __fscache_wait_on_invalidate(struct fscache_cookie *cookie)
+{
+	_enter("%p", cookie);
+
+	wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING,
+		    fscache_wait_bit_interruptible,
+		    TASK_UNINTERRUPTIBLE);
+
+	_leave("");
+}
+EXPORT_SYMBOL(__fscache_wait_on_invalidate);
+
 /*
  * update the index entries backing a cookie
  */
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index f6aad48d38a8..c81179303930 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -122,10 +122,16 @@ extern int fscache_submit_exclusive_op(struct fscache_object *,
 extern int fscache_submit_op(struct fscache_object *,
 			     struct fscache_operation *);
 extern int fscache_cancel_op(struct fscache_operation *);
+extern void fscache_cancel_all_ops(struct fscache_object *);
 extern void fscache_abort_object(struct fscache_object *);
 extern void fscache_start_operations(struct fscache_object *);
 extern void fscache_operation_gc(struct work_struct *);
 
+/*
+ * page.c
+ */
+extern void fscache_invalidate_writes(struct fscache_cookie *);
+
 /*
  * proc.c
  */
@@ -205,6 +211,9 @@ extern atomic_t fscache_n_acquires_ok;
 extern atomic_t fscache_n_acquires_nobufs;
 extern atomic_t fscache_n_acquires_oom;
 
+extern atomic_t fscache_n_invalidates;
+extern atomic_t fscache_n_invalidates_run;
+
 extern atomic_t fscache_n_updates;
 extern atomic_t fscache_n_updates_null;
 extern atomic_t fscache_n_updates_run;
@@ -237,6 +246,7 @@ extern atomic_t fscache_n_cop_alloc_object;
 extern atomic_t fscache_n_cop_lookup_object;
 extern atomic_t fscache_n_cop_lookup_complete;
 extern atomic_t fscache_n_cop_grab_object;
+extern atomic_t fscache_n_cop_invalidate_object;
 extern atomic_t fscache_n_cop_update_object;
 extern atomic_t fscache_n_cop_drop_object;
 extern atomic_t fscache_n_cop_put_object;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 773bc798a416..80b549141ea6 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -14,6 +14,7 @@
 
 #define FSCACHE_DEBUG_LEVEL COOKIE
 #include <linux/module.h>
+#include <linux/slab.h>
 #include "internal.h"
 
 const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = {
@@ -22,6 +23,7 @@ const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = {
 	[FSCACHE_OBJECT_CREATING]	= "OBJECT_CREATING",
 	[FSCACHE_OBJECT_AVAILABLE]	= "OBJECT_AVAILABLE",
 	[FSCACHE_OBJECT_ACTIVE]		= "OBJECT_ACTIVE",
+	[FSCACHE_OBJECT_INVALIDATING]	= "OBJECT_INVALIDATING",
 	[FSCACHE_OBJECT_UPDATING]	= "OBJECT_UPDATING",
 	[FSCACHE_OBJECT_DYING]		= "OBJECT_DYING",
 	[FSCACHE_OBJECT_LC_DYING]	= "OBJECT_LC_DYING",
@@ -39,6 +41,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {
 	[FSCACHE_OBJECT_CREATING]	= "CRTN",
 	[FSCACHE_OBJECT_AVAILABLE]	= "AVBL",
 	[FSCACHE_OBJECT_ACTIVE]		= "ACTV",
+	[FSCACHE_OBJECT_INVALIDATING]	= "INVL",
 	[FSCACHE_OBJECT_UPDATING]	= "UPDT",
 	[FSCACHE_OBJECT_DYING]		= "DYNG",
 	[FSCACHE_OBJECT_LC_DYING]	= "LCDY",
@@ -54,6 +57,7 @@ static void fscache_put_object(struct fscache_object *);
 static void fscache_initialise_object(struct fscache_object *);
 static void fscache_lookup_object(struct fscache_object *);
 static void fscache_object_available(struct fscache_object *);
+static void fscache_invalidate_object(struct fscache_object *);
 static void fscache_release_object(struct fscache_object *);
 static void fscache_withdraw_object(struct fscache_object *);
 static void fscache_enqueue_dependents(struct fscache_object *);
@@ -78,6 +82,15 @@ static inline void fscache_done_parent_op(struct fscache_object *object)
 	spin_unlock(&parent->lock);
 }
 
+/*
+ * Notify netfs of invalidation completion.
+ */
+static inline void fscache_invalidation_complete(struct fscache_cookie *cookie)
+{
+	if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags))
+		wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING);
+}
+
 /*
  * process events that have been sent to an object's state machine
  * - initiates parent lookup
@@ -125,6 +138,16 @@ static void fscache_object_state_machine(struct fscache_object *object)
 	case FSCACHE_OBJECT_ACTIVE:
 		goto active_transit;
 
+		/* Invalidate an object on disk */
+	case FSCACHE_OBJECT_INVALIDATING:
+		clear_bit(FSCACHE_OBJECT_EV_INVALIDATE, &object->events);
+		fscache_stat(&fscache_n_invalidates_run);
+		fscache_stat(&fscache_n_cop_invalidate_object);
+		fscache_invalidate_object(object);
+		fscache_stat_d(&fscache_n_cop_invalidate_object);
+		fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE);
+		goto active_transit;
+
 		/* update the object metadata on disk */
 	case FSCACHE_OBJECT_UPDATING:
 		clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events);
@@ -275,6 +298,9 @@ active_transit:
 	case FSCACHE_OBJECT_EV_ERROR:
 		new_state = FSCACHE_OBJECT_DYING;
 		goto change_state;
+	case FSCACHE_OBJECT_EV_INVALIDATE:
+		new_state = FSCACHE_OBJECT_INVALIDATING;
+		goto change_state;
 	case FSCACHE_OBJECT_EV_UPDATE:
 		new_state = FSCACHE_OBJECT_UPDATING;
 		goto change_state;
@@ -679,6 +705,7 @@ static void fscache_withdraw_object(struct fscache_object *object)
 		if (object->cookie == cookie) {
 			hlist_del_init(&object->cookie_link);
 			object->cookie = NULL;
+			fscache_invalidation_complete(cookie);
 			detached = true;
 		}
 		spin_unlock(&cookie->lock);
@@ -888,3 +915,48 @@ enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
 	return result;
 }
 EXPORT_SYMBOL(fscache_check_aux);
+
+/*
+ * Asynchronously invalidate an object.
+ */
+static void fscache_invalidate_object(struct fscache_object *object)
+{
+	struct fscache_operation *op;
+	struct fscache_cookie *cookie = object->cookie;
+
+	_enter("{OBJ%x}", object->debug_id);
+
+	/* Reject any new read/write ops and abort any that are pending. */
+	fscache_invalidate_writes(cookie);
+	clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
+	fscache_cancel_all_ops(object);
+
+	/* Now we have to wait for in-progress reads and writes */
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+	if (!op) {
+		fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR);
+		_leave(" [ENOMEM]");
+		return;
+	}
+
+	fscache_operation_init(op, object->cache->ops->invalidate_object, NULL);
+	op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
+
+	spin_lock(&cookie->lock);
+	if (fscache_submit_exclusive_op(object, op) < 0)
+		BUG();
+	spin_unlock(&cookie->lock);
+	fscache_put_operation(op);
+
+	/* Once we've completed the invalidation, we know there will be no data
+	 * stored in the cache and thus we can reinstate the data-check-skip
+	 * optimisation.
+	 */
+	set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
+
+	/* We can allow read and write requests to come in once again.  They'll
+	 * queue up behind our exclusive invalidation operation.
+	 */
+	fscache_invalidation_complete(cookie);
+	_leave("");
+}
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 748f9553c2cb..c58dbe613266 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -323,6 +323,38 @@ int fscache_cancel_op(struct fscache_operation *op)
 	return ret;
 }
 
+/*
+ * Cancel all pending operations on an object
+ */
+void fscache_cancel_all_ops(struct fscache_object *object)
+{
+	struct fscache_operation *op;
+
+	_enter("OBJ%x", object->debug_id);
+
+	spin_lock(&object->lock);
+
+	while (!list_empty(&object->pending_ops)) {
+		op = list_entry(object->pending_ops.next,
+				struct fscache_operation, pend_link);
+		fscache_stat(&fscache_n_op_cancelled);
+		list_del_init(&op->pend_link);
+
+		ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING);
+		op->state = FSCACHE_OP_ST_CANCELLED;
+
+		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))
+			object->n_exclusive--;
+		if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
+			wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
+		fscache_put_operation(op);
+		cond_resched_lock(&object->lock);
+	}
+
+	spin_unlock(&object->lock);
+	_leave("");
+}
+
 /*
  * Record the completion of an in-progress operation.
  */
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index b38b13d2a555..7bf9d2557052 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -361,6 +361,11 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 	if (hlist_empty(&cookie->backing_objects))
 		goto nobufs;
 
+	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) {
+		_leave(" = -ENOBUFS [invalidating]");
+		return -ENOBUFS;
+	}
+
 	ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
 	ASSERTCMP(page, !=, NULL);
 
@@ -483,6 +488,11 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 	if (hlist_empty(&cookie->backing_objects))
 		goto nobufs;
 
+	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) {
+		_leave(" = -ENOBUFS [invalidating]");
+		return -ENOBUFS;
+	}
+
 	ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
 	ASSERTCMP(*nr_pages, >, 0);
 	ASSERT(!list_empty(pages));
@@ -591,6 +601,11 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
 	ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
 	ASSERTCMP(page, !=, NULL);
 
+	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) {
+		_leave(" = -ENOBUFS [invalidating]");
+		return -ENOBUFS;
+	}
+
 	if (fscache_wait_for_deferred_lookup(cookie) < 0)
 		return -ERESTARTSYS;
 
@@ -730,6 +745,37 @@ superseded:
 	_leave("");
 }
 
+/*
+ * Clear the pages pending writing for invalidation
+ */
+void fscache_invalidate_writes(struct fscache_cookie *cookie)
+{
+	struct page *page;
+	void *results[16];
+	int n, i;
+
+	_enter("");
+
+	while (spin_lock(&cookie->stores_lock),
+	       n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0,
+					      ARRAY_SIZE(results),
+					      FSCACHE_COOKIE_PENDING_TAG),
+	       n > 0) {
+		for (i = n - 1; i >= 0; i--) {
+			page = results[i];
+			radix_tree_delete(&cookie->stores, page->index);
+		}
+
+		spin_unlock(&cookie->stores_lock);
+
+		for (i = n - 1; i >= 0; i--)
+			page_cache_release(results[i]);
+	}
+
+	spin_unlock(&cookie->stores_lock);
+	_leave("");
+}
+
 /*
  * request a page be stored in the cache
  * - returns:
@@ -776,6 +822,11 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 
 	fscache_stat(&fscache_n_stores);
 
+	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) {
+		_leave(" = -ENOBUFS [invalidating]");
+		return -ENOBUFS;
+	}
+
 	op = kzalloc(sizeof(*op), GFP_NOIO | __GFP_NOMEMALLOC | __GFP_NORETRY);
 	if (!op)
 		goto nomem;
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index 4765190d537f..51cdaee14109 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -80,6 +80,9 @@ atomic_t fscache_n_acquires_ok;
 atomic_t fscache_n_acquires_nobufs;
 atomic_t fscache_n_acquires_oom;
 
+atomic_t fscache_n_invalidates;
+atomic_t fscache_n_invalidates_run;
+
 atomic_t fscache_n_updates;
 atomic_t fscache_n_updates_null;
 atomic_t fscache_n_updates_run;
@@ -112,6 +115,7 @@ atomic_t fscache_n_cop_alloc_object;
 atomic_t fscache_n_cop_lookup_object;
 atomic_t fscache_n_cop_lookup_complete;
 atomic_t fscache_n_cop_grab_object;
+atomic_t fscache_n_cop_invalidate_object;
 atomic_t fscache_n_cop_update_object;
 atomic_t fscache_n_cop_drop_object;
 atomic_t fscache_n_cop_put_object;
@@ -168,6 +172,10 @@ static int fscache_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&fscache_n_object_created),
 		   atomic_read(&fscache_n_object_lookups_timed_out));
 
+	seq_printf(m, "Invals : n=%u run=%u\n",
+		   atomic_read(&fscache_n_invalidates),
+		   atomic_read(&fscache_n_invalidates_run));
+
 	seq_printf(m, "Updates: n=%u nul=%u run=%u\n",
 		   atomic_read(&fscache_n_updates),
 		   atomic_read(&fscache_n_updates_null),
@@ -246,7 +254,8 @@ static int fscache_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&fscache_n_cop_lookup_object),
 		   atomic_read(&fscache_n_cop_lookup_complete),
 		   atomic_read(&fscache_n_cop_grab_object));
-	seq_printf(m, "CacheOp: upo=%d dro=%d pto=%d atc=%d syn=%d\n",
+	seq_printf(m, "CacheOp: inv=%d upo=%d dro=%d pto=%d atc=%d syn=%d\n",
+		   atomic_read(&fscache_n_cop_invalidate_object),
 		   atomic_read(&fscache_n_cop_update_object),
 		   atomic_read(&fscache_n_cop_drop_object),
 		   atomic_read(&fscache_n_cop_put_object),
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index f5facd1d333f..1e454ad7a832 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -254,6 +254,9 @@ struct fscache_cache_ops {
 	/* store the updated auxiliary data on an object */
 	void (*update_object)(struct fscache_object *object);
 
+	/* Invalidate an object */
+	void (*invalidate_object)(struct fscache_operation *op);
+
 	/* discard the resources pinned by an object and effect retirement if
 	 * necessary */
 	void (*drop_object)(struct fscache_object *object);
@@ -329,6 +332,7 @@ struct fscache_cookie {
 #define FSCACHE_COOKIE_FILLING		4	/* T if filling object incrementally */
 #define FSCACHE_COOKIE_UNAVAILABLE	5	/* T if cookie is unavailable (error, etc) */
 #define FSCACHE_COOKIE_WAITING_ON_READS	6	/* T if cookie is waiting on reads */
+#define FSCACHE_COOKIE_INVALIDATING	7	/* T if cookie is being invalidated */
 };
 
 extern struct fscache_cookie fscache_fsdef_index;
@@ -345,6 +349,7 @@ struct fscache_object {
 		/* active states */
 		FSCACHE_OBJECT_AVAILABLE,	/* cleaning up object after creation */
 		FSCACHE_OBJECT_ACTIVE,		/* object is usable */
+		FSCACHE_OBJECT_INVALIDATING,	/* object is invalidating */
 		FSCACHE_OBJECT_UPDATING,	/* object is updating */
 
 		/* terminal states */
@@ -378,7 +383,8 @@ struct fscache_object {
 #define FSCACHE_OBJECT_EV_RELEASE	4	/* T if netfs requested object release */
 #define FSCACHE_OBJECT_EV_RETIRE	5	/* T if netfs requested object retirement */
 #define FSCACHE_OBJECT_EV_WITHDRAW	6	/* T if cache requested object withdrawal */
-#define FSCACHE_OBJECT_EVENTS_MASK	0x7f	/* mask of all events*/
+#define FSCACHE_OBJECT_EV_INVALIDATE	7	/* T if cache requested object invalidation */
+#define FSCACHE_OBJECT_EVENTS_MASK	0xff	/* mask of all events*/
 
 	unsigned long		flags;
 #define FSCACHE_OBJECT_LOCK		0	/* T if object is busy being processed */
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index f4b6353543bf..7a086235da4b 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -185,6 +185,8 @@ extern struct fscache_cookie *__fscache_acquire_cookie(
 extern void __fscache_relinquish_cookie(struct fscache_cookie *, int);
 extern void __fscache_update_cookie(struct fscache_cookie *);
 extern int __fscache_attr_changed(struct fscache_cookie *);
+extern void __fscache_invalidate(struct fscache_cookie *);
+extern void __fscache_wait_on_invalidate(struct fscache_cookie *);
 extern int __fscache_read_or_alloc_page(struct fscache_cookie *,
 					struct page *,
 					fscache_rw_complete_t,
@@ -389,6 +391,42 @@ int fscache_attr_changed(struct fscache_cookie *cookie)
 		return -ENOBUFS;
 }
 
+/**
+ * fscache_invalidate - Notify cache that an object needs invalidation
+ * @cookie: The cookie representing the cache object
+ *
+ * Notify the cache that an object is needs to be invalidated and that it
+ * should abort any retrievals or stores it is doing on the cache.  The object
+ * is then marked non-caching until such time as the invalidation is complete.
+ *
+ * This can be called with spinlocks held.
+ *
+ * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * description.
+ */
+static inline
+void fscache_invalidate(struct fscache_cookie *cookie)
+{
+	if (fscache_cookie_valid(cookie))
+		__fscache_invalidate(cookie);
+}
+
+/**
+ * fscache_wait_on_invalidate - Wait for invalidation to complete
+ * @cookie: The cookie representing the cache object
+ *
+ * Wait for the invalidation of an object to complete.
+ *
+ * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * description.
+ */
+static inline
+void fscache_wait_on_invalidate(struct fscache_cookie *cookie)
+{
+	if (fscache_cookie_valid(cookie))
+		__fscache_wait_on_invalidate(cookie);
+}
+
 /**
  * fscache_reserve_space - Reserve data space for a cached object
  * @cookie: The cookie representing the cache object
-- 
cgit v1.2.3-55-g7522


From a02de9608595c8ef649ef03ae735b0b45e3d4396 Mon Sep 17 00:00:00 2001
From: David Howells
Date: Thu, 20 Dec 2012 21:52:36 +0000
Subject: VFS: Make more complete truncate operation available to CacheFiles

Make a more complete truncate operation available to CacheFiles (including
security checks and suchlike) so that it can use this to clear invalidated
cache files.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c          | 50 +++++++++++++++++++++++++++-----------------------
 include/linux/fs.h |  1 +
 2 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/open.c b/fs/open.c
index 182d8667b7bd..c819bbdab47f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -61,33 +61,22 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
 	return ret;
 }
 
-static long do_sys_truncate(const char __user *pathname, loff_t length)
+long vfs_truncate(struct path *path, loff_t length)
 {
-	struct path path;
 	struct inode *inode;
-	int error;
-
-	error = -EINVAL;
-	if (length < 0)	/* sorry, but loff_t says... */
-		goto out;
+	long error;
 
-	error = user_path(pathname, &path);
-	if (error)
-		goto out;
-	inode = path.dentry->d_inode;
+	inode = path->dentry->d_inode;
 
 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
-	error = -EISDIR;
 	if (S_ISDIR(inode->i_mode))
-		goto dput_and_out;
-
-	error = -EINVAL;
+		return -EISDIR;
 	if (!S_ISREG(inode->i_mode))
-		goto dput_and_out;
+		return -EINVAL;
 
-	error = mnt_want_write(path.mnt);
+	error = mnt_want_write(path->mnt);
 	if (error)
-		goto dput_and_out;
+		goto out;
 
 	error = inode_permission(inode, MAY_WRITE);
 	if (error)
@@ -111,19 +100,34 @@ static long do_sys_truncate(const char __user *pathname, loff_t length)
 
 	error = locks_verify_truncate(inode, NULL, length);
 	if (!error)
-		error = security_path_truncate(&path);
+		error = security_path_truncate(path);
 	if (!error)
-		error = do_truncate(path.dentry, length, 0, NULL);
+		error = do_truncate(path->dentry, length, 0, NULL);
 
 put_write_and_out:
 	put_write_access(inode);
 mnt_drop_write_and_out:
-	mnt_drop_write(path.mnt);
-dput_and_out:
-	path_put(&path);
+	mnt_drop_write(path->mnt);
 out:
 	return error;
 }
+EXPORT_SYMBOL_GPL(vfs_truncate);
+
+static long do_sys_truncate(const char __user *pathname, loff_t length)
+{
+	struct path path;
+	int error;
+
+	if (length < 0)	/* sorry, but loff_t says... */
+		return -EINVAL;
+
+	error = user_path(pathname, &path);
+	if (!error) {
+		error = vfs_truncate(&path, length);
+		path_put(&path);
+	}
+	return error;
+}
 
 SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a823d4be38e7..017a15b707e2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1999,6 +1999,7 @@ struct filename {
 	bool			separate; /* should "name" be freed? */
 };
 
+extern long vfs_truncate(struct path *, loff_t);
 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
 		       struct file *filp);
 extern int do_fallocate(struct file *file, int mode, loff_t offset,
-- 
cgit v1.2.3-55-g7522


From 36a02de5d7981435931d4608ee3e510b752e072b Mon Sep 17 00:00:00 2001
From: David Howells
Date: Wed, 5 Dec 2012 13:34:46 +0000
Subject: FS-Cache: Convert the object event ID #defines into an enum

Convert the fscache_object event IDs from #defines into an enum.  Also add an
extra label to the enum to carry the event count and redefine the event mask
in terms of that.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/fscache-cache.h | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 1e454ad7a832..73e68c8d5df4 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -337,6 +337,23 @@ struct fscache_cookie {
 
 extern struct fscache_cookie fscache_fsdef_index;
 
+/*
+ * Event list for fscache_object::{event_mask,events}
+ */
+enum {
+	FSCACHE_OBJECT_EV_REQUEUE,	/* T if object should be requeued */
+	FSCACHE_OBJECT_EV_UPDATE,	/* T if object should be updated */
+	FSCACHE_OBJECT_EV_INVALIDATE,	/* T if cache requested object invalidation */
+	FSCACHE_OBJECT_EV_CLEARED,	/* T if accessors all gone */
+	FSCACHE_OBJECT_EV_ERROR,	/* T if fatal error occurred during processing */
+	FSCACHE_OBJECT_EV_RELEASE,	/* T if netfs requested object release */
+	FSCACHE_OBJECT_EV_RETIRE,	/* T if netfs requested object retirement */
+	FSCACHE_OBJECT_EV_WITHDRAW,	/* T if cache requested object withdrawal */
+	NR_FSCACHE_OBJECT_EVENTS
+};
+
+#define FSCACHE_OBJECT_EVENTS_MASK ((1UL << NR_FSCACHE_OBJECT_EVENTS) - 1)
+
 /*
  * on-disk cache file or index handle
  */
@@ -376,15 +393,6 @@ struct fscache_object {
 	unsigned long		event_mask;	/* events this object is interested in */
 	unsigned long		events;		/* events to be processed by this object
 						 * (order is important - using fls) */
-#define FSCACHE_OBJECT_EV_REQUEUE	0	/* T if object should be requeued */
-#define FSCACHE_OBJECT_EV_UPDATE	1	/* T if object should be updated */
-#define FSCACHE_OBJECT_EV_CLEARED	2	/* T if accessors all gone */
-#define FSCACHE_OBJECT_EV_ERROR		3	/* T if fatal error occurred during processing */
-#define FSCACHE_OBJECT_EV_RELEASE	4	/* T if netfs requested object release */
-#define FSCACHE_OBJECT_EV_RETIRE	5	/* T if netfs requested object retirement */
-#define FSCACHE_OBJECT_EV_WITHDRAW	6	/* T if cache requested object withdrawal */
-#define FSCACHE_OBJECT_EV_INVALIDATE	7	/* T if cache requested object invalidation */
-#define FSCACHE_OBJECT_EVENTS_MASK	0xff	/* mask of all events*/
 
 	unsigned long		flags;
 #define FSCACHE_OBJECT_LOCK		0	/* T if object is busy being processed */
-- 
cgit v1.2.3-55-g7522


From 1f372dff1da37e2b36ae9085368fa46896398598 Mon Sep 17 00:00:00 2001
From: David Howells
Date: Thu, 13 Dec 2012 20:03:13 +0000
Subject: FS-Cache: Mark cancellation of in-progress operation

Mark as cancelled an operation that is in progress rather than pending at the
time it is cancelled, and call fscache_complete_op() to cancel an operation so
that blocked ops can be started.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/cachefiles/interface.c     |  2 +-
 fs/fscache/operation.c        |  7 ++++---
 fs/fscache/page.c             | 10 +++++-----
 include/linux/fscache-cache.h |  4 ++--
 4 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 7a9d574b961c..746ce532e130 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -484,7 +484,7 @@ static void cachefiles_invalidate_object(struct fscache_operation *op)
 		}
 	}
 
-	fscache_op_complete(op);
+	fscache_op_complete(op, true);
 	_leave("");
 }
 
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 9e6b7d232bb1..36c59604130d 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -363,9 +363,9 @@ void fscache_cancel_all_ops(struct fscache_object *object)
 }
 
 /*
- * Record the completion of an in-progress operation.
+ * Record the completion or cancellation of an in-progress operation.
  */
-void fscache_op_complete(struct fscache_operation *op)
+void fscache_op_complete(struct fscache_operation *op, bool cancelled)
 {
 	struct fscache_object *object = op->object;
 
@@ -380,7 +380,8 @@ void fscache_op_complete(struct fscache_operation *op)
 
 	spin_lock(&object->lock);
 
-	op->state = FSCACHE_OP_ST_COMPLETE;
+	op->state = cancelled ?
+		FSCACHE_OP_ST_CANCELLED : FSCACHE_OP_ST_COMPLETE;
 
 	if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))
 		object->n_exclusive--;
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index ef0218f5080d..8a92b9fabe83 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -171,7 +171,7 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
 			fscache_abort_object(object);
 	}
 
-	fscache_op_complete(op);
+	fscache_op_complete(op, true);
 	_leave("");
 }
 
@@ -704,7 +704,7 @@ static void fscache_write_op(struct fscache_operation *_op)
 		 * exists, so we should just cancel this write operation.
 		 */
 		spin_unlock(&object->lock);
-		op->op.state = FSCACHE_OP_ST_CANCELLED;
+		fscache_op_complete(&op->op, false);
 		_leave(" [inactive]");
 		return;
 	}
@@ -717,7 +717,7 @@ static void fscache_write_op(struct fscache_operation *_op)
 		 * cancel this write operation.
 		 */
 		spin_unlock(&object->lock);
-		op->op.state = FSCACHE_OP_ST_CANCELLED;
+		fscache_op_complete(&op->op, false);
 		_leave(" [cancel] op{f=%lx s=%u} obj{s=%u f=%lx}",
 		       _op->flags, _op->state, object->state, object->flags);
 		return;
@@ -755,7 +755,7 @@ static void fscache_write_op(struct fscache_operation *_op)
 	fscache_end_page_write(object, page);
 	if (ret < 0) {
 		fscache_abort_object(object);
-		fscache_op_complete(&op->op);
+		fscache_op_complete(&op->op, true);
 	} else {
 		fscache_enqueue_operation(&op->op);
 	}
@@ -770,7 +770,7 @@ superseded:
 	spin_unlock(&cookie->stores_lock);
 	clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
 	spin_unlock(&object->lock);
-	fscache_op_complete(&op->op);
+	fscache_op_complete(&op->op, true);
 	_leave("");
 }
 
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 73e68c8d5df4..5dfa0aa216b6 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -116,7 +116,7 @@ extern atomic_t fscache_op_debug_id;
 extern void fscache_op_work_func(struct work_struct *work);
 
 extern void fscache_enqueue_operation(struct fscache_operation *);
-extern void fscache_op_complete(struct fscache_operation *);
+extern void fscache_op_complete(struct fscache_operation *, bool);
 extern void fscache_put_operation(struct fscache_operation *);
 
 /**
@@ -196,7 +196,7 @@ static inline void fscache_retrieval_complete(struct fscache_retrieval *op,
 {
 	op->n_pages -= n_pages;
 	if (op->n_pages <= 0)
-		fscache_op_complete(&op->op);
+		fscache_op_complete(&op->op, true);
 }
 
 /**
-- 
cgit v1.2.3-55-g7522


From d30357f2f0ec0bfb67fd39f8f76d22d02d78631e Mon Sep 17 00:00:00 2001
From: Marco Stornelli
Date: Sat, 15 Dec 2012 11:59:20 +0100
Subject: vfs: drop vmtruncate

Removed vmtruncate

Signed-off-by: Marco Stornelli <marco.stornelli@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/libfs.c         | 2 --
 include/linux/fs.h | 1 -
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/libfs.c b/fs/libfs.c
index 35fc6e74cd88..916da8c4158b 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -369,8 +369,6 @@ int simple_setattr(struct dentry *dentry, struct iattr *iattr)
 	struct inode *inode = dentry->d_inode;
 	int error;
 
-	WARN_ON_ONCE(inode->i_op->truncate);
-
 	error = inode_change_ok(inode, iattr);
 	if (error)
 		return error;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a823d4be38e7..a0c5ba57ffc5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1565,7 +1565,6 @@ struct inode_operations {
 	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *);
-	void (*truncate) (struct inode *);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
-- 
cgit v1.2.3-55-g7522


From 7898575fc81bd707ce0844cb06874d48e39bbe09 Mon Sep 17 00:00:00 2001
From: Marco Stornelli
Date: Sat, 15 Dec 2012 12:00:02 +0100
Subject: mm: drop vmtruncate

Removed vmtruncate

Signed-off-by: Marco Stornelli <marco.stornelli@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/mm.h |  1 -
 mm/truncate.c      | 23 -----------------------
 2 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7f4f906190bd..63204078f72b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1007,7 +1007,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 
 extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
-extern int vmtruncate(struct inode *inode, loff_t offset);
 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
 int truncate_inode_page(struct address_space *mapping, struct page *page);
 int generic_error_remove_page(struct address_space *mapping, struct page *page);
diff --git a/mm/truncate.c b/mm/truncate.c
index d51ce92d6e83..c75b736e54b7 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -576,29 +576,6 @@ void truncate_setsize(struct inode *inode, loff_t newsize)
 }
 EXPORT_SYMBOL(truncate_setsize);
 
-/**
- * vmtruncate - unmap mappings "freed" by truncate() syscall
- * @inode: inode of the file used
- * @newsize: file offset to start truncating
- *
- * This function is deprecated and truncate_setsize or truncate_pagecache
- * should be used instead, together with filesystem specific block truncation.
- */
-int vmtruncate(struct inode *inode, loff_t newsize)
-{
-	int error;
-
-	error = inode_newsize_ok(inode, newsize);
-	if (error)
-		return error;
-
-	truncate_setsize(inode, newsize);
-	if (inode->i_op->truncate)
-		inode->i_op->truncate(inode);
-	return 0;
-}
-EXPORT_SYMBOL(vmtruncate);
-
 /**
  * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
  * @inode: inode
-- 
cgit v1.2.3-55-g7522


From 471667391a92bf7bf2cd4ff31a3ad88e5dec934b Mon Sep 17 00:00:00 2001
From: Alessio Igor Bogani
Date: Thu, 13 Dec 2012 12:22:39 +0100
Subject: vfs: Remove useless function prototypes

Commit 8e22cc88d68ca1a46d7d582938f979eb640ed30f removes the (un)lock_super
function definitions but forgets to remove their prototypes.

Signed-off-by: Alessio Igor Bogani <abogani@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index a0c5ba57ffc5..05cd238ad941 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1445,10 +1445,6 @@ static inline void sb_start_intwrite(struct super_block *sb)
 
 extern bool inode_owner_or_capable(const struct inode *inode);
 
-/* not quite ready to be deprecated, but... */
-extern void lock_super(struct super_block *);
-extern void unlock_super(struct super_block *);
-
 /*
  * VFS helper functions..
  */
-- 
cgit v1.2.3-55-g7522


From b9d6ba94b875192ef5e2dab92d72beea33b83c3d Mon Sep 17 00:00:00 2001
From: Jeff Layton
Date: Thu, 20 Dec 2012 14:59:40 -0500
Subject: vfs: add a retry_estale helper function to handle retries on ESTALE

This function is expected to be called from path-based syscalls to help
them decide whether to try the lookup and call again in the event that
they got an -ESTALE return back on an earier try.

Currently, we only retry the call once on an ESTALE error, but in the
event that we decide that that's not enough in the future, we should be
able to change the logic in this helper without too much effort.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/namei.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 4bf19d8174ed..66542b644804 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -98,4 +98,20 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
 	((char *) name)[min(len, maxlen)] = '\0';
 }
 
+/**
+ * retry_estale - determine whether the caller should retry an operation
+ * @error: the error that would currently be returned
+ * @flags: flags being used for next lookup attempt
+ *
+ * Check to see if the error code was -ESTALE, and then determine whether
+ * to retry the call based on whether "flags" already has LOOKUP_REVAL set.
+ *
+ * Returns true if the caller should try the operation again.
+ */
+static inline bool
+retry_estale(const long error, const unsigned int flags)
+{
+	return error == -ESTALE && !(flags & LOOKUP_REVAL);
+}
+
 #endif /* _LINUX_NAMEI_H */
-- 
cgit v1.2.3-55-g7522


From 1ac12b4b6d707937f9de6d09622823b2fd0c93ef Mon Sep 17 00:00:00 2001
From: Jeff Layton
Date: Tue, 11 Dec 2012 12:10:06 -0500
Subject: vfs: turn is_dir argument to kern_path_create into a lookup_flags arg

Where we can pass in LOOKUP_DIRECTORY or LOOKUP_REVAL. Any other flags
passed in here are currently ignored.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/platforms/cell/spufs/syscalls.c |  2 +-
 drivers/base/devtmpfs.c                      |  2 +-
 fs/namei.c                                   | 21 ++++++++++++++++-----
 include/linux/namei.h                        |  4 ++--
 4 files changed, 20 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 5b7d8ffbf890..baee994fe810 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -66,7 +66,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
 	struct dentry *dentry;
 	int ret;
 
-	dentry = user_path_create(AT_FDCWD, pathname, &path, 1);
+	dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
 	ret = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
 		ret = spufs_create(&path, dentry, flags, mode, neighbor);
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 147d1a4dd269..17cf7cad601e 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -148,7 +148,7 @@ static int dev_mkdir(const char *name, umode_t mode)
 	struct path path;
 	int err;
 
-	dentry = kern_path_create(AT_FDCWD, name, &path, 1);
+	dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
diff --git a/fs/namei.c b/fs/namei.c
index 25a41e02984b..8f8e41f6eb52 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3030,12 +3030,22 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
 	return file;
 }
 
-struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir)
+struct dentry *kern_path_create(int dfd, const char *pathname,
+				struct path *path, unsigned int lookup_flags)
 {
 	struct dentry *dentry = ERR_PTR(-EEXIST);
 	struct nameidata nd;
 	int err2;
-	int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
+	int error;
+	bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
+
+	/*
+	 * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any
+	 * other flags passed in are ignored!
+	 */
+	lookup_flags &= LOOKUP_REVAL;
+
+	error = do_path_lookup(dfd, pathname, LOOKUP_PARENT|lookup_flags, &nd);
 	if (error)
 		return ERR_PTR(error);
 
@@ -3099,13 +3109,14 @@ void done_path_create(struct path *path, struct dentry *dentry)
 }
 EXPORT_SYMBOL(done_path_create);
 
-struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
+struct dentry *user_path_create(int dfd, const char __user *pathname,
+				struct path *path, unsigned int lookup_flags)
 {
 	struct filename *tmp = getname(pathname);
 	struct dentry *res;
 	if (IS_ERR(tmp))
 		return ERR_CAST(tmp);
-	res = kern_path_create(dfd, tmp->name, path, is_dir);
+	res = kern_path_create(dfd, tmp->name, path, lookup_flags);
 	putname(tmp);
 	return res;
 }
@@ -3228,7 +3239,7 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
 	struct path path;
 	int error;
 
-	dentry = user_path_create(dfd, pathname, &path, 1);
+	dentry = user_path_create(dfd, pathname, &path, LOOKUP_DIRECTORY);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 66542b644804..e998c030061d 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -65,8 +65,8 @@ extern int user_path_at_empty(int, const char __user *, unsigned, struct path *,
 
 extern int kern_path(const char *, unsigned, struct path *);
 
-extern struct dentry *kern_path_create(int, const char *, struct path *, int);
-extern struct dentry *user_path_create(int, const char __user *, struct path *, int);
+extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int);
+extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
 extern void done_path_create(struct path *, struct dentry *);
 extern struct dentry *kern_path_locked(const char *, struct path *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
-- 
cgit v1.2.3-55-g7522


From b66c5984017533316fd1951770302649baf1aa33 Mon Sep 17 00:00:00 2001
From: Kees Cook
Date: Thu, 20 Dec 2012 15:05:16 -0800
Subject: exec: do not leave bprm->interp on stack

If a series of scripts are executed, each triggering module loading via
unprintable bytes in the script header, kernel stack contents can leak
into the command line.

Normally execution of binfmt_script and binfmt_misc happens recursively.
However, when modules are enabled, and unprintable bytes exist in the
bprm->buf, execution will restart after attempting to load matching
binfmt modules.  Unfortunately, the logic in binfmt_script and
binfmt_misc does not expect to get restarted.  They leave bprm->interp
pointing to their local stack.  This means on restart bprm->interp is
left pointing into unused stack memory which can then be copied into the
userspace argv areas.

After additional study, it seems that both recursion and restart remains
the desirable way to handle exec with scripts, misc, and modules.  As
such, we need to protect the changes to interp.

This changes the logic to require allocation for any changes to the
bprm->interp.  To avoid adding a new kmalloc to every exec, the default
value is left as-is.  Only when passing through binfmt_script or
binfmt_misc does an allocation take place.

For a proof of concept, see DoTest.sh from:

   http://www.halfdog.net/Security/2012/LinuxKernelBinfmtScriptStackDataDisclosure/

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: halfdog <me@halfdog.net>
Cc: P J P <ppandit@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_misc.c        |  5 ++++-
 fs/binfmt_script.c      |  4 +++-
 fs/exec.c               | 15 +++++++++++++++
 include/linux/binfmts.h |  1 +
 4 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 9be335fb8a7c..0c8869fdd14e 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -172,7 +172,10 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		goto _error;
 	bprm->argc ++;
 
-	bprm->interp = iname;	/* for binfmt_script */
+	/* Update interp in case binfmt_script needs it. */
+	retval = bprm_change_interp(iname, bprm);
+	if (retval < 0)
+		goto _error;
 
 	interp_file = open_exec (iname);
 	retval = PTR_ERR (interp_file);
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 1610a91637e5..5027a3e14922 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -80,7 +80,9 @@ static int load_script(struct linux_binprm *bprm)
 	retval = copy_strings_kernel(1, &i_name, bprm);
 	if (retval) return retval; 
 	bprm->argc++;
-	bprm->interp = interp;
+	retval = bprm_change_interp(interp, bprm);
+	if (retval < 0)
+		return retval;
 
 	/*
 	 * OK, now restart the process with the interpreter's dentry.
diff --git a/fs/exec.c b/fs/exec.c
index d8e1191cb112..237d5342786c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1175,9 +1175,24 @@ void free_bprm(struct linux_binprm *bprm)
 		mutex_unlock(&current->signal->cred_guard_mutex);
 		abort_creds(bprm->cred);
 	}
+	/* If a binfmt changed the interp, free it. */
+	if (bprm->interp != bprm->filename)
+		kfree(bprm->interp);
 	kfree(bprm);
 }
 
+int bprm_change_interp(char *interp, struct linux_binprm *bprm)
+{
+	/* If a binfmt changed the interp, free it first. */
+	if (bprm->interp != bprm->filename)
+		kfree(bprm->interp);
+	bprm->interp = kstrdup(interp, GFP_KERNEL);
+	if (!bprm->interp)
+		return -ENOMEM;
+	return 0;
+}
+EXPORT_SYMBOL(bprm_change_interp);
+
 /*
  * install the new credentials for this executable
  */
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index a4c2b565c835..bdf3965f0a29 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -112,6 +112,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
 			   unsigned long stack_top,
 			   int executable_stack);
 extern int bprm_mm_init(struct linux_binprm *bprm);
+extern int bprm_change_interp(char *interp, struct linux_binprm *bprm);
 extern int copy_strings_kernel(int argc, const char *const *argv,
 			       struct linux_binprm *bprm);
 extern int prepare_bprm_creds(struct linux_binprm *bprm);
-- 
cgit v1.2.3-55-g7522


From c4e18497d8fd92eef2c6e7eadcc1a107ccd115ea Mon Sep 17 00:00:00 2001
From: Guenter Roeck
Date: Thu, 20 Dec 2012 15:05:42 -0800
Subject: linux/kernel.h: fix DIV_ROUND_CLOSEST with unsigned divisors

Commit 263a523d18bc ("linux/kernel.h: Fix warning seen with W=1 due to
change in DIV_ROUND_CLOSEST") fixes a warning seen with W=1 due to
change in DIV_ROUND_CLOSEST.

Unfortunately, the C compiler converts divide operations with unsigned
divisors to unsigned, even if the dividend is signed and negative (for
example, -10 / 5U = 858993457).  The C standard says "If one operand has
unsigned int type, the other operand is converted to unsigned int", so
the compiler is not to blame.  As a result, DIV_ROUND_CLOSEST(0, 2U) and
similar operations now return bad values, since the automatic conversion
of expressions such as "0 - 2U/2" to unsigned was not taken into
account.

Fix by checking for the divisor variable type when deciding which
operation to perform.  This fixes DIV_ROUND_CLOSEST(0, 2U), but still
returns bad values for negative dividends divided by unsigned divisors.
Mark the latter case as unsupported.

One observed effect of this problem is that the s2c_hwmon driver reports
a value of 4198403 instead of 0 if the ADC reads 0.

Other impact is unpredictable.  Problem is seen if the divisor is an
unsigned variable or constant and the dividend is less than (divisor/2).

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reported-by: Juergen Beisert <jbe@pengutronix.de>
Tested-by: Juergen Beisert <jbe@pengutronix.de>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: <stable@vger.kernel.org>	[3.7.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d140e8fb075f..c566927efcbd 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -77,13 +77,15 @@
 
 /*
  * Divide positive or negative dividend by positive divisor and round
- * to closest integer. Result is undefined for negative divisors.
+ * to closest integer. Result is undefined for negative divisors and
+ * for negative dividends if the divisor variable type is unsigned.
  */
 #define DIV_ROUND_CLOSEST(x, divisor)(			\
 {							\
 	typeof(x) __x = x;				\
 	typeof(divisor) __d = divisor;			\
-	(((typeof(x))-1) > 0 || (__x) > 0) ?		\
+	(((typeof(x))-1) > 0 ||				\
+	 ((typeof(divisor))-1) > 0 || (__x) > 0) ?	\
 		(((__x) + ((__d) / 2)) / (__d)) :	\
 		(((__x) - ((__d) / 2)) / (__d));	\
 }							\
-- 
cgit v1.2.3-55-g7522


From d54eaa5a0fde0a202e4e91f200f818edcef15bee Mon Sep 17 00:00:00 2001
From: Mike Snitzer
Date: Fri, 21 Dec 2012 20:23:36 +0000
Subject: dm: prepare to support WRITE SAME

Allow targets to opt in to WRITE SAME support by setting
'num_write_same_requests' in the dm_target structure.

A dm device will only advertise WRITE SAME support if all its
targets and all its underlying devices support it.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-table.c         | 30 +++++++++++++++++++++++++++++-
 include/linux/device-mapper.h |  5 +++++
 2 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index fa2955790031..6be58b696377 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1414,6 +1414,33 @@ static bool dm_table_all_devices_attribute(struct dm_table *t,
 	return 1;
 }
 
+static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev,
+					 sector_t start, sector_t len, void *data)
+{
+	struct request_queue *q = bdev_get_queue(dev->bdev);
+
+	return q && !q->limits.max_write_same_sectors;
+}
+
+static bool dm_table_supports_write_same(struct dm_table *t)
+{
+	struct dm_target *ti;
+	unsigned i = 0;
+
+	while (i < dm_table_get_num_targets(t)) {
+		ti = dm_table_get_target(t, i++);
+
+		if (!ti->num_write_same_requests)
+			return false;
+
+		if (!ti->type->iterate_devices ||
+		    !ti->type->iterate_devices(ti, device_not_write_same_capable, NULL))
+			return false;
+	}
+
+	return true;
+}
+
 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 			       struct queue_limits *limits)
 {
@@ -1445,7 +1472,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	else
 		queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
 
-	q->limits.max_write_same_sectors = 0;
+	if (!dm_table_supports_write_same(t))
+		q->limits.max_write_same_sectors = 0;
 
 	dm_table_set_integrity(t);
 
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 38d27a10aa5d..d1f6cd8486f2 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -205,6 +205,11 @@ struct dm_target {
 	 */
 	unsigned num_discard_requests;
 
+	/*
+	 * The number of WRITE SAME requests that will be submitted to the target.
+	 */
+	unsigned num_write_same_requests;
+
 	/* target specific data */
 	void *private;
 
-- 
cgit v1.2.3-55-g7522


From c0820cf5ad09522bdd9ff68e84841a09c9f339d8 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka
Date: Fri, 21 Dec 2012 20:23:38 +0000
Subject: dm: introduce per_bio_data

Introduce a field per_bio_data_size in struct dm_target.

Targets can set this field in the constructor. If a target sets this
field to a non-zero value, "per_bio_data_size" bytes of auxiliary data
are allocated for each bio submitted to the target. These data can be
used for any purpose by the target and help us improve performance by
removing some per-target mempools.

Per-bio data is accessed with dm_per_bio_data. The
argument data_size must be the same as the value per_bio_data_size in
dm_target.

If the target has a pointer to per_bio_data, it can get a pointer to
the bio with dm_bio_from_per_bio_data() function (data_size must be the
same as the value passed to dm_per_bio_data).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-table.c         | 11 ++++++++++-
 drivers/md/dm.c               | 33 +++++++++++++++------------------
 drivers/md/dm.h               |  2 +-
 include/linux/device-mapper.h | 30 ++++++++++++++++++++++++++++++
 4 files changed, 56 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 6be58b696377..daf25d0890b3 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -967,13 +967,22 @@ bool dm_table_request_based(struct dm_table *t)
 int dm_table_alloc_md_mempools(struct dm_table *t)
 {
 	unsigned type = dm_table_get_type(t);
+	unsigned per_bio_data_size = 0;
+	struct dm_target *tgt;
+	unsigned i;
 
 	if (unlikely(type == DM_TYPE_NONE)) {
 		DMWARN("no table type is set, can't allocate mempools");
 		return -EINVAL;
 	}
 
-	t->mempools = dm_alloc_md_mempools(type, t->integrity_supported);
+	if (type == DM_TYPE_BIO_BASED)
+		for (i = 0; i < t->num_targets; i++) {
+			tgt = t->targets + i;
+			per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size);
+		}
+
+	t->mempools = dm_alloc_md_mempools(type, t->integrity_supported, per_bio_data_size);
 	if (!t->mempools)
 		return -ENOMEM;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5401cdce0fc5..2765cf2ba0ff 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -62,18 +62,6 @@ struct dm_io {
 	spinlock_t endio_lock;
 };
 
-/*
- * For bio-based dm.
- * One of these is allocated per target within a bio.  Hopefully
- * this will be simplified out one day.
- */
-struct dm_target_io {
-	struct dm_io *io;
-	struct dm_target *ti;
-	union map_info info;
-	struct bio clone;
-};
-
 /*
  * For request-based dm.
  * One of these is allocated per request.
@@ -1980,13 +1968,20 @@ static void free_dev(struct mapped_device *md)
 
 static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
 {
-	struct dm_md_mempools *p;
+	struct dm_md_mempools *p = dm_table_get_md_mempools(t);
 
-	if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs)
-		/* the md already has necessary mempools */
+	if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) {
+		/*
+		 * The md already has necessary mempools. Reload just the
+		 * bioset because front_pad may have changed because
+		 * a different table was loaded.
+		 */
+		bioset_free(md->bs);
+		md->bs = p->bs;
+		p->bs = NULL;
 		goto out;
+	}
 
-	p = dm_table_get_md_mempools(t);
 	BUG_ON(!p || md->io_pool || md->tio_pool || md->bs);
 
 	md->io_pool = p->io_pool;
@@ -2745,7 +2740,7 @@ int dm_noflush_suspending(struct dm_target *ti)
 }
 EXPORT_SYMBOL_GPL(dm_noflush_suspending);
 
-struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
+struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size)
 {
 	struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
 	unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS;
@@ -2753,6 +2748,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
 	if (!pools)
 		return NULL;
 
+	per_bio_data_size = roundup(per_bio_data_size, __alignof__(struct dm_target_io));
+
 	pools->io_pool = (type == DM_TYPE_BIO_BASED) ?
 			 mempool_create_slab_pool(MIN_IOS, _io_cache) :
 			 mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache);
@@ -2768,7 +2765,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
 
 	pools->bs = (type == DM_TYPE_BIO_BASED) ?
 		bioset_create(pool_size,
-			      offsetof(struct dm_target_io, clone)) :
+			      per_bio_data_size + offsetof(struct dm_target_io, clone)) :
 		bioset_create(pool_size,
 			      offsetof(struct dm_rq_clone_bio_info, clone));
 	if (!pools->bs)
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 6a99fefaa743..45b97da1bd06 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -159,7 +159,7 @@ void dm_kcopyd_exit(void);
 /*
  * Mempool operations
  */
-struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity);
+struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size);
 void dm_free_md_mempools(struct dm_md_mempools *pools);
 
 #endif
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index d1f6cd8486f2..6f0e73b4a80d 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -210,6 +210,12 @@ struct dm_target {
 	 */
 	unsigned num_write_same_requests;
 
+	/*
+	 * The minimum number of extra bytes allocated in each bio for the
+	 * target to use.  dm_per_bio_data returns the data location.
+	 */
+	unsigned per_bio_data_size;
+
 	/* target specific data */
 	void *private;
 
@@ -246,6 +252,30 @@ struct dm_target_callbacks {
 	int (*congested_fn) (struct dm_target_callbacks *, int);
 };
 
+/*
+ * For bio-based dm.
+ * One of these is allocated for each bio.
+ * This structure shouldn't be touched directly by target drivers.
+ * It is here so that we can inline dm_per_bio_data and
+ * dm_bio_from_per_bio_data
+ */
+struct dm_target_io {
+	struct dm_io *io;
+	struct dm_target *ti;
+	union map_info info;
+	struct bio clone;
+};
+
+static inline void *dm_per_bio_data(struct bio *bio, size_t data_size)
+{
+	return (char *)bio - offsetof(struct dm_target_io, clone) - data_size;
+}
+
+static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
+{
+	return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone));
+}
+
 int dm_register_target(struct target_type *t);
 void dm_unregister_target(struct target_type *t);
 
-- 
cgit v1.2.3-55-g7522


From ddbd658f6446a35e4d6ba84812fd71023320cae2 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka
Date: Fri, 21 Dec 2012 20:23:39 +0000
Subject: dm: move target request nr to dm_target_io

This patch moves target_request_nr from map_info to dm_target_io and
makes it accessible with dm_bio_get_target_request_nr.

This patch is a preparation for the next patch that removes map_info.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-snap.c          |  2 +-
 drivers/md/dm-stripe.c        |  4 ++--
 drivers/md/dm.c               |  3 ++-
 include/linux/device-mapper.h | 14 ++++++++++----
 4 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 5e88bc437be0..b7e179cdc5af 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1682,7 +1682,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 	chunk_t chunk;
 
 	if (bio->bi_rw & REQ_FLUSH) {
-		if (!map_context->target_request_nr)
+		if (!dm_bio_get_target_request_nr(bio))
 			bio->bi_bdev = s->origin->bdev;
 		else
 			bio->bi_bdev = s->cow->bdev;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index e2f876539743..4e7ba82146c0 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -279,13 +279,13 @@ static int stripe_map(struct dm_target *ti, struct bio *bio,
 	unsigned target_request_nr;
 
 	if (bio->bi_rw & REQ_FLUSH) {
-		target_request_nr = map_context->target_request_nr;
+		target_request_nr = dm_bio_get_target_request_nr(bio);
 		BUG_ON(target_request_nr >= sc->stripes);
 		bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
 		return DM_MAPIO_REMAPPED;
 	}
 	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
-		target_request_nr = map_context->target_request_nr;
+		target_request_nr = dm_bio_get_target_request_nr(bio);
 		BUG_ON(target_request_nr >= sc->stripes);
 		return stripe_map_discard(sc, bio, target_request_nr);
 	}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 2765cf2ba0ff..5ee580b4f330 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1099,6 +1099,7 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci,
 	tio->io = ci->io;
 	tio->ti = ti;
 	memset(&tio->info, 0, sizeof(tio->info));
+	tio->target_request_nr = 0;
 
 	return tio;
 }
@@ -1109,7 +1110,7 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
 	struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs);
 	struct bio *clone = &tio->clone;
 
-	tio->info.target_request_nr = request_nr;
+	tio->target_request_nr = request_nr;
 
 	/*
 	 * Discard requests require the bio's inline iovecs be initialized.
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 6f0e73b4a80d..eb96ef6fd8b7 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -23,7 +23,6 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
 union map_info {
 	void *ptr;
 	unsigned long long ll;
-	unsigned target_request_nr;
 };
 
 /*
@@ -193,20 +192,21 @@ struct dm_target {
 	 * A number of zero-length barrier requests that will be submitted
 	 * to the target for the purpose of flushing cache.
 	 *
-	 * The request number will be placed in union map_info->target_request_nr.
+	 * The request number can be accessed with dm_bio_get_target_request_nr.
 	 * It is a responsibility of the target driver to remap these requests
 	 * to the real underlying devices.
 	 */
 	unsigned num_flush_requests;
 
 	/*
-	 * The number of discard requests that will be submitted to the
-	 * target.  map_info->request_nr is used just like num_flush_requests.
+	 * The number of discard requests that will be submitted to the target.
+	 * The request number can be accessed with dm_bio_get_target_request_nr.
 	 */
 	unsigned num_discard_requests;
 
 	/*
 	 * The number of WRITE SAME requests that will be submitted to the target.
+	 * The request number can be accessed with dm_bio_get_target_request_nr.
 	 */
 	unsigned num_write_same_requests;
 
@@ -263,6 +263,7 @@ struct dm_target_io {
 	struct dm_io *io;
 	struct dm_target *ti;
 	union map_info info;
+	unsigned target_request_nr;
 	struct bio clone;
 };
 
@@ -276,6 +277,11 @@ static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
 	return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone));
 }
 
+static inline unsigned dm_bio_get_target_request_nr(const struct bio *bio)
+{
+	return container_of(bio, struct dm_target_io, clone)->target_request_nr;
+}
+
 int dm_register_target(struct target_type *t);
 void dm_unregister_target(struct target_type *t);
 
-- 
cgit v1.2.3-55-g7522


From 7de3ee57da4b717050e79c9313a9bf66ccc72519 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka
Date: Fri, 21 Dec 2012 20:23:41 +0000
Subject: dm: remove map_info

This patch removes map_info from bio-based device mapper targets.
map_info is still used for request-based targets.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c         |  5 ++---
 drivers/md/dm-delay.c         |  5 ++---
 drivers/md/dm-flakey.c        |  6 ++----
 drivers/md/dm-linear.c        |  3 +--
 drivers/md/dm-raid.c          |  4 ++--
 drivers/md/dm-raid1.c         |  6 ++----
 drivers/md/dm-snap.c          | 12 ++++--------
 drivers/md/dm-stripe.c        |  6 ++----
 drivers/md/dm-target.c        |  5 ++---
 drivers/md/dm-thin.c          | 15 +++++----------
 drivers/md/dm-verity.c        |  3 +--
 drivers/md/dm-zero.c          |  5 ++---
 drivers/md/dm.c               |  4 ++--
 include/linux/device-mapper.h |  6 ++----
 14 files changed, 31 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index bbf459bca61d..f7369f9d8595 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1689,8 +1689,7 @@ bad:
 	return ret;
 }
 
-static int crypt_map(struct dm_target *ti, struct bio *bio,
-		     union map_info *map_context)
+static int crypt_map(struct dm_target *ti, struct bio *bio)
 {
 	struct dm_crypt_io *io;
 	struct crypt_config *cc = ti->private;
@@ -1846,7 +1845,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
 
 static struct target_type crypt_target = {
 	.name   = "crypt",
-	.version = {1, 11, 0},
+	.version = {1, 12, 0},
 	.module = THIS_MODULE,
 	.ctr    = crypt_ctr,
 	.dtr    = crypt_dtr,
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index f53846f9ab50..cc1bd048acb2 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -274,8 +274,7 @@ static void delay_resume(struct dm_target *ti)
 	atomic_set(&dc->may_delay, 1);
 }
 
-static int delay_map(struct dm_target *ti, struct bio *bio,
-		     union map_info *map_context)
+static int delay_map(struct dm_target *ti, struct bio *bio)
 {
 	struct delay_c *dc = ti->private;
 
@@ -338,7 +337,7 @@ out:
 
 static struct target_type delay_target = {
 	.name	     = "delay",
-	.version     = {1, 1, 0},
+	.version     = {1, 2, 0},
 	.module      = THIS_MODULE,
 	.ctr	     = delay_ctr,
 	.dtr	     = delay_dtr,
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 660f98167e7b..9721f2ffb1a2 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -270,8 +270,7 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
 	}
 }
 
-static int flakey_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
+static int flakey_map(struct dm_target *ti, struct bio *bio)
 {
 	struct flakey_c *fc = ti->private;
 	unsigned elapsed;
@@ -321,8 +320,7 @@ map_bio:
 	return DM_MAPIO_REMAPPED;
 }
 
-static int flakey_end_io(struct dm_target *ti, struct bio *bio,
-			 int error, union map_info *map_context)
+static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
 {
 	struct flakey_c *fc = ti->private;
 	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 82222a8cf750..328cad5617ab 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -88,8 +88,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
 		bio->bi_sector = linear_map_sector(ti, bio->bi_sector);
 }
 
-static int linear_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
+static int linear_map(struct dm_target *ti, struct bio *bio)
 {
 	linear_map_bio(ti, bio);
 
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 4a20bf8c72da..3d8984edeff7 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1218,7 +1218,7 @@ static void raid_dtr(struct dm_target *ti)
 	context_free(rs);
 }
 
-static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_context)
+static int raid_map(struct dm_target *ti, struct bio *bio)
 {
 	struct raid_set *rs = ti->private;
 	struct mddev *mddev = &rs->md;
@@ -1432,7 +1432,7 @@ static void raid_resume(struct dm_target *ti)
 
 static struct target_type raid_target = {
 	.name = "raid",
-	.version = {1, 3, 1},
+	.version = {1, 4, 0},
 	.module = THIS_MODULE,
 	.ctr = raid_ctr,
 	.dtr = raid_dtr,
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 57685cf0afa8..fa519185ebba 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1142,8 +1142,7 @@ static void mirror_dtr(struct dm_target *ti)
 /*
  * Mirror mapping function
  */
-static int mirror_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
+static int mirror_map(struct dm_target *ti, struct bio *bio)
 {
 	int r, rw = bio_rw(bio);
 	struct mirror *m;
@@ -1192,8 +1191,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio,
 	return DM_MAPIO_REMAPPED;
 }
 
-static int mirror_end_io(struct dm_target *ti, struct bio *bio,
-			 int error, union map_info *map_context)
+static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 {
 	int rw = bio_rw(bio);
 	struct mirror_set *ms = (struct mirror_set *) ti->private;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index e1ecacf2456f..59fc18ae52c2 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1567,8 +1567,7 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
 					  s->store->chunk_mask);
 }
 
-static int snapshot_map(struct dm_target *ti, struct bio *bio,
-			union map_info *map_context)
+static int snapshot_map(struct dm_target *ti, struct bio *bio)
 {
 	struct dm_exception *e;
 	struct dm_snapshot *s = ti->private;
@@ -1683,8 +1682,7 @@ out:
  * If merging is currently taking place on the chunk in question, the
  * I/O is deferred by adding it to s->bios_queued_during_merge.
  */
-static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
-			      union map_info *map_context)
+static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
 {
 	struct dm_exception *e;
 	struct dm_snapshot *s = ti->private;
@@ -1744,8 +1742,7 @@ out_unlock:
 	return r;
 }
 
-static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
-			   int error, union map_info *map_context)
+static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error)
 {
 	struct dm_snapshot *s = ti->private;
 
@@ -2119,8 +2116,7 @@ static void origin_dtr(struct dm_target *ti)
 	dm_put_device(ti, dev);
 }
 
-static int origin_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
+static int origin_map(struct dm_target *ti, struct bio *bio)
 {
 	struct dm_dev *dev = ti->private;
 	bio->bi_bdev = dev->bdev;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 4e7ba82146c0..6b0e5ea38027 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -271,8 +271,7 @@ static int stripe_map_discard(struct stripe_c *sc, struct bio *bio,
 	}
 }
 
-static int stripe_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
+static int stripe_map(struct dm_target *ti, struct bio *bio)
 {
 	struct stripe_c *sc = ti->private;
 	uint32_t stripe;
@@ -342,8 +341,7 @@ static int stripe_status(struct dm_target *ti, status_type_t type,
 	return 0;
 }
 
-static int stripe_end_io(struct dm_target *ti, struct bio *bio,
-			 int error, union map_info *map_context)
+static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
 {
 	unsigned i;
 	char major_minor[16];
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 8da366cf381c..617d21a77256 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -126,15 +126,14 @@ static void io_err_dtr(struct dm_target *tt)
 	/* empty */
 }
 
-static int io_err_map(struct dm_target *tt, struct bio *bio,
-		      union map_info *map_context)
+static int io_err_map(struct dm_target *tt, struct bio *bio)
 {
 	return -EIO;
 }
 
 static struct target_type error_target = {
 	.name = "error",
-	.version = {1, 0, 1},
+	.version = {1, 1, 0},
 	.ctr  = io_err_ctr,
 	.dtr  = io_err_dtr,
 	.map  = io_err_map,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index e7743c69a24c..675ae5274016 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1371,8 +1371,7 @@ static void thin_hook_bio(struct thin_c *tc, struct bio *bio)
 /*
  * Non-blocking function called from the thin target's map function.
  */
-static int thin_bio_map(struct dm_target *ti, struct bio *bio,
-			union map_info *map_context)
+static int thin_bio_map(struct dm_target *ti, struct bio *bio)
 {
 	int r;
 	struct thin_c *tc = ti->private;
@@ -1980,8 +1979,7 @@ out_unlock:
 	return r;
 }
 
-static int pool_map(struct dm_target *ti, struct bio *bio,
-		    union map_info *map_context)
+static int pool_map(struct dm_target *ti, struct bio *bio)
 {
 	int r;
 	struct pool_c *pt = ti->private;
@@ -2626,17 +2624,14 @@ out_unlock:
 	return r;
 }
 
-static int thin_map(struct dm_target *ti, struct bio *bio,
-		    union map_info *map_context)
+static int thin_map(struct dm_target *ti, struct bio *bio)
 {
 	bio->bi_sector = dm_target_offset(ti, bio->bi_sector);
 
-	return thin_bio_map(ti, bio, map_context);
+	return thin_bio_map(ti, bio);
 }
 
-static int thin_endio(struct dm_target *ti,
-		      struct bio *bio, int err,
-		      union map_info *map_context)
+static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
 {
 	unsigned long flags;
 	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 78f349894b24..52cde982164a 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -458,8 +458,7 @@ no_prefetch_cluster:
  * Bio map function. It allocates dm_verity_io structure and bio vector and
  * fills them. Then it issues prefetches and the I/O.
  */
-static int verity_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
+static int verity_map(struct dm_target *ti, struct bio *bio)
 {
 	struct dm_verity *v = ti->private;
 	struct dm_verity_io *io;
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index cc2b3cb81946..69a5c3b3b340 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -33,8 +33,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 /*
  * Return zeros only on reads
  */
-static int zero_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
+static int zero_map(struct dm_target *ti, struct bio *bio)
 {
 	switch(bio_rw(bio)) {
 	case READ:
@@ -56,7 +55,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio,
 
 static struct target_type zero_target = {
 	.name   = "zero",
-	.version = {1, 0, 0},
+	.version = {1, 1, 0},
 	.module = THIS_MODULE,
 	.ctr    = zero_ctr,
 	.map    = zero_map,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5ee580b4f330..c72e4d5a9617 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -645,7 +645,7 @@ static void clone_endio(struct bio *bio, int error)
 		error = -EIO;
 
 	if (endio) {
-		r = endio(tio->ti, bio, error, &tio->info);
+		r = endio(tio->ti, bio, error);
 		if (r < 0 || r == DM_ENDIO_REQUEUE)
 			/*
 			 * error and requeue request are handled
@@ -1004,7 +1004,7 @@ static void __map_bio(struct dm_target *ti, struct dm_target_io *tio)
 	 */
 	atomic_inc(&tio->io->io_count);
 	sector = clone->bi_sector;
-	r = ti->type->map(ti, clone, &tio->info);
+	r = ti->type->map(ti, clone);
 	if (r == DM_MAPIO_REMAPPED) {
 		/* the bio has been remapped so dispatch it */
 
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index eb96ef6fd8b7..bf6afa2fc432 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -45,8 +45,7 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti);
  * = 1: simple remap complete
  * = 2: The target wants to push back the io
  */
-typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
-			  union map_info *map_context);
+typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio);
 typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
 				  union map_info *map_context);
 
@@ -59,8 +58,7 @@ typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
  * 2   : The target wants to push back the io
  */
 typedef int (*dm_endio_fn) (struct dm_target *ti,
-			    struct bio *bio, int error,
-			    union map_info *map_context);
+			    struct bio *bio, int error);
 typedef int (*dm_request_endio_fn) (struct dm_target *ti,
 				    struct request *clone, int error,
 				    union map_info *map_context);
-- 
cgit v1.2.3-55-g7522


From 30e6c9fa93cf3dbc7cc6df1d748ad25e4264545a Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Thu, 20 Dec 2012 17:25:08 +0000
Subject: net: devnet_rename_seq should be a seqcount

Using a seqlock for devnet_rename_seq is not a good idea,
as device_rename() can sleep.

As we hold RTNL, we dont need a protection for writers,
and only need a seqcount so that readers can catch a change done
by a writer.

Bug added in commit c91f6df2db4972d3 (sockopt: Change getsockopt() of
SO_BINDTODEVICE to return an interface name)

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 net/core/dev.c            | 18 +++++++++---------
 net/core/sock.c           |  4 ++--
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 02e0f6b156c3..c599e4782d45 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1576,7 +1576,7 @@ extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
 
 extern rwlock_t				dev_base_lock;		/* Device list lock */
 
-extern seqlock_t	devnet_rename_seq;	/* Device rename lock */
+extern seqcount_t	devnet_rename_seq;	/* Device rename seq */
 
 
 #define for_each_netdev(net, d)		\
diff --git a/net/core/dev.c b/net/core/dev.c
index d0cbc93fcf32..515473ee52cb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -203,7 +203,7 @@ static struct list_head offload_base __read_mostly;
 DEFINE_RWLOCK(dev_base_lock);
 EXPORT_SYMBOL(dev_base_lock);
 
-DEFINE_SEQLOCK(devnet_rename_seq);
+seqcount_t devnet_rename_seq;
 
 static inline void dev_base_seq_inc(struct net *net)
 {
@@ -1093,10 +1093,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
 	if (dev->flags & IFF_UP)
 		return -EBUSY;
 
-	write_seqlock(&devnet_rename_seq);
+	write_seqcount_begin(&devnet_rename_seq);
 
 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
-		write_sequnlock(&devnet_rename_seq);
+		write_seqcount_end(&devnet_rename_seq);
 		return 0;
 	}
 
@@ -1104,7 +1104,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
 
 	err = dev_get_valid_name(net, dev, newname);
 	if (err < 0) {
-		write_sequnlock(&devnet_rename_seq);
+		write_seqcount_end(&devnet_rename_seq);
 		return err;
 	}
 
@@ -1112,11 +1112,11 @@ rollback:
 	ret = device_rename(&dev->dev, dev->name);
 	if (ret) {
 		memcpy(dev->name, oldname, IFNAMSIZ);
-		write_sequnlock(&devnet_rename_seq);
+		write_seqcount_end(&devnet_rename_seq);
 		return ret;
 	}
 
-	write_sequnlock(&devnet_rename_seq);
+	write_seqcount_end(&devnet_rename_seq);
 
 	write_lock_bh(&dev_base_lock);
 	hlist_del_rcu(&dev->name_hlist);
@@ -1135,7 +1135,7 @@ rollback:
 		/* err >= 0 after dev_alloc_name() or stores the first errno */
 		if (err >= 0) {
 			err = ret;
-			write_seqlock(&devnet_rename_seq);
+			write_seqcount_begin(&devnet_rename_seq);
 			memcpy(dev->name, oldname, IFNAMSIZ);
 			goto rollback;
 		} else {
@@ -4180,7 +4180,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
 		return -EFAULT;
 
 retry:
-	seq = read_seqbegin(&devnet_rename_seq);
+	seq = read_seqcount_begin(&devnet_rename_seq);
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
 	if (!dev) {
@@ -4190,7 +4190,7 @@ retry:
 
 	strcpy(ifr.ifr_name, dev->name);
 	rcu_read_unlock();
-	if (read_seqretry(&devnet_rename_seq, seq))
+	if (read_seqcount_retry(&devnet_rename_seq, seq))
 		goto retry;
 
 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
diff --git a/net/core/sock.c b/net/core/sock.c
index a692ef49c9bb..bc131d419683 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -583,7 +583,7 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
 		goto out;
 
 retry:
-	seq = read_seqbegin(&devnet_rename_seq);
+	seq = read_seqcount_begin(&devnet_rename_seq);
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
 	ret = -ENODEV;
@@ -594,7 +594,7 @@ retry:
 
 	strcpy(devname, dev->name);
 	rcu_read_unlock();
-	if (read_seqretry(&devnet_rename_seq, seq))
+	if (read_seqcount_retry(&devnet_rename_seq, seq))
 		goto retry;
 
 	len = strlen(devname) + 1;
-- 
cgit v1.2.3-55-g7522


From 53e872681fed6a43047e71bf927f77d06f467988 Mon Sep 17 00:00:00 2001
From: Jan Kara
Date: Tue, 25 Dec 2012 13:29:52 -0500
Subject: ext4: fix deadlock in journal_unmap_buffer()

We cannot wait for transaction commit in journal_unmap_buffer()
because we hold page lock which ranks below transaction start.  We
solve the issue by bailing out of journal_unmap_buffer() and
jbd2_journal_invalidatepage() with -EBUSY.  Caller is then responsible
for waiting for transaction commit to finish and try invalidation
again. Since the issue can happen only for page stradding i_size, it
is simple enough to manually call jbd2_journal_invalidatepage() for
such page from ext4_setattr(), check the return value and wait if
necessary.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c       | 82 ++++++++++++++++++++++++++++++++++++++++++++-------
 fs/jbd2/transaction.c | 27 +++++++++--------
 include/linux/jbd2.h  |  2 +-
 3 files changed, 86 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 12d3fbcff59f..cbfe13bf5b2a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2894,8 +2894,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
 	block_invalidatepage(page, offset);
 }
 
-static void ext4_journalled_invalidatepage(struct page *page,
-					   unsigned long offset)
+static int __ext4_journalled_invalidatepage(struct page *page,
+					    unsigned long offset)
 {
 	journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 
@@ -2907,7 +2907,14 @@ static void ext4_journalled_invalidatepage(struct page *page,
 	if (offset == 0)
 		ClearPageChecked(page);
 
-	jbd2_journal_invalidatepage(journal, page, offset);
+	return jbd2_journal_invalidatepage(journal, page, offset);
+}
+
+/* Wrapper for aops... */
+static void ext4_journalled_invalidatepage(struct page *page,
+					   unsigned long offset)
+{
+	WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
 }
 
 static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -4313,6 +4320,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return err;
 }
 
+/*
+ * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
+ * buffers that are attached to a page stradding i_size and are undergoing
+ * commit. In that case we have to wait for commit to finish and try again.
+ */
+static void ext4_wait_for_tail_page_commit(struct inode *inode)
+{
+	struct page *page;
+	unsigned offset;
+	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+	tid_t commit_tid = 0;
+	int ret;
+
+	offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+	/*
+	 * All buffers in the last page remain valid? Then there's nothing to
+	 * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
+	 * blocksize case
+	 */
+	if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
+		return;
+	while (1) {
+		page = find_lock_page(inode->i_mapping,
+				      inode->i_size >> PAGE_CACHE_SHIFT);
+		if (!page)
+			return;
+		ret = __ext4_journalled_invalidatepage(page, offset);
+		unlock_page(page);
+		page_cache_release(page);
+		if (ret != -EBUSY)
+			return;
+		commit_tid = 0;
+		read_lock(&journal->j_state_lock);
+		if (journal->j_committing_transaction)
+			commit_tid = journal->j_committing_transaction->t_tid;
+		read_unlock(&journal->j_state_lock);
+		if (commit_tid)
+			jbd2_log_wait_commit(journal, commit_tid);
+	}
+}
+
 /*
  * ext4_setattr()
  *
@@ -4426,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	if (attr->ia_valid & ATTR_SIZE) {
-		if (attr->ia_size != i_size_read(inode)) {
-			truncate_setsize(inode, attr->ia_size);
-			/* Inode size will be reduced, wait for dio in flight.
-			 * Temporarily disable dioread_nolock to prevent
-			 * livelock. */
+		if (attr->ia_size != inode->i_size) {
+			loff_t oldsize = inode->i_size;
+
+			i_size_write(inode, attr->ia_size);
+			/*
+			 * Blocks are going to be removed from the inode. Wait
+			 * for dio in flight.  Temporarily disable
+			 * dioread_nolock to prevent livelock.
+			 */
 			if (orphan) {
-				ext4_inode_block_unlocked_dio(inode);
-				inode_dio_wait(inode);
-				ext4_inode_resume_unlocked_dio(inode);
+				if (!ext4_should_journal_data(inode)) {
+					ext4_inode_block_unlocked_dio(inode);
+					inode_dio_wait(inode);
+					ext4_inode_resume_unlocked_dio(inode);
+				} else
+					ext4_wait_for_tail_page_commit(inode);
 			}
+			/*
+			 * Truncate pagecache after we've waited for commit
+			 * in data=journal mode to make pages freeable.
+			 */
+			truncate_pagecache(inode, oldsize, inode->i_size);
 		}
 		ext4_truncate(inode);
 	}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index cd4485db42b3..ddc51a7f4508 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1840,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
 
 	BUFFER_TRACE(bh, "entry");
 
-retry:
 	/*
 	 * It is safe to proceed here without the j_list_lock because the
 	 * buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1935,14 +1934,11 @@ retry:
 		 * for commit and try again.
 		 */
 		if (partial_page) {
-			tid_t tid = journal->j_committing_transaction->t_tid;
-
 			jbd2_journal_put_journal_head(jh);
 			spin_unlock(&journal->j_list_lock);
 			jbd_unlock_bh_state(bh);
 			write_unlock(&journal->j_state_lock);
-			jbd2_log_wait_commit(journal, tid);
-			goto retry;
+			return -EBUSY;
 		}
 		/*
 		 * OK, buffer won't be reachable after truncate. We just set
@@ -2003,21 +1999,23 @@ zap_buffer_unlocked:
  * @page:    page to flush
  * @offset:  length of page to invalidate.
  *
- * Reap page buffers containing data after offset in page.
- *
+ * Reap page buffers containing data after offset in page. Can return -EBUSY
+ * if buffers are part of the committing transaction and the page is straddling
+ * i_size. Caller then has to wait for current commit and try again.
  */
-void jbd2_journal_invalidatepage(journal_t *journal,
-		      struct page *page,
-		      unsigned long offset)
+int jbd2_journal_invalidatepage(journal_t *journal,
+				struct page *page,
+				unsigned long offset)
 {
 	struct buffer_head *head, *bh, *next;
 	unsigned int curr_off = 0;
 	int may_free = 1;
+	int ret = 0;
 
 	if (!PageLocked(page))
 		BUG();
 	if (!page_has_buffers(page))
-		return;
+		return 0;
 
 	/* We will potentially be playing with lists other than just the
 	 * data lists (especially for journaled data mode), so be
@@ -2031,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
 		if (offset <= curr_off) {
 			/* This block is wholly outside the truncation point */
 			lock_buffer(bh);
-			may_free &= journal_unmap_buffer(journal, bh,
-							 offset > 0);
+			ret = journal_unmap_buffer(journal, bh, offset > 0);
 			unlock_buffer(bh);
+			if (ret < 0)
+				return ret;
+			may_free &= ret;
 		}
 		curr_off = next_off;
 		bh = next;
@@ -2044,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
 		if (may_free && try_to_free_buffers(page))
 			J_ASSERT(!page_has_buffers(page));
 	}
+	return 0;
 }
 
 /*
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 1be23d9fdacb..e30b66346942 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1098,7 +1098,7 @@ void		 jbd2_journal_set_triggers(struct buffer_head *,
 extern int	 jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_forget (handle_t *, struct buffer_head *);
 extern void	 journal_sync_buffer (struct buffer_head *);
-extern void	 jbd2_journal_invalidatepage(journal_t *,
+extern int	 jbd2_journal_invalidatepage(journal_t *,
 				struct page *, unsigned long);
 extern int	 jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
 extern int	 jbd2_journal_stop(handle_t *);
-- 
cgit v1.2.3-55-g7522


From 08b60f8438879a84246d7debded31c9cb7aea6e4 Mon Sep 17 00:00:00 2001
From: Stephen Warren
Date: Mon, 24 Dec 2012 11:14:58 -0700
Subject: namei.h: include errno.h

This solves:

In file included from fs/ext3/symlink.c:20:0:
include/linux/namei.h: In function 'retry_estale':
include/linux/namei.h:114:19: error: 'ESTALE' undeclared (first use in this function)

Signed-off-by: Stephen Warren <swarren@wwwdotorg.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/namei.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index e998c030061d..5a5ff57ceed4 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -2,6 +2,7 @@
 #define _LINUX_NAMEI_H
 
 #include <linux/dcache.h>
+#include <linux/errno.h>
 #include <linux/linkage.h>
 #include <linux/path.h>
 
-- 
cgit v1.2.3-55-g7522


From c876ad7682155958d0c9c27afe9017925c230d64 Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Fri, 21 Dec 2012 20:27:12 -0800
Subject: pidns: Stop pid allocation when init dies

Oleg pointed out that in a pid namespace the sequence.
- pid 1 becomes a zombie
- setns(thepidns), fork,...
- reaping pid 1.
- The injected processes exiting.

Can lead to processes attempting access their child reaper and
instead following a stale pointer.

That waitpid for init can return before all of the processes in
the pid namespace have exited is also unfortunate.

Avoid these problems by disabling the allocation of new pids in a pid
namespace when init dies, instead of when the last process in a pid
namespace is reaped.

Pointed-out-by:  Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/pid.h           |  1 +
 include/linux/pid_namespace.h |  4 +++-
 kernel/pid.c                  | 15 ++++++++++++---
 kernel/pid_namespace.c        |  4 ++++
 4 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index b152d44fb181..2381c973d897 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -121,6 +121,7 @@ int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
 
 extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void free_pid(struct pid *pid);
+extern void disable_pid_allocation(struct pid_namespace *ns);
 
 /*
  * ns_of_pid() returns the pid namespace in which the specified pid was
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index bf285999273a..215e5e3dda10 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -21,7 +21,7 @@ struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
 	int last_pid;
-	int nr_hashed;
+	unsigned int nr_hashed;
 	struct task_struct *child_reaper;
 	struct kmem_cache *pid_cachep;
 	unsigned int level;
@@ -42,6 +42,8 @@ struct pid_namespace {
 
 extern struct pid_namespace init_pid_ns;
 
+#define PIDNS_HASH_ADDING (1U << 31)
+
 #ifdef CONFIG_PID_NS
 static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 {
diff --git a/kernel/pid.c b/kernel/pid.c
index 36aa02ff17d6..de9af600006f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -270,7 +270,6 @@ void free_pid(struct pid *pid)
 			wake_up_process(ns->child_reaper);
 			break;
 		case 0:
-			ns->nr_hashed = -1;
 			schedule_work(&ns->proc_work);
 			break;
 		}
@@ -319,7 +318,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 
 	upid = pid->numbers + ns->level;
 	spin_lock_irq(&pidmap_lock);
-	if (ns->nr_hashed < 0)
+	if (!(ns->nr_hashed & PIDNS_HASH_ADDING))
 		goto out_unlock;
 	for ( ; upid >= pid->numbers; --upid) {
 		hlist_add_head_rcu(&upid->pid_chain,
@@ -342,6 +341,13 @@ out_free:
 	goto out;
 }
 
+void disable_pid_allocation(struct pid_namespace *ns)
+{
+	spin_lock_irq(&pidmap_lock);
+	ns->nr_hashed &= ~PIDNS_HASH_ADDING;
+	spin_unlock_irq(&pidmap_lock);
+}
+
 struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
 {
 	struct hlist_node *elem;
@@ -573,6 +579,9 @@ void __init pidhash_init(void)
 
 void __init pidmap_init(void)
 {
+	/* Veryify no one has done anything silly */
+	BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
+
 	/* bump default and minimum pid_max based on number of cpus */
 	pid_max = min(pid_max_max, max_t(int, pid_max,
 				PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
@@ -584,7 +593,7 @@ void __init pidmap_init(void)
 	/* Reserve PID 0. We never call free_pidmap(0) */
 	set_bit(0, init_pid_ns.pidmap[0].page);
 	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
-	init_pid_ns.nr_hashed = 1;
+	init_pid_ns.nr_hashed = PIDNS_HASH_ADDING;
 
 	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
 			SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index fdbd0cdf271a..c1c3dc1c6023 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -115,6 +115,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	ns->level = level;
 	ns->parent = get_pid_ns(parent_pid_ns);
 	ns->user_ns = get_user_ns(user_ns);
+	ns->nr_hashed = PIDNS_HASH_ADDING;
 	INIT_WORK(&ns->proc_work, proc_cleanup_work);
 
 	set_bit(0, ns->pidmap[0].page);
@@ -181,6 +182,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 	int rc;
 	struct task_struct *task, *me = current;
 
+	/* Don't allow any more processes into the pid namespace */
+	disable_pid_allocation(pid_ns);
+
 	/* Ignore SIGCHLD causing any terminated children to autoreap */
 	spin_lock_irq(&me->sighand->siglock);
 	me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
-- 
cgit v1.2.3-55-g7522


From 812089e01b9f65f90fc8fc670d8cce72a0e01fbb Mon Sep 17 00:00:00 2001
From: Andy Lutomirski
Date: Sat, 1 Dec 2012 12:37:20 -0800
Subject: PCI: Reduce Ricoh 0xe822 SD card reader base clock frequency to 50MHz

Otherwise it fails like this on cards like the Transcend 16GB SDHC card:

    mmc0: new SDHC card at address b368
    mmcblk0: mmc0:b368 SDC   15.0 GiB
    mmcblk0: error -110 sending status command, retrying
    mmcblk0: error -84 transferring data, sector 0, nr 8, cmd response 0x900, card status 0xb0

Tested on my Lenovo x200 laptop.

[bhelgaas: changelog]
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Chris Ball <cjb@laptop.org>
CC: Manoj Iyer <manoj.iyer@canonical.com>
CC: stable@vger.kernel.org---
 drivers/pci/quirks.c    | 7 +++++--
 include/linux/pci_ids.h | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 8f7a6344e79e..0369fb6fc1da 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2725,7 +2725,7 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev)
 	if (PCI_FUNC(dev->devfn))
 		return;
 	/*
-	 * RICOH 0xe823 SD/MMC card reader fails to recognize
+	 * RICOH 0xe822 and 0xe823 SD/MMC card readers fail to recognize
 	 * certain types of SD/MMC cards. Lowering the SD base
 	 * clock frequency from 200Mhz to 50Mhz fixes this issue.
 	 *
@@ -2736,7 +2736,8 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev)
 	 * 0xf9  - Key register for 0x150
 	 * 0xfc  - key register for 0xe1
 	 */
-	if (dev->device == PCI_DEVICE_ID_RICOH_R5CE823) {
+	if (dev->device == PCI_DEVICE_ID_RICOH_R5CE822 ||
+	    dev->device == PCI_DEVICE_ID_RICOH_R5CE823) {
 		pci_write_config_byte(dev, 0xf9, 0xfc);
 		pci_write_config_byte(dev, 0x150, 0x10);
 		pci_write_config_byte(dev, 0xf9, 0x00);
@@ -2763,6 +2764,8 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE822, ricoh_mmc_fixup_r5c832);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE822, ricoh_mmc_fixup_r5c832);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832);
 #endif /*CONFIG_MMC_RICOH_MMC*/
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0f8447376ddb..0eb65796bcb9 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1568,6 +1568,7 @@
 #define PCI_DEVICE_ID_RICOH_RL5C476	0x0476
 #define PCI_DEVICE_ID_RICOH_RL5C478	0x0478
 #define PCI_DEVICE_ID_RICOH_R5C822	0x0822
+#define PCI_DEVICE_ID_RICOH_R5CE822	0xe822
 #define PCI_DEVICE_ID_RICOH_R5CE823	0xe823
 #define PCI_DEVICE_ID_RICOH_R5C832	0x0832
 #define PCI_DEVICE_ID_RICOH_R5C843	0x0843
-- 
cgit v1.2.3-55-g7522


From ad4b3fb7ff9940bcdb1e4cd62bd189d10fa636ba Mon Sep 17 00:00:00 2001
From: Christoffer Dall
Date: Fri, 21 Dec 2012 13:03:50 -0500
Subject: mm: Fix PageHead when !CONFIG_PAGEFLAGS_EXTENDED

Unfortunately with !CONFIG_PAGEFLAGS_EXTENDED, (!PageHead) is false, and
(PageHead) is true, for tail pages.  If this is indeed the intended
behavior, which I doubt because it breaks cache cleaning on some ARM
systems, then the nomenclature is highly problematic.

This patch makes sure PageHead is only true for head pages and PageTail
is only true for tail pages, and neither is true for non-compound pages.

[ This buglet seems ancient - seems to have been introduced back in Apr
  2008 in commit 6a1e7f777f61: "pageflags: convert to the use of new
  macros".  And the reason nobody noticed is because the PageHead()
  tests are almost all about just sanity-checking, and only used on
  pages that are actual page heads.  The fact that the old code returned
  true for tail pages too was thus not really noticeable.   - Linus ]

Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
Acked-by:  Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Will Deacon <Will.Deacon@arm.com>
Cc: Steve Capper <Steve.Capper@arm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: stable@kernel.org  # 2.6.26+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b5d13841604e..70473da47b3f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -362,7 +362,7 @@ static inline void ClearPageCompound(struct page *page)
  * pages on the LRU and/or pagecache.
  */
 TESTPAGEFLAG(Compound, compound)
-__PAGEFLAG(Head, compound)
+__SETPAGEFLAG(Head, compound)  __CLEARPAGEFLAG(Head, compound)
 
 /*
  * PG_reclaim is used in combination with PG_compound to mark the
@@ -374,8 +374,14 @@ __PAGEFLAG(Head, compound)
  * PG_compound & PG_reclaim	=> Tail page
  * PG_compound & ~PG_reclaim	=> Head page
  */
+#define PG_head_mask ((1L << PG_compound))
 #define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim))
 
+static inline int PageHead(struct page *page)
+{
+	return ((page->flags & PG_head_tail_mask) == PG_head_mask);
+}
+
 static inline int PageTail(struct page *page)
 {
 	return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask);
-- 
cgit v1.2.3-55-g7522


From a7a88b23737095e6c18a20c5d4eef9e25ec5b829 Mon Sep 17 00:00:00 2001
From: Hugh Dickins
Date: Wed, 2 Jan 2013 02:04:23 -0800
Subject: mempolicy: remove arg from mpol_parse_str, mpol_to_str

Remove the unused argument (formerly no_context) from mpol_parse_str()
and from mpol_to_str().

Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c        |  2 +-
 include/linux/mempolicy.h | 11 ++++-------
 mm/mempolicy.c            |  6 ++----
 mm/shmem.c                |  4 ++--
 4 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 448455b7fd91..ca5ce7f9f800 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1278,7 +1278,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
 	walk.mm = mm;
 
 	pol = get_vma_policy(task, vma, vma->vm_start);
-	mpol_to_str(buffer, sizeof(buffer), pol, 0);
+	mpol_to_str(buffer, sizeof(buffer), pol);
 	mpol_cond_put(pol);
 
 	seq_printf(m, "%08lx %s", vma->vm_start, buffer);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 9adc270de7ef..92bc9988a180 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -165,11 +165,10 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
 
 
 #ifdef CONFIG_TMPFS
-extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context);
+extern int mpol_parse_str(char *str, struct mempolicy **mpol);
 #endif
 
-extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
-			int no_context);
+extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
 
 /* Check if a vma is migratable */
 static inline int vma_migratable(struct vm_area_struct *vma)
@@ -296,15 +295,13 @@ static inline void check_highest_zone(int k)
 }
 
 #ifdef CONFIG_TMPFS
-static inline int mpol_parse_str(char *str, struct mempolicy **mpol,
-				int no_context)
+static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
 {
 	return 1;	/* error */
 }
 #endif
 
-static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
-				int no_context)
+static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 {
 	return 0;
 }
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 02c914cca53d..1cb200af3828 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2612,14 +2612,13 @@ static const char * const policy_modes[] =
  * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option.
  * @str:  string containing mempolicy to parse
  * @mpol:  pointer to struct mempolicy pointer, returned on success.
- * @unused:  redundant argument, to be removed later.
  *
  * Format of input:
  *	<mode>[=<flags>][:<nodelist>]
  *
  * On success, returns 0, else 1
  */
-int mpol_parse_str(char *str, struct mempolicy **mpol, int unused)
+int mpol_parse_str(char *str, struct mempolicy **mpol)
 {
 	struct mempolicy *new = NULL;
 	unsigned short mode;
@@ -2747,13 +2746,12 @@ out:
  * @buffer:  to contain formatted mempolicy string
  * @maxlen:  length of @buffer
  * @pol:  pointer to mempolicy to be formatted
- * @unused:  redundant argument, to be removed later.
  *
  * Convert a mempolicy into a string.
  * Returns the number of characters in buffer (if positive)
  * or an error (negative)
  */
-int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int unused)
+int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 {
 	char *p = buffer;
 	int l;
diff --git a/mm/shmem.c b/mm/shmem.c
index 5c90d84c2b02..5dd56f6efdbd 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -889,7 +889,7 @@ static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
 	if (!mpol || mpol->mode == MPOL_DEFAULT)
 		return;		/* show nothing */
 
-	mpol_to_str(buffer, sizeof(buffer), mpol, 1);
+	mpol_to_str(buffer, sizeof(buffer), mpol);
 
 	seq_printf(seq, ",mpol=%s", buffer);
 }
@@ -2463,7 +2463,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
 			if (!gid_valid(sbinfo->gid))
 				goto bad_val;
 		} else if (!strcmp(this_char,"mpol")) {
-			if (mpol_parse_str(value, &sbinfo->mpol, 1))
+			if (mpol_parse_str(value, &sbinfo->mpol))
 				goto bad_val;
 		} else {
 			printk(KERN_ERR "tmpfs: Bad mount option %s\n",
-- 
cgit v1.2.3-55-g7522


From 42288fe366c4f1ce7522bc9f27d0bc2a81c55264 Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Fri, 21 Dec 2012 23:10:25 +0000
Subject: mm: mempolicy: Convert shared_policy mutex to spinlock

Sasha was fuzzing with trinity and reported the following problem:

  BUG: sleeping function called from invalid context at kernel/mutex.c:269
  in_atomic(): 1, irqs_disabled(): 0, pid: 6361, name: trinity-main
  2 locks held by trinity-main/6361:
   #0:  (&mm->mmap_sem){++++++}, at: [<ffffffff810aa314>] __do_page_fault+0x1e4/0x4f0
   #1:  (&(&mm->page_table_lock)->rlock){+.+...}, at: [<ffffffff8122f017>] handle_pte_fault+0x3f7/0x6a0
  Pid: 6361, comm: trinity-main Tainted: G        W
  3.7.0-rc2-next-20121024-sasha-00001-gd95ef01-dirty #74
  Call Trace:
    __might_sleep+0x1c3/0x1e0
    mutex_lock_nested+0x29/0x50
    mpol_shared_policy_lookup+0x2e/0x90
    shmem_get_policy+0x2e/0x30
    get_vma_policy+0x5a/0xa0
    mpol_misplaced+0x41/0x1d0
    handle_pte_fault+0x465/0x6a0

This was triggered by a different version of automatic NUMA balancing
but in theory the current version is vunerable to the same problem.

do_numa_page
  -> numa_migrate_prep
    -> mpol_misplaced
      -> get_vma_policy
        -> shmem_get_policy

It's very unlikely this will happen as shared pages are not marked
pte_numa -- see the page_mapcount() check in change_pte_range() -- but
it is possible.

To address this, this patch restores sp->lock as originally implemented
by Kosaki Motohiro.  In the path where get_vma_policy() is called, it
should not be calling sp_alloc() so it is not necessary to treat the PTL
specially.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h |  2 +-
 mm/mempolicy.c            | 68 +++++++++++++++++++++++++++++++++--------------
 2 files changed, 49 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 92bc9988a180..0d7df39a5885 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -123,7 +123,7 @@ struct sp_node {
 
 struct shared_policy {
 	struct rb_root root;
-	struct mutex mutex;
+	spinlock_t lock;
 };
 
 void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1cb200af3828..e2df1c1fb41f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2132,7 +2132,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
  */
 
 /* lookup first element intersecting start-end */
-/* Caller holds sp->mutex */
+/* Caller holds sp->lock */
 static struct sp_node *
 sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
 {
@@ -2196,13 +2196,13 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
 
 	if (!sp->root.rb_node)
 		return NULL;
-	mutex_lock(&sp->mutex);
+	spin_lock(&sp->lock);
 	sn = sp_lookup(sp, idx, idx+1);
 	if (sn) {
 		mpol_get(sn->policy);
 		pol = sn->policy;
 	}
-	mutex_unlock(&sp->mutex);
+	spin_unlock(&sp->lock);
 	return pol;
 }
 
@@ -2328,6 +2328,14 @@ static void sp_delete(struct shared_policy *sp, struct sp_node *n)
 	sp_free(n);
 }
 
+static void sp_node_init(struct sp_node *node, unsigned long start,
+			unsigned long end, struct mempolicy *pol)
+{
+	node->start = start;
+	node->end = end;
+	node->policy = pol;
+}
+
 static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
 				struct mempolicy *pol)
 {
@@ -2344,10 +2352,7 @@ static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
 		return NULL;
 	}
 	newpol->flags |= MPOL_F_SHARED;
-
-	n->start = start;
-	n->end = end;
-	n->policy = newpol;
+	sp_node_init(n, start, end, newpol);
 
 	return n;
 }
@@ -2357,9 +2362,12 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
 				 unsigned long end, struct sp_node *new)
 {
 	struct sp_node *n;
+	struct sp_node *n_new = NULL;
+	struct mempolicy *mpol_new = NULL;
 	int ret = 0;
 
-	mutex_lock(&sp->mutex);
+restart:
+	spin_lock(&sp->lock);
 	n = sp_lookup(sp, start, end);
 	/* Take care of old policies in the same range. */
 	while (n && n->start < end) {
@@ -2372,14 +2380,16 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
 		} else {
 			/* Old policy spanning whole new range. */
 			if (n->end > end) {
-				struct sp_node *new2;
-				new2 = sp_alloc(end, n->end, n->policy);
-				if (!new2) {
-					ret = -ENOMEM;
-					goto out;
-				}
+				if (!n_new)
+					goto alloc_new;
+
+				*mpol_new = *n->policy;
+				atomic_set(&mpol_new->refcnt, 1);
+				sp_node_init(n_new, n->end, end, mpol_new);
+				sp_insert(sp, n_new);
 				n->end = start;
-				sp_insert(sp, new2);
+				n_new = NULL;
+				mpol_new = NULL;
 				break;
 			} else
 				n->end = start;
@@ -2390,9 +2400,27 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
 	}
 	if (new)
 		sp_insert(sp, new);
-out:
-	mutex_unlock(&sp->mutex);
+	spin_unlock(&sp->lock);
+	ret = 0;
+
+err_out:
+	if (mpol_new)
+		mpol_put(mpol_new);
+	if (n_new)
+		kmem_cache_free(sn_cache, n_new);
+
 	return ret;
+
+alloc_new:
+	spin_unlock(&sp->lock);
+	ret = -ENOMEM;
+	n_new = kmem_cache_alloc(sn_cache, GFP_KERNEL);
+	if (!n_new)
+		goto err_out;
+	mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL);
+	if (!mpol_new)
+		goto err_out;
+	goto restart;
 }
 
 /**
@@ -2410,7 +2438,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
 	int ret;
 
 	sp->root = RB_ROOT;		/* empty tree == default mempolicy */
-	mutex_init(&sp->mutex);
+	spin_lock_init(&sp->lock);
 
 	if (mpol) {
 		struct vm_area_struct pvma;
@@ -2476,14 +2504,14 @@ void mpol_free_shared_policy(struct shared_policy *p)
 
 	if (!p->root.rb_node)
 		return;
-	mutex_lock(&p->mutex);
+	spin_lock(&p->lock);
 	next = rb_first(&p->root);
 	while (next) {
 		n = rb_entry(next, struct sp_node, nd);
 		next = rb_next(&n->nd);
 		sp_delete(p, n);
 	}
-	mutex_unlock(&p->mutex);
+	spin_unlock(&p->lock);
 }
 
 #ifdef CONFIG_NUMA_BALANCING
-- 
cgit v1.2.3-55-g7522


From 3d33fcc11bdd11b6949cf5c406726a094395dc4f Mon Sep 17 00:00:00 2001
From: David Howells
Date: Wed, 2 Jan 2013 15:12:55 +0000
Subject: UAPI: Remove empty Kbuild files

Empty files can get deleted by the patch program, so remove empty Kbuild
files and their links from the parent Kbuilds.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/Kbuild            | 3 ---
 include/linux/Kbuild      | 5 -----
 include/linux/hdlc/Kbuild | 0
 include/linux/hsi/Kbuild  | 0
 include/linux/raid/Kbuild | 0
 include/linux/usb/Kbuild  | 0
 include/rdma/Kbuild       | 0
 include/sound/Kbuild      | 0
 8 files changed, 8 deletions(-)
 delete mode 100644 include/linux/Kbuild
 delete mode 100644 include/linux/hdlc/Kbuild
 delete mode 100644 include/linux/hsi/Kbuild
 delete mode 100644 include/linux/raid/Kbuild
 delete mode 100644 include/linux/usb/Kbuild
 delete mode 100644 include/rdma/Kbuild
 delete mode 100644 include/sound/Kbuild

(limited to 'include/linux')

diff --git a/include/Kbuild b/include/Kbuild
index 83256b64166a..1dfd33e8d43b 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -1,8 +1,5 @@
 # Top-level Makefile calls into asm-$(ARCH)
 # List only non-arch directories below
 
-header-y += linux/
-header-y += sound/
-header-y += rdma/
 header-y += video/
 header-y += scsi/
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
deleted file mode 100644
index 7fe2dae251e5..000000000000
--- a/include/linux/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-header-y += dvb/
-header-y += hdlc/
-header-y += hsi/
-header-y += raid/
-header-y += usb/
diff --git a/include/linux/hdlc/Kbuild b/include/linux/hdlc/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/linux/hsi/Kbuild b/include/linux/hsi/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/sound/Kbuild b/include/sound/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
-- 
cgit v1.2.3-55-g7522


From f568f6ca811fe681ecfd11c4ce78b6aa488020c0 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman
Date: Fri, 21 Dec 2012 15:02:05 -0800
Subject: pstore: remove __dev* attributes.

CONFIG_HOTPLUG is going away as an option.  As a result, the __dev*
markings need to be removed.

This change removes the use of __devinit from the pstore filesystem.

Based on patches originally written by Bill Pemberton, but redone by me
in order to handle some of the coding style issues better, by hand.

Cc: Bill Pemberton <wfp5p@virginia.edu>
Cc: Anton Vorontsov <cbouatmailru@gmail.com>
Cc: Colin Cross <ccross@android.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/pstore/ram.c            | 14 ++++++--------
 fs/pstore/ram_core.c       |  9 ++++-----
 include/linux/pstore_ram.h |  5 ++---
 3 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index f883e7e74305..7003e5266f25 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -291,9 +291,8 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
 	kfree(cxt->przs);
 }
 
-static int __devinit ramoops_init_przs(struct device *dev,
-				       struct ramoops_context *cxt,
-				       phys_addr_t *paddr, size_t dump_mem_sz)
+static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
+			     phys_addr_t *paddr, size_t dump_mem_sz)
 {
 	int err = -ENOMEM;
 	int i;
@@ -336,10 +335,9 @@ fail_prz:
 	return err;
 }
 
-static int __devinit ramoops_init_prz(struct device *dev,
-				      struct ramoops_context *cxt,
-				      struct persistent_ram_zone **prz,
-				      phys_addr_t *paddr, size_t sz, u32 sig)
+static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
+			    struct persistent_ram_zone **prz,
+			    phys_addr_t *paddr, size_t sz, u32 sig)
 {
 	if (!sz)
 		return 0;
@@ -367,7 +365,7 @@ static int __devinit ramoops_init_prz(struct device *dev,
 	return 0;
 }
 
-static int __devinit ramoops_probe(struct platform_device *pdev)
+static int ramoops_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct ramoops_platform_data *pdata = pdev->dev.platform_data;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index eecd2a8a84dd..0306303be372 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -390,8 +390,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
 	return 0;
 }
 
-static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz,
-					      u32 sig, int ecc_size)
+static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
+				    int ecc_size)
 {
 	int ret;
 
@@ -443,9 +443,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
 	kfree(prz);
 }
 
-struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start,
-							  size_t size, u32 sig,
-							  int ecc_size)
+struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
+					       u32 sig, int ecc_size)
 {
 	struct persistent_ram_zone *prz;
 	int ret = -ENOMEM;
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 098d2a838296..cb6ab5feab67 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -46,9 +46,8 @@ struct persistent_ram_zone {
 	size_t old_log_size;
 };
 
-struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start,
-							  size_t size, u32 sig,
-							  int ecc_size);
+struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
+					       u32 sig, int ecc_size);
 void persistent_ram_free(struct persistent_ram_zone *prz);
 void persistent_ram_zap(struct persistent_ram_zone *prz);
 
-- 
cgit v1.2.3-55-g7522


From 0f58a01ddd5e8177255705ba15e64c3b74d67993 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman
Date: Fri, 21 Dec 2012 15:12:59 -0800
Subject: Drivers: bcma: remove __dev* attributes.

CONFIG_HOTPLUG is going away as an option.  As a result, the __dev*
markings need to be removed.

This change removes the use of __devinit, __devexit_p, and __devexit
from these drivers.

Based on patches originally written by Bill Pemberton, but redone by me
in order to handle some of the coding style issues better, by hand.

Cc: Bill Pemberton <wfp5p@virginia.edu>
Cc: "Rafał Miłecki" <zajec5@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bcma/bcma_private.h               |  6 +++---
 drivers/bcma/driver_gmac_cmn.c            |  2 +-
 drivers/bcma/driver_pci.c                 |  4 ++--
 drivers/bcma/driver_pci_host.c            | 13 ++++++-------
 drivers/bcma/host_pci.c                   |  8 ++++----
 drivers/bcma/main.c                       |  2 +-
 include/linux/bcma/bcma_driver_gmac_cmn.h |  2 +-
 include/linux/bcma/bcma_driver_pci.h      |  2 +-
 8 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 4a2d72ec6d43..19e3fbfd5757 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -22,7 +22,7 @@
 struct bcma_bus;
 
 /* main.c */
-int __devinit bcma_bus_register(struct bcma_bus *bus);
+int bcma_bus_register(struct bcma_bus *bus);
 void bcma_bus_unregister(struct bcma_bus *bus);
 int __init bcma_bus_early_register(struct bcma_bus *bus,
 				   struct bcma_device *core_cc,
@@ -87,8 +87,8 @@ u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address);
 extern int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc);
 
 #ifdef CONFIG_BCMA_DRIVER_PCI_HOSTMODE
-bool __devinit bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc);
-void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc);
+bool bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc);
+void bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc);
 #endif /* CONFIG_BCMA_DRIVER_PCI_HOSTMODE */
 
 #ifdef CONFIG_BCMA_DRIVER_GPIO
diff --git a/drivers/bcma/driver_gmac_cmn.c b/drivers/bcma/driver_gmac_cmn.c
index 834225f65e8f..dcb137926d31 100644
--- a/drivers/bcma/driver_gmac_cmn.c
+++ b/drivers/bcma/driver_gmac_cmn.c
@@ -8,7 +8,7 @@
 #include "bcma_private.h"
 #include <linux/bcma/bcma.h>
 
-void __devinit bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc)
+void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc)
 {
 	mutex_init(&gc->phy_mutex);
 }
diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c
index c39ee6d45850..cf7a476a519f 100644
--- a/drivers/bcma/driver_pci.c
+++ b/drivers/bcma/driver_pci.c
@@ -207,14 +207,14 @@ static void bcma_core_pci_config_fixup(struct bcma_drv_pci *pc)
  * Init.
  **************************************************/
 
-static void __devinit bcma_core_pci_clientmode_init(struct bcma_drv_pci *pc)
+static void bcma_core_pci_clientmode_init(struct bcma_drv_pci *pc)
 {
 	bcma_core_pci_fixcfg(pc);
 	bcma_pcicore_serdes_workaround(pc);
 	bcma_core_pci_config_fixup(pc);
 }
 
-void __devinit bcma_core_pci_init(struct bcma_drv_pci *pc)
+void bcma_core_pci_init(struct bcma_drv_pci *pc)
 {
 	if (pc->setup_done)
 		return;
diff --git a/drivers/bcma/driver_pci_host.c b/drivers/bcma/driver_pci_host.c
index e6b5c89469dc..af0c9fabee54 100644
--- a/drivers/bcma/driver_pci_host.c
+++ b/drivers/bcma/driver_pci_host.c
@@ -24,7 +24,7 @@
 #define BCMA_PCI_SLOT_MAX	16
 #define	PCI_CONFIG_SPACE_SIZE	256
 
-bool __devinit bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc)
+bool bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc)
 {
 	struct bcma_bus *bus = pc->core->bus;
 	u16 chipid_top;
@@ -264,10 +264,9 @@ static int bcma_core_pci_hostmode_write_config(struct pci_bus *bus,
 }
 
 /* return cap_offset if requested capability exists in the PCI config space */
-static u8 __devinit bcma_find_pci_capability(struct bcma_drv_pci *pc,
-					     unsigned int dev,
-					     unsigned int func, u8 req_cap_id,
-					     unsigned char *buf, u32 *buflen)
+static u8 bcma_find_pci_capability(struct bcma_drv_pci *pc, unsigned int dev,
+				   unsigned int func, u8 req_cap_id,
+				   unsigned char *buf, u32 *buflen)
 {
 	u8 cap_id;
 	u8 cap_ptr = 0;
@@ -334,7 +333,7 @@ static u8 __devinit bcma_find_pci_capability(struct bcma_drv_pci *pc,
  * Retry Status (CRS) Completion Status to software then
  * enable the feature.
  */
-static void __devinit bcma_core_pci_enable_crs(struct bcma_drv_pci *pc)
+static void bcma_core_pci_enable_crs(struct bcma_drv_pci *pc)
 {
 	struct bcma_bus *bus = pc->core->bus;
 	u8 cap_ptr, root_ctrl, root_cap, dev;
@@ -381,7 +380,7 @@ static void __devinit bcma_core_pci_enable_crs(struct bcma_drv_pci *pc)
 	}
 }
 
-void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc)
+void bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc)
 {
 	struct bcma_bus *bus = pc->core->bus;
 	struct bcma_drv_pci_host *pc_host;
diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
index 98fdc3e014e7..fbf2759e7e4e 100644
--- a/drivers/bcma/host_pci.c
+++ b/drivers/bcma/host_pci.c
@@ -155,8 +155,8 @@ static const struct bcma_host_ops bcma_host_pci_ops = {
 	.awrite32	= bcma_host_pci_awrite32,
 };
 
-static int __devinit bcma_host_pci_probe(struct pci_dev *dev,
-					 const struct pci_device_id *id)
+static int bcma_host_pci_probe(struct pci_dev *dev,
+			       const struct pci_device_id *id)
 {
 	struct bcma_bus *bus;
 	int err = -ENOMEM;
@@ -226,7 +226,7 @@ err_kfree_bus:
 	return err;
 }
 
-static void __devexit bcma_host_pci_remove(struct pci_dev *dev)
+static void bcma_host_pci_remove(struct pci_dev *dev)
 {
 	struct bcma_bus *bus = pci_get_drvdata(dev);
 
@@ -284,7 +284,7 @@ static struct pci_driver bcma_pci_bridge_driver = {
 	.name = "bcma-pci-bridge",
 	.id_table = bcma_pci_bridge_tbl,
 	.probe = bcma_host_pci_probe,
-	.remove = __devexit_p(bcma_host_pci_remove),
+	.remove = bcma_host_pci_remove,
 	.driver.pm = BCMA_PM_OPS,
 };
 
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 53ba20ca17e0..4a92f647b58b 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -192,7 +192,7 @@ static void bcma_unregister_cores(struct bcma_bus *bus)
 		platform_device_unregister(bus->drv_cc.watchdog);
 }
 
-int __devinit bcma_bus_register(struct bcma_bus *bus)
+int bcma_bus_register(struct bcma_bus *bus)
 {
 	int err;
 	struct bcma_device *core;
diff --git a/include/linux/bcma/bcma_driver_gmac_cmn.h b/include/linux/bcma/bcma_driver_gmac_cmn.h
index def894b83b0d..4dd1f33e36a2 100644
--- a/include/linux/bcma/bcma_driver_gmac_cmn.h
+++ b/include/linux/bcma/bcma_driver_gmac_cmn.h
@@ -92,7 +92,7 @@ struct bcma_drv_gmac_cmn {
 #define gmac_cmn_write32(gc, offset, val)	bcma_write32((gc)->core, offset, val)
 
 #ifdef CONFIG_BCMA_DRIVER_GMAC_CMN
-extern void __devinit bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc);
+extern void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc);
 #else
 static inline void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc) { }
 #endif
diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
index 41da581e1612..c48d98d27b77 100644
--- a/include/linux/bcma/bcma_driver_pci.h
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -214,7 +214,7 @@ struct bcma_drv_pci {
 #define pcicore_write16(pc, offset, val)	bcma_write16((pc)->core, offset, val)
 #define pcicore_write32(pc, offset, val)	bcma_write32((pc)->core, offset, val)
 
-extern void __devinit bcma_core_pci_init(struct bcma_drv_pci *pc);
+extern void bcma_core_pci_init(struct bcma_drv_pci *pc);
 extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc,
 				 struct bcma_device *core, bool enable);
 extern void bcma_core_pci_extend_L1timer(struct bcma_drv_pci *pc, bool extend);
-- 
cgit v1.2.3-55-g7522


From e389623a68622e3c9be440ab522fac1aa1ca3454 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman
Date: Fri, 21 Dec 2012 15:15:49 -0800
Subject: include: remove __dev* attributes.

CONFIG_HOTPLUG is going away as an option.  As a result, the __dev*
markings need to be removed.

This change removes the use of __devinit from some include files that
were previously missed.

Based on patches originally written by Bill Pemberton, but redone by me
in order to handle some of the coding style issues better, by hand.

Cc: Bill Pemberton <wfp5p@virginia.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/asm-generic/parport.h |  4 ++--
 include/linux/ata_platform.h  | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/parport.h b/include/asm-generic/parport.h
index 40528cb977e8..2c9f9d4336ca 100644
--- a/include/asm-generic/parport.h
+++ b/include/asm-generic/parport.h
@@ -10,8 +10,8 @@
  * to devices on the PCI bus.
  */
 
-static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma);
-static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma)
+static int parport_pc_find_isa_ports(int autoirq, int autodma);
+static int parport_pc_find_nonpci_ports(int autoirq, int autodma)
 {
 #ifdef CONFIG_ISA
 	return parport_pc_find_isa_ports(autoirq, autodma);
diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h
index fe9989636b62..b9fde17f767c 100644
--- a/include/linux/ata_platform.h
+++ b/include/linux/ata_platform.h
@@ -15,12 +15,12 @@ struct pata_platform_info {
 	unsigned int irq_flags;
 };
 
-extern int __devinit __pata_platform_probe(struct device *dev,
-					   struct resource *io_res,
-					   struct resource *ctl_res,
-					   struct resource *irq_res,
-					   unsigned int ioport_shift,
-					   int __pio_mask);
+extern int __pata_platform_probe(struct device *dev,
+				 struct resource *io_res,
+				 struct resource *ctl_res,
+				 struct resource *irq_res,
+				 unsigned int ioport_shift,
+				 int __pio_mask);
 
 /*
  * Marvell SATA private data
-- 
cgit v1.2.3-55-g7522


From 03f595668017f1a1fb971c02fc37140bc6e7bb1c Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky
Date: Fri, 4 Jan 2013 15:34:50 -0800
Subject: ipc: add sysctl to specify desired next object id

Add 3 new variables and sysctls to tune them (by one "next_id" variable
for messages, semaphores and shared memory respectively).  This variable
can be used to set desired id for next allocated IPC object.  By default
it's equal to -1 and old behaviour is preserved.  If this variable is
non-negative, then desired idr will be extracted from it and used as a
start value to search for free IDR slot.

Notes:

1) this patch doesn't guarantee that the new object will have desired
   id.  So it's up to user space how to handle new object with wrong id.

2) After a sucessful id allocation attempt, "next_id" will be set back
   to -1 (if it was non-negative).

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/kernel.txt | 19 +++++++++++++++++++
 include/linux/ipc_namespace.h   |  1 +
 ipc/ipc_sysctl.c                | 32 ++++++++++++++++++++++++++++++++
 ipc/util.c                      | 16 ++++++++++++----
 ipc/util.h                      |  1 +
 5 files changed, 65 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 2907ba6c3607..51b953a1b149 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -38,6 +38,7 @@ show up in /proc/sys/kernel:
 - l2cr                        [ PPC only ]
 - modprobe                    ==> Documentation/debugging-modules.txt
 - modules_disabled
+- msg_next_id		      [ sysv ipc ]
 - msgmax
 - msgmnb
 - msgmni
@@ -62,7 +63,9 @@ show up in /proc/sys/kernel:
 - rtsig-max
 - rtsig-nr
 - sem
+- sem_next_id		      [ sysv ipc ]
 - sg-big-buff                 [ generic SCSI device (sg) ]
+- shm_next_id		      [ sysv ipc ]
 - shm_rmid_forced
 - shmall
 - shmmax                      [ sysv ipc ]
@@ -320,6 +323,22 @@ to false.
 
 ==============================================================
 
+msg_next_id, sem_next_id, and shm_next_id:
+
+These three toggles allows to specify desired id for next allocated IPC
+object: message, semaphore or shared memory respectively.
+
+By default they are equal to -1, which means generic allocation logic.
+Possible values to set are in range {0..INT_MAX}.
+
+Notes:
+1) kernel doesn't guarantee, that new object will have desired id. So,
+it's up to userspace, how to handle an object with "wrong" id.
+2) Toggle with non-default value will be set back to -1 by kernel after
+successful IPC object allocation.
+
+==============================================================
+
 nmi_watchdog:
 
 Enables/Disables the NMI watchdog on x86 systems. When the value is
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index fe771978e877..ae221a7b5092 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -24,6 +24,7 @@ struct ipc_ids {
 	unsigned short seq_max;
 	struct rw_semaphore rw_mutex;
 	struct idr ipcs_idr;
+	int next_id;
 };
 
 struct ipc_namespace {
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 00fba2bab87d..130dfece27ac 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -158,6 +158,9 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
 
 static int zero;
 static int one = 1;
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static int int_max = INT_MAX;
+#endif
 
 static struct ctl_table ipc_kern_table[] = {
 	{
@@ -227,6 +230,35 @@ static struct ctl_table ipc_kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	{
+		.procname	= "sem_next_id",
+		.data		= &init_ipc_ns.ids[IPC_SEM_IDS].next_id,
+		.maxlen		= sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id),
+		.mode		= 0644,
+		.proc_handler	= proc_ipc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &int_max,
+	},
+	{
+		.procname	= "msg_next_id",
+		.data		= &init_ipc_ns.ids[IPC_MSG_IDS].next_id,
+		.maxlen		= sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id),
+		.mode		= 0644,
+		.proc_handler	= proc_ipc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &int_max,
+	},
+	{
+		.procname	= "shm_next_id",
+		.data		= &init_ipc_ns.ids[IPC_SHM_IDS].next_id,
+		.maxlen		= sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id),
+		.mode		= 0644,
+		.proc_handler	= proc_ipc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &int_max,
+	},
+#endif
 	{}
 };
 
diff --git a/ipc/util.c b/ipc/util.c
index 72fd0785ac94..74e1d9c7a98a 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -122,6 +122,7 @@ void ipc_init_ids(struct ipc_ids *ids)
 
 	ids->in_use = 0;
 	ids->seq = 0;
+	ids->next_id = -1;
 	{
 		int seq_limit = INT_MAX/SEQ_MULTIPLIER;
 		if (seq_limit > USHRT_MAX)
@@ -252,6 +253,7 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 	kuid_t euid;
 	kgid_t egid;
 	int id, err;
+	int next_id = ids->next_id;
 
 	if (size > IPCMNI)
 		size = IPCMNI;
@@ -264,7 +266,8 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 	rcu_read_lock();
 	spin_lock(&new->lock);
 
-	err = idr_get_new(&ids->ipcs_idr, new, &id);
+	err = idr_get_new_above(&ids->ipcs_idr, new,
+				(next_id < 0) ? 0 : ipcid_to_idx(next_id), &id);
 	if (err) {
 		spin_unlock(&new->lock);
 		rcu_read_unlock();
@@ -277,9 +280,14 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 	new->cuid = new->uid = euid;
 	new->gid = new->cgid = egid;
 
-	new->seq = ids->seq++;
-	if(ids->seq > ids->seq_max)
-		ids->seq = 0;
+	if (next_id < 0) {
+		new->seq = ids->seq++;
+		if (ids->seq > ids->seq_max)
+			ids->seq = 0;
+	} else {
+		new->seq = ipcid_to_seqx(next_id);
+		ids->next_id = -1;
+	}
 
 	new->id = ipc_buildid(id, new->seq);
 	return id;
diff --git a/ipc/util.h b/ipc/util.h
index c8fe2f7631e9..a61e0ca2bffd 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -92,6 +92,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
 #define IPC_SHM_IDS	2
 
 #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
+#define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER)
 
 /* must be called with ids->rw_mutex acquired for writing */
 int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
-- 
cgit v1.2.3-55-g7522


From f9dd87f4738c7555aca2cdf8cb2b2326cafb0cad Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky
Date: Fri, 4 Jan 2013 15:34:52 -0800
Subject: ipc: message queue receive cleanup

Move all message related manipulation into one function msg_fill().
Actually, two functions because of the compat one.

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msg.h |  5 +++--
 ipc/compat.c        | 45 +++++++++++++++++++--------------------------
 ipc/msg.c           | 44 +++++++++++++++++++++++---------------------
 3 files changed, 45 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msg.h b/include/linux/msg.h
index 7a4b9e97d29a..fc5743a554e6 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -34,7 +34,8 @@ struct msg_queue {
 /* Helper routines for sys_msgsnd and sys_msgrcv */
 extern long do_msgsnd(int msqid, long mtype, void __user *mtext,
 			size_t msgsz, int msgflg);
-extern long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
-			size_t msgsz, long msgtyp, int msgflg);
+extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
+		      int msgflg,
+		      long (*msg_fill)(void __user *, struct msg_msg *, size_t));
 
 #endif /* _LINUX_MSG_H */
diff --git a/ipc/compat.c b/ipc/compat.c
index ad9518eb26e0..eb3ea16d2d1d 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -306,6 +306,20 @@ static long do_compat_semctl(int first, int second, int third, u32 pad)
 	return err;
 }
 
+long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
+{
+	struct compat_msgbuf __user *msgp = dest;
+	size_t msgsz;
+
+	if (put_user(msg->m_type, &msgp->mtype))
+		return -EFAULT;
+
+	msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
+	if (store_msg(msgp->mtext, msg, msgsz))
+		return -EFAULT;
+	return msgsz;
+}
+
 #ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC
 long compat_sys_semctl(int first, int second, int third, void __user *uptr)
 {
@@ -337,10 +351,6 @@ long compat_sys_msgsnd(int first, int second, int third, void __user *uptr)
 long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
 			   int version, void __user *uptr)
 {
-	struct compat_msgbuf __user *up;
-	long type;
-	int err;
-
 	if (first < 0)
 		return -EINVAL;
 	if (second < 0)
@@ -348,23 +358,14 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
 
 	if (!version) {
 		struct compat_ipc_kludge ipck;
-		err = -EINVAL;
 		if (!uptr)
-			goto out;
-		err = -EFAULT;
+			return -EINVAL;
 		if (copy_from_user (&ipck, uptr, sizeof(ipck)))
-			goto out;
+			return -EFAULT;
 		uptr = compat_ptr(ipck.msgp);
 		msgtyp = ipck.msgtyp;
 	}
-	up = uptr;
-	err = do_msgrcv(first, &type, up->mtext, second, msgtyp, third);
-	if (err < 0)
-		goto out;
-	if (put_user(type, &up->mtype))
-		err = -EFAULT;
-out:
-	return err;
+	return do_msgrcv(first, uptr, second, msgtyp, third, compat_do_msg_fill);
 }
 #else
 long compat_sys_semctl(int semid, int semnum, int cmd, int arg)
@@ -385,16 +386,8 @@ long compat_sys_msgsnd(int msqid, struct compat_msgbuf __user *msgp,
 long compat_sys_msgrcv(int msqid, struct compat_msgbuf __user *msgp,
 		       compat_ssize_t msgsz, long msgtyp, int msgflg)
 {
-	long err, mtype;
-
-	err =  do_msgrcv(msqid, &mtype, msgp->mtext, (ssize_t)msgsz, msgtyp, msgflg);
-	if (err < 0)
-		goto out;
-
-	if (put_user(mtype, &msgp->mtype))
-		err = -EFAULT;
- out:
-	return err;
+	return do_msgrcv(msqid, msgp, (ssize_t)msgsz, msgtyp, msgflg,
+			 compat_do_msg_fill);
 }
 #endif
 
diff --git a/ipc/msg.c b/ipc/msg.c
index 2f272fa76595..cefc24f46e3e 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -755,15 +755,30 @@ static inline int convert_mode(long *msgtyp, int msgflg)
 	return SEARCH_EQUAL;
 }
 
-long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
-		size_t msgsz, long msgtyp, int msgflg)
+static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
+{
+	struct msgbuf __user *msgp = dest;
+	size_t msgsz;
+
+	if (put_user(msg->m_type, &msgp->mtype))
+		return -EFAULT;
+
+	msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
+	if (store_msg(msgp->mtext, msg, msgsz))
+		return -EFAULT;
+	return msgsz;
+}
+
+long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
+	       int msgflg,
+	       long (*msg_handler)(void __user *, struct msg_msg *, size_t))
 {
 	struct msg_queue *msq;
 	struct msg_msg *msg;
 	int mode;
 	struct ipc_namespace *ns;
 
-	if (msqid < 0 || (long) msgsz < 0)
+	if (msqid < 0 || (long) bufsz < 0)
 		return -EINVAL;
 	mode = convert_mode(&msgtyp, msgflg);
 	ns = current->nsproxy->ipc_ns;
@@ -804,7 +819,7 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
 			 * Found a suitable message.
 			 * Unlink it from the queue.
 			 */
-			if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
+			if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
 				msg = ERR_PTR(-E2BIG);
 				goto out_unlock;
 			}
@@ -831,7 +846,7 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
 		if (msgflg & MSG_NOERROR)
 			msr_d.r_maxsize = INT_MAX;
 		else
-			msr_d.r_maxsize = msgsz;
+			msr_d.r_maxsize = bufsz;
 		msr_d.r_msg = ERR_PTR(-EAGAIN);
 		current->state = TASK_INTERRUPTIBLE;
 		msg_unlock(msq);
@@ -894,29 +909,16 @@ out_unlock:
 	if (IS_ERR(msg))
 		return PTR_ERR(msg);
 
-	msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
-	*pmtype = msg->m_type;
-	if (store_msg(mtext, msg, msgsz))
-		msgsz = -EFAULT;
-
+	bufsz = msg_handler(buf, msg, bufsz);
 	free_msg(msg);
 
-	return msgsz;
+	return bufsz;
 }
 
 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
 		long, msgtyp, int, msgflg)
 {
-	long err, mtype;
-
-	err =  do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg);
-	if (err < 0)
-		goto out;
-
-	if (put_user(mtype, &msgp->mtype))
-		err = -EFAULT;
-out:
-	return err;
+	return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);
 }
 
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.3-55-g7522


From 3a665531a3b7c2ad2c87903b24646be6916340e4 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky
Date: Fri, 4 Jan 2013 15:34:56 -0800
Subject: selftests: IPC message queue copy feature test

This test can be used to check wheither kernel supports IPC message queue
copy and restore features (required by CRIU project).

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msg.h                  |   3 +-
 ipc/compat.c                         |   3 +-
 tools/testing/selftests/ipc/Makefile |  25 ++++
 tools/testing/selftests/ipc/msgque.c | 246 +++++++++++++++++++++++++++++++++++
 4 files changed, 275 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/ipc/Makefile
 create mode 100644 tools/testing/selftests/ipc/msgque.c

(limited to 'include/linux')

diff --git a/include/linux/msg.h b/include/linux/msg.h
index fc5743a554e6..391af8d11cce 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -36,6 +36,7 @@ extern long do_msgsnd(int msqid, long mtype, void __user *mtext,
 			size_t msgsz, int msgflg);
 extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
 		      int msgflg,
-		      long (*msg_fill)(void __user *, struct msg_msg *, size_t));
+		      long (*msg_fill)(void __user *, struct msg_msg *,
+				       size_t));
 
 #endif /* _LINUX_MSG_H */
diff --git a/ipc/compat.c b/ipc/compat.c
index eb3ea16d2d1d..2547f29dcd1b 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -365,7 +365,8 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
 		uptr = compat_ptr(ipck.msgp);
 		msgtyp = ipck.msgtyp;
 	}
-	return do_msgrcv(first, uptr, second, msgtyp, third, compat_do_msg_fill);
+	return do_msgrcv(first, uptr, second, msgtyp, third,
+			 compat_do_msg_fill);
 }
 #else
 long compat_sys_semctl(int semid, int semnum, int cmd, int arg)
diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile
new file mode 100644
index 000000000000..5386fd7c43ae
--- /dev/null
+++ b/tools/testing/selftests/ipc/Makefile
@@ -0,0 +1,25 @@
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
+ifeq ($(ARCH),i386)
+        ARCH := X86
+	CFLAGS := -DCONFIG_X86_32 -D__i386__
+endif
+ifeq ($(ARCH),x86_64)
+	ARCH := X86
+	CFLAGS := -DCONFIG_X86_64 -D__x86_64__
+endif
+
+CFLAGS += -I../../../../usr/include/
+
+all:
+ifeq ($(ARCH),X86)
+	gcc $(CFLAGS) msgque.c -o msgque_test
+else
+	echo "Not an x86 target, can't build msgque selftest"
+endif
+
+run_tests: all
+	./msgque_test
+
+clean:
+	rm -fr ./msgque_test
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
new file mode 100644
index 000000000000..d66418237d21
--- /dev/null
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -0,0 +1,246 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/msg.h>
+#include <fcntl.h>
+
+#define MAX_MSG_SIZE		32
+
+struct msg1 {
+	int msize;
+	long mtype;
+	char mtext[MAX_MSG_SIZE];
+};
+
+#define TEST_STRING "Test sysv5 msg"
+#define MSG_TYPE 1
+
+#define ANOTHER_TEST_STRING "Yet another test sysv5 msg"
+#define ANOTHER_MSG_TYPE 26538
+
+struct msgque_data {
+	key_t key;
+	int msq_id;
+	int qbytes;
+	int qnum;
+	int mode;
+	struct msg1 *messages;
+};
+
+int restore_queue(struct msgque_data *msgque)
+{
+	int fd, ret, id, i;
+	char buf[32];
+
+	fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY);
+	if (fd == -1) {
+		printf("Failed to open /proc/sys/kernel/msg_next_id\n");
+		return -errno;
+	}
+	sprintf(buf, "%d", msgque->msq_id);
+
+	ret = write(fd, buf, strlen(buf));
+	if (ret != strlen(buf)) {
+		printf("Failed to write to /proc/sys/kernel/msg_next_id\n");
+		return -errno;
+	}
+
+	id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL);
+	if (id == -1) {
+		printf("Failed to create queue\n");
+		return -errno;
+	}
+
+	if (id != msgque->msq_id) {
+		printf("Restored queue has wrong id (%d instead of %d)\n",
+							id, msgque->msq_id);
+		ret = -EFAULT;
+		goto destroy;
+	}
+
+	for (i = 0; i < msgque->qnum; i++) {
+		if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype,
+			   msgque->messages[i].msize, IPC_NOWAIT) != 0) {
+			printf("msgsnd failed (%m)\n");
+			ret = -errno;
+			goto destroy;
+		};
+	}
+	return 0;
+
+destroy:
+	if (msgctl(id, IPC_RMID, 0))
+		printf("Failed to destroy queue: %d\n", -errno);
+	return ret;
+}
+
+int check_and_destroy_queue(struct msgque_data *msgque)
+{
+	struct msg1 message;
+	int cnt = 0, ret;
+
+	while (1) {
+		ret = msgrcv(msgque->msq_id, &message.mtype, MAX_MSG_SIZE,
+				0, IPC_NOWAIT);
+		if (ret < 0) {
+			if (errno == ENOMSG)
+				break;
+			printf("Failed to read IPC message: %m\n");
+			ret = -errno;
+			goto err;
+		}
+		if (ret != msgque->messages[cnt].msize) {
+			printf("Wrong message size: %d (expected %d)\n", ret,
+						msgque->messages[cnt].msize);
+			ret = -EINVAL;
+			goto err;
+		}
+		if (message.mtype != msgque->messages[cnt].mtype) {
+			printf("Wrong message type\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) {
+			printf("Wrong message content\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		cnt++;
+	}
+
+	if (cnt != msgque->qnum) {
+		printf("Wrong message number\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = 0;
+err:
+	if (msgctl(msgque->msq_id, IPC_RMID, 0)) {
+		printf("Failed to destroy queue: %d\n", -errno);
+		return -errno;
+	}
+	return ret;
+}
+
+int dump_queue(struct msgque_data *msgque)
+{
+	struct msqid64_ds ds;
+	int kern_id;
+	int i, ret;
+
+	for (kern_id = 0; kern_id < 256; kern_id++) {
+		ret = msgctl(kern_id, MSG_STAT, &ds);
+		if (ret < 0) {
+			if (errno == -EINVAL)
+				continue;
+			printf("Failed to get stats for IPC queue with id %d\n",
+					kern_id);
+			return -errno;
+		}
+
+		if (ret == msgque->msq_id)
+			break;
+	}
+
+	msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum);
+	if (msgque->messages == NULL) {
+		printf("Failed to get stats for IPC queue\n");
+		return -ENOMEM;
+	}
+
+	msgque->qnum = ds.msg_qnum;
+	msgque->mode = ds.msg_perm.mode;
+	msgque->qbytes = ds.msg_qbytes;
+
+	for (i = 0; i < msgque->qnum; i++) {
+		ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype,
+				MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY);
+		if (ret < 0) {
+			printf("Failed to copy IPC message: %m (%d)\n", errno);
+			return -errno;
+		}
+		msgque->messages[i].msize = ret;
+	}
+	return 0;
+}
+
+int fill_msgque(struct msgque_data *msgque)
+{
+	struct msg1 msgbuf;
+
+	msgbuf.mtype = MSG_TYPE;
+	memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING));
+	if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING),
+				IPC_NOWAIT) != 0) {
+		printf("First message send failed (%m)\n");
+		return -errno;
+	};
+
+	msgbuf.mtype = ANOTHER_MSG_TYPE;
+	memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING));
+	if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING),
+				IPC_NOWAIT) != 0) {
+		printf("Second message send failed (%m)\n");
+		return -errno;
+	};
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int msg, pid, err;
+	struct msgque_data msgque;
+
+	msgque.key = ftok(argv[0], 822155650);
+	if (msgque.key == -1) {
+		printf("Can't make key\n");
+		return -errno;
+	}
+
+	msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666);
+	if (msgque.msq_id == -1) {
+		printf("Can't create queue\n");
+		goto err_out;
+	}
+
+	err = fill_msgque(&msgque);
+	if (err) {
+		printf("Failed to fill queue\n");
+		goto err_destroy;
+	}
+
+	err = dump_queue(&msgque);
+	if (err) {
+		printf("Failed to dump queue\n");
+		goto err_destroy;
+	}
+
+	err = check_and_destroy_queue(&msgque);
+	if (err) {
+		printf("Failed to check and destroy queue\n");
+		goto err_out;
+	}
+
+	err = restore_queue(&msgque);
+	if (err) {
+		printf("Failed to restore queue\n");
+		goto err_destroy;
+	}
+
+	err = check_and_destroy_queue(&msgque);
+	if (err) {
+		printf("Failed to test queue\n");
+		goto err_out;
+	}
+	return 0;
+
+err_destroy:
+	if (msgctl(msgque.msq_id, IPC_RMID, 0)) {
+		printf("Failed to destroy queue: %d\n", -errno);
+		return -errno;
+	}
+err_out:
+	return err;
+}
-- 
cgit v1.2.3-55-g7522


From a458431e176ddb27e8ef8b98c2a681b217337393 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 4 Jan 2013 15:35:08 -0800
Subject: mm: fix zone_watermark_ok_safe() accounting of isolated pages

Commit 702d1a6e0766 ("memory-hotplug: fix kswapd looping forever
problem") added an isolated pageblocks counter (nr_pageblock_isolate in
struct zone) and used it to adjust free pages counter in
zone_watermark_ok_safe() to prevent kswapd looping forever problem.

Then later, commit 2139cbe627b8 ("cma: fix counting of isolated pages")
fixed accounting of isolated pages in global free pages counter.  It
made the previous zone_watermark_ok_safe() fix unnecessary and
potentially harmful (cause now isolated pages may be accounted twice
making free pages counter incorrect).

This patch removes the special isolated pageblocks counter altogether
which fixes zone_watermark_ok_safe() free pages check.

Reported-by: Tomasz Stanislawski <t.stanislaws@samsung.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Aaditya Kumar <aaditya.kumar.30@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  8 --------
 mm/page_alloc.c        | 27 ---------------------------
 mm/page_isolation.c    | 26 ++------------------------
 3 files changed, 2 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4bec5be82cab..73b64a38b984 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -503,14 +503,6 @@ struct zone {
 	 * rarely used fields:
 	 */
 	const char		*name;
-#ifdef CONFIG_MEMORY_ISOLATION
-	/*
-	 * the number of MIGRATE_ISOLATE *pageblock*.
-	 * We need this for free page counting. Look at zone_watermark_ok_safe.
-	 * It's protected by zone->lock
-	 */
-	int		nr_pageblock_isolate;
-#endif
 } ____cacheline_internodealigned_in_smp;
 
 typedef enum {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4ba5e37127fc..bc6cc0e913bd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -221,11 +221,6 @@ EXPORT_SYMBOL(nr_online_nodes);
 
 int page_group_by_mobility_disabled __read_mostly;
 
-/*
- * NOTE:
- * Don't use set_pageblock_migratetype(page, MIGRATE_ISOLATE) directly.
- * Instead, use {un}set_pageblock_isolate.
- */
 void set_pageblock_migratetype(struct page *page, int migratetype)
 {
 
@@ -1655,20 +1650,6 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 	return true;
 }
 
-#ifdef CONFIG_MEMORY_ISOLATION
-static inline unsigned long nr_zone_isolate_freepages(struct zone *zone)
-{
-	if (unlikely(zone->nr_pageblock_isolate))
-		return zone->nr_pageblock_isolate * pageblock_nr_pages;
-	return 0;
-}
-#else
-static inline unsigned long nr_zone_isolate_freepages(struct zone *zone)
-{
-	return 0;
-}
-#endif
-
 bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 		      int classzone_idx, int alloc_flags)
 {
@@ -1684,14 +1665,6 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
 	if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
 		free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
 
-	/*
-	 * If the zone has MIGRATE_ISOLATE type free pages, we should consider
-	 * it.  nr_zone_isolate_freepages is never accurate so kswapd might not
-	 * sleep although it could do so.  But this is more desirable for memory
-	 * hotplug than sleeping which can cause a livelock in the direct
-	 * reclaim path.
-	 */
-	free_pages -= nr_zone_isolate_freepages(z);
 	return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
 								free_pages);
 }
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 9d2264ea4606..383bdbb98b04 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -8,28 +8,6 @@
 #include <linux/memory.h>
 #include "internal.h"
 
-/* called while holding zone->lock */
-static void set_pageblock_isolate(struct page *page)
-{
-	if (get_pageblock_migratetype(page) == MIGRATE_ISOLATE)
-		return;
-
-	set_pageblock_migratetype(page, MIGRATE_ISOLATE);
-	page_zone(page)->nr_pageblock_isolate++;
-}
-
-/* called while holding zone->lock */
-static void restore_pageblock_isolate(struct page *page, int migratetype)
-{
-	struct zone *zone = page_zone(page);
-	if (WARN_ON(get_pageblock_migratetype(page) != MIGRATE_ISOLATE))
-		return;
-
-	BUG_ON(zone->nr_pageblock_isolate <= 0);
-	set_pageblock_migratetype(page, migratetype);
-	zone->nr_pageblock_isolate--;
-}
-
 int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages)
 {
 	struct zone *zone;
@@ -80,7 +58,7 @@ out:
 		unsigned long nr_pages;
 		int migratetype = get_pageblock_migratetype(page);
 
-		set_pageblock_isolate(page);
+		set_pageblock_migratetype(page, MIGRATE_ISOLATE);
 		nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
 
 		__mod_zone_freepage_state(zone, -nr_pages, migratetype);
@@ -103,7 +81,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
 		goto out;
 	nr_pages = move_freepages_block(zone, page, migratetype);
 	__mod_zone_freepage_state(zone, nr_pages, migratetype);
-	restore_pageblock_isolate(page, migratetype);
+	set_pageblock_migratetype(page, migratetype);
 out:
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
-- 
cgit v1.2.3-55-g7522


From e909c682d04e55e77a3c9d158e7dc36027195493 Mon Sep 17 00:00:00 2001
From: Sascha Hauer
Date: Mon, 7 Jan 2013 10:57:14 +0100
Subject: [media] coda: Fix build due to iram.h rename

commit c045e3f13 (ARM: imx: include iram.h rather than mach/iram.h) changed the
location of iram.h, which causes the following build error when building the coda
driver:

drivers/media/platform/coda.c:27:23: error: mach/iram.h: No such file or directory
drivers/media/platform/coda.c: In function 'coda_probe':
drivers/media/platform/coda.c:2000: error: implicit declaration of function 'iram_alloc'
drivers/media/platform/coda.c:2001: warning: assignment makes pointer from integer without a cast
drivers/media/platform/coda.c: In function 'coda_remove':
drivers/media/platform/coda.c:2024: error: implicit declaration of function 'iram_free'

Since the content of iram.h is not imx specific, move it to
include/linux/platform_data/imx-iram.h instead. This is an intermediate solution
until the i.MX iram allocator is converted to the generic SRAM allocator.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 arch/arm/mach-imx/iram.h               | 41 ----------------------------------
 arch/arm/mach-imx/iram_alloc.c         |  3 +--
 drivers/media/platform/coda.c          |  2 +-
 include/linux/platform_data/imx-iram.h | 41 ++++++++++++++++++++++++++++++++++
 4 files changed, 43 insertions(+), 44 deletions(-)
 delete mode 100644 arch/arm/mach-imx/iram.h
 create mode 100644 include/linux/platform_data/imx-iram.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/iram.h b/arch/arm/mach-imx/iram.h
deleted file mode 100644
index 022690c33702..000000000000
--- a/arch/arm/mach-imx/iram.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2010 Freescale Semiconductor, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-#include <linux/errno.h>
-
-#ifdef CONFIG_IRAM_ALLOC
-
-int __init iram_init(unsigned long base, unsigned long size);
-void __iomem *iram_alloc(unsigned int size, unsigned long *dma_addr);
-void iram_free(unsigned long dma_addr, unsigned int size);
-
-#else
-
-static inline int __init iram_init(unsigned long base, unsigned long size)
-{
-	return -ENOMEM;
-}
-
-static inline void __iomem *iram_alloc(unsigned int size, unsigned long *dma_addr)
-{
-	return NULL;
-}
-
-static inline void iram_free(unsigned long base, unsigned long size) {}
-
-#endif
diff --git a/arch/arm/mach-imx/iram_alloc.c b/arch/arm/mach-imx/iram_alloc.c
index 6c80424f678e..e05cf407db65 100644
--- a/arch/arm/mach-imx/iram_alloc.c
+++ b/arch/arm/mach-imx/iram_alloc.c
@@ -22,8 +22,7 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/genalloc.h>
-
-#include "iram.h"
+#include "linux/platform_data/imx-iram.h"
 
 static unsigned long iram_phys_base;
 static void __iomem *iram_virt_base;
diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
index 7b8b547f2d51..afadd3aba3e0 100644
--- a/drivers/media/platform/coda.c
+++ b/drivers/media/platform/coda.c
@@ -23,8 +23,8 @@
 #include <linux/slab.h>
 #include <linux/videodev2.h>
 #include <linux/of.h>
+#include <linux/platform_data/imx-iram.h>
 
-#include <mach/iram.h>
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-ioctl.h>
diff --git a/include/linux/platform_data/imx-iram.h b/include/linux/platform_data/imx-iram.h
new file mode 100644
index 000000000000..022690c33702
--- /dev/null
+++ b/include/linux/platform_data/imx-iram.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2010 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+#include <linux/errno.h>
+
+#ifdef CONFIG_IRAM_ALLOC
+
+int __init iram_init(unsigned long base, unsigned long size);
+void __iomem *iram_alloc(unsigned int size, unsigned long *dma_addr);
+void iram_free(unsigned long dma_addr, unsigned int size);
+
+#else
+
+static inline int __init iram_init(unsigned long base, unsigned long size)
+{
+	return -ENOMEM;
+}
+
+static inline void __iomem *iram_alloc(unsigned int size, unsigned long *dma_addr)
+{
+	return NULL;
+}
+
+static inline void iram_free(unsigned long base, unsigned long size) {}
+
+#endif
-- 
cgit v1.2.3-55-g7522


From 08c097fc3bb283299a6915a6a3795edab85979b1 Mon Sep 17 00:00:00 2001
From: Marc Dionne
Date: Wed, 9 Jan 2013 14:16:30 +0000
Subject: cred: Remove tgcred pointer from struct cred

Commit 3a50597de863 ("KEYS: Make the session and process keyrings
per-thread") removed the definition of the thread_group_cred structure,
but left a now unused pointer in struct cred.

Signed-off-by: Marc Dionne <marc.c.dionne@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cred.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index abb2cd50f6b2..04421e825365 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -128,7 +128,6 @@ struct cred {
 	struct key	*process_keyring; /* keyring private to this process */
 	struct key	*thread_keyring; /* keyring private to this thread */
 	struct key	*request_key_auth; /* assumed request_key authority */
-	struct thread_group_cred *tgcred; /* thread-group shared credentials */
 #endif
 #ifdef CONFIG_SECURITY
 	void		*security;	/* subjective LSM security */
-- 
cgit v1.2.3-55-g7522


From 54b956b903607f8f8878754dd4352da6a54a1da2 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman
Date: Thu, 10 Jan 2013 10:57:01 -0800
Subject: Remove __dev* markings from init.h

Now that all in-kernel users of __dev* are gone, let's remove them from
init.h to keep them from popping up again and again.

Thanks to Bill Pemberton for doing all of the hard work to make removal
of this possible.

Cc: Bill Pemberton <wfp5p@virginia.edu>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/init.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index a799273714ac..10ed4f436458 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -93,14 +93,6 @@
 
 #define __exit          __section(.exit.text) __exitused __cold notrace
 
-/* Used for HOTPLUG, but that is always enabled now, so just make them noops */
-#define __devinit
-#define __devinitdata
-#define __devinitconst
-#define __devexit
-#define __devexitdata
-#define __devexitconst
-
 /* Used for HOTPLUG_CPU */
 #define __cpuinit        __section(.cpuinit.text) __cold notrace
 #define __cpuinitdata    __section(.cpuinit.data)
@@ -337,18 +329,6 @@ void __init parse_early_options(char *cmdline);
 #define __INITRODATA_OR_MODULE __INITRODATA
 #endif /*CONFIG_MODULES*/
 
-/* Functions marked as __devexit may be discarded at kernel link time, depending
-   on config options.  Newer versions of binutils detect references from
-   retained sections to discarded sections and flag an error.  Pointers to
-   __devexit functions must use __devexit_p(function_name), the wrapper will
-   insert either the function_name or NULL, depending on the config options.
- */
-#if defined(MODULE) || defined(CONFIG_HOTPLUG)
-#define __devexit_p(x) x
-#else
-#define __devexit_p(x) NULL
-#endif
-
 #ifdef MODULE
 #define __exit_p(x) x
 #else
-- 
cgit v1.2.3-55-g7522


From 896f97ea95c1d29c0520ee0766b66b7f64cb967c Mon Sep 17 00:00:00 2001
From: David Decotigny
Date: Fri, 11 Jan 2013 14:31:36 -0800
Subject: lib: cpu_rmap: avoid flushing all workqueues

In some cases, free_irq_cpu_rmap() is called while holding a lock (eg
rtnl).  This can lead to deadlocks, because it invokes
flush_scheduled_work() which ends up waiting for whole system workqueue
to flush, but some pending works might try to acquire the lock we are
already holding.

This commit uses reference-counting to replace
irq_run_affinity_notifiers().  It also removes
irq_run_affinity_notifiers() altogether.

[akpm@linux-foundation.org: eliminate free_cpu_rmap, rename cpu_rmap_reclaim() to cpu_rmap_release(), propagate kref_put() retval from cpu_rmap_put()]
Signed-off-by: David Decotigny <decot@googlers.com>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Or Gerlitz <ogerlitz@mellanox.com>
Acked-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu_rmap.h  | 13 ++++--------
 include/linux/interrupt.h |  5 -----
 lib/cpu_rmap.c            | 54 ++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 53 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h
index ac3bbb5b9502..1739510d8994 100644
--- a/include/linux/cpu_rmap.h
+++ b/include/linux/cpu_rmap.h
@@ -13,9 +13,11 @@
 #include <linux/cpumask.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
+#include <linux/kref.h>
 
 /**
  * struct cpu_rmap - CPU affinity reverse-map
+ * @refcount: kref for object
  * @size: Number of objects to be reverse-mapped
  * @used: Number of objects added
  * @obj: Pointer to array of object pointers
@@ -23,6 +25,7 @@
  *      based on affinity masks
  */
 struct cpu_rmap {
+	struct kref	refcount;
 	u16		size, used;
 	void		**obj;
 	struct {
@@ -33,15 +36,7 @@ struct cpu_rmap {
 #define CPU_RMAP_DIST_INF 0xffff
 
 extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags);
-
-/**
- * free_cpu_rmap - free CPU affinity reverse-map
- * @rmap: Reverse-map allocated with alloc_cpu_rmap(), or %NULL
- */
-static inline void free_cpu_rmap(struct cpu_rmap *rmap)
-{
-	kfree(rmap);
-}
+extern int cpu_rmap_put(struct cpu_rmap *rmap);
 
 extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj);
 extern int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5e4e6170f43a..5fa5afeeb759 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -268,11 +268,6 @@ struct irq_affinity_notify {
 extern int
 irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
 
-static inline void irq_run_affinity_notifiers(void)
-{
-	flush_scheduled_work();
-}
-
 #else /* CONFIG_SMP */
 
 static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c
index 145dec5267c9..5fbed5caba6e 100644
--- a/lib/cpu_rmap.c
+++ b/lib/cpu_rmap.c
@@ -45,6 +45,7 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
 	if (!rmap)
 		return NULL;
 
+	kref_init(&rmap->refcount);
 	rmap->obj = (void **)((char *)rmap + obj_offset);
 
 	/* Initially assign CPUs to objects on a rota, since we have
@@ -63,6 +64,35 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
 }
 EXPORT_SYMBOL(alloc_cpu_rmap);
 
+/**
+ * cpu_rmap_release - internal reclaiming helper called from kref_put
+ * @ref: kref to struct cpu_rmap
+ */
+static void cpu_rmap_release(struct kref *ref)
+{
+	struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount);
+	kfree(rmap);
+}
+
+/**
+ * cpu_rmap_get - internal helper to get new ref on a cpu_rmap
+ * @rmap: reverse-map allocated with alloc_cpu_rmap()
+ */
+static inline void cpu_rmap_get(struct cpu_rmap *rmap)
+{
+	kref_get(&rmap->refcount);
+}
+
+/**
+ * cpu_rmap_put - release ref on a cpu_rmap
+ * @rmap: reverse-map allocated with alloc_cpu_rmap()
+ */
+int cpu_rmap_put(struct cpu_rmap *rmap)
+{
+	return kref_put(&rmap->refcount, cpu_rmap_release);
+}
+EXPORT_SYMBOL(cpu_rmap_put);
+
 /* Reevaluate nearest object for given CPU, comparing with the given
  * neighbours at the given distance.
  */
@@ -197,8 +227,7 @@ struct irq_glue {
  * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
  * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
  *
- * Must be called in process context, before freeing the IRQs, and
- * without holding any locks required by global workqueue items.
+ * Must be called in process context, before freeing the IRQs.
  */
 void free_irq_cpu_rmap(struct cpu_rmap *rmap)
 {
@@ -212,12 +241,18 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap)
 		glue = rmap->obj[index];
 		irq_set_affinity_notifier(glue->notify.irq, NULL);
 	}
-	irq_run_affinity_notifiers();
 
-	kfree(rmap);
+	cpu_rmap_put(rmap);
 }
 EXPORT_SYMBOL(free_irq_cpu_rmap);
 
+/**
+ * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated
+ * @notify: struct irq_affinity_notify passed by irq/manage.c
+ * @mask: cpu mask for new SMP affinity
+ *
+ * This is executed in workqueue context.
+ */
 static void
 irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
 {
@@ -230,10 +265,16 @@ irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
 		pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc);
 }
 
+/**
+ * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem
+ * @ref: kref to struct irq_affinity_notify passed by irq/manage.c
+ */
 static void irq_cpu_rmap_release(struct kref *ref)
 {
 	struct irq_glue *glue =
 		container_of(ref, struct irq_glue, notify.kref);
+
+	cpu_rmap_put(glue->rmap);
 	kfree(glue);
 }
 
@@ -258,10 +299,13 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
 	glue->notify.notify = irq_cpu_rmap_notify;
 	glue->notify.release = irq_cpu_rmap_release;
 	glue->rmap = rmap;
+	cpu_rmap_get(rmap);
 	glue->index = cpu_rmap_add(rmap, glue);
 	rc = irq_set_affinity_notifier(irq, &glue->notify);
-	if (rc)
+	if (rc) {
+		cpu_rmap_put(glue->rmap);
 		kfree(glue);
+	}
 	return rc;
 }
 EXPORT_SYMBOL(irq_cpu_rmap_add);
-- 
cgit v1.2.3-55-g7522


From 1b963c81b14509e330e0fe3218b645ece2738dc5 Mon Sep 17 00:00:00 2001
From: Jiri Kosina
Date: Fri, 11 Jan 2013 14:31:56 -0800
Subject: lockdep, rwsem: provide down_write_nest_lock()

down_write_nest_lock() provides a means to annotate locking scenario
where an outer lock is guaranteed to serialize the order nested locks
are being acquired.

This is analogoue to already existing mutex_lock_nest_lock() and
spin_lock_nest_lock().

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Cc: Rik van Riel <riel@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mel Gorman <mel@csn.ul.ie>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h |  3 +++
 include/linux/rwsem.h   |  9 +++++++++
 kernel/rwsem.c          | 10 ++++++++++
 3 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 00e46376e28f..2bca44b0893c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -524,14 +524,17 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # ifdef CONFIG_PROVE_LOCKING
 #  define rwsem_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 2, NULL, i)
+#  define rwsem_acquire_nest(l, s, t, n, i)	lock_acquire(l, s, t, 0, 2, n, i)
 #  define rwsem_acquire_read(l, s, t, i)	lock_acquire(l, s, t, 1, 2, NULL, i)
 # else
 #  define rwsem_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 1, NULL, i)
+#  define rwsem_acquire_nest(l, s, t, n, i)	lock_acquire(l, s, t, 0, 1, n, i)
 #  define rwsem_acquire_read(l, s, t, i)	lock_acquire(l, s, t, 1, 1, NULL, i)
 # endif
 # define rwsem_release(l, n, i)			lock_release(l, n, i)
 #else
 # define rwsem_acquire(l, s, t, i)		do { } while (0)
+# define rwsem_acquire_nest(l, s, t, n, i)	do { } while (0)
 # define rwsem_acquire_read(l, s, t, i)		do { } while (0)
 # define rwsem_release(l, n, i)			do { } while (0)
 #endif
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 54bd7cd7ecbd..413cc11e414a 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -125,8 +125,17 @@ extern void downgrade_write(struct rw_semaphore *sem);
  */
 extern void down_read_nested(struct rw_semaphore *sem, int subclass);
 extern void down_write_nested(struct rw_semaphore *sem, int subclass);
+extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock);
+
+# define down_write_nest_lock(sem, nest_lock)			\
+do {								\
+	typecheck(struct lockdep_map *, &(nest_lock)->dep_map);	\
+	_down_write_nest_lock(sem, &(nest_lock)->dep_map);	\
+} while (0);
+
 #else
 # define down_read_nested(sem, subclass)		down_read(sem)
+# define down_write_nest_lock(sem, nest_lock)	down_read(sem)
 # define down_write_nested(sem, subclass)	down_write(sem)
 #endif
 
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index 6850f53e02d8..b3c6c3fcd847 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -116,6 +116,16 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
 
 EXPORT_SYMBOL(down_read_nested);
 
+void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
+{
+	might_sleep();
+	rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
+
+	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+}
+
+EXPORT_SYMBOL(_down_write_nest_lock);
+
 void down_write_nested(struct rw_semaphore *sem, int subclass)
 {
 	might_sleep();
-- 
cgit v1.2.3-55-g7522


From 7b9205bd775afc4439ed86d617f9042ee9e76a71 Mon Sep 17 00:00:00 2001
From: Kees Cook
Date: Fri, 11 Jan 2013 14:32:05 -0800
Subject: audit: create explicit AUDIT_SECCOMP event type

The seccomp path was using AUDIT_ANOM_ABEND from when seccomp mode 1
could only kill a process.  While we still want to make sure an audit
record is forced on a kill, this should use a separate record type since
seccomp mode 2 introduces other behaviors.

In the case of "handled" behaviors (process wasn't killed), only emit a
record if the process is under inspection.  This change also fixes
userspace examination of seccomp audit events, since it was considered
malformed due to missing fields of the AUDIT_ANOM_ABEND event type.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric Paris <eparis@redhat.com>
Cc: Jeff Layton <jlayton@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Julien Tinnes <jln@google.com>
Acked-by: Will Drewry <wad@chromium.org>
Acked-by: Steve Grubb <sgrubb@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/audit.h      |  3 ++-
 include/uapi/linux/audit.h |  1 +
 kernel/auditsc.c           | 14 +++++++++++---
 3 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index bce729afbcf9..9d5104d7aba9 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -157,7 +157,8 @@ void audit_core_dumps(long signr);
 
 static inline void audit_seccomp(unsigned long syscall, long signr, int code)
 {
-	if (unlikely(!audit_dummy_context()))
+	/* Force a record to be reported if a signal was delivered. */
+	if (signr || unlikely(!audit_dummy_context()))
 		__audit_seccomp(syscall, signr, code);
 }
 
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 76352ac45f24..09a2d94ab113 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -106,6 +106,7 @@
 #define AUDIT_MMAP		1323	/* Record showing descriptor and flags in mmap */
 #define AUDIT_NETFILTER_PKT	1324	/* Packets traversing netfilter chains */
 #define AUDIT_NETFILTER_CFG	1325	/* Netfilter chain modifications */
+#define AUDIT_SECCOMP		1326	/* Secure Computing event */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e37e6a12c5e3..3e46d1dec613 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2675,7 +2675,7 @@ void __audit_mmap_fd(int fd, int flags)
 	context->type = AUDIT_MMAP;
 }
 
-static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
+static void audit_log_task(struct audit_buffer *ab)
 {
 	kuid_t auid, uid;
 	kgid_t gid;
@@ -2693,6 +2693,11 @@ static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
 	audit_log_task_context(ab);
 	audit_log_format(ab, " pid=%d comm=", current->pid);
 	audit_log_untrustedstring(ab, current->comm);
+}
+
+static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
+{
+	audit_log_task(ab);
 	audit_log_format(ab, " reason=");
 	audit_log_string(ab, reason);
 	audit_log_format(ab, " sig=%ld", signr);
@@ -2723,8 +2728,11 @@ void __audit_seccomp(unsigned long syscall, long signr, int code)
 {
 	struct audit_buffer *ab;
 
-	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
-	audit_log_abend(ab, "seccomp", signr);
+	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_SECCOMP);
+	if (unlikely(!ab))
+		return;
+	audit_log_task(ab);
+	audit_log_format(ab, " sig=%ld", signr);
 	audit_log_format(ab, " syscall=%ld", syscall);
 	audit_log_format(ab, " compat=%d", is_compat_task());
 	audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current));
-- 
cgit v1.2.3-55-g7522


From c0a3a20b6c4b5229ef5d26fd9b1c4b1957632aa7 Mon Sep 17 00:00:00 2001
From: Mike Frysinger
Date: Fri, 11 Jan 2013 14:32:13 -0800
Subject: linux/audit.h: move ptrace.h include to kernel header

While the kernel internals want pt_regs (and so it includes
linux/ptrace.h), the user version of audit.h does not need it.  So move
the include out of the uapi version.

This avoids issues where people want the audit defines and userland
ptrace api.  Including both the kernel ptrace and the userland ptrace
headers can easily lead to failure.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/audit.h      | 1 +
 include/uapi/linux/audit.h | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 9d5104d7aba9..5a6d718adf34 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -24,6 +24,7 @@
 #define _LINUX_AUDIT_H_
 
 #include <linux/sched.h>
+#include <linux/ptrace.h>
 #include <uapi/linux/audit.h>
 
 struct audit_sig_info {
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 09a2d94ab113..9f096f1c0907 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -26,7 +26,6 @@
 
 #include <linux/types.h>
 #include <linux/elf-em.h>
-#include <linux/ptrace.h>
 
 /* The netlink messages for the audit system is divided into blocks:
  * 1000 - 1099 are for commanding the audit system
-- 
cgit v1.2.3-55-g7522


From 8fb74b9fb2b182d54beee592350d9ea1f325917a Mon Sep 17 00:00:00 2001
From: Mel Gorman
Date: Fri, 11 Jan 2013 14:32:16 -0800
Subject: mm: compaction: partially revert capture of suitable high-order page

Eric Wong reported on 3.7 and 3.8-rc2 that ppoll() got stuck when
waiting for POLLIN on a local TCP socket.  It was easier to trigger if
there was disk IO and dirty pages at the same time and he bisected it to
commit 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page
immediately when it is made available").

The intention of that patch was to improve high-order allocations under
memory pressure after changes made to reclaim in 3.6 drastically hurt
THP allocations but the approach was flawed.  For Eric, the problem was
that page->pfmemalloc was not being cleared for captured pages leading
to a poor interaction with swap-over-NFS support causing the packets to
be dropped.  However, I identified a few more problems with the patch
including the fact that it can increase contention on zone->lock in some
cases which could result in async direct compaction being aborted early.

In retrospect the capture patch took the wrong approach.  What it should
have done is mark the pageblock being migrated as MIGRATE_ISOLATE if it
was allocating for THP and avoided races that way.  While the patch was
showing to improve allocation success rates at the time, the benefit is
marginal given the relative complexity and it should be revisited from
scratch in the context of the other reclaim-related changes that have
taken place since the patch was first written and tested.  This patch
partially reverts commit 1fb3f8ca0e92 ("mm: compaction: capture a
suitable high-order page immediately when it is made available").

Reported-and-tested-by: Eric Wong <normalperson@yhbt.net>
Tested-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h |  4 +-
 include/linux/mm.h         |  1 -
 mm/compaction.c            | 92 +++++++---------------------------------------
 mm/internal.h              |  1 -
 mm/page_alloc.c            | 35 ++++--------------
 5 files changed, 23 insertions(+), 110 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 6ecb6dc2f303..cc7bddeaf553 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
-			bool sync, bool *contended, struct page **page);
+			bool sync, bool *contended);
 extern int compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
@@ -75,7 +75,7 @@ static inline bool compaction_restarting(struct zone *zone, int order)
 #else
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			bool sync, bool *contended, struct page **page)
+			bool sync, bool *contended)
 {
 	return COMPACT_CONTINUE;
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 63204078f72b..66e2f7c61e5c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -455,7 +455,6 @@ void put_pages_list(struct list_head *pages);
 
 void split_page(struct page *page, unsigned int order);
 int split_free_page(struct page *page);
-int capture_free_page(struct page *page, int alloc_order, int migratetype);
 
 /*
  * Compound pages have a destructor function.  Provide a
diff --git a/mm/compaction.c b/mm/compaction.c
index f8f5c111b7d7..c62bd063d766 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -816,6 +816,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
 static int compact_finished(struct zone *zone,
 			    struct compact_control *cc)
 {
+	unsigned int order;
 	unsigned long watermark;
 
 	if (fatal_signal_pending(current))
@@ -850,22 +851,16 @@ static int compact_finished(struct zone *zone,
 		return COMPACT_CONTINUE;
 
 	/* Direct compactor: Is a suitable page free? */
-	if (cc->page) {
-		/* Was a suitable page captured? */
-		if (*cc->page)
+	for (order = cc->order; order < MAX_ORDER; order++) {
+		struct free_area *area = &zone->free_area[order];
+
+		/* Job done if page is free of the right migratetype */
+		if (!list_empty(&area->free_list[cc->migratetype]))
+			return COMPACT_PARTIAL;
+
+		/* Job done if allocation would set block type */
+		if (cc->order >= pageblock_order && area->nr_free)
 			return COMPACT_PARTIAL;
-	} else {
-		unsigned int order;
-		for (order = cc->order; order < MAX_ORDER; order++) {
-			struct free_area *area = &zone->free_area[cc->order];
-			/* Job done if page is free of the right migratetype */
-			if (!list_empty(&area->free_list[cc->migratetype]))
-				return COMPACT_PARTIAL;
-
-			/* Job done if allocation would set block type */
-			if (cc->order >= pageblock_order && area->nr_free)
-				return COMPACT_PARTIAL;
-		}
 	}
 
 	return COMPACT_CONTINUE;
@@ -921,60 +916,6 @@ unsigned long compaction_suitable(struct zone *zone, int order)
 	return COMPACT_CONTINUE;
 }
 
-static void compact_capture_page(struct compact_control *cc)
-{
-	unsigned long flags;
-	int mtype, mtype_low, mtype_high;
-
-	if (!cc->page || *cc->page)
-		return;
-
-	/*
-	 * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
-	 * regardless of the migratetype of the freelist is is captured from.
-	 * This is fine because the order for a high-order MIGRATE_MOVABLE
-	 * allocation is typically at least a pageblock size and overall
-	 * fragmentation is not impaired. Other allocation types must
-	 * capture pages from their own migratelist because otherwise they
-	 * could pollute other pageblocks like MIGRATE_MOVABLE with
-	 * difficult to move pages and making fragmentation worse overall.
-	 */
-	if (cc->migratetype == MIGRATE_MOVABLE) {
-		mtype_low = 0;
-		mtype_high = MIGRATE_PCPTYPES;
-	} else {
-		mtype_low = cc->migratetype;
-		mtype_high = cc->migratetype + 1;
-	}
-
-	/* Speculatively examine the free lists without zone lock */
-	for (mtype = mtype_low; mtype < mtype_high; mtype++) {
-		int order;
-		for (order = cc->order; order < MAX_ORDER; order++) {
-			struct page *page;
-			struct free_area *area;
-			area = &(cc->zone->free_area[order]);
-			if (list_empty(&area->free_list[mtype]))
-				continue;
-
-			/* Take the lock and attempt capture of the page */
-			if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
-				return;
-			if (!list_empty(&area->free_list[mtype])) {
-				page = list_entry(area->free_list[mtype].next,
-							struct page, lru);
-				if (capture_free_page(page, cc->order, mtype)) {
-					spin_unlock_irqrestore(&cc->zone->lock,
-									flags);
-					*cc->page = page;
-					return;
-				}
-			}
-			spin_unlock_irqrestore(&cc->zone->lock, flags);
-		}
-	}
-}
-
 static int compact_zone(struct zone *zone, struct compact_control *cc)
 {
 	int ret;
@@ -1054,9 +995,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 				goto out;
 			}
 		}
-
-		/* Capture a page now if it is a suitable size */
-		compact_capture_page(cc);
 	}
 
 out:
@@ -1069,8 +1007,7 @@ out:
 
 static unsigned long compact_zone_order(struct zone *zone,
 				 int order, gfp_t gfp_mask,
-				 bool sync, bool *contended,
-				 struct page **page)
+				 bool sync, bool *contended)
 {
 	unsigned long ret;
 	struct compact_control cc = {
@@ -1080,7 +1017,6 @@ static unsigned long compact_zone_order(struct zone *zone,
 		.migratetype = allocflags_to_migratetype(gfp_mask),
 		.zone = zone,
 		.sync = sync,
-		.page = page,
 	};
 	INIT_LIST_HEAD(&cc.freepages);
 	INIT_LIST_HEAD(&cc.migratepages);
@@ -1110,7 +1046,7 @@ int sysctl_extfrag_threshold = 500;
  */
 unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			bool sync, bool *contended, struct page **page)
+			bool sync, bool *contended)
 {
 	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
 	int may_enter_fs = gfp_mask & __GFP_FS;
@@ -1136,7 +1072,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 		int status;
 
 		status = compact_zone_order(zone, order, gfp_mask, sync,
-						contended, page);
+						contended);
 		rc = max(status, rc);
 
 		/* If a normal allocation would succeed, stop compacting */
@@ -1192,7 +1128,6 @@ int compact_pgdat(pg_data_t *pgdat, int order)
 	struct compact_control cc = {
 		.order = order,
 		.sync = false,
-		.page = NULL,
 	};
 
 	return __compact_pgdat(pgdat, &cc);
@@ -1203,7 +1138,6 @@ static int compact_node(int nid)
 	struct compact_control cc = {
 		.order = -1,
 		.sync = true,
-		.page = NULL,
 	};
 
 	return __compact_pgdat(NODE_DATA(nid), &cc);
diff --git a/mm/internal.h b/mm/internal.h
index d597f94cc205..9ba21100ebf3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -135,7 +135,6 @@ struct compact_control {
 	int migratetype;		/* MOVABLE, RECLAIMABLE etc */
 	struct zone *zone;
 	bool contended;			/* True if a lock was contended */
-	struct page **page;		/* Page captured of requested size */
 };
 
 unsigned long
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c957805a7f0e..df2022ff0c8a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1384,14 +1384,8 @@ void split_page(struct page *page, unsigned int order)
 		set_page_refcounted(page + i);
 }
 
-/*
- * Similar to the split_page family of functions except that the page
- * required at the given order and being isolated now to prevent races
- * with parallel allocators
- */
-int capture_free_page(struct page *page, int alloc_order, int migratetype)
+static int __isolate_free_page(struct page *page, unsigned int order)
 {
-	unsigned int order;
 	unsigned long watermark;
 	struct zone *zone;
 	int mt;
@@ -1399,7 +1393,6 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
 	BUG_ON(!PageBuddy(page));
 
 	zone = page_zone(page);
-	order = page_order(page);
 	mt = get_pageblock_migratetype(page);
 
 	if (mt != MIGRATE_ISOLATE) {
@@ -1408,7 +1401,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
 		if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
 			return 0;
 
-		__mod_zone_freepage_state(zone, -(1UL << alloc_order), mt);
+		__mod_zone_freepage_state(zone, -(1UL << order), mt);
 	}
 
 	/* Remove page from free list */
@@ -1416,11 +1409,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
 	zone->free_area[order].nr_free--;
 	rmv_page_order(page);
 
-	if (alloc_order != order)
-		expand(zone, page, alloc_order, order,
-			&zone->free_area[order], migratetype);
-
-	/* Set the pageblock if the captured page is at least a pageblock */
+	/* Set the pageblock if the isolated page is at least a pageblock */
 	if (order >= pageblock_order - 1) {
 		struct page *endpage = page + (1 << order) - 1;
 		for (; page < endpage; page += pageblock_nr_pages) {
@@ -1431,7 +1420,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
 		}
 	}
 
-	return 1UL << alloc_order;
+	return 1UL << order;
 }
 
 /*
@@ -1449,10 +1438,9 @@ int split_free_page(struct page *page)
 	unsigned int order;
 	int nr_pages;
 
-	BUG_ON(!PageBuddy(page));
 	order = page_order(page);
 
-	nr_pages = capture_free_page(page, order, 0);
+	nr_pages = __isolate_free_page(page, order);
 	if (!nr_pages)
 		return 0;
 
@@ -2136,8 +2124,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	bool *contended_compaction, bool *deferred_compaction,
 	unsigned long *did_some_progress)
 {
-	struct page *page = NULL;
-
 	if (!order)
 		return NULL;
 
@@ -2149,16 +2135,12 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	current->flags |= PF_MEMALLOC;
 	*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
 						nodemask, sync_migration,
-						contended_compaction, &page);
+						contended_compaction);
 	current->flags &= ~PF_MEMALLOC;
 
-	/* If compaction captured a page, prep and use it */
-	if (page) {
-		prep_new_page(page, order, gfp_mask);
-		goto got_page;
-	}
-
 	if (*did_some_progress != COMPACT_SKIPPED) {
+		struct page *page;
+
 		/* Page migration frees to the PCP lists but we want merging */
 		drain_pages(get_cpu());
 		put_cpu();
@@ -2168,7 +2150,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 				alloc_flags & ~ALLOC_NO_WATERMARKS,
 				preferred_zone, migratetype);
 		if (page) {
-got_page:
 			preferred_zone->compact_blockskip_flush = false;
 			preferred_zone->compact_considered = 0;
 			preferred_zone->compact_defer_shift = 0;
-- 
cgit v1.2.3-55-g7522


From 3cb7a56344ca45ee56d71c5f8fe9f922306bff1f Mon Sep 17 00:00:00 2001
From: Michel Lespinasse
Date: Fri, 11 Jan 2013 14:32:20 -0800
Subject: lib/rbtree.c: avoid the use of non-static __always_inline

lib/rbtree.c declared __rb_erase_color() as __always_inline void, and
then exported it with EXPORT_SYMBOL.

This was because __rb_erase_color() must be exported for augmented
rbtree users, but it must also be inlined into rb_erase() so that the
dummy callback can get optimized out of that call site.

(Actually with a modern compiler, none of the dummy callback functions
should even be generated as separate text functions).

The above usage is legal C, but it was unusual enough for some compilers
to warn about it.  This change makes things more explicit, with a static
__always_inline ____rb_erase_color function for use in rb_erase(), and a
separate non-inline __rb_erase_color function for use in
rb_erase_augmented call sites.

Signed-off-by: Michel Lespinasse <walken@google.com>
Reported-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree_augmented.h | 14 +++++++++++---
 lib/rbtree.c                     | 20 +++++++++++++++++---
 2 files changed, 28 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 2ac60c9cf644..fea49b5da12a 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -123,9 +123,9 @@ __rb_change_child(struct rb_node *old, struct rb_node *new,
 extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
 
-static __always_inline void
-rb_erase_augmented(struct rb_node *node, struct rb_root *root,
-		   const struct rb_augment_callbacks *augment)
+static __always_inline struct rb_node *
+__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+		     const struct rb_augment_callbacks *augment)
 {
 	struct rb_node *child = node->rb_right, *tmp = node->rb_left;
 	struct rb_node *parent, *rebalance;
@@ -217,6 +217,14 @@ rb_erase_augmented(struct rb_node *node, struct rb_root *root,
 	}
 
 	augment->propagate(tmp, NULL);
+	return rebalance;
+}
+
+static __always_inline void
+rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+		   const struct rb_augment_callbacks *augment)
+{
+	struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
 	if (rebalance)
 		__rb_erase_color(rebalance, root, augment->rotate);
 }
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 4f56a11d67fa..c0e31fe2fabf 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -194,8 +194,12 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
 	}
 }
 
-__always_inline void
-__rb_erase_color(struct rb_node *parent, struct rb_root *root,
+/*
+ * Inline version for rb_erase() use - we want to be able to inline
+ * and eliminate the dummy_rotate callback there
+ */
+static __always_inline void
+____rb_erase_color(struct rb_node *parent, struct rb_root *root,
 	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
 {
 	struct rb_node *node = NULL, *sibling, *tmp1, *tmp2;
@@ -355,6 +359,13 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 		}
 	}
 }
+
+/* Non-inline version for rb_erase_augmented() use */
+void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+	____rb_erase_color(parent, root, augment_rotate);
+}
 EXPORT_SYMBOL(__rb_erase_color);
 
 /*
@@ -380,7 +391,10 @@ EXPORT_SYMBOL(rb_insert_color);
 
 void rb_erase(struct rb_node *node, struct rb_root *root)
 {
-	rb_erase_augmented(node, root, &dummy_callbacks);
+	struct rb_node *rebalance;
+	rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
+	if (rebalance)
+		____rb_erase_color(rebalance, root, dummy_rotate);
 }
 EXPORT_SYMBOL(rb_erase);
 
-- 
cgit v1.2.3-55-g7522


From d07d7507bfb4e23735c9b83e397c43e1e8a173e8 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka
Date: Thu, 10 Jan 2013 23:19:10 +0000
Subject: net, wireless: overwrite default_ethtool_ops

Since:

commit 2c60db037034d27f8c636403355d52872da92f81
Author: Eric Dumazet <edumazet@google.com>
Date:   Sun Sep 16 09:17:26 2012 +0000

    net: provide a default dev->ethtool_ops

wireless core does not correctly assign ethtool_ops.

After alloc_netdev*() call, some cfg80211 drivers provide they own
ethtool_ops, but some do not. For them, wireless core provide generic
cfg80211_ethtool_ops, which is assigned in NETDEV_REGISTER notify call:

        if (!dev->ethtool_ops)
                dev->ethtool_ops = &cfg80211_ethtool_ops;

But after Eric's commit, dev->ethtool_ops is no longer NULL (on cfg80211
drivers without custom ethtool_ops), but points to &default_ethtool_ops.

In order to fix the problem, provide function which will overwrite
default_ethtool_ops and use it by wireless core.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 net/core/dev.c            | 8 ++++++++
 net/wireless/core.c       | 3 +--
 3 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c599e4782d45..9ef07d0868b6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -60,6 +60,9 @@ struct wireless_dev;
 #define SET_ETHTOOL_OPS(netdev,ops) \
 	( (netdev)->ethtool_ops = (ops) )
 
+extern void netdev_set_default_ethtool_ops(struct net_device *dev,
+					   const struct ethtool_ops *ops);
+
 /* hardware address assignment types */
 #define NET_ADDR_PERM		0	/* address is permanent (default) */
 #define NET_ADDR_RANDOM		1	/* address is generated randomly */
diff --git a/net/core/dev.c b/net/core/dev.c
index 515473ee52cb..f64e439b4a00 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6121,6 +6121,14 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 
 static const struct ethtool_ops default_ethtool_ops;
 
+void netdev_set_default_ethtool_ops(struct net_device *dev,
+				    const struct ethtool_ops *ops)
+{
+	if (dev->ethtool_ops == &default_ethtool_ops)
+		dev->ethtool_ops = ops;
+}
+EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
+
 /**
  *	alloc_netdev_mqs - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 14d990400354..b677eab55b68 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -866,8 +866,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 		/* allow mac80211 to determine the timeout */
 		wdev->ps_timeout = -1;
 
-		if (!dev->ethtool_ops)
-			dev->ethtool_ops = &cfg80211_ethtool_ops;
+		netdev_set_default_ethtool_ops(dev, &cfg80211_ethtool_ops);
 
 		if ((wdev->iftype == NL80211_IFTYPE_STATION ||
 		     wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
-- 
cgit v1.2.3-55-g7522


From 0d21b0e3477395e7ff2acc269f15df6e6a8d356d Mon Sep 17 00:00:00 2001
From: Rusty Russell
Date: Sat, 12 Jan 2013 11:38:44 +1030
Subject: module: add new state MODULE_STATE_UNFORMED.

You should never look at such a module, so it's excised from all paths
which traverse the modules list.

We add the state at the end, to avoid gratuitous ABI break (ksplice).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h      | 10 ++++----
 kernel/debug/kdb/kdb_main.c |  2 ++
 kernel/module.c             | 57 +++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 59 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 7760c6d344a3..1375ee3f03aa 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -199,11 +199,11 @@ struct module_use {
 	struct module *source, *target;
 };
 
-enum module_state
-{
-	MODULE_STATE_LIVE,
-	MODULE_STATE_COMING,
-	MODULE_STATE_GOING,
+enum module_state {
+	MODULE_STATE_LIVE,	/* Normal state. */
+	MODULE_STATE_COMING,	/* Full formed, running module_init. */
+	MODULE_STATE_GOING,	/* Going away. */
+	MODULE_STATE_UNFORMED,	/* Still setting it up. */
 };
 
 /**
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 4d5f8d5612f3..8875254120b6 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1970,6 +1970,8 @@ static int kdb_lsmod(int argc, const char **argv)
 
 	kdb_printf("Module                  Size  modstruct     Used by\n");
 	list_for_each_entry(mod, kdb_modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 
 		kdb_printf("%-20s%8u  0x%p ", mod->name,
 			   mod->core_size, (void *)mod);
diff --git a/kernel/module.c b/kernel/module.c
index 41bc1189b061..c3a2ee8e3679 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -188,6 +188,7 @@ struct load_info {
    ongoing or failed initialization etc. */
 static inline int strong_try_module_get(struct module *mod)
 {
+	BUG_ON(mod && mod->state == MODULE_STATE_UNFORMED);
 	if (mod && mod->state == MODULE_STATE_COMING)
 		return -EBUSY;
 	if (try_module_get(mod))
@@ -343,6 +344,9 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
 #endif
 		};
 
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
+
 		if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
 			return true;
 	}
@@ -450,16 +454,24 @@ const struct kernel_symbol *find_symbol(const char *name,
 EXPORT_SYMBOL_GPL(find_symbol);
 
 /* Search for module by name: must hold module_mutex. */
-struct module *find_module(const char *name)
+static struct module *find_module_all(const char *name,
+				      bool even_unformed)
 {
 	struct module *mod;
 
 	list_for_each_entry(mod, &modules, list) {
+		if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (strcmp(mod->name, name) == 0)
 			return mod;
 	}
 	return NULL;
 }
+
+struct module *find_module(const char *name)
+{
+	return find_module_all(name, false);
+}
 EXPORT_SYMBOL_GPL(find_module);
 
 #ifdef CONFIG_SMP
@@ -525,6 +537,8 @@ bool is_module_percpu_address(unsigned long addr)
 	preempt_disable();
 
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (!mod->percpu_size)
 			continue;
 		for_each_possible_cpu(cpu) {
@@ -1048,6 +1062,8 @@ static ssize_t show_initstate(struct module_attribute *mattr,
 	case MODULE_STATE_GOING:
 		state = "going";
 		break;
+	default:
+		BUG();
 	}
 	return sprintf(buffer, "%s\n", state);
 }
@@ -1786,6 +1802,8 @@ void set_all_modules_text_rw(void)
 
 	mutex_lock(&module_mutex);
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if ((mod->module_core) && (mod->core_text_size)) {
 			set_page_attributes(mod->module_core,
 						mod->module_core + mod->core_text_size,
@@ -1807,6 +1825,8 @@ void set_all_modules_text_ro(void)
 
 	mutex_lock(&module_mutex);
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if ((mod->module_core) && (mod->core_text_size)) {
 			set_page_attributes(mod->module_core,
 						mod->module_core + mod->core_text_size,
@@ -2998,7 +3018,8 @@ static bool finished_loading(const char *name)
 
 	mutex_lock(&module_mutex);
 	mod = find_module(name);
-	ret = !mod || mod->state != MODULE_STATE_COMING;
+	ret = !mod || mod->state == MODULE_STATE_LIVE
+		|| mod->state == MODULE_STATE_GOING;
 	mutex_unlock(&module_mutex);
 
 	return ret;
@@ -3361,6 +3382,8 @@ const char *module_address_lookup(unsigned long addr,
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (within_module_init(addr, mod) ||
 		    within_module_core(addr, mod)) {
 			if (modname)
@@ -3384,6 +3407,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (within_module_init(addr, mod) ||
 		    within_module_core(addr, mod)) {
 			const char *sym;
@@ -3408,6 +3433,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (within_module_init(addr, mod) ||
 		    within_module_core(addr, mod)) {
 			const char *sym;
@@ -3435,6 +3462,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (symnum < mod->num_symtab) {
 			*value = mod->symtab[symnum].st_value;
 			*type = mod->symtab[symnum].st_info;
@@ -3477,9 +3506,12 @@ unsigned long module_kallsyms_lookup_name(const char *name)
 			ret = mod_find_symname(mod, colon+1);
 		*colon = ':';
 	} else {
-		list_for_each_entry_rcu(mod, &modules, list)
+		list_for_each_entry_rcu(mod, &modules, list) {
+			if (mod->state == MODULE_STATE_UNFORMED)
+				continue;
 			if ((ret = mod_find_symname(mod, name)) != 0)
 				break;
+		}
 	}
 	preempt_enable();
 	return ret;
@@ -3494,6 +3526,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
 	int ret;
 
 	list_for_each_entry(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		for (i = 0; i < mod->num_symtab; i++) {
 			ret = fn(data, mod->strtab + mod->symtab[i].st_name,
 				 mod, mod->symtab[i].st_value);
@@ -3509,6 +3543,7 @@ static char *module_flags(struct module *mod, char *buf)
 {
 	int bx = 0;
 
+	BUG_ON(mod->state == MODULE_STATE_UNFORMED);
 	if (mod->taints ||
 	    mod->state == MODULE_STATE_GOING ||
 	    mod->state == MODULE_STATE_COMING) {
@@ -3550,6 +3585,10 @@ static int m_show(struct seq_file *m, void *p)
 	struct module *mod = list_entry(p, struct module, list);
 	char buf[8];
 
+	/* We always ignore unformed modules. */
+	if (mod->state == MODULE_STATE_UNFORMED)
+		return 0;
+
 	seq_printf(m, "%s %u",
 		   mod->name, mod->init_size + mod->core_size);
 	print_unload_info(m, mod);
@@ -3610,6 +3649,8 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (mod->num_exentries == 0)
 			continue;
 
@@ -3658,10 +3699,13 @@ struct module *__module_address(unsigned long addr)
 	if (addr < module_addr_min || addr > module_addr_max)
 		return NULL;
 
-	list_for_each_entry_rcu(mod, &modules, list)
+	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (within_module_core(addr, mod)
 		    || within_module_init(addr, mod))
 			return mod;
+	}
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(__module_address);
@@ -3714,8 +3758,11 @@ void print_modules(void)
 	printk(KERN_DEFAULT "Modules linked in:");
 	/* Most callers should already have preempt disabled, but make sure */
 	preempt_disable();
-	list_for_each_entry_rcu(mod, &modules, list)
+	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		printk(" %s%s", mod->name, module_flags(mod, buf));
+	}
 	preempt_enable();
 	if (last_unloaded_module[0])
 		printk(" [last unloaded: %s]", last_unloaded_module);
-- 
cgit v1.2.3-55-g7522


From 803739d25c2343da6d2f95eebdcbc08bf67097d4 Mon Sep 17 00:00:00 2001
From: Shane Huang
Date: Mon, 17 Dec 2012 23:18:59 +0800
Subject: [libata] replace sata_settings with devslp_timing

NCQ capability was used to check availability of SATA Settings page
from Identify Device Data Log, which contains DevSlp timing variables.
It does not work on some HDDs and leads to error messages.

IDENTIFY word 78 bit 5(Hardware Feature Control) can't work either
because it is only the sufficient condition of Identify Device data
log, not the necessary condition.

This patch replaced ata_device->sata_settings with ->devslp_timing
to only save DevSlp timing variables(8 bytes), instead of the whole
SATA Settings page(512 bytes).

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=51881

Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Shane Huang <shane.huang@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libahci.c     |  6 +++---
 drivers/ata/libata-core.c | 22 +++++++++++++---------
 include/linux/ata.h       |  8 +++++---
 include/linux/libata.h    |  4 ++--
 4 files changed, 23 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 320712a7b9ea..6cd7805e47ca 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -1951,13 +1951,13 @@ static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep)
 	/* Use the nominal value 10 ms if the read MDAT is zero,
 	 * the nominal value of DETO is 20 ms.
 	 */
-	if (dev->sata_settings[ATA_LOG_DEVSLP_VALID] &
+	if (dev->devslp_timing[ATA_LOG_DEVSLP_VALID] &
 	    ATA_LOG_DEVSLP_VALID_MASK) {
-		mdat = dev->sata_settings[ATA_LOG_DEVSLP_MDAT] &
+		mdat = dev->devslp_timing[ATA_LOG_DEVSLP_MDAT] &
 		       ATA_LOG_DEVSLP_MDAT_MASK;
 		if (!mdat)
 			mdat = 10;
-		deto = dev->sata_settings[ATA_LOG_DEVSLP_DETO];
+		deto = dev->devslp_timing[ATA_LOG_DEVSLP_DETO];
 		if (!deto)
 			deto = 20;
 	} else {
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 9e8b99af400d..46cd3f4c6aaa 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2325,24 +2325,28 @@ int ata_dev_configure(struct ata_device *dev)
 			}
 		}
 
-		/* check and mark DevSlp capability */
-		if (ata_id_has_devslp(dev->id))
-			dev->flags |= ATA_DFLAG_DEVSLP;
-
-		/* Obtain SATA Settings page from Identify Device Data Log,
-		 * which contains DevSlp timing variables etc.
-		 * Exclude old devices with ata_id_has_ncq()
+		/* Check and mark DevSlp capability. Get DevSlp timing variables
+		 * from SATA Settings page of Identify Device Data Log.
 		 */
-		if (ata_id_has_ncq(dev->id)) {
+		if (ata_id_has_devslp(dev->id)) {
+			u8 sata_setting[ATA_SECT_SIZE];
+			int i, j;
+
+			dev->flags |= ATA_DFLAG_DEVSLP;
 			err_mask = ata_read_log_page(dev,
 						     ATA_LOG_SATA_ID_DEV_DATA,
 						     ATA_LOG_SATA_SETTINGS,
-						     dev->sata_settings,
+						     sata_setting,
 						     1);
 			if (err_mask)
 				ata_dev_dbg(dev,
 					    "failed to get Identify Device Data, Emask 0x%x\n",
 					    err_mask);
+			else
+				for (i = 0; i < ATA_LOG_DEVSLP_SIZE; i++) {
+					j = ATA_LOG_DEVSLP_OFFSET + i;
+					dev->devslp_timing[i] = sata_setting[j];
+				}
 		}
 
 		dev->cdb_len = 16;
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 408da9502177..8f7a3d68371a 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -297,10 +297,12 @@ enum {
 	ATA_LOG_SATA_NCQ	= 0x10,
 	ATA_LOG_SATA_ID_DEV_DATA  = 0x30,
 	ATA_LOG_SATA_SETTINGS	  = 0x08,
-	ATA_LOG_DEVSLP_MDAT	  = 0x30,
+	ATA_LOG_DEVSLP_OFFSET	  = 0x30,
+	ATA_LOG_DEVSLP_SIZE	  = 0x08,
+	ATA_LOG_DEVSLP_MDAT	  = 0x00,
 	ATA_LOG_DEVSLP_MDAT_MASK  = 0x1F,
-	ATA_LOG_DEVSLP_DETO	  = 0x31,
-	ATA_LOG_DEVSLP_VALID	  = 0x37,
+	ATA_LOG_DEVSLP_DETO	  = 0x01,
+	ATA_LOG_DEVSLP_VALID	  = 0x07,
 	ATA_LOG_DEVSLP_VALID_MASK = 0x80,
 
 	/* READ/WRITE LONG (obsolete) */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 83ba0ab2c915..649e5f86b5f0 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -652,8 +652,8 @@ struct ata_device {
 		u32		gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */
 	};
 
-	/* Identify Device Data Log (30h), SATA Settings (page 08h) */
-	u8			sata_settings[ATA_SECT_SIZE];
+	/* DEVSLP Timing Variables from Identify Device Data Log */
+	u8			devslp_timing[ATA_LOG_DEVSLP_SIZE];
 
 	/* error history */
 	int			spdn_cnt;
-- 
cgit v1.2.3-55-g7522


From 5dbbaf2de89613d19a9286d4db0a535ca2735d26 Mon Sep 17 00:00:00 2001
From: Paul Moore
Date: Mon, 14 Jan 2013 07:12:19 +0000
Subject: tun: fix LSM/SELinux labeling of tun/tap devices

This patch corrects some problems with LSM/SELinux that were introduced
with the multiqueue patchset.  The problem stems from the fact that the
multiqueue work changed the relationship between the tun device and its
associated socket; before the socket persisted for the life of the
device, however after the multiqueue changes the socket only persisted
for the life of the userspace connection (fd open).  For non-persistent
devices this is not an issue, but for persistent devices this can cause
the tun device to lose its SELinux label.

We correct this problem by adding an opaque LSM security blob to the
tun device struct which allows us to have the LSM security state, e.g.
SELinux labeling information, persist for the lifetime of the tun
device.  In the process we tweak the LSM hooks to work with this new
approach to TUN device/socket labeling and introduce a new LSM hook,
security_tun_dev_attach_queue(), to approve requests to attach to a
TUN queue via TUNSETQUEUE.

The SELinux code has been adjusted to match the new LSM hooks, the
other LSMs do not make use of the LSM TUN controls.  This patch makes
use of the recently added "tun_socket:attach_queue" permission to
restrict access to the TUNSETQUEUE operation.  On older SELinux
policies which do not define the "tun_socket:attach_queue" permission
the access control decision for TUNSETQUEUE will be handled according
to the SELinux policy's unknown permission setting.

Signed-off-by: Paul Moore <pmoore@redhat.com>
Acked-by: Eric Paris <eparis@parisplace.org>
Tested-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c                 | 23 +++++++++++----
 include/linux/security.h          | 59 ++++++++++++++++++++++++++++++---------
 security/capability.c             | 24 ++++++++++++++--
 security/security.c               | 28 +++++++++++++++----
 security/selinux/hooks.c          | 50 +++++++++++++++++++++++++--------
 security/selinux/include/objsec.h |  4 +++
 6 files changed, 151 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index af372d0957fe..c81680dc10eb 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -185,6 +185,7 @@ struct tun_struct {
 	unsigned long ageing_time;
 	unsigned int numdisabled;
 	struct list_head disabled;
+	void *security;
 };
 
 static inline u32 tun_hashfn(u32 rxhash)
@@ -490,6 +491,10 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 	struct tun_file *tfile = file->private_data;
 	int err;
 
+	err = security_tun_dev_attach(tfile->socket.sk, tun->security);
+	if (err < 0)
+		goto out;
+
 	err = -EINVAL;
 	if (rtnl_dereference(tfile->tun))
 		goto out;
@@ -1373,6 +1378,7 @@ static void tun_free_netdev(struct net_device *dev)
 
 	BUG_ON(!(list_empty(&tun->disabled)));
 	tun_flow_uninit(tun);
+	security_tun_dev_free_security(tun->security);
 	free_netdev(dev);
 }
 
@@ -1562,7 +1568,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 		if (tun_not_capable(tun))
 			return -EPERM;
-		err = security_tun_dev_attach(tfile->socket.sk);
+		err = security_tun_dev_open(tun->security);
 		if (err < 0)
 			return err;
 
@@ -1619,7 +1625,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 		spin_lock_init(&tun->lock);
 
-		security_tun_dev_post_create(&tfile->sk);
+		err = security_tun_dev_alloc_security(&tun->security);
+		if (err < 0)
+			goto err_free_dev;
 
 		tun_net_init(dev);
 
@@ -1789,10 +1797,14 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
 
 	if (ifr->ifr_flags & IFF_ATTACH_QUEUE) {
 		tun = tfile->detached;
-		if (!tun)
+		if (!tun) {
 			ret = -EINVAL;
-		else
-			ret = tun_attach(tun, file);
+			goto unlock;
+		}
+		ret = security_tun_dev_attach_queue(tun->security);
+		if (ret < 0)
+			goto unlock;
+		ret = tun_attach(tun, file);
 	} else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
 		tun = rtnl_dereference(tfile->tun);
 		if (!tun || !(tun->flags & TUN_TAP_MQ))
@@ -1802,6 +1814,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
 	} else
 		ret = -EINVAL;
 
+unlock:
 	rtnl_unlock();
 	return ret;
 }
diff --git a/include/linux/security.h b/include/linux/security.h
index 0f6afc657f77..eee7478cda70 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -989,17 +989,29 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	tells the LSM to decrement the number of secmark labeling rules loaded
  * @req_classify_flow:
  *	Sets the flow's sid to the openreq sid.
+ * @tun_dev_alloc_security:
+ *	This hook allows a module to allocate a security structure for a TUN
+ *	device.
+ *	@security pointer to a security structure pointer.
+ *	Returns a zero on success, negative values on failure.
+ * @tun_dev_free_security:
+ *	This hook allows a module to free the security structure for a TUN
+ *	device.
+ *	@security pointer to the TUN device's security structure
  * @tun_dev_create:
  *	Check permissions prior to creating a new TUN device.
- * @tun_dev_post_create:
- *	This hook allows a module to update or allocate a per-socket security
- *	structure.
- *	@sk contains the newly created sock structure.
+ * @tun_dev_attach_queue:
+ *	Check permissions prior to attaching to a TUN device queue.
+ *	@security pointer to the TUN device's security structure.
  * @tun_dev_attach:
- *	Check permissions prior to attaching to a persistent TUN device.  This
- *	hook can also be used by the module to update any security state
+ *	This hook can be used by the module to update any security state
  *	associated with the TUN device's sock structure.
  *	@sk contains the existing sock structure.
+ *	@security pointer to the TUN device's security structure.
+ * @tun_dev_open:
+ *	This hook can be used by the module to update any security state
+ *	associated with the TUN device's security structure.
+ *	@security pointer to the TUN devices's security structure.
  *
  * Security hooks for XFRM operations.
  *
@@ -1620,9 +1632,12 @@ struct security_operations {
 	void (*secmark_refcount_inc) (void);
 	void (*secmark_refcount_dec) (void);
 	void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
-	int (*tun_dev_create)(void);
-	void (*tun_dev_post_create)(struct sock *sk);
-	int (*tun_dev_attach)(struct sock *sk);
+	int (*tun_dev_alloc_security) (void **security);
+	void (*tun_dev_free_security) (void *security);
+	int (*tun_dev_create) (void);
+	int (*tun_dev_attach_queue) (void *security);
+	int (*tun_dev_attach) (struct sock *sk, void *security);
+	int (*tun_dev_open) (void *security);
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -2566,9 +2581,12 @@ void security_inet_conn_established(struct sock *sk,
 int security_secmark_relabel_packet(u32 secid);
 void security_secmark_refcount_inc(void);
 void security_secmark_refcount_dec(void);
+int security_tun_dev_alloc_security(void **security);
+void security_tun_dev_free_security(void *security);
 int security_tun_dev_create(void);
-void security_tun_dev_post_create(struct sock *sk);
-int security_tun_dev_attach(struct sock *sk);
+int security_tun_dev_attach_queue(void *security);
+int security_tun_dev_attach(struct sock *sk, void *security);
+int security_tun_dev_open(void *security);
 
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct sock *sock,
@@ -2733,16 +2751,31 @@ static inline void security_secmark_refcount_dec(void)
 {
 }
 
+static inline int security_tun_dev_alloc_security(void **security)
+{
+	return 0;
+}
+
+static inline void security_tun_dev_free_security(void *security)
+{
+}
+
 static inline int security_tun_dev_create(void)
 {
 	return 0;
 }
 
-static inline void security_tun_dev_post_create(struct sock *sk)
+static inline int security_tun_dev_attach_queue(void *security)
+{
+	return 0;
+}
+
+static inline int security_tun_dev_attach(struct sock *sk, void *security)
 {
+	return 0;
 }
 
-static inline int security_tun_dev_attach(struct sock *sk)
+static inline int security_tun_dev_open(void *security)
 {
 	return 0;
 }
diff --git a/security/capability.c b/security/capability.c
index 0fe5a026aef8..579775088967 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -709,16 +709,31 @@ static void cap_req_classify_flow(const struct request_sock *req,
 {
 }
 
+static int cap_tun_dev_alloc_security(void **security)
+{
+	return 0;
+}
+
+static void cap_tun_dev_free_security(void *security)
+{
+}
+
 static int cap_tun_dev_create(void)
 {
 	return 0;
 }
 
-static void cap_tun_dev_post_create(struct sock *sk)
+static int cap_tun_dev_attach_queue(void *security)
+{
+	return 0;
+}
+
+static int cap_tun_dev_attach(struct sock *sk, void *security)
 {
+	return 0;
 }
 
-static int cap_tun_dev_attach(struct sock *sk)
+static int cap_tun_dev_open(void *security)
 {
 	return 0;
 }
@@ -1050,8 +1065,11 @@ void __init security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, secmark_refcount_inc);
 	set_to_cap_if_null(ops, secmark_refcount_dec);
 	set_to_cap_if_null(ops, req_classify_flow);
+	set_to_cap_if_null(ops, tun_dev_alloc_security);
+	set_to_cap_if_null(ops, tun_dev_free_security);
 	set_to_cap_if_null(ops, tun_dev_create);
-	set_to_cap_if_null(ops, tun_dev_post_create);
+	set_to_cap_if_null(ops, tun_dev_open);
+	set_to_cap_if_null(ops, tun_dev_attach_queue);
 	set_to_cap_if_null(ops, tun_dev_attach);
 #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
diff --git a/security/security.c b/security/security.c
index daa97f4ac9d1..7b88c6aeaed4 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1254,24 +1254,42 @@ void security_secmark_refcount_dec(void)
 }
 EXPORT_SYMBOL(security_secmark_refcount_dec);
 
+int security_tun_dev_alloc_security(void **security)
+{
+	return security_ops->tun_dev_alloc_security(security);
+}
+EXPORT_SYMBOL(security_tun_dev_alloc_security);
+
+void security_tun_dev_free_security(void *security)
+{
+	security_ops->tun_dev_free_security(security);
+}
+EXPORT_SYMBOL(security_tun_dev_free_security);
+
 int security_tun_dev_create(void)
 {
 	return security_ops->tun_dev_create();
 }
 EXPORT_SYMBOL(security_tun_dev_create);
 
-void security_tun_dev_post_create(struct sock *sk)
+int security_tun_dev_attach_queue(void *security)
 {
-	return security_ops->tun_dev_post_create(sk);
+	return security_ops->tun_dev_attach_queue(security);
 }
-EXPORT_SYMBOL(security_tun_dev_post_create);
+EXPORT_SYMBOL(security_tun_dev_attach_queue);
 
-int security_tun_dev_attach(struct sock *sk)
+int security_tun_dev_attach(struct sock *sk, void *security)
 {
-	return security_ops->tun_dev_attach(sk);
+	return security_ops->tun_dev_attach(sk, security);
 }
 EXPORT_SYMBOL(security_tun_dev_attach);
 
+int security_tun_dev_open(void *security)
+{
+	return security_ops->tun_dev_open(security);
+}
+EXPORT_SYMBOL(security_tun_dev_open);
+
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 61a53367d029..ef26e9611ffb 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4399,6 +4399,24 @@ static void selinux_req_classify_flow(const struct request_sock *req,
 	fl->flowi_secid = req->secid;
 }
 
+static int selinux_tun_dev_alloc_security(void **security)
+{
+	struct tun_security_struct *tunsec;
+
+	tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL);
+	if (!tunsec)
+		return -ENOMEM;
+	tunsec->sid = current_sid();
+
+	*security = tunsec;
+	return 0;
+}
+
+static void selinux_tun_dev_free_security(void *security)
+{
+	kfree(security);
+}
+
 static int selinux_tun_dev_create(void)
 {
 	u32 sid = current_sid();
@@ -4414,8 +4432,17 @@ static int selinux_tun_dev_create(void)
 			    NULL);
 }
 
-static void selinux_tun_dev_post_create(struct sock *sk)
+static int selinux_tun_dev_attach_queue(void *security)
 {
+	struct tun_security_struct *tunsec = security;
+
+	return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET,
+			    TUN_SOCKET__ATTACH_QUEUE, NULL);
+}
+
+static int selinux_tun_dev_attach(struct sock *sk, void *security)
+{
+	struct tun_security_struct *tunsec = security;
 	struct sk_security_struct *sksec = sk->sk_security;
 
 	/* we don't currently perform any NetLabel based labeling here and it
@@ -4425,20 +4452,19 @@ static void selinux_tun_dev_post_create(struct sock *sk)
 	 * cause confusion to the TUN user that had no idea network labeling
 	 * protocols were being used */
 
-	/* see the comments in selinux_tun_dev_create() about why we don't use
-	 * the sockcreate SID here */
-
-	sksec->sid = current_sid();
+	sksec->sid = tunsec->sid;
 	sksec->sclass = SECCLASS_TUN_SOCKET;
+
+	return 0;
 }
 
-static int selinux_tun_dev_attach(struct sock *sk)
+static int selinux_tun_dev_open(void *security)
 {
-	struct sk_security_struct *sksec = sk->sk_security;
+	struct tun_security_struct *tunsec = security;
 	u32 sid = current_sid();
 	int err;
 
-	err = avc_has_perm(sid, sksec->sid, SECCLASS_TUN_SOCKET,
+	err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET,
 			   TUN_SOCKET__RELABELFROM, NULL);
 	if (err)
 		return err;
@@ -4446,8 +4472,7 @@ static int selinux_tun_dev_attach(struct sock *sk)
 			   TUN_SOCKET__RELABELTO, NULL);
 	if (err)
 		return err;
-
-	sksec->sid = sid;
+	tunsec->sid = sid;
 
 	return 0;
 }
@@ -5642,9 +5667,12 @@ static struct security_operations selinux_ops = {
 	.secmark_refcount_inc =		selinux_secmark_refcount_inc,
 	.secmark_refcount_dec =		selinux_secmark_refcount_dec,
 	.req_classify_flow =		selinux_req_classify_flow,
+	.tun_dev_alloc_security =	selinux_tun_dev_alloc_security,
+	.tun_dev_free_security =	selinux_tun_dev_free_security,
 	.tun_dev_create =		selinux_tun_dev_create,
-	.tun_dev_post_create = 		selinux_tun_dev_post_create,
+	.tun_dev_attach_queue =		selinux_tun_dev_attach_queue,
 	.tun_dev_attach =		selinux_tun_dev_attach,
+	.tun_dev_open =			selinux_tun_dev_open,
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 26c7eee1c309..aa47bcabb5f6 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -110,6 +110,10 @@ struct sk_security_struct {
 	u16 sclass;			/* sock security class */
 };
 
+struct tun_security_struct {
+	u32 sid;			/* SID for the tun device sockets */
+};
+
 struct key_security_struct {
 	u32 sid;	/* SID of key */
 };
-- 
cgit v1.2.3-55-g7522


From 8aef33a7cf40ca9da188e8578b2abe7267a38c52 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano
Date: Tue, 15 Jan 2013 14:18:04 +0100
Subject: cpuidle: remove the power_specified field in the driver

We realized that the power usage field is never filled and when it
is filled for tegra, the power_specified flag is not set causing all
of these values to be reset when the driver is initialized with
set_power_state().

However, the power_specified flag can be simply removed under the
assumption that the states are always backward sorted, which is the
case with the current code.

This change allows the menu governor select function and the
cpuidle_play_dead() to be simplified.  Moreover, the
set_power_states() function can removed as it does not make sense
any more.

Drop the power_specified flag from struct cpuidle_driver and make
the related changes as described above.

As a consequence, this also fixes the bug where on the dynamic
C-states system, the power fields are not initialized.

[rjw: Changelog]
References: https://bugzilla.kernel.org/show_bug.cgi?id=42870
References: https://bugzilla.kernel.org/show_bug.cgi?id=43349
References: https://lkml.org/lkml/2012/10/16/518
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.c        | 17 ++++-------------
 drivers/cpuidle/driver.c         | 25 -------------------------
 drivers/cpuidle/governors/menu.c |  8 ++------
 include/linux/cpuidle.h          |  2 +-
 4 files changed, 7 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index fb4a7dd57f94..e1f6860e069c 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -69,24 +69,15 @@ int cpuidle_play_dead(void)
 {
 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
-	int i, dead_state = -1;
-	int power_usage = INT_MAX;
+	int i;
 
 	if (!drv)
 		return -ENODEV;
 
 	/* Find lowest-power state that supports long-term idle */
-	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
-		struct cpuidle_state *s = &drv->states[i];
-
-		if (s->power_usage < power_usage && s->enter_dead) {
-			power_usage = s->power_usage;
-			dead_state = i;
-		}
-	}
-
-	if (dead_state != -1)
-		return drv->states[dead_state].enter_dead(dev, dead_state);
+	for (i = drv->state_count - 1; i >= CPUIDLE_DRIVER_STATE_START; i--)
+		if (drv->states[i].enter_dead)
+			return drv->states[i].enter_dead(dev, i);
 
 	return -ENODEV;
 }
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index c2b281afe0ed..422c7b69ba7c 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -19,34 +19,9 @@ DEFINE_SPINLOCK(cpuidle_driver_lock);
 static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu);
 static struct cpuidle_driver * __cpuidle_get_cpu_driver(int cpu);
 
-static void set_power_states(struct cpuidle_driver *drv)
-{
-	int i;
-
-	/*
-	 * cpuidle driver should set the drv->power_specified bit
-	 * before registering if the driver provides
-	 * power_usage numbers.
-	 *
-	 * If power_specified is not set,
-	 * we fill in power_usage with decreasing values as the
-	 * cpuidle code has an implicit assumption that state Cn
-	 * uses less power than C(n-1).
-	 *
-	 * With CONFIG_ARCH_HAS_CPU_RELAX, C0 is already assigned
-	 * an power value of -1.  So we use -2, -3, etc, for other
-	 * c-states.
-	 */
-	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++)
-		drv->states[i].power_usage = -1 - i;
-}
-
 static void __cpuidle_driver_init(struct cpuidle_driver *drv)
 {
 	drv->refcnt = 0;
-
-	if (!drv->power_specified)
-		set_power_states(drv);
 }
 
 static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu)
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 20ea33afdda1..fe343a06b7da 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -312,7 +312,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
 	struct menu_device *data = &__get_cpu_var(menu_devices);
 	int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
-	int power_usage = INT_MAX;
 	int i;
 	int multiplier;
 	struct timespec t;
@@ -383,11 +382,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		if (s->exit_latency * multiplier > data->predicted_us)
 			continue;
 
-		if (s->power_usage < power_usage) {
-			power_usage = s->power_usage;
-			data->last_state_idx = i;
-			data->exit_us = s->exit_latency;
-		}
+		data->last_state_idx = i;
+		data->exit_us = s->exit_latency;
 	}
 
 	/* not deepest C-state chosen for low predicted residency */
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 3711b34dc4f9..24cd1037b6d6 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -126,9 +126,9 @@ struct cpuidle_driver {
 	struct module 		*owner;
 	int                     refcnt;
 
-	unsigned int		power_specified:1;
 	/* set to 1 to use the core cpuidle time keeping (for all states). */
 	unsigned int		en_core_tk_irqen:1;
+	/* states array must be ordered in decreasing power consumption */
 	struct cpuidle_state	states[CPUIDLE_STATE_MAX];
 	int			state_count;
 	int			safe_state_index;
-- 
cgit v1.2.3-55-g7522


From 774a1221e862b343388347bac9b318767336b20b Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Tue, 15 Jan 2013 18:52:51 -0800
Subject: module, async: async_synchronize_full() on module init iff async is
 used

If the default iosched is built as module, the kernel may deadlock
while trying to load the iosched module on device probe if the probing
was running off async.  This is because async_synchronize_full() at
the end of module init ends up waiting for the async job which
initiated the module loading.

 async A				modprobe

 1. finds a device
 2. registers the block device
 3. request_module(default iosched)
					4. modprobe in userland
					5. load and init module
					6. async_synchronize_full()

Async A waits for modprobe to finish in request_module() and modprobe
waits for async A to finish in async_synchronize_full().

Because there's no easy to track dependency once control goes out to
userland, implementing properly nested flushing is difficult.  For
now, make module init perform async_synchronize_full() iff module init
has queued async jobs as suggested by Linus.

This avoids the described deadlock because iosched module doesn't use
async and thus wouldn't invoke async_synchronize_full().  This is
hacky and incomplete.  It will deadlock if async module loading nests;
however, this works around the known problem case and seems to be the
best of bad options.

For more details, please refer to the following thread.

  http://thread.gmane.org/gmane.linux.kernel/1420814

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Alex Riesen <raa.lkml@gmail.com>
Tested-by: Ming Lei <ming.lei@canonical.com>
Tested-by: Alex Riesen <raa.lkml@gmail.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  1 +
 kernel/async.c        |  3 +++
 kernel/module.c       | 27 +++++++++++++++++++++++++--
 3 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 206bb089c06b..6fc8f45de4e9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1810,6 +1810,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
 #define PF_MEMALLOC	0x00000800	/* Allocating memory */
 #define PF_NPROC_EXCEEDED 0x00001000	/* set_user noticed that RLIMIT_NPROC was exceeded */
 #define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
+#define PF_USED_ASYNC	0x00004000	/* used async_schedule*(), used by module init */
 #define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
diff --git a/kernel/async.c b/kernel/async.c
index 9d3118384858..a1d585c351d6 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -196,6 +196,9 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a
 	atomic_inc(&entry_count);
 	spin_unlock_irqrestore(&async_lock, flags);
 
+	/* mark that this task has queued an async job, used by module init */
+	current->flags |= PF_USED_ASYNC;
+
 	/* schedule for execution */
 	queue_work(system_unbound_wq, &entry->work);
 
diff --git a/kernel/module.c b/kernel/module.c
index 250092c1d57d..b10b048367e1 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3013,6 +3013,12 @@ static int do_init_module(struct module *mod)
 {
 	int ret = 0;
 
+	/*
+	 * We want to find out whether @mod uses async during init.  Clear
+	 * PF_USED_ASYNC.  async_schedule*() will set it.
+	 */
+	current->flags &= ~PF_USED_ASYNC;
+
 	blocking_notifier_call_chain(&module_notify_list,
 			MODULE_STATE_COMING, mod);
 
@@ -3058,8 +3064,25 @@ static int do_init_module(struct module *mod)
 	blocking_notifier_call_chain(&module_notify_list,
 				     MODULE_STATE_LIVE, mod);
 
-	/* We need to finish all async code before the module init sequence is done */
-	async_synchronize_full();
+	/*
+	 * We need to finish all async code before the module init sequence
+	 * is done.  This has potential to deadlock.  For example, a newly
+	 * detected block device can trigger request_module() of the
+	 * default iosched from async probing task.  Once userland helper
+	 * reaches here, async_synchronize_full() will wait on the async
+	 * task waiting on request_module() and deadlock.
+	 *
+	 * This deadlock is avoided by perfomring async_synchronize_full()
+	 * iff module init queued any async jobs.  This isn't a full
+	 * solution as it will deadlock the same if module loading from
+	 * async jobs nests more than once; however, due to the various
+	 * constraints, this hack seems to be the best option for now.
+	 * Please refer to the following thread for details.
+	 *
+	 * http://thread.gmane.org/gmane.linux.kernel/1420814
+	 */
+	if (current->flags & PF_USED_ASYNC)
+		async_synchronize_full();
 
 	mutex_lock(&module_mutex);
 	/* Drop initial reference. */
-- 
cgit v1.2.3-55-g7522


From e65b9ad222c280c031bc8d3642cc38dd3026fe06 Mon Sep 17 00:00:00 2001
From: Jiri Kosina
Date: Tue, 15 Jan 2013 20:12:37 +0100
Subject: lockdep, rwsem: fix down_write_nest_lock() if
 !CONFIG_DEBUG_LOCK_ALLOC

Commit 1b963c81b145 ("lockdep, rwsem: provide down_write_nest_lock()")
contains a bug in a codepath when CONFIG_DEBUG_LOCK_ALLOC is disabled,
which causes down_read() to be called instead of down_write() by mistake
on such configurations.  Fix that.

Reported-and-tested-by: Andrew Clayton <andrew@digital-domain.net>
Reported-and-tested-by: Zlatko Calusic <zlatko.calusic@iskon.hr>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rwsem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 413cc11e414a..8da67d625e13 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -135,7 +135,7 @@ do {								\
 
 #else
 # define down_read_nested(sem, subclass)		down_read(sem)
-# define down_write_nest_lock(sem, nest_lock)	down_read(sem)
+# define down_write_nest_lock(sem, nest_lock)	down_write(sem)
 # define down_write_nested(sem, subclass)	down_write(sem)
 #endif
 
-- 
cgit v1.2.3-55-g7522


From edea0d03ee5f0ae0051b6adb6681ebdf976b1ca4 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Sun, 20 Jan 2013 20:25:47 +0100
Subject: ia64: kill thread_matches(), unexport ptrace_check_attach()

The ia64 function "thread_matches()" has no users since commit
e868a55c2a8c ("[IA64] remove find_thread_for_addr()").  Remove it.

This allows us to make ptrace_check_attach() static to kernel/ptrace.c,
which is good since we'll need to change the semantics of it and fix up
all the callers.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/ptrace.c | 27 ---------------------------
 include/linux/ptrace.h    |  1 -
 kernel/ptrace.c           |  2 +-
 3 files changed, 1 insertion(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 4265ff64219b..b7a5fffe0924 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -672,33 +672,6 @@ ptrace_attach_sync_user_rbs (struct task_struct *child)
 	read_unlock(&tasklist_lock);
 }
 
-static inline int
-thread_matches (struct task_struct *thread, unsigned long addr)
-{
-	unsigned long thread_rbs_end;
-	struct pt_regs *thread_regs;
-
-	if (ptrace_check_attach(thread, 0) < 0)
-		/*
-		 * If the thread is not in an attachable state, we'll
-		 * ignore it.  The net effect is that if ADDR happens
-		 * to overlap with the portion of the thread's
-		 * register backing store that is currently residing
-		 * on the thread's kernel stack, then ptrace() may end
-		 * up accessing a stale value.  But if the thread
-		 * isn't stopped, that's a problem anyhow, so we're
-		 * doing as well as we can...
-		 */
-		return 0;
-
-	thread_regs = task_pt_regs(thread);
-	thread_rbs_end = ia64_get_user_rbs_end(thread, thread_regs, NULL);
-	if (!on_kernel_rbs(addr, thread_regs->ar_bspstore, thread_rbs_end))
-		return 0;
-
-	return 1;	/* looks like we've got a winner */
-}
-
 /*
  * Write f32-f127 back to task->thread.fph if it has been modified.
  */
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 1693775ecfe8..89573a33ab3c 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -45,7 +45,6 @@ extern long arch_ptrace(struct task_struct *child, long request,
 extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
 extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
 extern void ptrace_disable(struct task_struct *);
-extern int ptrace_check_attach(struct task_struct *task, bool ignore_state);
 extern int ptrace_request(struct task_struct *child, long request,
 			  unsigned long addr, unsigned long data);
 extern void ptrace_notify(int exit_code);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1599157336a6..612a56126851 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -139,7 +139,7 @@ void __ptrace_unlink(struct task_struct *child)
  * RETURNS:
  * 0 on success, -ESRCH if %child is not ready.
  */
-int ptrace_check_attach(struct task_struct *child, bool ignore_state)
+static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 {
 	int ret = -ESRCH;
 
-- 
cgit v1.2.3-55-g7522


From 6509141f9c2ba74df6cc72ec35cd1865276ae3a4 Mon Sep 17 00:00:00 2001
From: Wei Shuai
Date: Mon, 21 Jan 2013 06:00:31 +0000
Subject: usbnet: add new flag FLAG_NOARP for usb net devices

We do have some USB net devices, which cannot do ARP.
so we can introduce a new flag FLAG_NOARP, then client drivers
can easily handle this kind of devices

Signed-off-by: Wei Shuai <cpuwolf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 4 ++++
 include/linux/usb/usbnet.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 3d4bf01641b4..f34b2ebee815 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1448,6 +1448,10 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 		if ((dev->driver_info->flags & FLAG_WWAN) != 0)
 			strcpy(net->name, "wwan%d");
 
+		/* devices that cannot do ARP */
+		if ((dev->driver_info->flags & FLAG_NOARP) != 0)
+			net->flags |= IFF_NOARP;
+
 		/* maybe the remote can't receive an Ethernet MTU */
 		if (net->mtu > (dev->hard_mtu - net->hard_header_len))
 			net->mtu = dev->hard_mtu - net->hard_header_len;
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index bd45eb7bedc8..5de7a220e986 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -100,6 +100,7 @@ struct driver_info {
 #define FLAG_LINK_INTR	0x0800		/* updates link (carrier) status */
 
 #define FLAG_POINTTOPOINT 0x1000	/* possibly use "usb%d" names */
+#define FLAG_NOARP	0x2000		/* device can't do ARP */
 
 /*
  * Indicates to usbnet, that USB driver accumulates multiple IP packets.
-- 
cgit v1.2.3-55-g7522


From 00441b5e6b98ad6a50b5cb7f88d473e3ea1e0d75 Mon Sep 17 00:00:00 2001
From: Lee Jones
Date: Wed, 9 Jan 2013 10:06:03 +0000
Subject: mfd: Fix compile errors and warnings when !CONFIG_AB8500_BM

drivers/mfd/ab8500-core.c:1015:21: error: ‘ab8500_bm_data’ undeclared here

include/linux/mfd/abx500/ab8500-bm.h:445:13: warning: ‘ab8500_fg_reinit’ defined but not used
include/linux/mfd/abx500/ab8500-bm.h:448:13: warning: ‘ab8500_charger_usb_state_changed’ defined but not used
include/linux/mfd/abx500/ab8500-bm.h:451:29: warning: ‘ab8500_btemp_get’ defined but not used
include/linux/mfd/abx500/ab8500-bm.h:455:12: warning: ‘ab8500_btemp_get_batctrl_temp’ defined but not used
include/linux/mfd/abx500/ab8500-bm.h:463:12: warning: ‘ab8500_fg_inst_curr_blocking’ defined but not used
include/linux/mfd/abx500/ab8500-bm.h:442:12: warning: ‘ab8500_fg_inst_curr_done’ defined but not used
include/linux/mfd/abx500/ab8500-bm.h:447:26: warning: ‘ab8500_fg_get’ defined but not used

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab8500-core.c            |  1 +
 include/linux/mfd/abx500.h           |  2 --
 include/linux/mfd/abx500/ab8500-bm.h | 29 ++++-------------------------
 3 files changed, 5 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index e1650badd106..4778bb124efe 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -19,6 +19,7 @@
 #include <linux/mfd/core.h>
 #include <linux/mfd/abx500.h>
 #include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-bm.h>
 #include <linux/mfd/dbx500-prcmu.h>
 #include <linux/regulator/ab8500.h>
 #include <linux/of.h>
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 2138bd33021a..e53dcfeaee69 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -272,8 +272,6 @@ struct abx500_bm_data {
 	const struct abx500_fg_parameters *fg_params;
 };
 
-extern struct abx500_bm_data ab8500_bm_data;
-
 enum {
 	NTC_EXTERNAL = 0,
 	NTC_INTERNAL,
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index 44310c98ee6e..9bd037df97d9 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -422,7 +422,10 @@ struct ab8500_chargalg_platform_data {
 struct ab8500_btemp;
 struct ab8500_gpadc;
 struct ab8500_fg;
+
 #ifdef CONFIG_AB8500_BM
+extern struct abx500_bm_data ab8500_bm_data;
+
 void ab8500_fg_reinit(void);
 void ab8500_charger_usb_state_changed(u8 bm_usb_state, u16 mA);
 struct ab8500_btemp *ab8500_btemp_get(void);
@@ -434,31 +437,7 @@ int ab8500_fg_inst_curr_finalize(struct ab8500_fg *di, int *res);
 int ab8500_fg_inst_curr_done(struct ab8500_fg *di);
 
 #else
-int ab8500_fg_inst_curr_done(struct ab8500_fg *di)
-{
-}
-static void ab8500_fg_reinit(void)
-{
-}
-static void ab8500_charger_usb_state_changed(u8 bm_usb_state, u16 mA)
-{
-}
-static struct ab8500_btemp *ab8500_btemp_get(void)
-{
-	return NULL;
-}
-static int ab8500_btemp_get_batctrl_temp(struct ab8500_btemp *btemp)
-{
-	return 0;
-}
-struct ab8500_fg *ab8500_fg_get(void)
-{
-	return NULL;
-}
-static int ab8500_fg_inst_curr_blocking(struct ab8500_fg *dev)
-{
-	return -ENODEV;
-}
+static struct abx500_bm_data ab8500_bm_data;
 
 static inline int ab8500_fg_inst_curr_start(struct ab8500_fg *di)
 {
-- 
cgit v1.2.3-55-g7522


From 910ffdb18a6408e14febbb6e4b6840fd2c928c82 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 21 Jan 2013 20:47:41 +0100
Subject: ptrace: introduce signal_wake_up_state() and ptrace_signal_wake_up()

Cleanup and preparation for the next change.

signal_wake_up(resume => true) is overused. None of ptrace/jctl callers
actually want to wakeup a TASK_WAKEKILL task, but they can't specify the
necessary mask.

Turn signal_wake_up() into signal_wake_up_state(state), reintroduce
signal_wake_up() as a trivial helper, and add ptrace_signal_wake_up()
which adds __TASK_TRACED.

This way ptrace_signal_wake_up() can work "inside" ptrace_request()
even if the tracee doesn't have the TASK_WAKEKILL bit set.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 11 ++++++++++-
 kernel/ptrace.c       |  8 ++++----
 kernel/signal.c       | 14 ++++----------
 3 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6fc8f45de4e9..d2112477ff5e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2714,7 +2714,16 @@ static inline void thread_group_cputime_init(struct signal_struct *sig)
 extern void recalc_sigpending_and_wake(struct task_struct *t);
 extern void recalc_sigpending(void);
 
-extern void signal_wake_up(struct task_struct *t, int resume_stopped);
+extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
+
+static inline void signal_wake_up(struct task_struct *t, bool resume)
+{
+	signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
+}
+static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
+{
+	signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
+}
 
 /*
  * Wrappers for p->thread_info->cpu access. No-op on UP.
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 612a56126851..62f7c2774b16 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -117,7 +117,7 @@ void __ptrace_unlink(struct task_struct *child)
 	 * TASK_KILLABLE sleeps.
 	 */
 	if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child))
-		signal_wake_up(child, task_is_traced(child));
+		ptrace_signal_wake_up(child, true);
 
 	spin_unlock(&child->sighand->siglock);
 }
@@ -317,7 +317,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 	 */
 	if (task_is_stopped(task) &&
 	    task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING))
-		signal_wake_up(task, 1);
+		signal_wake_up_state(task, __TASK_STOPPED);
 
 	spin_unlock(&task->sighand->siglock);
 
@@ -737,7 +737,7 @@ int ptrace_request(struct task_struct *child, long request,
 		 * tracee into STOP.
 		 */
 		if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP)))
-			signal_wake_up(child, child->jobctl & JOBCTL_LISTENING);
+			ptrace_signal_wake_up(child, child->jobctl & JOBCTL_LISTENING);
 
 		unlock_task_sighand(child, &flags);
 		ret = 0;
@@ -763,7 +763,7 @@ int ptrace_request(struct task_struct *child, long request,
 			 * start of this trap and now.  Trigger re-trap.
 			 */
 			if (child->jobctl & JOBCTL_TRAP_NOTIFY)
-				signal_wake_up(child, true);
+				ptrace_signal_wake_up(child, true);
 			ret = 0;
 		}
 		unlock_task_sighand(child, &flags);
diff --git a/kernel/signal.c b/kernel/signal.c
index 53cd5c4d1172..6e97aa6fa32c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -680,23 +680,17 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
  * No need to set need_resched since signal event passing
  * goes through ->blocked
  */
-void signal_wake_up(struct task_struct *t, int resume)
+void signal_wake_up_state(struct task_struct *t, unsigned int state)
 {
-	unsigned int mask;
-
 	set_tsk_thread_flag(t, TIF_SIGPENDING);
-
 	/*
-	 * For SIGKILL, we want to wake it up in the stopped/traced/killable
+	 * TASK_WAKEKILL also means wake it up in the stopped/traced/killable
 	 * case. We don't check t->state here because there is a race with it
 	 * executing another processor and just now entering stopped state.
 	 * By using wake_up_state, we ensure the process will wake up and
 	 * handle its death signal.
 	 */
-	mask = TASK_INTERRUPTIBLE;
-	if (resume)
-		mask |= TASK_WAKEKILL;
-	if (!wake_up_state(t, mask))
+	if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))
 		kick_process(t);
 }
 
@@ -844,7 +838,7 @@ static void ptrace_trap_notify(struct task_struct *t)
 	assert_spin_locked(&t->sighand->siglock);
 
 	task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY);
-	signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);
+	ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);
 }
 
 /*
-- 
cgit v1.2.3-55-g7522


From da0aa7169b97d90f4af39a9dc84d58bbe19d7e78 Mon Sep 17 00:00:00 2001
From: Alan Stern
Date: Fri, 25 Jan 2013 17:09:42 -0500
Subject: USB: add usb_hcd_{start,end}_port_resume

This patch (as1649) adds a mechanism for host controller drivers to
inform usbcore when they have begun or ended resume signalling on a
particular root-hub port.  The core will then make sure that the root
hub does not get runtime-suspended while the port resume is going on.

Since commit 596d789a211d134dc5f94d1e5957248c204ef850 (USB: set hub's
default autosuspend delay as 0), the system tries to suspend hubs
whenever they aren't in use.  While a root-hub port is being resumed,
the root hub does not appear to be in use.  Attempted runtime suspends
fail because of the ongoing port resume, but the PM core just keeps on
trying over and over again.  We want to prevent this wasteful effort.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c  | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb.h     |  2 ++
 include/linux/usb/hcd.h |  3 +++
 3 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 4225d5e72131..8e64adf8e4d5 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -39,6 +39,7 @@
 #include <asm/unaligned.h>
 #include <linux/platform_device.h>
 #include <linux/workqueue.h>
+#include <linux/pm_runtime.h>
 
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
@@ -1025,6 +1026,49 @@ static int register_root_hub(struct usb_hcd *hcd)
 	return retval;
 }
 
+/*
+ * usb_hcd_start_port_resume - a root-hub port is sending a resume signal
+ * @bus: the bus which the root hub belongs to
+ * @portnum: the port which is being resumed
+ *
+ * HCDs should call this function when they know that a resume signal is
+ * being sent to a root-hub port.  The root hub will be prevented from
+ * going into autosuspend until usb_hcd_end_port_resume() is called.
+ *
+ * The bus's private lock must be held by the caller.
+ */
+void usb_hcd_start_port_resume(struct usb_bus *bus, int portnum)
+{
+	unsigned bit = 1 << portnum;
+
+	if (!(bus->resuming_ports & bit)) {
+		bus->resuming_ports |= bit;
+		pm_runtime_get_noresume(&bus->root_hub->dev);
+	}
+}
+EXPORT_SYMBOL_GPL(usb_hcd_start_port_resume);
+
+/*
+ * usb_hcd_end_port_resume - a root-hub port has stopped sending a resume signal
+ * @bus: the bus which the root hub belongs to
+ * @portnum: the port which is being resumed
+ *
+ * HCDs should call this function when they know that a resume signal has
+ * stopped being sent to a root-hub port.  The root hub will be allowed to
+ * autosuspend again.
+ *
+ * The bus's private lock must be held by the caller.
+ */
+void usb_hcd_end_port_resume(struct usb_bus *bus, int portnum)
+{
+	unsigned bit = 1 << portnum;
+
+	if (bus->resuming_ports & bit) {
+		bus->resuming_ports &= ~bit;
+		pm_runtime_put_noidle(&bus->root_hub->dev);
+	}
+}
+EXPORT_SYMBOL_GPL(usb_hcd_end_port_resume);
 
 /*-------------------------------------------------------------------------*/
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 689b14b26c8d..4d22d0f6167a 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -357,6 +357,8 @@ struct usb_bus {
 	int bandwidth_int_reqs;		/* number of Interrupt requests */
 	int bandwidth_isoc_reqs;	/* number of Isoc. requests */
 
+	unsigned resuming_ports;	/* bit array: resuming root-hub ports */
+
 #if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
 	struct mon_bus *mon_bus;	/* non-null when associated */
 	int monitored;			/* non-zero when monitored */
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 608050b2545f..0a78df5f6cfd 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -430,6 +430,9 @@ extern void usb_hcd_poll_rh_status(struct usb_hcd *hcd);
 extern void usb_wakeup_notification(struct usb_device *hdev,
 		unsigned int portnum);
 
+extern void usb_hcd_start_port_resume(struct usb_bus *bus, int portnum);
+extern void usb_hcd_end_port_resume(struct usb_bus *bus, int portnum);
+
 /* The D0/D1 toggle bits ... USE WITH CAUTION (they're almost hcd-internal) */
 #define usb_gettoggle(dev, ep, out) (((dev)->toggle[out] >> (ep)) & 1)
 #define	usb_dotoggle(dev, ep, out)  ((dev)->toggle[out] ^= (1 << (ep)))
-- 
cgit v1.2.3-55-g7522


From d817ac4e181710cd02b582b759d3123ad2cfa8d8 Mon Sep 17 00:00:00 2001
From: Wei WANG
Date: Wed, 23 Jan 2013 09:51:04 +0800
Subject: mfd: rtsx: Add output voltage switch hook

Different card reader has different method to switch output voltage,
add this callback to let the card reader implement its individual switch
function.
This is needed as rtl8411 has a specific switch output voltage procedure.

Signed-off-by: Wei WANG <wei_wang@realsil.com.cn>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/rtl8411.c        | 16 ++++++++++++++++
 drivers/mfd/rts5209.c        | 20 ++++++++++++++++++++
 drivers/mfd/rts5229.c        | 20 ++++++++++++++++++++
 drivers/mfd/rtsx_pcr.c       |  9 +++++++++
 include/linux/mfd/rtsx_pci.h | 24 ++++++++++++++++++++----
 5 files changed, 85 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rtl8411.c b/drivers/mfd/rtl8411.c
index 89f046ca9e41..5058ba8dec3b 100644
--- a/drivers/mfd/rtl8411.c
+++ b/drivers/mfd/rtl8411.c
@@ -112,6 +112,21 @@ static int rtl8411_card_power_off(struct rtsx_pcr *pcr, int card)
 			BPP_LDO_POWB, BPP_LDO_SUSPEND);
 }
 
+static int rtl8411_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	u8 mask, val;
+
+	mask = (BPP_REG_TUNED18 << BPP_TUNED18_SHIFT_8411) | BPP_PAD_MASK;
+	if (voltage == OUTPUT_3V3)
+		val = (BPP_ASIC_3V3 << BPP_TUNED18_SHIFT_8411) | BPP_PAD_3V3;
+	else if (voltage == OUTPUT_1V8)
+		val = (BPP_ASIC_1V8 << BPP_TUNED18_SHIFT_8411) | BPP_PAD_1V8;
+	else
+		return -EINVAL;
+
+	return rtsx_pci_write_register(pcr, LDO_CTL, mask, val);
+}
+
 static unsigned int rtl8411_cd_deglitch(struct rtsx_pcr *pcr)
 {
 	unsigned int card_exist;
@@ -172,6 +187,7 @@ static const struct pcr_ops rtl8411_pcr_ops = {
 	.disable_auto_blink = rtl8411_disable_auto_blink,
 	.card_power_on = rtl8411_card_power_on,
 	.card_power_off = rtl8411_card_power_off,
+	.switch_output_voltage = rtl8411_switch_output_voltage,
 	.cd_deglitch = rtl8411_cd_deglitch,
 };
 
diff --git a/drivers/mfd/rts5209.c b/drivers/mfd/rts5209.c
index 283a4f148084..ba74de8a7c24 100644
--- a/drivers/mfd/rts5209.c
+++ b/drivers/mfd/rts5209.c
@@ -144,6 +144,25 @@ static int rts5209_card_power_off(struct rtsx_pcr *pcr, int card)
 	return rtsx_pci_send_cmd(pcr, 100);
 }
 
+static int rts5209_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24);
+		if (err < 0)
+			return err;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C40 | 0x24);
+		if (err < 0)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct pcr_ops rts5209_pcr_ops = {
 	.extra_init_hw = rts5209_extra_init_hw,
 	.optimize_phy = rts5209_optimize_phy,
@@ -153,6 +172,7 @@ static const struct pcr_ops rts5209_pcr_ops = {
 	.disable_auto_blink = rts5209_disable_auto_blink,
 	.card_power_on = rts5209_card_power_on,
 	.card_power_off = rts5209_card_power_off,
+	.switch_output_voltage = rts5209_switch_output_voltage,
 	.cd_deglitch = NULL,
 };
 
diff --git a/drivers/mfd/rts5229.c b/drivers/mfd/rts5229.c
index b9dbab266fda..ec1747adf5d3 100644
--- a/drivers/mfd/rts5229.c
+++ b/drivers/mfd/rts5229.c
@@ -114,6 +114,25 @@ static int rts5229_card_power_off(struct rtsx_pcr *pcr, int card)
 	return rtsx_pci_send_cmd(pcr, 100);
 }
 
+static int rts5229_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24);
+		if (err < 0)
+			return err;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C40 | 0x24);
+		if (err < 0)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct pcr_ops rts5229_pcr_ops = {
 	.extra_init_hw = rts5229_extra_init_hw,
 	.optimize_phy = rts5229_optimize_phy,
@@ -123,6 +142,7 @@ static const struct pcr_ops rts5229_pcr_ops = {
 	.disable_auto_blink = rts5229_disable_auto_blink,
 	.card_power_on = rts5229_card_power_on,
 	.card_power_off = rts5229_card_power_off,
+	.switch_output_voltage = rts5229_switch_output_voltage,
 	.cd_deglitch = NULL,
 };
 
diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 7a7b0bda4618..f4e02d089271 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -703,6 +703,15 @@ int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card)
 }
 EXPORT_SYMBOL_GPL(rtsx_pci_card_power_off);
 
+int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	if (pcr->ops->switch_output_voltage)
+		return pcr->ops->switch_output_voltage(pcr, voltage);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_switch_output_voltage);
+
 unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr)
 {
 	unsigned int val;
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index 060b721fcbfb..271b6e5654af 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -158,10 +158,9 @@
 #define SG_TRANS_DATA		(0x02 << 4)
 #define SG_LINK_DESC		(0x03 << 4)
 
-/* SD bank voltage */
-#define SD_IO_3V3		0
-#define SD_IO_1V8		1
-
+/* Output voltage */
+#define OUTPUT_3V3		0
+#define OUTPUT_1V8		1
 
 /* Card Clock Enable Register */
 #define SD_CLK_EN			0x04
@@ -201,6 +200,20 @@
 #define CHANGE_CLK			0x01
 
 /* LDO_CTL */
+#define BPP_ASIC_1V7			0x00
+#define BPP_ASIC_1V8			0x01
+#define BPP_ASIC_1V9			0x02
+#define BPP_ASIC_2V0			0x03
+#define BPP_ASIC_2V7			0x04
+#define BPP_ASIC_2V8			0x05
+#define BPP_ASIC_3V2			0x06
+#define BPP_ASIC_3V3			0x07
+#define BPP_REG_TUNED18			0x07
+#define BPP_TUNED18_SHIFT_8402		5
+#define BPP_TUNED18_SHIFT_8411		4
+#define BPP_PAD_MASK			0x04
+#define BPP_PAD_3V3			0x04
+#define BPP_PAD_1V8			0x00
 #define BPP_LDO_POWB			0x03
 #define BPP_LDO_ON			0x00
 #define BPP_LDO_SUSPEND			0x02
@@ -688,6 +701,8 @@ struct pcr_ops {
 	int		(*disable_auto_blink)(struct rtsx_pcr *pcr);
 	int		(*card_power_on)(struct rtsx_pcr *pcr, int card);
 	int		(*card_power_off)(struct rtsx_pcr *pcr, int card);
+	int		(*switch_output_voltage)(struct rtsx_pcr *pcr,
+						u8 voltage);
 	unsigned int	(*cd_deglitch)(struct rtsx_pcr *pcr);
 };
 
@@ -783,6 +798,7 @@ int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
 		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk);
 int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card);
 int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card);
+int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage);
 unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr);
 void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr);
 
-- 
cgit v1.2.3-55-g7522


From ab4e8f8b7bdfeff0c961fdbbdacb262d68f094c0 Mon Sep 17 00:00:00 2001
From: Wei WANG
Date: Wed, 23 Jan 2013 09:51:06 +0800
Subject: mfd: rtsx: Add clock divider hook

Add callback function conv_clk_and_div_n to convert between SSC clock
and its divider N.
For rtl8411, the formula to calculate SSC clock divider N is different
with the other card reader models.

Signed-off-by: Wei WANG <wei_wang@realsil.com.cn>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/rtl8411.c           | 13 +++++++++++++
 drivers/mfd/rts5209.c           |  1 +
 drivers/mfd/rts5229.c           |  1 +
 drivers/mfd/rtsx_pcr.c          | 14 ++++++++++++--
 include/linux/mfd/rtsx_common.h |  3 +++
 include/linux/mfd/rtsx_pci.h    |  1 +
 6 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rtl8411.c b/drivers/mfd/rtl8411.c
index 5058ba8dec3b..3d3b4addf81a 100644
--- a/drivers/mfd/rtl8411.c
+++ b/drivers/mfd/rtl8411.c
@@ -178,6 +178,18 @@ static unsigned int rtl8411_cd_deglitch(struct rtsx_pcr *pcr)
 	return card_exist;
 }
 
+static int rtl8411_conv_clk_and_div_n(int input, int dir)
+{
+	int output;
+
+	if (dir == CLK_TO_DIV_N)
+		output = input * 4 / 5 - 2;
+	else
+		output = (input + 2) * 5 / 4;
+
+	return output;
+}
+
 static const struct pcr_ops rtl8411_pcr_ops = {
 	.extra_init_hw = rtl8411_extra_init_hw,
 	.optimize_phy = NULL,
@@ -189,6 +201,7 @@ static const struct pcr_ops rtl8411_pcr_ops = {
 	.card_power_off = rtl8411_card_power_off,
 	.switch_output_voltage = rtl8411_switch_output_voltage,
 	.cd_deglitch = rtl8411_cd_deglitch,
+	.conv_clk_and_div_n = rtl8411_conv_clk_and_div_n,
 };
 
 /* SD Pull Control Enable:
diff --git a/drivers/mfd/rts5209.c b/drivers/mfd/rts5209.c
index ba74de8a7c24..98fe0f39463e 100644
--- a/drivers/mfd/rts5209.c
+++ b/drivers/mfd/rts5209.c
@@ -174,6 +174,7 @@ static const struct pcr_ops rts5209_pcr_ops = {
 	.card_power_off = rts5209_card_power_off,
 	.switch_output_voltage = rts5209_switch_output_voltage,
 	.cd_deglitch = NULL,
+	.conv_clk_and_div_n = NULL,
 };
 
 /* SD Pull Control Enable:
diff --git a/drivers/mfd/rts5229.c b/drivers/mfd/rts5229.c
index ec1747adf5d3..29d889cbb9c5 100644
--- a/drivers/mfd/rts5229.c
+++ b/drivers/mfd/rts5229.c
@@ -144,6 +144,7 @@ static const struct pcr_ops rts5229_pcr_ops = {
 	.card_power_off = rts5229_card_power_off,
 	.switch_output_voltage = rts5229_switch_output_voltage,
 	.cd_deglitch = NULL,
+	.conv_clk_and_div_n = NULL,
 };
 
 /* SD Pull Control Enable:
diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index f4e02d089271..910200e10fa0 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -630,7 +630,10 @@ int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
 	if (clk == pcr->cur_clock)
 		return 0;
 
-	N = (u8)(clk - 2);
+	if (pcr->ops->conv_clk_and_div_n)
+		N = (u8)pcr->ops->conv_clk_and_div_n(clk, CLK_TO_DIV_N);
+	else
+		N = (u8)(clk - 2);
 	if ((clk <= 2) || (N > max_N))
 		return -EINVAL;
 
@@ -641,7 +644,14 @@ int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
 	/* Make sure that the SSC clock div_n is equal or greater than min_N */
 	div = CLK_DIV_1;
 	while ((N < min_N) && (div < max_div)) {
-		N = (N + 2) * 2 - 2;
+		if (pcr->ops->conv_clk_and_div_n) {
+			int dbl_clk = pcr->ops->conv_clk_and_div_n(N,
+					DIV_N_TO_CLK) * 2;
+			N = (u8)pcr->ops->conv_clk_and_div_n(dbl_clk,
+					CLK_TO_DIV_N);
+		} else {
+			N = (N + 2) * 2 - 2;
+		}
 		div++;
 	}
 	dev_dbg(&(pcr->pci->dev), "N = %d, div = %d\n", N, div);
diff --git a/include/linux/mfd/rtsx_common.h b/include/linux/mfd/rtsx_common.h
index a8d393e3066b..2b13970596f5 100644
--- a/include/linux/mfd/rtsx_common.h
+++ b/include/linux/mfd/rtsx_common.h
@@ -38,6 +38,9 @@
 #define RTSX_SD_CARD			0
 #define RTSX_MS_CARD			1
 
+#define CLK_TO_DIV_N			0
+#define DIV_N_TO_CLK			1
+
 struct platform_device;
 
 struct rtsx_slot {
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index 271b6e5654af..4b117a3f54d4 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -704,6 +704,7 @@ struct pcr_ops {
 	int		(*switch_output_voltage)(struct rtsx_pcr *pcr,
 						u8 voltage);
 	unsigned int	(*cd_deglitch)(struct rtsx_pcr *pcr);
+	int		(*conv_clk_and_div_n)(int clk, int dir);
 };
 
 enum PDEV_STAT  {PDEV_STAT_IDLE, PDEV_STAT_RUN};
-- 
cgit v1.2.3-55-g7522


From 0a8c290ac58a86d5e1f2193abcd4d74ec075e20c Mon Sep 17 00:00:00 2001
From: Ashish Jangam
Date: Fri, 25 Jan 2013 14:03:49 +0530
Subject: mfd: da9052/53 lockup fix

An issue has been reported where the PMIC either locks up or fails to
respond following a system Reset. This could result in a second write
in which the bus writes the current content of the write buffer to address
of the last I2C access.

The failure case is where this unwanted write transfers incorrect data to
a critical register.

This patch fixes this issue to by following any read or write with a dummy read
to a safe register address. A safe register address is one where the contents
will not affect the operation of the system.

Signed-off-by: Ashish Jangam <ashish.jangam@kpitcummins.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/da9052-i2c.c          | 61 ++++++++++++++++++++++++++++++++++++
 include/linux/mfd/da9052/da9052.h | 66 ++++++++++++++++++++++++++++++++++++---
 include/linux/mfd/da9052/reg.h    |  3 ++
 3 files changed, 126 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9052-i2c.c b/drivers/mfd/da9052-i2c.c
index ac74a4d1daea..885e56780358 100644
--- a/drivers/mfd/da9052-i2c.c
+++ b/drivers/mfd/da9052-i2c.c
@@ -27,6 +27,66 @@
 #include <linux/of_device.h>
 #endif
 
+/* I2C safe register check */
+static inline bool i2c_safe_reg(unsigned char reg)
+{
+	switch (reg) {
+	case DA9052_STATUS_A_REG:
+	case DA9052_STATUS_B_REG:
+	case DA9052_STATUS_C_REG:
+	case DA9052_STATUS_D_REG:
+	case DA9052_ADC_RES_L_REG:
+	case DA9052_ADC_RES_H_REG:
+	case DA9052_VDD_RES_REG:
+	case DA9052_ICHG_AV_REG:
+	case DA9052_TBAT_RES_REG:
+	case DA9052_ADCIN4_RES_REG:
+	case DA9052_ADCIN5_RES_REG:
+	case DA9052_ADCIN6_RES_REG:
+	case DA9052_TJUNC_RES_REG:
+	case DA9052_TSI_X_MSB_REG:
+	case DA9052_TSI_Y_MSB_REG:
+	case DA9052_TSI_LSB_REG:
+	case DA9052_TSI_Z_MSB_REG:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/*
+ * There is an issue with DA9052 and DA9053_AA/BA/BB PMIC where the PMIC
+ * gets lockup up or fails to respond following a system reset.
+ * This fix is to follow any read or write with a dummy read to a safe
+ * register.
+ */
+int da9052_i2c_fix(struct da9052 *da9052, unsigned char reg)
+{
+	int val;
+
+	switch (da9052->chip_id) {
+	case DA9052:
+	case DA9053_AA:
+	case DA9053_BA:
+	case DA9053_BB:
+		/* A dummy read to a safe register address. */
+	if (!i2c_safe_reg(reg))
+			return regmap_read(da9052->regmap,
+					   DA9052_PARK_REGISTER,
+					   &val);
+		break;
+	default:
+		/*
+		 * For other chips parking of I2C register
+		 * to a safe place is not required.
+		 */
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(da9052_i2c_fix);
+
 static int da9052_i2c_enable_multiwrite(struct da9052 *da9052)
 {
 	int reg_val, ret;
@@ -83,6 +143,7 @@ static int da9052_i2c_probe(struct i2c_client *client,
 
 	da9052->dev = &client->dev;
 	da9052->chip_irq = client->irq;
+	da9052->fix_io = da9052_i2c_fix;
 
 	i2c_set_clientdata(client, da9052);
 
diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
index 86dd93de6ff2..786d02eb79d2 100644
--- a/include/linux/mfd/da9052/da9052.h
+++ b/include/linux/mfd/da9052/da9052.h
@@ -99,6 +99,9 @@ struct da9052 {
 	u8 chip_id;
 
 	int chip_irq;
+
+	/* SOC I/O transfer related fixes for DA9052/53 */
+	int (*fix_io) (struct da9052 *da9052, unsigned char reg);
 };
 
 /* ADC API */
@@ -113,32 +116,87 @@ static inline int da9052_reg_read(struct da9052 *da9052, unsigned char reg)
 	ret = regmap_read(da9052->regmap, reg, &val);
 	if (ret < 0)
 		return ret;
+
+	if (da9052->fix_io) {
+		ret = da9052->fix_io(da9052, reg);
+		if (ret < 0)
+			return ret;
+	}
+
 	return val;
 }
 
 static inline int da9052_reg_write(struct da9052 *da9052, unsigned char reg,
 				    unsigned char val)
 {
-	return regmap_write(da9052->regmap, reg, val);
+	int ret;
+
+	ret = regmap_write(da9052->regmap, reg, val);
+	if (ret < 0)
+		return ret;
+
+	if (da9052->fix_io) {
+		ret = da9052->fix_io(da9052, reg);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
 }
 
 static inline int da9052_group_read(struct da9052 *da9052, unsigned char reg,
 				     unsigned reg_cnt, unsigned char *val)
 {
-	return regmap_bulk_read(da9052->regmap, reg, val, reg_cnt);
+	int ret;
+
+	ret = regmap_bulk_read(da9052->regmap, reg, val, reg_cnt);
+	if (ret < 0)
+		return ret;
+
+	if (da9052->fix_io) {
+		ret = da9052->fix_io(da9052, reg);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
 }
 
 static inline int da9052_group_write(struct da9052 *da9052, unsigned char reg,
 				      unsigned reg_cnt, unsigned char *val)
 {
-	return regmap_raw_write(da9052->regmap, reg, val, reg_cnt);
+	int ret;
+
+	ret = regmap_raw_write(da9052->regmap, reg, val, reg_cnt);
+	if (ret < 0)
+		return ret;
+
+	if (da9052->fix_io) {
+		ret = da9052->fix_io(da9052, reg);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
 }
 
 static inline int da9052_reg_update(struct da9052 *da9052, unsigned char reg,
 				     unsigned char bit_mask,
 				     unsigned char reg_val)
 {
-	return regmap_update_bits(da9052->regmap, reg, bit_mask, reg_val);
+	int ret;
+
+	ret = regmap_update_bits(da9052->regmap, reg, bit_mask, reg_val);
+	if (ret < 0)
+		return ret;
+
+	if (da9052->fix_io) {
+		ret = da9052->fix_io(da9052, reg);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
 }
 
 int da9052_device_init(struct da9052 *da9052, u8 chip_id);
diff --git a/include/linux/mfd/da9052/reg.h b/include/linux/mfd/da9052/reg.h
index b97f7309d7f6..c4dd3a8add21 100644
--- a/include/linux/mfd/da9052/reg.h
+++ b/include/linux/mfd/da9052/reg.h
@@ -34,6 +34,9 @@
 #define DA9052_STATUS_C_REG		3
 #define DA9052_STATUS_D_REG		4
 
+/* PARK REGISTER */
+#define DA9052_PARK_REGISTER		DA9052_STATUS_D_REG
+
 /* EVENT REGISTERS */
 #define DA9052_EVENT_A_REG		5
 #define DA9052_EVENT_B_REG		6
-- 
cgit v1.2.3-55-g7522


From 83e68189745ad931c2afd45d8ee3303929233e7f Mon Sep 17 00:00:00 2001
From: Matt Fleming
Date: Wed, 14 Nov 2012 09:42:35 +0000
Subject: efi: Make 'efi_enabled' a function to query EFI facilities

Originally 'efi_enabled' indicated whether a kernel was booted from
EFI firmware. Over time its semantics have changed, and it now
indicates whether or not we are booted on an EFI machine with
bit-native firmware, e.g. 64-bit kernel with 64-bit firmware.

The immediate motivation for this patch is the bug report at,

    https://bugs.launchpad.net/ubuntu-cdimage/+bug/1040557

which details how running a platform driver on an EFI machine that is
designed to run under BIOS can cause the machine to become
bricked. Also, the following report,

    https://bugzilla.kernel.org/show_bug.cgi?id=47121

details how running said driver can also cause Machine Check
Exceptions. Drivers need a new means of detecting whether they're
running on an EFI machine, as sadly the expression,

    if (!efi_enabled)

hasn't been a sufficient condition for quite some time.

Users actually want to query 'efi_enabled' for different reasons -
what they really want access to is the list of available EFI
facilities.

For instance, the x86 reboot code needs to know whether it can invoke
the ResetSystem() function provided by the EFI runtime services, while
the ACPI OSL code wants to know whether the EFI config tables were
mapped successfully. There are also checks in some of the platform
driver code to simply see if they're running on an EFI machine (which
would make it a bad idea to do BIOS-y things).

This patch is a prereq for the samsung-laptop fix patch.

Cc: David Airlie <airlied@linux.ie>
Cc: Corentin Chary <corentincj@iksaif.net>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Olof Johansson <olof@lixom.net>
Cc: Peter Jones <pjones@redhat.com>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: Steve Langasek <steve.langasek@canonical.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Konrad Rzeszutek Wilk <konrad@kernel.org>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/efi.h             |  1 +
 arch/x86/kernel/reboot.c               |  2 +-
 arch/x86/kernel/setup.c                | 28 ++++++++---------
 arch/x86/platform/efi/efi.c            | 57 ++++++++++++++++++++--------------
 drivers/acpi/osl.c                     |  2 +-
 drivers/firmware/dmi_scan.c            |  2 +-
 drivers/firmware/efivars.c             |  4 +--
 drivers/firmware/iscsi_ibft_find.c     |  2 +-
 drivers/gpu/drm/radeon/radeon_device.c |  3 +-
 drivers/platform/x86/ibm_rtl.c         |  2 +-
 drivers/scsi/isci/init.c               |  2 +-
 include/linux/efi.h                    | 24 ++++++++++----
 init/main.c                            |  4 +--
 13 files changed, 79 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 6e8fdf5ad113..28677c55113f 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -94,6 +94,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
 #endif /* CONFIG_X86_32 */
 
 extern int add_efi_memmap;
+extern unsigned long x86_efi_facility;
 extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern int efi_memblock_x86_reserve_range(void);
 extern void efi_call_phys_prelog(void);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 4e8ba39eaf0f..76fa1e9a2b39 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -584,7 +584,7 @@ static void native_machine_emergency_restart(void)
 			break;
 
 		case BOOT_EFI:
-			if (efi_enabled)
+			if (efi_enabled(EFI_RUNTIME_SERVICES))
 				efi.reset_system(reboot_mode ?
 						 EFI_RESET_WARM :
 						 EFI_RESET_COLD,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 00f6c1472b85..8b24289cc10c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -807,15 +807,15 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_EFI
 	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
 		     "EL32", 4)) {
-		efi_enabled = 1;
-		efi_64bit = false;
+		set_bit(EFI_BOOT, &x86_efi_facility);
 	} else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
 		     "EL64", 4)) {
-		efi_enabled = 1;
-		efi_64bit = true;
+		set_bit(EFI_BOOT, &x86_efi_facility);
+		set_bit(EFI_64BIT, &x86_efi_facility);
 	}
-	if (efi_enabled && efi_memblock_x86_reserve_range())
-		efi_enabled = 0;
+
+	if (efi_enabled(EFI_BOOT))
+		efi_memblock_x86_reserve_range();
 #endif
 
 	x86_init.oem.arch_setup();
@@ -888,7 +888,7 @@ void __init setup_arch(char **cmdline_p)
 
 	finish_e820_parsing();
 
-	if (efi_enabled)
+	if (efi_enabled(EFI_BOOT))
 		efi_init();
 
 	dmi_scan_machine();
@@ -971,7 +971,7 @@ void __init setup_arch(char **cmdline_p)
 	 * The EFI specification says that boot service code won't be called
 	 * after ExitBootServices(). This is, in fact, a lie.
 	 */
-	if (efi_enabled)
+	if (efi_enabled(EFI_MEMMAP))
 		efi_reserve_boot_services();
 
 	/* preallocate 4k for mptable mpc */
@@ -1114,7 +1114,7 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
-	if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
+	if (!efi_enabled(EFI_BOOT) || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
 		conswitchp = &vga_con;
 #elif defined(CONFIG_DUMMY_CONSOLE)
 	conswitchp = &dummy_con;
@@ -1131,14 +1131,14 @@ void __init setup_arch(char **cmdline_p)
 	register_refined_jiffies(CLOCK_TICK_RATE);
 
 #ifdef CONFIG_EFI
-	/* Once setup is done above, disable efi_enabled on mismatched
-	 * firmware/kernel archtectures since there is no support for
-	 * runtime services.
+	/* Once setup is done above, unmap the EFI memory map on
+	 * mismatched firmware/kernel archtectures since there is no
+	 * support for runtime services.
 	 */
-	if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) {
+	if (efi_enabled(EFI_BOOT) &&
+	    IS_ENABLED(CONFIG_X86_64) != efi_enabled(EFI_64BIT)) {
 		pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
 		efi_unmap_memmap();
-		efi_enabled = 0;
 	}
 #endif
 }
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index ad4439145f85..5426e482db6e 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -51,9 +51,6 @@
 
 #define EFI_DEBUG	1
 
-int efi_enabled;
-EXPORT_SYMBOL(efi_enabled);
-
 struct efi __read_mostly efi = {
 	.mps        = EFI_INVALID_TABLE_ADDR,
 	.acpi       = EFI_INVALID_TABLE_ADDR,
@@ -69,19 +66,28 @@ EXPORT_SYMBOL(efi);
 
 struct efi_memory_map memmap;
 
-bool efi_64bit;
-
 static struct efi efi_phys __initdata;
 static efi_system_table_t efi_systab __initdata;
 
 static inline bool efi_is_native(void)
 {
-	return IS_ENABLED(CONFIG_X86_64) == efi_64bit;
+	return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
+}
+
+unsigned long x86_efi_facility;
+
+/*
+ * Returns 1 if 'facility' is enabled, 0 otherwise.
+ */
+int efi_enabled(int facility)
+{
+	return test_bit(facility, &x86_efi_facility) != 0;
 }
+EXPORT_SYMBOL(efi_enabled);
 
 static int __init setup_noefi(char *arg)
 {
-	efi_enabled = 0;
+	clear_bit(EFI_BOOT, &x86_efi_facility);
 	return 0;
 }
 early_param("noefi", setup_noefi);
@@ -426,6 +432,7 @@ void __init efi_reserve_boot_services(void)
 
 void __init efi_unmap_memmap(void)
 {
+	clear_bit(EFI_MEMMAP, &x86_efi_facility);
 	if (memmap.map) {
 		early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
 		memmap.map = NULL;
@@ -460,7 +467,7 @@ void __init efi_free_boot_services(void)
 
 static int __init efi_systab_init(void *phys)
 {
-	if (efi_64bit) {
+	if (efi_enabled(EFI_64BIT)) {
 		efi_system_table_64_t *systab64;
 		u64 tmp = 0;
 
@@ -552,7 +559,7 @@ static int __init efi_config_init(u64 tables, int nr_tables)
 	void *config_tables, *tablep;
 	int i, sz;
 
-	if (efi_64bit)
+	if (efi_enabled(EFI_64BIT))
 		sz = sizeof(efi_config_table_64_t);
 	else
 		sz = sizeof(efi_config_table_32_t);
@@ -572,7 +579,7 @@ static int __init efi_config_init(u64 tables, int nr_tables)
 		efi_guid_t guid;
 		unsigned long table;
 
-		if (efi_64bit) {
+		if (efi_enabled(EFI_64BIT)) {
 			u64 table64;
 			guid = ((efi_config_table_64_t *)tablep)->guid;
 			table64 = ((efi_config_table_64_t *)tablep)->table;
@@ -684,7 +691,6 @@ void __init efi_init(void)
 	if (boot_params.efi_info.efi_systab_hi ||
 	    boot_params.efi_info.efi_memmap_hi) {
 		pr_info("Table located above 4GB, disabling EFI.\n");
-		efi_enabled = 0;
 		return;
 	}
 	efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
@@ -694,10 +700,10 @@ void __init efi_init(void)
 			  ((__u64)boot_params.efi_info.efi_systab_hi<<32));
 #endif
 
-	if (efi_systab_init(efi_phys.systab)) {
-		efi_enabled = 0;
+	if (efi_systab_init(efi_phys.systab))
 		return;
-	}
+
+	set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
 
 	/*
 	 * Show what we know for posterity
@@ -715,10 +721,10 @@ void __init efi_init(void)
 		efi.systab->hdr.revision >> 16,
 		efi.systab->hdr.revision & 0xffff, vendor);
 
-	if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) {
-		efi_enabled = 0;
+	if (efi_config_init(efi.systab->tables, efi.systab->nr_tables))
 		return;
-	}
+
+	set_bit(EFI_CONFIG_TABLES, &x86_efi_facility);
 
 	/*
 	 * Note: We currently don't support runtime services on an EFI
@@ -727,15 +733,17 @@ void __init efi_init(void)
 
 	if (!efi_is_native())
 		pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
-	else if (efi_runtime_init()) {
-		efi_enabled = 0;
-		return;
+	else {
+		if (efi_runtime_init())
+			return;
+		set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
 	}
 
-	if (efi_memmap_init()) {
-		efi_enabled = 0;
+	if (efi_memmap_init())
 		return;
-	}
+
+	set_bit(EFI_MEMMAP, &x86_efi_facility);
+
 #ifdef CONFIG_X86_32
 	if (efi_is_native()) {
 		x86_platform.get_wallclock = efi_get_time;
@@ -969,6 +977,9 @@ u32 efi_mem_type(unsigned long phys_addr)
 	efi_memory_desc_t *md;
 	void *p;
 
+	if (!efi_enabled(EFI_MEMMAP))
+		return 0;
+
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
 		if ((md->phys_addr <= phys_addr) &&
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 3ff267861541..bd22f8667eed 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -250,7 +250,7 @@ acpi_physical_address __init acpi_os_get_root_pointer(void)
 		return acpi_rsdp;
 #endif
 
-	if (efi_enabled) {
+	if (efi_enabled(EFI_CONFIG_TABLES)) {
 		if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
 			return efi.acpi20;
 		else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index fd3ae6290d71..982f1f5f5742 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -471,7 +471,7 @@ void __init dmi_scan_machine(void)
 	char __iomem *p, *q;
 	int rc;
 
-	if (efi_enabled) {
+	if (efi_enabled(EFI_CONFIG_TABLES)) {
 		if (efi.smbios == EFI_INVALID_TABLE_ADDR)
 			goto error;
 
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 7b1c37497c9a..1065119dff92 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -1782,7 +1782,7 @@ efivars_init(void)
 	printk(KERN_INFO "EFI Variables Facility v%s %s\n", EFIVARS_VERSION,
 	       EFIVARS_DATE);
 
-	if (!efi_enabled)
+	if (!efi_enabled(EFI_RUNTIME_SERVICES))
 		return 0;
 
 	/* For now we'll register the efi directory at /sys/firmware/efi */
@@ -1822,7 +1822,7 @@ err_put:
 static void __exit
 efivars_exit(void)
 {
-	if (efi_enabled) {
+	if (efi_enabled(EFI_RUNTIME_SERVICES)) {
 		unregister_efivars(&__efivars);
 		kobject_put(efi_kobj);
 	}
diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c
index 4da4eb9ae926..2224f1dc074b 100644
--- a/drivers/firmware/iscsi_ibft_find.c
+++ b/drivers/firmware/iscsi_ibft_find.c
@@ -99,7 +99,7 @@ unsigned long __init find_ibft_region(unsigned long *sizep)
 	/* iBFT 1.03 section 1.4.3.1 mandates that UEFI machines will
 	 * only use ACPI for this */
 
-	if (!efi_enabled)
+	if (!efi_enabled(EFI_BOOT))
 		find_ibft_in_mem();
 
 	if (ibft_addr) {
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index edfc54e41842..0d6562bb0c93 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -429,7 +429,8 @@ bool radeon_card_posted(struct radeon_device *rdev)
 {
 	uint32_t reg;
 
-	if (efi_enabled && rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE)
+	if (efi_enabled(EFI_BOOT) &&
+	    rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE)
 		return false;
 
 	/* first check CRTCs */
diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c
index 7481146a5b47..97c2be195efc 100644
--- a/drivers/platform/x86/ibm_rtl.c
+++ b/drivers/platform/x86/ibm_rtl.c
@@ -244,7 +244,7 @@ static int __init ibm_rtl_init(void) {
 	if (force)
 		pr_warn("module loaded by force\n");
 	/* first ensure that we are running on IBM HW */
-	else if (efi_enabled || !dmi_check_system(ibm_rtl_dmi_table))
+	else if (efi_enabled(EFI_BOOT) || !dmi_check_system(ibm_rtl_dmi_table))
 		return -ENODEV;
 
 	/* Get the address for the Extended BIOS Data Area */
diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c
index d73fdcfeb45a..2839baa82a5a 100644
--- a/drivers/scsi/isci/init.c
+++ b/drivers/scsi/isci/init.c
@@ -633,7 +633,7 @@ static int isci_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return -ENOMEM;
 	pci_set_drvdata(pdev, pci_info);
 
-	if (efi_enabled)
+	if (efi_enabled(EFI_RUNTIME_SERVICES))
 		orom = isci_get_efi_var(pdev);
 
 	if (!orom)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8b84916dc671..7a9498ab3c2d 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -618,18 +618,30 @@ extern int __init efi_setup_pcdp_console(char *);
 #endif
 
 /*
- * We play games with efi_enabled so that the compiler will, if possible, remove
- * EFI-related code altogether.
+ * We play games with efi_enabled so that the compiler will, if
+ * possible, remove EFI-related code altogether.
  */
+#define EFI_BOOT		0	/* Were we booted from EFI? */
+#define EFI_SYSTEM_TABLES	1	/* Can we use EFI system tables? */
+#define EFI_CONFIG_TABLES	2	/* Can we use EFI config tables? */
+#define EFI_RUNTIME_SERVICES	3	/* Can we use runtime services? */
+#define EFI_MEMMAP		4	/* Can we use EFI memory map? */
+#define EFI_64BIT		5	/* Is the firmware 64-bit? */
+
 #ifdef CONFIG_EFI
 # ifdef CONFIG_X86
-   extern int efi_enabled;
-   extern bool efi_64bit;
+extern int efi_enabled(int facility);
 # else
-#  define efi_enabled 1
+static inline int efi_enabled(int facility)
+{
+	return 1;
+}
 # endif
 #else
-# define efi_enabled 0
+static inline int efi_enabled(int facility)
+{
+	return 0;
+}
 #endif
 
 /*
diff --git a/init/main.c b/init/main.c
index 85d69dffe864..cd30179c86bd 100644
--- a/init/main.c
+++ b/init/main.c
@@ -604,7 +604,7 @@ asmlinkage void __init start_kernel(void)
 	pidmap_init();
 	anon_vma_init();
 #ifdef CONFIG_X86
-	if (efi_enabled)
+	if (efi_enabled(EFI_RUNTIME_SERVICES))
 		efi_enter_virtual_mode();
 #endif
 	thread_info_cache_init();
@@ -632,7 +632,7 @@ asmlinkage void __init start_kernel(void)
 	acpi_early_init(); /* before LAPIC and SMP init */
 	sfi_init_late();
 
-	if (efi_enabled) {
+	if (efi_enabled(EFI_RUNTIME_SERVICES)) {
 		efi_late_init();
 		efi_free_boot_services();
 	}
-- 
cgit v1.2.3-55-g7522


From 70c37bf97f2a91accba76080db69144f3b69f736 Mon Sep 17 00:00:00 2001
From: Bjørn Mork
Date: Mon, 28 Jan 2013 23:51:28 +0000
Subject: net: usbnet: prevent buggy devices from killing us

A device sending 0 length frames as fast as it can has been
observed killing the host system due to the resulting memory
pressure.

Temporarily disable RX skb allocation and URB submission when
the current error ratio is high, preventing us from trying to
allocate an infinite number of skbs.  Reenable as soon as we
are finished processing the done queue, allowing the device
to continue working after short error bursts.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Acked-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 25 +++++++++++++++++++++++++
 include/linux/usb/usbnet.h |  2 ++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index f34b2ebee815..977837725726 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -380,6 +380,12 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
 	unsigned long		lockflags;
 	size_t			size = dev->rx_urb_size;
 
+	/* prevent rx skb allocation when error ratio is high */
+	if (test_bit(EVENT_RX_KILL, &dev->flags)) {
+		usb_free_urb(urb);
+		return -ENOLINK;
+	}
+
 	skb = __netdev_alloc_skb_ip_align(dev->net, size, flags);
 	if (!skb) {
 		netif_dbg(dev, rx_err, dev->net, "no rx skb\n");
@@ -539,6 +545,17 @@ block:
 		break;
 	}
 
+	/* stop rx if packet error rate is high */
+	if (++dev->pkt_cnt > 30) {
+		dev->pkt_cnt = 0;
+		dev->pkt_err = 0;
+	} else {
+		if (state == rx_cleanup)
+			dev->pkt_err++;
+		if (dev->pkt_err > 20)
+			set_bit(EVENT_RX_KILL, &dev->flags);
+	}
+
 	state = defer_bh(dev, skb, &dev->rxq, state);
 
 	if (urb) {
@@ -791,6 +808,11 @@ int usbnet_open (struct net_device *net)
 		   (dev->driver_info->flags & FLAG_FRAMING_AX) ? "ASIX" :
 		   "simple");
 
+	/* reset rx error state */
+	dev->pkt_cnt = 0;
+	dev->pkt_err = 0;
+	clear_bit(EVENT_RX_KILL, &dev->flags);
+
 	// delay posting reads until we're fully open
 	tasklet_schedule (&dev->bh);
 	if (info->manage_power) {
@@ -1254,6 +1276,9 @@ static void usbnet_bh (unsigned long param)
 		}
 	}
 
+	/* restart RX again after disabling due to high error rate */
+	clear_bit(EVENT_RX_KILL, &dev->flags);
+
 	// waiting for all pending urbs to complete?
 	if (dev->wait) {
 		if ((dev->txq.qlen + dev->rxq.qlen + dev->done.qlen) == 0) {
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 5de7a220e986..0de078d4cdb9 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -33,6 +33,7 @@ struct usbnet {
 	wait_queue_head_t	*wait;
 	struct mutex		phy_mutex;
 	unsigned char		suspend_count;
+	unsigned char		pkt_cnt, pkt_err;
 
 	/* i/o info: pipes etc */
 	unsigned		in, out;
@@ -70,6 +71,7 @@ struct usbnet {
 #		define EVENT_DEV_OPEN	7
 #		define EVENT_DEVICE_REPORT_IDLE	8
 #		define EVENT_NO_RUNTIME_PM	9
+#		define EVENT_RX_KILL	10
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
-- 
cgit v1.2.3-55-g7522


From 631b0cfdbd801ceae8762e8d287f15da26792ebe Mon Sep 17 00:00:00 2001
From: Yuanhan Liu
Date: Mon, 4 Feb 2013 14:28:48 -0800
Subject: mm: fix wrong comments about anon_vma lock

We use rwsem since commit 5a505085f043 ("mm/rmap: Convert the struct
anon_vma::mutex to an rwsem").  And most of comments are converted to
the new rwsem lock; while just 2 more missed from:

	 $ git grep 'anon_vma->mutex'

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 2 +-
 mm/mmap.c                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index bc823c4c028b..deca87452528 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -151,7 +151,7 @@ struct mmu_notifier_ops {
  * Therefore notifier chains can only be traversed when either
  *
  * 1. mmap_sem is held.
- * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->mutex).
+ * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->rwsem).
  * 3. No other concurrent thread can access the list (release)
  */
 struct mmu_notifier {
diff --git a/mm/mmap.c b/mm/mmap.c
index 35730ee9d515..d1e4124f3d0e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2943,7 +2943,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
  * vma in this mm is backed by the same anon_vma or address_space.
  *
  * We can take all the locks in random order because the VM code
- * taking i_mmap_mutex or anon_vma->mutex outside the mmap_sem never
+ * taking i_mmap_mutex or anon_vma->rwsem outside the mmap_sem never
  * takes more than one of them in a row. Secondly we're protected
  * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
  *
-- 
cgit v1.2.3-55-g7522


From 91c777d86752b00bb3a1d8efa3d8f7e1264f38a9 Mon Sep 17 00:00:00 2001
From: Glauber Costa
Date: Mon, 4 Feb 2013 14:28:49 -0800
Subject: memcg: fix typo in kmemcg cache walk macro

The macro for_each_memcg_cache_index contains a silly yet potentially
deadly mistake.  Although the macro parameter is _idx, the loop tests
are done over i, not _idx.

This hasn't generated any problems so far, because all users use i as a
loop index.  However, while playing with an extension of the code I
ended using another loop index and the compiler was quick to complain.

Unfortunately, this is not the kind of thing that testing reveals =(

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0108a56f814e..28bd5fa2ff2e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -429,7 +429,7 @@ extern int memcg_limited_groups_array_size;
  * the slab_mutex must be held when looping through those caches
  */
 #define for_each_memcg_cache_index(_idx)	\
-	for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++)
+	for ((_idx) = 0; (_idx) < memcg_limited_groups_array_size; (_idx)++)
 
 static inline bool memcg_kmem_enabled(void)
 {
-- 
cgit v1.2.3-55-g7522


From 9c79330d930b5774aed8eb323daebecedce2e245 Mon Sep 17 00:00:00 2001
From: Lucas Stach
Date: Thu, 7 Feb 2013 16:18:39 +0000
Subject: net: usb: fix regression from FLAG_NOARP code

In commit 6509141f9c2ba74df6cc72ec35cd1865276ae3a4 ("usbnet: add new
flag FLAG_NOARP for usb net devices"), the newly added flag NOARP was
using an already defined value, which broke drivers using flag
MULTI_PACKET.

Signed-off-by: Lucas Stach <dev@lynxeye.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/usbnet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 0de078d4cdb9..0e5ac93bab10 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -102,7 +102,6 @@ struct driver_info {
 #define FLAG_LINK_INTR	0x0800		/* updates link (carrier) status */
 
 #define FLAG_POINTTOPOINT 0x1000	/* possibly use "usb%d" names */
-#define FLAG_NOARP	0x2000		/* device can't do ARP */
 
 /*
  * Indicates to usbnet, that USB driver accumulates multiple IP packets.
@@ -110,6 +109,7 @@ struct driver_info {
  */
 #define FLAG_MULTI_PACKET	0x2000
 #define FLAG_RX_ASSEMBLE	0x4000	/* rx packets may span >1 frames */
+#define FLAG_NOARP		0x8000	/* device can't do ARP */
 
 	/* init device ... can sleep, or cause probe() failure */
 	int	(*bind)(struct usbnet *, struct usb_interface *);
-- 
cgit v1.2.3-55-g7522