From 331818f1c468a24e581aedcbe52af799366a9dfe Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Fri, 3 Feb 2012 18:30:53 -0500
Subject: NFSv4: Fix an Oops in the NFSv4 getacl code

Commit bf118a342f10dafe44b14451a1392c3254629a1f (NFSv4: include bitmap
in nfsv4 get acl data) introduces the 'acl_scratch' page for the case
where we may need to decode multi-page data. However it fails to take
into account the fact that the variable may be NULL (for the case where
we're not doing multi-page decode), and it also attaches it to the
encoding xdr_stream rather than the decoding one.

The immediate result is an Oops in nfs4_xdr_enc_getacl due to the
call to page_address() with a NULL page pointer.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Andy Adamson <andros@netapp.com>
Cc: stable@vger.kernel.org
---
 include/linux/nfs_xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a764cef06b73..d6ba9a12591e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -614,7 +614,6 @@ struct nfs_getaclargs {
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
-	struct page *			acl_scratch;
 	struct nfs4_sequence_args 	seq_args;
 };
 
@@ -624,6 +623,7 @@ struct nfs_getaclres {
 	size_t				acl_len;
 	size_t				acl_data_offset;
 	int				acl_flags;
+	struct page *			acl_scratch;
 	struct nfs4_sequence_res	seq_res;
 };
 
-- 
cgit v1.2.3-55-g7522


From d9f5343e35d9138432657202afa8e3ddb2ade360 Mon Sep 17 00:00:00 2001
From: Sarah Sharp
Date: Thu, 5 Jan 2012 16:28:54 -0800
Subject: USB: Remove duplicate USB 3.0 hub feature #defines.

Somehow we ended up with duplicate hub feature #defines in ch11.h.
Tatyana Brokhman first created the USB 3.0 hub feature macros in 2.6.38
with commit 0eadcc09203349b11ca477ec367079b23d32ab91 "usb: USB3.0 ch11
definitions".  In 2.6.39, I modified a patch from John Youn that added
similar macros in a different place in the same file, and committed
dbe79bbe9dcb22cb3651c46f18943477141ca452 "USB 3.0 Hub Changes".

Some of the #defines used different names for the same values.  Others
used exactly the same names with the same values, like these gems:

 #define USB_PORT_FEAT_BH_PORT_RESET     28
...
 #define USB_PORT_FEAT_BH_PORT_RESET            28

According to my very geeky husband (who looked it up in the C99 spec),
it is allowed to have object-like macros with duplicate names as long as
the replacement list is exactly the same.  However, he recalled that
some compilers will give warnings when they find duplicate macros.  It's
probably best to remove the duplicates in the stable tree, so that the
code compiles for everyone.

The macros are now fixed to move the feature requests that are specific
to USB 3.0 hubs into a new section (out of the USB 2.0 hub feature
section), and use the most common macro name.

This patch should be backported to 2.6.39.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: Tatyana Brokhman <tlinder@codeaurora.org>
Cc: John Youn <johnyoun@synopsys.com>
Cc: Jamey Sharp <jamey@minilop.net>
Cc: stable@vger.kernel.org
---
 include/linux/usb/ch11.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/ch11.h b/include/linux/usb/ch11.h
index 31fdb4c6ee3d..0b83acd3360a 100644
--- a/include/linux/usb/ch11.h
+++ b/include/linux/usb/ch11.h
@@ -61,12 +61,6 @@
 #define USB_PORT_FEAT_TEST              21
 #define USB_PORT_FEAT_INDICATOR         22
 #define USB_PORT_FEAT_C_PORT_L1         23
-#define USB_PORT_FEAT_C_PORT_LINK_STATE	25
-#define USB_PORT_FEAT_C_PORT_CONFIG_ERROR 26
-#define USB_PORT_FEAT_PORT_REMOTE_WAKE_MASK 27
-#define USB_PORT_FEAT_BH_PORT_RESET     28
-#define USB_PORT_FEAT_C_BH_PORT_RESET   29
-#define USB_PORT_FEAT_FORCE_LINKPM_ACCEPT 30
 
 /*
  * Port feature selectors added by USB 3.0 spec.
@@ -75,8 +69,8 @@
 #define USB_PORT_FEAT_LINK_STATE		5
 #define USB_PORT_FEAT_U1_TIMEOUT		23
 #define USB_PORT_FEAT_U2_TIMEOUT		24
-#define USB_PORT_FEAT_C_LINK_STATE		25
-#define USB_PORT_FEAT_C_CONFIG_ERR		26
+#define USB_PORT_FEAT_C_PORT_LINK_STATE		25
+#define USB_PORT_FEAT_C_PORT_CONFIG_ERROR	26
 #define USB_PORT_FEAT_REMOTE_WAKE_MASK		27
 #define USB_PORT_FEAT_BH_PORT_RESET		28
 #define USB_PORT_FEAT_C_BH_PORT_RESET		29
-- 
cgit v1.2.3-55-g7522


From 2c4967f741e87cdd63de7271b97807041dccbf3b Mon Sep 17 00:00:00 2001
From: Sujit Reddy Thumma
Date: Sat, 4 Feb 2012 16:14:50 -0500
Subject: mmc: core: Ensure clocks are always enabled before host interaction

Ensure clocks are always enabled before any interaction with the
host controller driver. This makes sure that there is no race
between host execution and the core layer turning off clocks
in different context with clock gating framework.

Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Per Forlin <per.forlin@stericsson.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c     | 19 ++++++++++++++++---
 drivers/mmc/core/host.h     | 21 ---------------------
 drivers/mmc/core/sd.c       | 22 ++++++++++++++++++----
 drivers/mmc/core/sdio_irq.c | 10 ++++++++--
 include/linux/mmc/host.h    | 19 +++++++++++++++++++
 5 files changed, 61 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index f545a3e6eb80..b3063b741df3 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -290,8 +290,11 @@ static void mmc_wait_for_req_done(struct mmc_host *host,
 static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq,
 		 bool is_first_req)
 {
-	if (host->ops->pre_req)
+	if (host->ops->pre_req) {
+		mmc_host_clk_hold(host);
 		host->ops->pre_req(host, mrq, is_first_req);
+		mmc_host_clk_release(host);
+	}
 }
 
 /**
@@ -306,8 +309,11 @@ static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq,
 static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq,
 			 int err)
 {
-	if (host->ops->post_req)
+	if (host->ops->post_req) {
+		mmc_host_clk_hold(host);
 		host->ops->post_req(host, mrq, err);
+		mmc_host_clk_release(host);
+	}
 }
 
 /**
@@ -620,7 +626,9 @@ int mmc_host_enable(struct mmc_host *host)
 		int err;
 
 		host->en_dis_recurs = 1;
+		mmc_host_clk_hold(host);
 		err = host->ops->enable(host);
+		mmc_host_clk_release(host);
 		host->en_dis_recurs = 0;
 
 		if (err) {
@@ -640,7 +648,9 @@ static int mmc_host_do_disable(struct mmc_host *host, int lazy)
 		int err;
 
 		host->en_dis_recurs = 1;
+		mmc_host_clk_hold(host);
 		err = host->ops->disable(host, lazy);
+		mmc_host_clk_release(host);
 		host->en_dis_recurs = 0;
 
 		if (err < 0) {
@@ -1203,8 +1213,11 @@ int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage, bool cmd11
 
 	host->ios.signal_voltage = signal_voltage;
 
-	if (host->ops->start_signal_voltage_switch)
+	if (host->ops->start_signal_voltage_switch) {
+		mmc_host_clk_hold(host);
 		err = host->ops->start_signal_voltage_switch(host, &host->ios);
+		mmc_host_clk_release(host);
+	}
 
 	return err;
 }
diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
index fb8a5cd2e4a1..08a7852ade44 100644
--- a/drivers/mmc/core/host.h
+++ b/drivers/mmc/core/host.h
@@ -14,27 +14,6 @@
 
 int mmc_register_host_class(void);
 void mmc_unregister_host_class(void);
-
-#ifdef CONFIG_MMC_CLKGATE
-void mmc_host_clk_hold(struct mmc_host *host);
-void mmc_host_clk_release(struct mmc_host *host);
-unsigned int mmc_host_clk_rate(struct mmc_host *host);
-
-#else
-static inline void mmc_host_clk_hold(struct mmc_host *host)
-{
-}
-
-static inline void mmc_host_clk_release(struct mmc_host *host)
-{
-}
-
-static inline unsigned int mmc_host_clk_rate(struct mmc_host *host)
-{
-	return host->ios.clock;
-}
-#endif
-
 void mmc_host_deeper_disable(struct work_struct *work);
 
 #endif
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index c63ad03c29c7..5017f9354ce2 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -451,9 +451,11 @@ static int sd_select_driver_type(struct mmc_card *card, u8 *status)
 	 * information and let the hardware specific code
 	 * return what is possible given the options
 	 */
+	mmc_host_clk_hold(card->host);
 	drive_strength = card->host->ops->select_drive_strength(
 		card->sw_caps.uhs_max_dtr,
 		host_drv_type, card_drv_type);
+	mmc_host_clk_release(card->host);
 
 	err = mmc_sd_switch(card, 1, 2, drive_strength, status);
 	if (err)
@@ -660,9 +662,12 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
 		goto out;
 
 	/* SPI mode doesn't define CMD19 */
-	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning)
+	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning) {
+		mmc_host_clk_hold(card->host);
 		err = card->host->ops->execute_tuning(card->host,
 						      MMC_SEND_TUNING_BLOCK);
+		mmc_host_clk_release(card->host);
+	}
 
 out:
 	kfree(status);
@@ -850,8 +855,11 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card,
 	if (!reinit) {
 		int ro = -1;
 
-		if (host->ops->get_ro)
+		if (host->ops->get_ro) {
+			mmc_host_clk_hold(card->host);
 			ro = host->ops->get_ro(host);
+			mmc_host_clk_release(card->host);
+		}
 
 		if (ro < 0) {
 			pr_warning("%s: host does not "
@@ -967,8 +975,11 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 		 * Since initialization is now complete, enable preset
 		 * value registers for UHS-I cards.
 		 */
-		if (host->ops->enable_preset_value)
+		if (host->ops->enable_preset_value) {
+			mmc_host_clk_hold(card->host);
 			host->ops->enable_preset_value(host, true);
+			mmc_host_clk_release(card->host);
+		}
 	} else {
 		/*
 		 * Attempt to change to high-speed (if supported)
@@ -1151,8 +1162,11 @@ int mmc_attach_sd(struct mmc_host *host)
 		return err;
 
 	/* Disable preset value enable if already set since last time */
-	if (host->ops->enable_preset_value)
+	if (host->ops->enable_preset_value) {
+		mmc_host_clk_hold(host);
 		host->ops->enable_preset_value(host, false);
+		mmc_host_clk_release(host);
+	}
 
 	err = mmc_send_app_op_cond(host, 0, &ocr);
 	if (err)
diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index 68f81b9ee0fb..f573e7f9f740 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -146,15 +146,21 @@ static int sdio_irq_thread(void *_host)
 		}
 
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (host->caps & MMC_CAP_SDIO_IRQ)
+		if (host->caps & MMC_CAP_SDIO_IRQ) {
+			mmc_host_clk_hold(host);
 			host->ops->enable_sdio_irq(host, 1);
+			mmc_host_clk_release(host);
+		}
 		if (!kthread_should_stop())
 			schedule_timeout(period);
 		set_current_state(TASK_RUNNING);
 	} while (!kthread_should_stop());
 
-	if (host->caps & MMC_CAP_SDIO_IRQ)
+	if (host->caps & MMC_CAP_SDIO_IRQ) {
+		mmc_host_clk_hold(host);
 		host->ops->enable_sdio_irq(host, 0);
+		mmc_host_clk_release(host);
+	}
 
 	pr_debug("%s: IRQ thread exiting with code %d\n",
 		 mmc_hostname(host), ret);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0beba1e5e1ed..73e5ee8e9ca2 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -444,4 +444,23 @@ static inline int mmc_boot_partition_access(struct mmc_host *host)
 	return !(host->caps2 & MMC_CAP2_BOOTPART_NOACC);
 }
 
+#ifdef CONFIG_MMC_CLKGATE
+void mmc_host_clk_hold(struct mmc_host *host);
+void mmc_host_clk_release(struct mmc_host *host);
+unsigned int mmc_host_clk_rate(struct mmc_host *host);
+
+#else
+static inline void mmc_host_clk_hold(struct mmc_host *host)
+{
+}
+
+static inline void mmc_host_clk_release(struct mmc_host *host)
+{
+}
+
+static inline unsigned int mmc_host_clk_rate(struct mmc_host *host)
+{
+	return host->ios.clock;
+}
+#endif
 #endif /* LINUX_MMC_HOST_H */
-- 
cgit v1.2.3-55-g7522


From 6e8201f57c9359c9c5dc8f9805c15a4392492a10 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung
Date: Mon, 16 Jan 2012 17:49:01 +0900
Subject: mmc: core: add the capability for broken voltage

There is an understood mismatch between the voltage the host controller is
set to and the voltage supplied to the card by a fixed voltage regulator.
Teaching the driver to accept the mismatch is overly complicated.  Instead
just accept the regulator's voltage.

This patch adds MMC_CAP2_BROKEN_VOLTAGE.

If the voltage didn't satisfy between min_uV and max_uV, try to change
the voltage in core.c.  When changing the voltage, maybe use
regulator_set_voltage().

In regulator_set_voltage(), check the below condition.

	/* sanity check */
	if (!rdev->desc->ops->set_voltage &&
	    !rdev->desc->ops->set_voltage_sel) {
		ret = -EINVAL;
		goto out;
	}

If some board should use the fixed-regulator, always return -EINVAL.
Then, eMMC didn't initialize always.

So if use a fixed-regulator, we need to add the MMC_CAP2_BROKEN_VOLTAGE.

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 4 ++++
 include/linux/mmc/host.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index b3063b741df3..18661554e79c 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1131,6 +1131,10 @@ int mmc_regulator_set_ocr(struct mmc_host *mmc,
 		 * might not allow this operation
 		 */
 		voltage = regulator_get_voltage(supply);
+
+		if (mmc->caps2 & MMC_CAP2_BROKEN_VOLTAGE)
+			min_uV = max_uV = voltage;
+
 		if (voltage < 0)
 			result = voltage;
 		else if (voltage < min_uV || voltage > max_uV)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 73e5ee8e9ca2..ee2b0363c040 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -257,6 +257,7 @@ struct mmc_host {
 #define MMC_CAP2_HS200_1_2V_SDR	(1 << 6)        /* can support */
 #define MMC_CAP2_HS200		(MMC_CAP2_HS200_1_8V_SDR | \
 				 MMC_CAP2_HS200_1_2V_SDR)
+#define MMC_CAP2_BROKEN_VOLTAGE	(1 << 7)	/* Use the broken voltage */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 	unsigned int        power_notify_type;
-- 
cgit v1.2.3-55-g7522


From 3e73c36b4dc224529d0b0c0d5d69c0dacd793c42 Mon Sep 17 00:00:00 2001
From: Girish K S
Date: Tue, 31 Jan 2012 15:44:03 +0530
Subject: mmc: core: Fix PowerOff Notify suspend/resume

Modified the mmc_poweroff to resume before sending the poweroff
notification command. In sleep mode only AWAKE and RESET commands are
allowed, so before sending the poweroff notification command resume from
sleep mode and then send the notification command.

PowerOff Notify is tested on a Synopsis Designware Host Controller
(eMMC 4.5). The suspend to RAM and resume works fine.

Signed-off-by: Girish K S <girish.shivananjappa@linaro.org>
Tested-by: Girish K S <girish.shivananjappa@linaro.org>
Reviewed-by: Saugata Das <saugata.das@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 26 +++++++++++++++++++-------
 drivers/mmc/core/mmc.c   | 17 ++++++++++++-----
 include/linux/mmc/card.h |  4 ++++
 3 files changed, 35 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 18661554e79c..690255c7d4dc 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1256,6 +1256,7 @@ static void mmc_poweroff_notify(struct mmc_host *host)
 	int err = 0;
 
 	card = host->card;
+	mmc_claim_host(host);
 
 	/*
 	 * Send power notify command only if card
@@ -1286,6 +1287,7 @@ static void mmc_poweroff_notify(struct mmc_host *host)
 		/* Set the card state to no notification after the poweroff */
 		card->poweroff_notify_state = MMC_NO_POWER_NOTIFICATION;
 	}
+	mmc_release_host(host);
 }
 
 /*
@@ -1344,12 +1346,28 @@ static void mmc_power_up(struct mmc_host *host)
 
 void mmc_power_off(struct mmc_host *host)
 {
+	int err = 0;
 	mmc_host_clk_hold(host);
 
 	host->ios.clock = 0;
 	host->ios.vdd = 0;
 
-	mmc_poweroff_notify(host);
+	/*
+	 * For eMMC 4.5 device send AWAKE command before
+	 * POWER_OFF_NOTIFY command, because in sleep state
+	 * eMMC 4.5 devices respond to only RESET and AWAKE cmd
+	 */
+	if (host->card && mmc_card_is_sleep(host->card) &&
+	    host->bus_ops->resume) {
+		err = host->bus_ops->resume(host);
+
+		if (!err)
+			mmc_poweroff_notify(host);
+		else
+			pr_warning("%s: error %d during resume "
+				   "(continue with poweroff sequence)\n",
+				   mmc_hostname(host), err);
+	}
 
 	/*
 	 * Reset ocr mask to be the highest possible voltage supported for
@@ -2403,12 +2421,6 @@ int mmc_suspend_host(struct mmc_host *host)
 		 */
 		if (mmc_try_claim_host(host)) {
 			if (host->bus_ops->suspend) {
-				/*
-				 * For eMMC 4.5 device send notify command
-				 * before sleep, because in sleep state eMMC 4.5
-				 * devices respond to only RESET and AWAKE cmd
-				 */
-				mmc_poweroff_notify(host);
 				err = host->bus_ops->suspend(host);
 			}
 			mmc_do_release_host(host);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index dd3fcac684a3..9be031934e33 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1316,11 +1316,13 @@ static int mmc_suspend(struct mmc_host *host)
 	BUG_ON(!host->card);
 
 	mmc_claim_host(host);
-	if (mmc_card_can_sleep(host))
+	if (mmc_card_can_sleep(host)) {
 		err = mmc_card_sleep(host);
-	else if (!mmc_host_is_spi(host))
+		if (!err)
+			mmc_card_set_sleep(host->card);
+	} else if (!mmc_host_is_spi(host))
 		mmc_deselect_cards(host);
-	host->card->state &= ~MMC_STATE_HIGHSPEED;
+	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
 	mmc_release_host(host);
 
 	return err;
@@ -1340,7 +1342,11 @@ static int mmc_resume(struct mmc_host *host)
 	BUG_ON(!host->card);
 
 	mmc_claim_host(host);
-	err = mmc_init_card(host, host->ocr, host->card);
+	if (mmc_card_is_sleep(host->card)) {
+		err = mmc_card_awake(host);
+		mmc_card_clr_sleep(host->card);
+	} else
+		err = mmc_init_card(host, host->ocr, host->card);
 	mmc_release_host(host);
 
 	return err;
@@ -1350,7 +1356,8 @@ static int mmc_power_restore(struct mmc_host *host)
 {
 	int ret;
 
-	host->card->state &= ~MMC_STATE_HIGHSPEED;
+	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
+	mmc_card_clr_sleep(host->card);
 	mmc_claim_host(host);
 	ret = mmc_init_card(host, host->ocr, host->card);
 	mmc_release_host(host);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 9f22ba572de0..19a41d1737af 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -217,6 +217,7 @@ struct mmc_card {
 #define MMC_CARD_SDXC		(1<<6)		/* card is SDXC */
 #define MMC_CARD_REMOVED	(1<<7)		/* card has been removed */
 #define MMC_STATE_HIGHSPEED_200	(1<<8)		/* card is in HS200 mode */
+#define MMC_STATE_SLEEP		(1<<9)		/* card is in sleep state */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -382,6 +383,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_sd_card_uhs(c)	((c)->state & MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
 #define mmc_card_removed(c)	((c) && ((c)->state & MMC_CARD_REMOVED))
+#define mmc_card_is_sleep(c)	((c)->state & MMC_STATE_SLEEP)
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
@@ -393,7 +395,9 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
 #define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED)
+#define mmc_card_set_sleep(c)	((c)->state |= MMC_STATE_SLEEP)
 
+#define mmc_card_clr_sleep(c)	((c)->state &= ~MMC_STATE_SLEEP)
 /*
  * Quirk add/remove for MMC products.
  */
-- 
cgit v1.2.3-55-g7522


From f9c2a0dc42a6938ff2a80e55ca2bbd1d5581c72e Mon Sep 17 00:00:00 2001
From: Seungwon Jeon
Date: Thu, 9 Feb 2012 14:32:43 +0900
Subject: mmc: dw_mmc: Fix PIO mode with support of highmem

Current PIO mode makes a kernel crash with CONFIG_HIGHMEM.
Highmem pages have a NULL from sg_virt(sg).
This patch fixes the following problem.

Unable to handle kernel NULL pointer dereference at virtual address 00000000
pgd = c0004000
[00000000] *pgd=00000000
Internal error: Oops: 817 [#1] PREEMPT SMP
Modules linked in:
CPU: 0    Not tainted  (3.0.15-01423-gdbf465f #589)
PC is at dw_mci_pull_data32+0x4c/0x9c
LR is at dw_mci_read_data_pio+0x54/0x1f0
pc : [<c0358824>]    lr : [<c035988c>]    psr: 20000193
sp : c0619d48  ip : c0619d70  fp : c0619d6c
r10: 00000000  r9 : 00000002  r8 : 00001000
r7 : 00000200  r6 : 00000000  r5 : e1dd3100  r4 : 00000000
r3 : 65622023  r2 : 0000007f  r1 : eeb96000  r0 : e1dd3100
Flags: nzCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM  Segment
xkernel
Control: 10c5387d  Table: 61e2004a  DAC: 00000015
Process swapper (pid: 0, stack limit = 0xc06182f0)
Stack: (0xc0619d48 to 0xc061a000)
9d40:                   e1dd3100 e1a4f000 00000000 e1dd3100 e1a4f000 00000200
9d60: c0619da4 c0619d70 c035988c c03587e4 c0619d9c e18158f4 e1dd3100 e1dd3100
9d80: 00000020 00000000 00000000 00000020 c06e8a84 00000000 c0619e04 c0619da8
9da0: c0359b24 c0359844 e18158f4 e1dd3164 e1dd3168 e1dd3150 3d02fc79 e1dd3154
9dc0: e1dd3178 00000000 00000020 00000000 e1dd3150 00000000 c10dd7e8 e1a84900
9de0: c061e7cc 00000000 00000000 0000008d c06e8a84 c061e780 c0619e4c c0619e08
9e00: c00c4738 c0359a34 3d02fc79 00000000 c0619e4c c05a1698 c05a1670 c05a165c
9e20: c04de8b0 c061e780 c061e7cc e1a84900 ffffed68 0000008d c0618000 00000000
9e40: c0619e6c c0619e50 c00c48b4 c00c46c8 c061e780 c00423ac c061e7cc ffffed68
9e60: c0619e8c c0619e70 c00c7358 c00c487c 0000008d ffffee38 c0618000 ffffed68
9e80: c0619ea4 c0619e90 c00c4258 c00c72b0 c00423ac ffffee38 c0619ecc c0619ea8
9ea0: c004241c c00c4234 ffffffff f8810000 0000006d 00000002 00000001 7fffffff
9ec0: c0619f44 c0619ed0 c0048bc0 c00423c4 220ae7a9 00000000 386f0d30 0005d3a4
9ee0: c00423ac c10dd0b8 c06f2cd8 c0618000 c0594778 c003a674 7fffffff c0619f44
9f00: 386f0d30 c0619f18 c00a6f94 c005be3c 80000013 ffffffff 386f0d30 0005d3a4
9f20: 386f0d30 0005d2d1 c10dd0a8 c10dd0b8 c06f2cd8 c0618000 c0619f74 c0619f48
9f40: c0345858 c005be00 c00a2440 c0618000 c0618000 c00410d8 c06c1944 c00410fc
9f60: c0594778 c003a674 c0619f9c c0619f78 c004a7e8 c03457b4 c0618000 c06c18f8
9f80: 00000000 c0039c70 c06c18d4 c003a674 c0619fb4 c0619fa0 c04ceafc c004a714
9fa0: c06287b4 c06c18f8 c0619ff4 c0619fb8 c0008b68 c04cea68 c0008578 00000000
9fc0: 00000000 c003a674 00000000 10c5387d c0628658 c003aa78 c062f1c4 4000406a
9fe0: 413fc090 00000000 00000000 c0619ff8 40008044 c0008858 00000000 00000000
Backtrace:
[<c03587d8>] (dw_mci_pull_data32+0x0/0x9c) from [<c035988c>] (dw_mci_read_data_pio+0x54/0x1f0)
 r6:00000200 r5:e1a4f000 r4:e1dd3100
 [<c0359838>] (dw_mci_read_data_pio+0x0/0x1f0) from [<c0359b24>] (dw_mci_interrupt+0xfc/0x4a4)
[<c0359a28>] (dw_mci_interrupt+0x0/0x4a4) from [<c00c4738>] (handle_irq_event_percpu+0x7c/0x1b4)
[<c00c46bc>] (handle_irq_event_percpu+0x0/0x1b4) from [<c00c48b4>] (handle_irq_event+0x44/0x64)
[<c00c4870>] (handle_irq_event+0x0/0x64) from [<c00c7358>] (handle_fasteoi_irq+0xb4/0x124)
 r7:ffffed68 r6:c061e7cc r5:c00423ac r4:c061e780
 [<c00c72a4>] (handle_fasteoi_irq+0x0/0x124) from [<c00c4258>] (generic_handle_irq+0x30/0x38)
 r7:ffffed68 r6:c0618000 r5:ffffee38 r4:0000008d
 [<c00c4228>] (generic_handle_irq+0x0/0x38) from [<c004241c>] (asm_do_IRQ+0x64/0xe0)
 r5:ffffee38 r4:c00423ac
 [<c00423b8>] (asm_do_IRQ+0x0/0xe0) from [<c0048bc0>] (__irq_svc+0x80/0x14c)
Exception stack(0xc0619ed0 to 0xc0619f18)

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Acked-by: Will Newton <will.newton@imgtec.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/dw_mmc.c  | 144 +++++++++++++++++++++++----------------------
 include/linux/mmc/dw_mmc.h |   6 +-
 2 files changed, 79 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 0e342793ff14..8bec1c36b159 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -22,7 +22,6 @@
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/scatterlist.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
@@ -502,8 +501,14 @@ static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data)
 		host->dir_status = DW_MCI_SEND_STATUS;
 
 	if (dw_mci_submit_data_dma(host, data)) {
+		int flags = SG_MITER_ATOMIC;
+		if (host->data->flags & MMC_DATA_READ)
+			flags |= SG_MITER_TO_SG;
+		else
+			flags |= SG_MITER_FROM_SG;
+
+		sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags);
 		host->sg = data->sg;
-		host->pio_offset = 0;
 		host->part_buf_start = 0;
 		host->part_buf_count = 0;
 
@@ -972,6 +977,7 @@ static void dw_mci_tasklet_func(unsigned long priv)
 				 * generates a block interrupt, hence setting
 				 * the scatter-gather pointer to NULL.
 				 */
+				sg_miter_stop(&host->sg_miter);
 				host->sg = NULL;
 				ctrl = mci_readl(host, CTRL);
 				ctrl |= SDMMC_CTRL_FIFO_RESET;
@@ -1311,54 +1317,44 @@ static void dw_mci_pull_data(struct dw_mci *host, void *buf, int cnt)
 
 static void dw_mci_read_data_pio(struct dw_mci *host)
 {
-	struct scatterlist *sg = host->sg;
-	void *buf = sg_virt(sg);
-	unsigned int offset = host->pio_offset;
+	struct sg_mapping_iter *sg_miter = &host->sg_miter;
+	void *buf;
+	unsigned int offset;
 	struct mmc_data	*data = host->data;
 	int shift = host->data_shift;
 	u32 status;
 	unsigned int nbytes = 0, len;
+	unsigned int remain, fcnt;
 
 	do {
-		len = host->part_buf_count +
-			(SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift);
-		if (offset + len <= sg->length) {
+		if (!sg_miter_next(sg_miter))
+			goto done;
+
+		host->sg = sg_miter->__sg;
+		buf = sg_miter->addr;
+		remain = sg_miter->length;
+		offset = 0;
+
+		do {
+			fcnt = (SDMMC_GET_FCNT(mci_readl(host, STATUS))
+					<< shift) + host->part_buf_count;
+			len = min(remain, fcnt);
+			if (!len)
+				break;
 			dw_mci_pull_data(host, (void *)(buf + offset), len);
-
 			offset += len;
 			nbytes += len;
-
-			if (offset == sg->length) {
-				flush_dcache_page(sg_page(sg));
-				host->sg = sg = sg_next(sg);
-				if (!sg)
-					goto done;
-
-				offset = 0;
-				buf = sg_virt(sg);
-			}
-		} else {
-			unsigned int remaining = sg->length - offset;
-			dw_mci_pull_data(host, (void *)(buf + offset),
-					 remaining);
-			nbytes += remaining;
-
-			flush_dcache_page(sg_page(sg));
-			host->sg = sg = sg_next(sg);
-			if (!sg)
-				goto done;
-
-			offset = len - remaining;
-			buf = sg_virt(sg);
-			dw_mci_pull_data(host, buf, offset);
-			nbytes += offset;
-		}
+			remain -= len;
+		} while (remain);
+		sg_miter->consumed = offset;
 
 		status = mci_readl(host, MINTSTS);
 		mci_writel(host, RINTSTS, SDMMC_INT_RXDR);
 		if (status & DW_MCI_DATA_ERROR_FLAGS) {
 			host->data_status = status;
 			data->bytes_xfered += nbytes;
+			sg_miter_stop(sg_miter);
+			host->sg = NULL;
 			smp_wmb();
 
 			set_bit(EVENT_DATA_ERROR, &host->pending_events);
@@ -1367,65 +1363,66 @@ static void dw_mci_read_data_pio(struct dw_mci *host)
 			return;
 		}
 	} while (status & SDMMC_INT_RXDR); /*if the RXDR is ready read again*/
-	host->pio_offset = offset;
 	data->bytes_xfered += nbytes;
+
+	if (!remain) {
+		if (!sg_miter_next(sg_miter))
+			goto done;
+		sg_miter->consumed = 0;
+	}
+	sg_miter_stop(sg_miter);
 	return;
 
 done:
 	data->bytes_xfered += nbytes;
+	sg_miter_stop(sg_miter);
+	host->sg = NULL;
 	smp_wmb();
 	set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
 }
 
 static void dw_mci_write_data_pio(struct dw_mci *host)
 {
-	struct scatterlist *sg = host->sg;
-	void *buf = sg_virt(sg);
-	unsigned int offset = host->pio_offset;
+	struct sg_mapping_iter *sg_miter = &host->sg_miter;
+	void *buf;
+	unsigned int offset;
 	struct mmc_data	*data = host->data;
 	int shift = host->data_shift;
 	u32 status;
 	unsigned int nbytes = 0, len;
+	unsigned int fifo_depth = host->fifo_depth;
+	unsigned int remain, fcnt;
 
 	do {
-		len = ((host->fifo_depth -
-			SDMMC_GET_FCNT(mci_readl(host, STATUS))) << shift)
-			- host->part_buf_count;
-		if (offset + len <= sg->length) {
+		if (!sg_miter_next(sg_miter))
+			goto done;
+
+		host->sg = sg_miter->__sg;
+		buf = sg_miter->addr;
+		remain = sg_miter->length;
+		offset = 0;
+
+		do {
+			fcnt = ((fifo_depth -
+				 SDMMC_GET_FCNT(mci_readl(host, STATUS)))
+					<< shift) - host->part_buf_count;
+			len = min(remain, fcnt);
+			if (!len)
+				break;
 			host->push_data(host, (void *)(buf + offset), len);
-
 			offset += len;
 			nbytes += len;
-			if (offset == sg->length) {
-				host->sg = sg = sg_next(sg);
-				if (!sg)
-					goto done;
-
-				offset = 0;
-				buf = sg_virt(sg);
-			}
-		} else {
-			unsigned int remaining = sg->length - offset;
-
-			host->push_data(host, (void *)(buf + offset),
-					remaining);
-			nbytes += remaining;
-
-			host->sg = sg = sg_next(sg);
-			if (!sg)
-				goto done;
-
-			offset = len - remaining;
-			buf = sg_virt(sg);
-			host->push_data(host, (void *)buf, offset);
-			nbytes += offset;
-		}
+			remain -= len;
+		} while (remain);
+		sg_miter->consumed = offset;
 
 		status = mci_readl(host, MINTSTS);
 		mci_writel(host, RINTSTS, SDMMC_INT_TXDR);
 		if (status & DW_MCI_DATA_ERROR_FLAGS) {
 			host->data_status = status;
 			data->bytes_xfered += nbytes;
+			sg_miter_stop(sg_miter);
+			host->sg = NULL;
 
 			smp_wmb();
 
@@ -1435,12 +1432,20 @@ static void dw_mci_write_data_pio(struct dw_mci *host)
 			return;
 		}
 	} while (status & SDMMC_INT_TXDR); /* if TXDR write again */
-	host->pio_offset = offset;
 	data->bytes_xfered += nbytes;
+
+	if (!remain) {
+		if (!sg_miter_next(sg_miter))
+			goto done;
+		sg_miter->consumed = 0;
+	}
+	sg_miter_stop(sg_miter);
 	return;
 
 done:
 	data->bytes_xfered += nbytes;
+	sg_miter_stop(sg_miter);
+	host->sg = NULL;
 	smp_wmb();
 	set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
 }
@@ -1643,6 +1648,7 @@ static void dw_mci_work_routine_card(struct work_struct *work)
 				 * block interrupt, hence setting the
 				 * scatter-gather pointer to NULL.
 				 */
+				sg_miter_stop(&host->sg_miter);
 				host->sg = NULL;
 
 				ctrl = mci_readl(host, CTRL);
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index e8779c6d1759..aae5d1f1bb39 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -14,6 +14,8 @@
 #ifndef LINUX_MMC_DW_MMC_H
 #define LINUX_MMC_DW_MMC_H
 
+#include <linux/scatterlist.h>
+
 #define MAX_MCI_SLOTS	2
 
 enum dw_mci_state {
@@ -40,7 +42,7 @@ struct mmc_data;
  * @lock: Spinlock protecting the queue and associated data.
  * @regs: Pointer to MMIO registers.
  * @sg: Scatterlist entry currently being processed by PIO code, if any.
- * @pio_offset: Offset into the current scatterlist entry.
+ * @sg_miter: PIO mapping scatterlist iterator.
  * @cur_slot: The slot which is currently using the controller.
  * @mrq: The request currently being processed on @cur_slot,
  *	or NULL if the controller is idle.
@@ -115,7 +117,7 @@ struct dw_mci {
 	void __iomem		*regs;
 
 	struct scatterlist	*sg;
-	unsigned int		pio_offset;
+	struct sg_mapping_iter	sg_miter;
 
 	struct dw_mci_slot	*cur_slot;
 	struct mmc_request	*mrq;
-- 
cgit v1.2.3-55-g7522


From 6b6dc836a195e077e76977b6c020a73de411b46d Mon Sep 17 00:00:00 2001
From: Jan Kara
Date: Fri, 10 Feb 2012 11:03:00 +0100
Subject: vfs: Provide function to get superblock and wait for it to thaw

In quota code we need to find a superblock corresponding to a device and wait
for superblock to be unfrozen. However this waiting has to happen without
s_umount semaphore because that is required for superblock to thaw. So provide
a function in VFS for this to keep dances with s_umount where they belong.

[AV: implementation switched to saner variant]

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c         | 22 ++++++++++++++++++++++
 include/linux/fs.h |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index 6015c02296b7..6277ec6cb60a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -633,6 +633,28 @@ rescan:
 
 EXPORT_SYMBOL(get_super);
 
+/**
+ *	get_super_thawed - get thawed superblock of a device
+ *	@bdev: device to get the superblock for
+ *
+ *	Scans the superblock list and finds the superblock of the file system
+ *	mounted on the device. The superblock is returned once it is thawed
+ *	(or immediately if it was not frozen). %NULL is returned if no match
+ *	is found.
+ */
+struct super_block *get_super_thawed(struct block_device *bdev)
+{
+	while (1) {
+		struct super_block *s = get_super(bdev);
+		if (!s || s->s_frozen == SB_UNFROZEN)
+			return s;
+		up_read(&s->s_umount);
+		vfs_check_frozen(s, SB_FREEZE_WRITE);
+		put_super(s);
+	}
+}
+EXPORT_SYMBOL(get_super_thawed);
+
 /**
  * get_active_super - get an active reference to the superblock of a device
  * @bdev: device to get the superblock for
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 386da09f229d..69cd5bb640f5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2496,6 +2496,7 @@ extern void get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
+extern struct super_block *get_super_thawed(struct block_device *);
 extern struct super_block *get_active_super(struct block_device *bdev);
 extern void drop_super(struct super_block *sb);
 extern void iterate_supers(void (*)(struct super_block *, void *), void *);
-- 
cgit v1.2.3-55-g7522


From 607c50d429371797f198ffc34afb239eadd1c655 Mon Sep 17 00:00:00 2001
From: Eun-Chul Kim
Date: Tue, 14 Feb 2012 15:59:46 +0900
Subject: drm/exynos: added panel physical size.

Signed-off-by: Eun-Chul Kim <chulspro.kim@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_connector.c | 16 +++++++++++-----
 drivers/gpu/drm/exynos/exynos_drm_drv.h       |  4 ++--
 drivers/gpu/drm/exynos/exynos_drm_fimd.c      | 27 ++++++++++++++-------------
 include/drm/exynos_drm.h                      | 17 +++++++++++++++--
 4 files changed, 42 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/exynos/exynos_drm_connector.c b/drivers/gpu/drm/exynos/exynos_drm_connector.c
index d620b0784257..618bd4d87d28 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_connector.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_connector.c
@@ -28,6 +28,7 @@
 #include "drmP.h"
 #include "drm_crtc_helper.h"
 
+#include <drm/exynos_drm.h>
 #include "exynos_drm_drv.h"
 #include "exynos_drm_encoder.h"
 
@@ -44,8 +45,9 @@ struct exynos_drm_connector {
 /* convert exynos_video_timings to drm_display_mode */
 static inline void
 convert_to_display_mode(struct drm_display_mode *mode,
-			struct fb_videomode *timing)
+			struct exynos_drm_panel_info *panel)
 {
+	struct fb_videomode *timing = &panel->timing;
 	DRM_DEBUG_KMS("%s\n", __FILE__);
 
 	mode->clock = timing->pixclock / 1000;
@@ -60,6 +62,8 @@ convert_to_display_mode(struct drm_display_mode *mode,
 	mode->vsync_start = mode->vdisplay + timing->upper_margin;
 	mode->vsync_end = mode->vsync_start + timing->vsync_len;
 	mode->vtotal = mode->vsync_end + timing->lower_margin;
+	mode->width_mm = panel->width_mm;
+	mode->height_mm = panel->height_mm;
 
 	if (timing->vmode & FB_VMODE_INTERLACED)
 		mode->flags |= DRM_MODE_FLAG_INTERLACE;
@@ -148,16 +152,18 @@ static int exynos_drm_connector_get_modes(struct drm_connector *connector)
 		connector->display_info.raw_edid = edid;
 	} else {
 		struct drm_display_mode *mode = drm_mode_create(connector->dev);
-		struct fb_videomode *timing;
+		struct exynos_drm_panel_info *panel;
 
-		if (display_ops->get_timing)
-			timing = display_ops->get_timing(manager->dev);
+		if (display_ops->get_panel)
+			panel = display_ops->get_panel(manager->dev);
 		else {
 			drm_mode_destroy(connector->dev, mode);
 			return 0;
 		}
 
-		convert_to_display_mode(mode, timing);
+		convert_to_display_mode(mode, panel);
+		connector->display_info.width_mm = mode->width_mm;
+		connector->display_info.height_mm = mode->height_mm;
 
 		mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
 		drm_mode_set_name(mode);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index e685e1e33055..13540de90bfc 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -136,7 +136,7 @@ struct exynos_drm_overlay {
  * @type: one of EXYNOS_DISPLAY_TYPE_LCD and HDMI.
  * @is_connected: check for that display is connected or not.
  * @get_edid: get edid modes from display driver.
- * @get_timing: get timing object from display driver.
+ * @get_panel: get panel object from display driver.
  * @check_timing: check if timing is valid or not.
  * @power_on: display device on or off.
  */
@@ -145,7 +145,7 @@ struct exynos_drm_display_ops {
 	bool (*is_connected)(struct device *dev);
 	int (*get_edid)(struct device *dev, struct drm_connector *connector,
 				u8 *edid, int len);
-	void *(*get_timing)(struct device *dev);
+	void *(*get_panel)(struct device *dev);
 	int (*check_timing)(struct device *dev, void *timing);
 	int (*power_on)(struct device *dev, int mode);
 };
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 0dbb32bb18a3..360adf2bba04 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -89,7 +89,7 @@ struct fimd_context {
 	bool				suspended;
 	struct mutex			lock;
 
-	struct fb_videomode		*timing;
+	struct exynos_drm_panel_info *panel;
 };
 
 static bool fimd_display_is_connected(struct device *dev)
@@ -101,13 +101,13 @@ static bool fimd_display_is_connected(struct device *dev)
 	return true;
 }
 
-static void *fimd_get_timing(struct device *dev)
+static void *fimd_get_panel(struct device *dev)
 {
 	struct fimd_context *ctx = get_fimd_context(dev);
 
 	DRM_DEBUG_KMS("%s\n", __FILE__);
 
-	return ctx->timing;
+	return ctx->panel;
 }
 
 static int fimd_check_timing(struct device *dev, void *timing)
@@ -131,7 +131,7 @@ static int fimd_display_power_on(struct device *dev, int mode)
 static struct exynos_drm_display_ops fimd_display_ops = {
 	.type = EXYNOS_DISPLAY_TYPE_LCD,
 	.is_connected = fimd_display_is_connected,
-	.get_timing = fimd_get_timing,
+	.get_panel = fimd_get_panel,
 	.check_timing = fimd_check_timing,
 	.power_on = fimd_display_power_on,
 };
@@ -193,7 +193,8 @@ static void fimd_apply(struct device *subdrv_dev)
 static void fimd_commit(struct device *dev)
 {
 	struct fimd_context *ctx = get_fimd_context(dev);
-	struct fb_videomode *timing = ctx->timing;
+	struct exynos_drm_panel_info *panel = ctx->panel;
+	struct fb_videomode *timing = &panel->timing;
 	u32 val;
 
 	if (ctx->suspended)
@@ -786,7 +787,7 @@ static int __devinit fimd_probe(struct platform_device *pdev)
 	struct fimd_context *ctx;
 	struct exynos_drm_subdrv *subdrv;
 	struct exynos_drm_fimd_pdata *pdata;
-	struct fb_videomode *timing;
+	struct exynos_drm_panel_info *panel;
 	struct resource *res;
 	int win;
 	int ret = -EINVAL;
@@ -799,9 +800,9 @@ static int __devinit fimd_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	timing = &pdata->timing;
-	if (!timing) {
-		dev_err(dev, "timing is null.\n");
+	panel = &pdata->panel;
+	if (!panel) {
+		dev_err(dev, "panel is null.\n");
 		return -EINVAL;
 	}
 
@@ -863,16 +864,16 @@ static int __devinit fimd_probe(struct platform_device *pdev)
 		goto err_req_irq;
 	}
 
-	ctx->clkdiv = fimd_calc_clkdiv(ctx, timing);
+	ctx->clkdiv = fimd_calc_clkdiv(ctx, &panel->timing);
 	ctx->vidcon0 = pdata->vidcon0;
 	ctx->vidcon1 = pdata->vidcon1;
 	ctx->default_win = pdata->default_win;
-	ctx->timing = timing;
+	ctx->panel = panel;
 
-	timing->pixclock = clk_get_rate(ctx->lcd_clk) / ctx->clkdiv;
+	panel->timing.pixclock = clk_get_rate(ctx->lcd_clk) / ctx->clkdiv;
 
 	DRM_DEBUG_KMS("pixel clock = %d, clkdiv = %d\n",
-			timing->pixclock, ctx->clkdiv);
+			panel->timing.pixclock, ctx->clkdiv);
 
 	subdrv = &ctx->subdrv;
 
diff --git a/include/drm/exynos_drm.h b/include/drm/exynos_drm.h
index 5e120f1c5cd9..308575ea7c00 100644
--- a/include/drm/exynos_drm.h
+++ b/include/drm/exynos_drm.h
@@ -98,14 +98,27 @@ struct drm_exynos_plane_set_zpos {
 		DRM_EXYNOS_PLANE_SET_ZPOS, struct drm_exynos_plane_set_zpos)
 
 /**
- * Platform Specific Structure for DRM based FIMD.
+ * A structure for lcd panel information.
  *
  * @timing: default video mode for initializing
+ * @width_mm: physical size of lcd width.
+ * @height_mm: physical size of lcd height.
+ */
+struct exynos_drm_panel_info {
+	struct fb_videomode timing;
+	u32 width_mm;
+	u32 height_mm;
+};
+
+/**
+ * Platform Specific Structure for DRM based FIMD.
+ *
+ * @panel: default panel info for initializing
  * @default_win: default window layer number to be used for UI.
  * @bpp: default bit per pixel.
  */
 struct exynos_drm_fimd_pdata {
-	struct fb_videomode		timing;
+	struct exynos_drm_panel_info panel;
 	u32				vidcon0;
 	u32				vidcon1;
 	unsigned int			default_win;
-- 
cgit v1.2.3-55-g7522


From 265da78afd52b9a01d76d99556e828a6c30f1ac9 Mon Sep 17 00:00:00 2001
From: Kamil Debski
Date: Wed, 15 Feb 2012 10:23:33 +0900
Subject: drm/exynos: exynos_drm.h header file fixes

First of all #ifdef __KERNEL__ was added to exynos_drm.h to
mark the part that should be left out of userspace.
Secondly exynos_drm.h was added to include/drm/Kbuild, so it
will be included when doing make headers_install.

Signed-off-by: Kamil Debski <k.debski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 include/drm/Kbuild       | 1 +
 include/drm/exynos_drm.h | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/Kbuild b/include/drm/Kbuild
index a5c0e10fd47d..1e38a19d68f6 100644
--- a/include/drm/Kbuild
+++ b/include/drm/Kbuild
@@ -2,6 +2,7 @@ header-y += drm.h
 header-y += drm_fourcc.h
 header-y += drm_mode.h
 header-y += drm_sarea.h
+header-y += exynos_drm.h
 header-y += i810_drm.h
 header-y += i915_drm.h
 header-y += mga_drm.h
diff --git a/include/drm/exynos_drm.h b/include/drm/exynos_drm.h
index 308575ea7c00..1ed3aae893a5 100644
--- a/include/drm/exynos_drm.h
+++ b/include/drm/exynos_drm.h
@@ -97,6 +97,8 @@ struct drm_exynos_plane_set_zpos {
 #define DRM_IOCTL_EXYNOS_PLANE_SET_ZPOS	DRM_IOWR(DRM_COMMAND_BASE + \
 		DRM_EXYNOS_PLANE_SET_ZPOS, struct drm_exynos_plane_set_zpos)
 
+#ifdef __KERNEL__
+
 /**
  * A structure for lcd panel information.
  *
@@ -152,4 +154,5 @@ struct exynos_drm_hdmi_pdata {
 	unsigned int			bpp;
 };
 
-#endif
+#endif	/* __KERNEL__ */
+#endif	/* _EXYNOS_DRM_H_ */
-- 
cgit v1.2.3-55-g7522


From 4aa832c27edf902130f8bace1d42cf22468823fa Mon Sep 17 00:00:00 2001
From: Johan Hedberg
Date: Sun, 8 Jan 2012 22:51:16 +0200
Subject: Bluetooth: Remove bogus inline declaration from l2cap_chan_connect

As reported by Dan Carpenter this function causes a Sparse warning and
shouldn't be declared inline:

include/net/bluetooth/l2cap.h:837:30 error: marked inline, but without a
definition"

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 2 +-
 net/bluetooth/l2cap_core.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 68f589150692..124f7cf45bd7 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -834,7 +834,7 @@ int l2cap_add_scid(struct l2cap_chan *chan,  __u16 scid);
 struct l2cap_chan *l2cap_chan_create(struct sock *sk);
 void l2cap_chan_close(struct l2cap_chan *chan, int reason);
 void l2cap_chan_destroy(struct l2cap_chan *chan);
-inline int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
+int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
 								bdaddr_t *dst);
 int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len,
 								u32 priority);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index faf0b11ac1d3..980abdb30671 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1120,7 +1120,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, bdaddr
 	return c1;
 }
 
-inline int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst)
+int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst)
 {
 	struct sock *sk = chan->sk;
 	bdaddr_t *src = &bt_sk(sk)->src;
-- 
cgit v1.2.3-55-g7522


From 6e1da683f79a22fafaada62d547138daaa9e3456 Mon Sep 17 00:00:00 2001
From: Andrzej Kaczmarek
Date: Wed, 4 Jan 2012 12:10:42 +0100
Subject: Bluetooth: l2cap_set_timer needs jiffies as timeout value

After moving L2CAP timers to workqueues l2cap_set_timer expects timeout
value to be specified in jiffies but constants defined in miliseconds
are used. This makes timeouts unreliable when CONFIG_HZ is not set to
1000.

__set_chan_timer macro still uses jiffies as input to avoid multiple
conversions from/to jiffies for sk_sndtimeo value which is already
specified in jiffies.

Signed-off-by: Andrzej Kaczmarek <andrzej.kaczmarek@tieto.com>
Ackec-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/l2cap.h |  6 +++---
 net/bluetooth/l2cap_core.c    | 12 ++++++++----
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 124f7cf45bd7..26486e182f55 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -626,13 +626,13 @@ static inline void l2cap_clear_timer(struct l2cap_chan *chan,
 #define __set_chan_timer(c, t) l2cap_set_timer(c, &c->chan_timer, (t))
 #define __clear_chan_timer(c) l2cap_clear_timer(c, &c->chan_timer)
 #define __set_retrans_timer(c) l2cap_set_timer(c, &c->retrans_timer, \
-		L2CAP_DEFAULT_RETRANS_TO);
+		msecs_to_jiffies(L2CAP_DEFAULT_RETRANS_TO));
 #define __clear_retrans_timer(c) l2cap_clear_timer(c, &c->retrans_timer)
 #define __set_monitor_timer(c) l2cap_set_timer(c, &c->monitor_timer, \
-		L2CAP_DEFAULT_MONITOR_TO);
+		msecs_to_jiffies(L2CAP_DEFAULT_MONITOR_TO));
 #define __clear_monitor_timer(c) l2cap_clear_timer(c, &c->monitor_timer)
 #define __set_ack_timer(c) l2cap_set_timer(c, &chan->ack_timer, \
-		L2CAP_DEFAULT_ACK_TO);
+		msecs_to_jiffies(L2CAP_DEFAULT_ACK_TO));
 #define __clear_ack_timer(c) l2cap_clear_timer(c, &c->ack_timer)
 
 static inline int __seq_offset(struct l2cap_chan *chan, __u16 seq1, __u16 seq2)
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 980abdb30671..dcccae04ae08 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -2970,7 +2970,8 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 
 	default:
 		sk->sk_err = ECONNRESET;
-		__set_chan_timer(chan, L2CAP_DISC_REJ_TIMEOUT);
+		__set_chan_timer(chan,
+				msecs_to_jiffies(L2CAP_DISC_REJ_TIMEOUT));
 		l2cap_send_disconn_req(conn, chan, ECONNRESET);
 		goto done;
 	}
@@ -4478,7 +4479,8 @@ static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt)
 	if (encrypt == 0x00) {
 		if (chan->sec_level == BT_SECURITY_MEDIUM) {
 			__clear_chan_timer(chan);
-			__set_chan_timer(chan, L2CAP_ENC_TIMEOUT);
+			__set_chan_timer(chan,
+					msecs_to_jiffies(L2CAP_ENC_TIMEOUT));
 		} else if (chan->sec_level == BT_SECURITY_HIGH)
 			l2cap_chan_close(chan, ECONNREFUSED);
 	} else {
@@ -4546,7 +4548,8 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
 					L2CAP_CONN_REQ, sizeof(req), &req);
 			} else {
 				__clear_chan_timer(chan);
-				__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);
+				__set_chan_timer(chan,
+					msecs_to_jiffies(L2CAP_DISC_TIMEOUT));
 			}
 		} else if (chan->state == BT_CONNECT2) {
 			struct l2cap_conn_rsp rsp;
@@ -4566,7 +4569,8 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
 				}
 			} else {
 				l2cap_state_change(chan, BT_DISCONN);
-				__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);
+				__set_chan_timer(chan,
+					msecs_to_jiffies(L2CAP_DISC_TIMEOUT));
 				res = L2CAP_CR_SEC_BLOCK;
 				stat = L2CAP_CS_NO_INFO;
 			}
-- 
cgit v1.2.3-55-g7522


From 331660637b4e5136602a98200a84f6b65ed8d5be Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes
Date: Wed, 4 Jan 2012 11:57:17 -0300
Subject: Bluetooth: Fix using an absolute timeout on hci_conn_put()

queue_delayed_work() expects a relative time for when that work
should be scheduled.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@openbossa.org>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ea9231f4935f..92b8fea553d7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -561,7 +561,7 @@ static inline void hci_conn_put(struct hci_conn *conn)
 		}
 		cancel_delayed_work_sync(&conn->disc_work);
 		queue_delayed_work(conn->hdev->workqueue,
-					&conn->disc_work, jiffies + timeo);
+					&conn->disc_work, timeo);
 	}
 }
 
-- 
cgit v1.2.3-55-g7522


From b5a30dda6598af216c070165ece6068f9f00f33a Mon Sep 17 00:00:00 2001
From: Octavian Purdila
Date: Sun, 22 Jan 2012 00:28:34 +0200
Subject: Bluetooth: silence lockdep warning

Since bluetooth uses multiple protocols types, to avoid lockdep
warnings, we need to use different lockdep classes (one for each
protocol type).

This is already done in bt_sock_create but it misses a couple of cases
when new connections are created. This patch corrects that to fix the
following warning:

<4>[ 1864.732366] =======================================================
<4>[ 1864.733030] [ INFO: possible circular locking dependency detected ]
<4>[ 1864.733544] 3.0.16-mid3-00007-gc9a0f62 #3
<4>[ 1864.733883] -------------------------------------------------------
<4>[ 1864.734408] t.android.btclc/4204 is trying to acquire lock:
<4>[ 1864.734869]  (rfcomm_mutex){+.+.+.}, at: [<c14970ea>] rfcomm_dlc_close+0x15/0x30
<4>[ 1864.735541]
<4>[ 1864.735549] but task is already holding lock:
<4>[ 1864.736045]  (sk_lock-AF_BLUETOOTH){+.+.+.}, at: [<c1498bf7>] lock_sock+0xa/0xc
<4>[ 1864.736732]
<4>[ 1864.736740] which lock already depends on the new lock.
<4>[ 1864.736750]
<4>[ 1864.737428]
<4>[ 1864.737437] the existing dependency chain (in reverse order) is:
<4>[ 1864.738016]
<4>[ 1864.738023] -> #1 (sk_lock-AF_BLUETOOTH){+.+.+.}:
<4>[ 1864.738549]        [<c1062273>] lock_acquire+0x104/0x140
<4>[ 1864.738977]        [<c13d35c1>] lock_sock_nested+0x58/0x68
<4>[ 1864.739411]        [<c1493c33>] l2cap_sock_sendmsg+0x3e/0x76
<4>[ 1864.739858]        [<c13d06c3>] __sock_sendmsg+0x50/0x59
<4>[ 1864.740279]        [<c13d0ea2>] sock_sendmsg+0x94/0xa8
<4>[ 1864.740687]        [<c13d0ede>] kernel_sendmsg+0x28/0x37
<4>[ 1864.741106]        [<c14969ca>] rfcomm_send_frame+0x30/0x38
<4>[ 1864.741542]        [<c1496a2a>] rfcomm_send_ua+0x58/0x5a
<4>[ 1864.741959]        [<c1498447>] rfcomm_run+0x441/0xb52
<4>[ 1864.742365]        [<c104f095>] kthread+0x63/0x68
<4>[ 1864.742742]        [<c14d5182>] kernel_thread_helper+0x6/0xd
<4>[ 1864.743187]
<4>[ 1864.743193] -> #0 (rfcomm_mutex){+.+.+.}:
<4>[ 1864.743667]        [<c1061ada>] __lock_acquire+0x988/0xc00
<4>[ 1864.744100]        [<c1062273>] lock_acquire+0x104/0x140
<4>[ 1864.744519]        [<c14d2c70>] __mutex_lock_common+0x3b/0x33f
<4>[ 1864.744975]        [<c14d303e>] mutex_lock_nested+0x2d/0x36
<4>[ 1864.745412]        [<c14970ea>] rfcomm_dlc_close+0x15/0x30
<4>[ 1864.745842]        [<c14990d9>] __rfcomm_sock_close+0x5f/0x6b
<4>[ 1864.746288]        [<c1499114>] rfcomm_sock_shutdown+0x2f/0x62
<4>[ 1864.746737]        [<c13d275d>] sys_socketcall+0x1db/0x422
<4>[ 1864.747165]        [<c14d42f0>] syscall_call+0x7/0xb

Signed-off-by: Octavian Purdila <octavian.purdila@intel.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/bluetooth.h |  2 ++
 net/bluetooth/af_bluetooth.c      | 12 +++++-------
 net/bluetooth/l2cap_sock.c        |  2 ++
 net/bluetooth/rfcomm/sock.c       |  2 ++
 4 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index abaad6ed9b83..4a82ca0bb0b2 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -256,4 +256,6 @@ void l2cap_exit(void);
 int sco_init(void);
 void sco_exit(void);
 
+void bt_sock_reclassify_lock(struct sock *sk, int proto);
+
 #endif /* __BLUETOOTH_H */
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index ef92864ac625..72eb187a5f60 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -71,19 +71,16 @@ static const char *const bt_slock_key_strings[BT_MAX_PROTO] = {
 	"slock-AF_BLUETOOTH-BTPROTO_AVDTP",
 };
 
-static inline void bt_sock_reclassify_lock(struct socket *sock, int proto)
+void bt_sock_reclassify_lock(struct sock *sk, int proto)
 {
-	struct sock *sk = sock->sk;
-
-	if (!sk)
-		return;
-
+	BUG_ON(!sk);
 	BUG_ON(sock_owned_by_user(sk));
 
 	sock_lock_init_class_and_name(sk,
 			bt_slock_key_strings[proto], &bt_slock_key[proto],
 				bt_key_strings[proto], &bt_lock_key[proto]);
 }
+EXPORT_SYMBOL(bt_sock_reclassify_lock);
 
 int bt_sock_register(int proto, const struct net_proto_family *ops)
 {
@@ -145,7 +142,8 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto,
 
 	if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) {
 		err = bt_proto[proto]->create(net, sock, proto, kern);
-		bt_sock_reclassify_lock(sock, proto);
+		if (!err)
+			bt_sock_reclassify_lock(sock->sk, proto);
 		module_put(bt_proto[proto]->owner);
 	}
 
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index c57027f7606f..401d9428ae4c 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -849,6 +849,8 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(void *data)
 	if (!sk)
 		return NULL;
 
+	bt_sock_reclassify_lock(sk, BTPROTO_L2CAP);
+
 	l2cap_sock_init(sk, parent);
 
 	return l2cap_pi(sk)->chan;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index f066678faeee..22169c3f1482 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -956,6 +956,8 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
 	if (!sk)
 		goto done;
 
+	bt_sock_reclassify_lock(sk, BTPROTO_RFCOMM);
+
 	rfcomm_sock_init(sk, parent);
 	bacpy(&bt_sk(sk)->src, &src);
 	bacpy(&bt_sk(sk)->dst, &dst);
-- 
cgit v1.2.3-55-g7522


From a51cd2be864a3cc0272359b1995e213341dfc7e7 Mon Sep 17 00:00:00 2001
From: Andre Guedes
Date: Fri, 27 Jan 2012 19:42:02 -0300
Subject: Bluetooth: Fix potential deadlock

We don't need to use the _sync variant in hci_conn_hold and
hci_conn_put to cancel conn->disc_work delayed work. This way
we avoid potential deadlocks like this one reported by lockdep.

======================================================
[ INFO: possible circular locking dependency detected ]
3.2.0+ #1 Not tainted
-------------------------------------------------------
kworker/u:1/17 is trying to acquire lock:
 (&hdev->lock){+.+.+.}, at: [<ffffffffa0041155>] hci_conn_timeout+0x62/0x158 [bluetooth]

but task is already holding lock:
 ((&(&conn->disc_work)->work)){+.+...}, at: [<ffffffff81035751>] process_one_work+0x11a/0x2bf

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 ((&(&conn->disc_work)->work)){+.+...}:
       [<ffffffff81057444>] lock_acquire+0x8a/0xa7
       [<ffffffff81034ed1>] wait_on_work+0x3d/0xaa
       [<ffffffff81035b54>] __cancel_work_timer+0xac/0xef
       [<ffffffff81035ba4>] cancel_delayed_work_sync+0xd/0xf
       [<ffffffffa00554b0>] smp_chan_create+0xde/0xe6 [bluetooth]
       [<ffffffffa0056160>] smp_conn_security+0xa3/0x12d [bluetooth]
       [<ffffffffa0053640>] l2cap_connect_cfm+0x237/0x2e8 [bluetooth]
       [<ffffffffa004239c>] hci_proto_connect_cfm+0x2d/0x6f [bluetooth]
       [<ffffffffa0046ea5>] hci_event_packet+0x29d1/0x2d60 [bluetooth]
       [<ffffffffa003dde3>] hci_rx_work+0xd0/0x2e1 [bluetooth]
       [<ffffffff810357af>] process_one_work+0x178/0x2bf
       [<ffffffff81036178>] worker_thread+0xce/0x152
       [<ffffffff81039a03>] kthread+0x95/0x9d
       [<ffffffff812e7754>] kernel_thread_helper+0x4/0x10

-> #1 (slock-AF_BLUETOOTH-BTPROTO_L2CAP){+.+...}:
       [<ffffffff81057444>] lock_acquire+0x8a/0xa7
       [<ffffffff812e553a>] _raw_spin_lock_bh+0x36/0x6a
       [<ffffffff81244d56>] lock_sock_nested+0x24/0x7f
       [<ffffffffa004d96f>] lock_sock+0xb/0xd [bluetooth]
       [<ffffffffa0052906>] l2cap_chan_connect+0xa9/0x26f [bluetooth]
       [<ffffffffa00545f8>] l2cap_sock_connect+0xb3/0xff [bluetooth]
       [<ffffffff81243b48>] sys_connect+0x69/0x8a
       [<ffffffff812e6579>] system_call_fastpath+0x16/0x1b

-> #0 (&hdev->lock){+.+.+.}:
       [<ffffffff81056d06>] __lock_acquire+0xa80/0xd74
       [<ffffffff81057444>] lock_acquire+0x8a/0xa7
       [<ffffffff812e3870>] __mutex_lock_common+0x48/0x38e
       [<ffffffff812e3c75>] mutex_lock_nested+0x2a/0x31
       [<ffffffffa0041155>] hci_conn_timeout+0x62/0x158 [bluetooth]
       [<ffffffff810357af>] process_one_work+0x178/0x2bf
       [<ffffffff81036178>] worker_thread+0xce/0x152
       [<ffffffff81039a03>] kthread+0x95/0x9d
       [<ffffffff812e7754>] kernel_thread_helper+0x4/0x10

other info that might help us debug this:

Chain exists of:
  &hdev->lock --> slock-AF_BLUETOOTH-BTPROTO_L2CAP --> (&(&conn->disc_work)->work)

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock((&(&conn->disc_work)->work));
                               lock(slock-AF_BLUETOOTH-BTPROTO_L2CAP);
                               lock((&(&conn->disc_work)->work));
  lock(&hdev->lock);

 *** DEADLOCK ***

2 locks held by kworker/u:1/17:
 #0:  (hdev->name){.+.+.+}, at: [<ffffffff81035751>] process_one_work+0x11a/0x2bf
 #1:  ((&(&conn->disc_work)->work)){+.+...}, at: [<ffffffff81035751>] process_one_work+0x11a/0x2bf

stack backtrace:
Pid: 17, comm: kworker/u:1 Not tainted 3.2.0+ #1
Call Trace:
 [<ffffffff812e06c6>] print_circular_bug+0x1f8/0x209
 [<ffffffff81056d06>] __lock_acquire+0xa80/0xd74
 [<ffffffff81021ef2>] ? arch_local_irq_restore+0x6/0xd
 [<ffffffff81022bc7>] ? vprintk+0x3f9/0x41e
 [<ffffffff81057444>] lock_acquire+0x8a/0xa7
 [<ffffffffa0041155>] ? hci_conn_timeout+0x62/0x158 [bluetooth]
 [<ffffffff812e3870>] __mutex_lock_common+0x48/0x38e
 [<ffffffffa0041155>] ? hci_conn_timeout+0x62/0x158 [bluetooth]
 [<ffffffff81190fd6>] ? __dynamic_pr_debug+0x6d/0x6f
 [<ffffffffa0041155>] ? hci_conn_timeout+0x62/0x158 [bluetooth]
 [<ffffffff8105320f>] ? trace_hardirqs_off+0xd/0xf
 [<ffffffff812e3c75>] mutex_lock_nested+0x2a/0x31
 [<ffffffffa0041155>] hci_conn_timeout+0x62/0x158 [bluetooth]
 [<ffffffff810357af>] process_one_work+0x178/0x2bf
 [<ffffffff81035751>] ? process_one_work+0x11a/0x2bf
 [<ffffffff81055af3>] ? lock_acquired+0x1d0/0x1df
 [<ffffffffa00410f3>] ? hci_acl_disconn+0x65/0x65 [bluetooth]
 [<ffffffff81036178>] worker_thread+0xce/0x152
 [<ffffffff810407ed>] ? finish_task_switch+0x45/0xc5
 [<ffffffff810360aa>] ? manage_workers.isra.25+0x16a/0x16a
 [<ffffffff81039a03>] kthread+0x95/0x9d
 [<ffffffff812e7754>] kernel_thread_helper+0x4/0x10
 [<ffffffff812e5db4>] ? retint_restore_args+0x13/0x13
 [<ffffffff8103996e>] ? __init_kthread_worker+0x55/0x55
 [<ffffffff812e7750>] ? gs_change+0x13/0x13

Signed-off-by: Andre Guedes <andre.guedes@openbossa.org>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@openbossa.org>
Reviewed-by: Ulisses Furquim <ulisses@profusion.mobi>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 92b8fea553d7..453893b3120e 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -540,7 +540,7 @@ void hci_conn_put_device(struct hci_conn *conn);
 static inline void hci_conn_hold(struct hci_conn *conn)
 {
 	atomic_inc(&conn->refcnt);
-	cancel_delayed_work_sync(&conn->disc_work);
+	cancel_delayed_work(&conn->disc_work);
 }
 
 static inline void hci_conn_put(struct hci_conn *conn)
@@ -559,7 +559,7 @@ static inline void hci_conn_put(struct hci_conn *conn)
 		} else {
 			timeo = msecs_to_jiffies(10);
 		}
-		cancel_delayed_work_sync(&conn->disc_work);
+		cancel_delayed_work(&conn->disc_work);
 		queue_delayed_work(conn->hdev->workqueue,
 					&conn->disc_work, timeo);
 	}
-- 
cgit v1.2.3-55-g7522


From 6de32750822d00bfa92c341166132b0714c5b559 Mon Sep 17 00:00:00 2001
From: Ulisses Furquim
Date: Mon, 30 Jan 2012 18:26:28 -0200
Subject: Bluetooth: Remove usage of __cancel_delayed_work()

__cancel_delayed_work() is being used in some paths where we cannot
sleep waiting for the delayed work to finish. However, that function
might return while the timer is running and the work will be queued
again. Replace the calls with safer cancel_delayed_work() version
which spins until the timer handler finishes on other CPUs and
cancels the delayed work.

Signed-off-by: Ulisses Furquim <ulisses@profusion.mobi>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/l2cap.h | 4 ++--
 net/bluetooth/l2cap_core.c    | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 26486e182f55..b1664ed884e6 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -611,7 +611,7 @@ static inline void l2cap_set_timer(struct l2cap_chan *chan,
 {
 	BT_DBG("chan %p state %d timeout %ld", chan, chan->state, timeout);
 
-	if (!__cancel_delayed_work(work))
+	if (!cancel_delayed_work(work))
 		l2cap_chan_hold(chan);
 	schedule_delayed_work(work, timeout);
 }
@@ -619,7 +619,7 @@ static inline void l2cap_set_timer(struct l2cap_chan *chan,
 static inline void l2cap_clear_timer(struct l2cap_chan *chan,
 					struct delayed_work *work)
 {
-	if (__cancel_delayed_work(work))
+	if (cancel_delayed_work(work))
 		l2cap_chan_put(chan);
 }
 
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index dcccae04ae08..ec10c698b891 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -2574,7 +2574,7 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hd
 
 	if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) &&
 					cmd->ident == conn->info_ident) {
-		__cancel_delayed_work(&conn->info_timer);
+		cancel_delayed_work(&conn->info_timer);
 
 		conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
 		conn->info_ident = 0;
@@ -3121,7 +3121,7 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm
 			conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)
 		return 0;
 
-	__cancel_delayed_work(&conn->info_timer);
+	cancel_delayed_work(&conn->info_timer);
 
 	if (result != L2CAP_IR_SUCCESS) {
 		conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
@@ -4501,7 +4501,7 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
 
 	if (hcon->type == LE_LINK) {
 		smp_distribute_keys(conn, 0);
-		__cancel_delayed_work(&conn->security_timer);
+		cancel_delayed_work(&conn->security_timer);
 	}
 
 	rcu_read_lock();
-- 
cgit v1.2.3-55-g7522


From 2261cc627f5453004042b4f694612edae27e492e Mon Sep 17 00:00:00 2001
From: Shawn Guo
Date: Wed, 15 Feb 2012 10:47:42 -0800
Subject: dt: add empty of_find_compatible_node function

Add empty of_find_compatible_node function for !CONFIG_OF build.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index a75a831e2057..92cf6ad35e0e 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -281,6 +281,14 @@ static inline struct property *of_find_property(const struct device_node *np,
 	return NULL;
 }
 
+static inline struct device_node *of_find_compatible_node(
+						struct device_node *from,
+						const char *type,
+						const char *compat)
+{
+	return NULL;
+}
+
 static inline int of_property_read_u32_array(const struct device_node *np,
 					     const char *propname,
 					     u32 *out_values, size_t sz)
-- 
cgit v1.2.3-55-g7522


From f2ea0f5f04c97b48c88edccba52b0682fbe45087 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan
Date: Sat, 14 Jan 2012 21:44:49 +0300
Subject: crypto: sha512 - use standard ror64()

Use standard ror64() instead of hand-written.
There is no standard ror64, so create it.

The difference is shift value being "unsigned int" instead of uint64_t
(for which there is no reason). gcc starts to emit native ROR instructions
which it doesn't do for some reason currently. This should make the code
faster.

Patch survives in-tree crypto test and ping flood with hmac(sha512) on.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sha512_generic.c | 13 ++++---------
 include/linux/bitops.h  | 20 ++++++++++++++++++++
 2 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index f04af931a682..107f6f7be5e1 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -31,11 +31,6 @@ static inline u64 Maj(u64 x, u64 y, u64 z)
         return (x & y) | (z & (x | y));
 }
 
-static inline u64 RORu64(u64 x, u64 y)
-{
-        return (x >> y) | (x << (64 - y));
-}
-
 static const u64 sha512_K[80] = {
         0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL,
         0xe9b5dba58189dbbcULL, 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
@@ -66,10 +61,10 @@ static const u64 sha512_K[80] = {
         0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL,
 };
 
-#define e0(x)       (RORu64(x,28) ^ RORu64(x,34) ^ RORu64(x,39))
-#define e1(x)       (RORu64(x,14) ^ RORu64(x,18) ^ RORu64(x,41))
-#define s0(x)       (RORu64(x, 1) ^ RORu64(x, 8) ^ (x >> 7))
-#define s1(x)       (RORu64(x,19) ^ RORu64(x,61) ^ (x >> 6))
+#define e0(x)       (ror64(x,28) ^ ror64(x,34) ^ ror64(x,39))
+#define e1(x)       (ror64(x,14) ^ ror64(x,18) ^ ror64(x,41))
+#define s0(x)       (ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7))
+#define s1(x)       (ror64(x,19) ^ ror64(x,61) ^ (x >> 6))
 
 static inline void LOAD_OP(int I, u64 *W, const u8 *input)
 {
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index a3ef66a2a083..fc8a3ffce320 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -49,6 +49,26 @@ static inline unsigned long hweight_long(unsigned long w)
 	return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
 }
 
+/**
+ * rol64 - rotate a 64-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u64 rol64(__u64 word, unsigned int shift)
+{
+	return (word << shift) | (word >> (64 - shift));
+}
+
+/**
+ * ror64 - rotate a 64-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u64 ror64(__u64 word, unsigned int shift)
+{
+	return (word >> shift) | (word << (64 - shift));
+}
+
 /**
  * rol32 - rotate a 32-bit value left
  * @word: value to rotate
-- 
cgit v1.2.3-55-g7522


From 59cca653a601372e9b4a430d867377a3e4a36d76 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin
Date: Thu, 2 Feb 2012 10:46:49 +0200
Subject: digsig: changed type of the timestamp

time_t was used in the signature and key packet headers,
which is typedef of long and is different on 32 and 64 bit architectures.
Signature and key format should be independent of architecture.
Similar to GPG, I have changed the type to uint32_t.

Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/digsig.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/digsig.h b/include/linux/digsig.h
index b01558b15814..6f85a070bb45 100644
--- a/include/linux/digsig.h
+++ b/include/linux/digsig.h
@@ -30,7 +30,7 @@ enum digest_algo {
 
 struct pubkey_hdr {
 	uint8_t		version;	/* key format version */
-	time_t		timestamp;	/* key made, always 0 for now */
+	uint32_t	timestamp;	/* key made, always 0 for now */
 	uint8_t		algo;
 	uint8_t		nmpi;
 	char		mpi[0];
@@ -38,7 +38,7 @@ struct pubkey_hdr {
 
 struct signature_hdr {
 	uint8_t		version;	/* signature format version */
-	time_t		timestamp;	/* signature made */
+	uint32_t	timestamp;	/* signature made */
 	uint8_t		algo;
 	uint8_t		hash;
 	uint8_t		keyid[8];
-- 
cgit v1.2.3-55-g7522


From 88ba136d6635b262f77cc418d536115fb8e4d4ab Mon Sep 17 00:00:00 2001
From: Joerg Willmann
Date: Tue, 21 Feb 2012 13:26:14 +0100
Subject: netfilter: ebtables: fix alignment problem in ppc

ebt_among extension of ebtables uses __alignof__(_xt_align) while the
corresponding kernel module uses __alignof__(ebt_replace) to determine
the alignment in EBT_ALIGN().

These are the results of these values on different platforms:

x86 x86_64 ppc
__alignof__(_xt_align) 4 8 8
__alignof__(ebt_replace) 4 8 4

ebtables fails to add rules which use the among extension.

I'm using kernel 2.6.33 and ebtables 2.0.10-4

According to Bart De Schuymer, userspace alignment was changed to
_xt_align to fix an alignment issue on a userspace32-kernel64 system
(he thinks it was for an ARM device). So userspace must be right.
The kernel alignment macro needs to change so it also uses _xt_align
instead of ebt_replace. The userspace changes date back from
June 29, 2009.

Signed-off-by: Joerg Willmann <joe@clnt.de>
Signed-off by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge/ebtables.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 8797ed16feb2..4dd5bd6994a8 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -285,8 +285,8 @@ struct ebt_table {
 	struct module *me;
 };
 
-#define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \
-		     ~(__alignof__(struct ebt_replace)-1))
+#define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \
+		     ~(__alignof__(struct _xt_align)-1))
 extern struct ebt_table *ebt_register_table(struct net *net,
 					    const struct ebt_table *table);
 extern void ebt_unregister_table(struct net *net, struct ebt_table *table);
-- 
cgit v1.2.3-55-g7522


From 7d96b3e55ad45ebe4ff1a1daad27ac1fff8682ec Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov
Date: Sun, 19 Feb 2012 18:29:11 +0400
Subject: percpu: fix generic definition of __this_cpu_add_and_return()

This patch adds missed "__" into function prefix.
Otherwise on all archectures (except x86) it expands to irq/preemtion-safe
variant: _this_cpu_generic_add_return(), which do extra irq-save/irq-restore.
Optimal generic implementation is __this_cpu_generic_add_return().

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 32cd1f67462e..3b609eb9cd7d 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -718,7 +718,8 @@ do {									\
 # ifndef __this_cpu_add_return_8
 #  define __this_cpu_add_return_8(pcp, val)	__this_cpu_generic_add_return(pcp, val)
 # endif
-# define __this_cpu_add_return(pcp, val)	__pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
+# define __this_cpu_add_return(pcp, val)	\
+	__pcpu_size_call_return2(__this_cpu_add_return_, pcp, val)
 #endif
 
 #define __this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(val))
-- 
cgit v1.2.3-55-g7522


From e920d5971d706290c5a6281f719e16c25021f964 Mon Sep 17 00:00:00 2001
From: Ming Lei
Date: Wed, 15 Feb 2012 16:54:38 +0800
Subject: percpu: use raw_local_irq_* in _this_cpu op

It doesn't make sense to trace irq off or do irq flags
lock proving inside 'this_cpu' operations, so replace local_irq_*
with raw_local_irq_* in 'this_cpu' op.

Also the patch fixes onelockdep warning[1] by the replacement, see
below:

In commit: 933393f58fef9963eac61db8093689544e29a600(percpu:
Remove irqsafe_cpu_xxx variants), local_irq_save/restore(flags) are
added inside this_cpu_inc operation, so that trace_hardirqs_off_caller
will be called by trace_hardirqs_on_caller directly because
__debug_atomic_inc is implemented as this_cpu_inc, which may trigger
the lockdep warning[1], for example in the below ARM scenary:

	kernel_thread_helper	/*irq disabled*/
		->trace_hardirqs_on_caller	/*hardirqs_enabled was set*/
			->trace_hardirqs_off_caller	/*hardirqs_enabled cleared*/
				__this_cpu_add(redundant_hardirqs_on)
			->trace_hardirqs_off_caller	/*irq disabled, so call here*/

The 'unannotated irqs-on' warning will be triggered somewhere because
irq is just enabled after the irq trace in kernel_thread_helper.

[1],
[    0.162841] ------------[ cut here ]------------
[    0.167694] WARNING: at kernel/lockdep.c:3493 check_flags+0xc0/0x1d0()
[    0.174468] Modules linked in:
[    0.177703] Backtrace:
[    0.180328] [<c00171f0>] (dump_backtrace+0x0/0x110) from [<c0412320>] (dump_stack+0x18/0x1c)
[    0.189086]  r6:c051f778 r5:00000da5 r4:00000000 r3:60000093
[    0.195007] [<c0412308>] (dump_stack+0x0/0x1c) from [<c00410e8>] (warn_slowpath_common+0x54/0x6c)
[    0.204223] [<c0041094>] (warn_slowpath_common+0x0/0x6c) from [<c0041124>] (warn_slowpath_null+0x24/0x2c)
[    0.214111]  r8:00000000 r7:00000000 r6:ee069598 r5:60000013 r4:ee082000
[    0.220825] r3:00000009
[    0.223693] [<c0041100>] (warn_slowpath_null+0x0/0x2c) from [<c0088f38>] (check_flags+0xc0/0x1d0)
[    0.232910] [<c0088e78>] (check_flags+0x0/0x1d0) from [<c008d348>] (lock_acquire+0x4c/0x11c)
[    0.241668] [<c008d2fc>] (lock_acquire+0x0/0x11c) from [<c0415aa4>] (_raw_spin_lock+0x3c/0x74)
[    0.250610] [<c0415a68>] (_raw_spin_lock+0x0/0x74) from [<c010a844>] (set_task_comm+0x20/0xc0)
[    0.259521]  r6:ee069588 r5:ee0691c0 r4:ee082000
[    0.264404] [<c010a824>] (set_task_comm+0x0/0xc0) from [<c0060780>] (kthreadd+0x28/0x108)
[    0.272857]  r8:00000000 r7:00000013 r6:c0044a08 r5:ee0691c0 r4:ee082000
[    0.279571] r3:ee083fe0
[    0.282470] [<c0060758>] (kthreadd+0x0/0x108) from [<c0044a08>] (do_exit+0x0/0x6dc)
[    0.290405]  r5:c0060758 r4:00000000
[    0.294189] ---[ end trace 1b75b31a2719ed1c ]---
[    0.299041] possible reason: unannotated irqs-on.
[    0.303955] irq event stamp: 5
[    0.307159] hardirqs last  enabled at (4): [<c001331c>] no_work_pending+0x8/0x2c
[    0.314880] hardirqs last disabled at (5): [<c0089b08>] trace_hardirqs_on_caller+0x60/0x26c
[    0.323547] softirqs last  enabled at (0): [<c003f754>] copy_process+0x33c/0xef4
[    0.331207] softirqs last disabled at (0): [<  (null)>]   (null)
[    0.337585] CPU0: thread -1, cpu 0, socket 0, mpidr 80000000

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 3b609eb9cd7d..594c0040fdd8 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -348,9 +348,9 @@ do {									\
 #define _this_cpu_generic_to_op(pcp, val, op)				\
 do {									\
 	unsigned long flags;						\
-	local_irq_save(flags);						\
+	raw_local_irq_save(flags);					\
 	*__this_cpu_ptr(&(pcp)) op val;					\
-	local_irq_restore(flags);					\
+	raw_local_irq_restore(flags);					\
 } while (0)
 
 #ifndef this_cpu_write
@@ -449,10 +449,10 @@ do {									\
 ({									\
 	typeof(pcp) ret__;						\
 	unsigned long flags;						\
-	local_irq_save(flags);						\
+	raw_local_irq_save(flags);					\
 	__this_cpu_add(pcp, val);					\
 	ret__ = __this_cpu_read(pcp);					\
-	local_irq_restore(flags);					\
+	raw_local_irq_restore(flags);					\
 	ret__;								\
 })
 
@@ -479,10 +479,10 @@ do {									\
 #define _this_cpu_generic_xchg(pcp, nval)				\
 ({	typeof(pcp) ret__;						\
 	unsigned long flags;						\
-	local_irq_save(flags);						\
+	raw_local_irq_save(flags);					\
 	ret__ = __this_cpu_read(pcp);					\
 	__this_cpu_write(pcp, nval);					\
-	local_irq_restore(flags);					\
+	raw_local_irq_restore(flags);					\
 	ret__;								\
 })
 
@@ -507,11 +507,11 @@ do {									\
 ({									\
 	typeof(pcp) ret__;						\
 	unsigned long flags;						\
-	local_irq_save(flags);						\
+	raw_local_irq_save(flags);					\
 	ret__ = __this_cpu_read(pcp);					\
 	if (ret__ == (oval))						\
 		__this_cpu_write(pcp, nval);				\
-	local_irq_restore(flags);					\
+	raw_local_irq_restore(flags);					\
 	ret__;								\
 })
 
@@ -544,10 +544,10 @@ do {									\
 ({									\
 	int ret__;							\
 	unsigned long flags;						\
-	local_irq_save(flags);						\
+	raw_local_irq_save(flags);					\
 	ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2,		\
 			oval1, oval2, nval1, nval2);			\
-	local_irq_restore(flags);					\
+	raw_local_irq_restore(flags);					\
 	ret__;								\
 })
 
-- 
cgit v1.2.3-55-g7522


From 115c9b81928360d769a76c632bae62d15206a94a Mon Sep 17 00:00:00 2001
From: Greg Rose
Date: Tue, 21 Feb 2012 16:54:48 -0500
Subject: rtnetlink: Fix problem with buffer allocation

Implement a new netlink attribute type IFLA_EXT_MASK.  The mask
is a 32 bit value that can be used to indicate to the kernel that
certain extended ifinfo values are requested by the user application.
At this time the only mask value defined is RTEXT_FILTER_VF to
indicate that the user wants the ifinfo dump to send information
about the VFs belonging to the interface.

This patch fixes a bug in which certain applications do not have
large enough buffers to accommodate the extra information returned
by the kernel with large numbers of SR-IOV virtual functions.
Those applications will not send the new netlink attribute with
the interface info dump request netlink messages so they will
not get unexpectedly large request buffers returned by the kernel.

Modifies the rtnl_calcit function to traverse the list of net
devices and compute the minimum buffer size that can hold the
info dumps of all matching devices based upon the filter passed
in via the new netlink attribute filter mask.  If no filter
mask is sent then the buffer allocation defaults to NLMSG_GOODSIZE.

With this change it is possible to add yet to be defined netlink
attributes to the dump request which should make it fairly extensible
in the future.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h   |  1 +
 include/linux/rtnetlink.h |  3 ++
 include/net/rtnetlink.h   |  2 +-
 net/core/rtnetlink.c      | 78 ++++++++++++++++++++++++++++++++++-------------
 4 files changed, 62 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index c52d4b5f872a..4b24ff453aee 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -137,6 +137,7 @@ enum {
 	IFLA_AF_SPEC,
 	IFLA_GROUP,		/* Group the device belongs to */
 	IFLA_NET_NS_FD,
+	IFLA_EXT_MASK,		/* Extended info mask, VFs, etc */
 	__IFLA_MAX
 };
 
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 8e872ead88b5..577592ea0ea0 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -602,6 +602,9 @@ struct tcamsg {
 #define TCA_ACT_TAB 1 /* attr type must be >=1 */	
 #define TCAA_MAX 1
 
+/* New extended info filters for IFLA_EXT_MASK */
+#define RTEXT_FILTER_VF		(1 << 0)
+
 /* End of information exported to user level */
 
 #ifdef __KERNEL__
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 678f1ffaf843..370293901971 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -6,7 +6,7 @@
 
 typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, void *);
 typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
-typedef u16 (*rtnl_calcit_func)(struct sk_buff *);
+typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *);
 
 extern int	__rtnl_register(int protocol, int msgtype,
 				rtnl_doit_func, rtnl_dumpit_func,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 65aebd450027..606a6e8f3671 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -60,7 +60,6 @@ struct rtnl_link {
 };
 
 static DEFINE_MUTEX(rtnl_mutex);
-static u16 min_ifinfo_dump_size;
 
 void rtnl_lock(void)
 {
@@ -724,10 +723,11 @@ static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
 }
 
 /* All VF info */
-static inline int rtnl_vfinfo_size(const struct net_device *dev)
+static inline int rtnl_vfinfo_size(const struct net_device *dev,
+				   u32 ext_filter_mask)
 {
-	if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
-
+	if (dev->dev.parent && dev_is_pci(dev->dev.parent) &&
+	    (ext_filter_mask & RTEXT_FILTER_VF)) {
 		int num_vfs = dev_num_vf(dev->dev.parent);
 		size_t size = nla_total_size(sizeof(struct nlattr));
 		size += nla_total_size(num_vfs * sizeof(struct nlattr));
@@ -766,7 +766,8 @@ static size_t rtnl_port_size(const struct net_device *dev)
 		return port_self_size;
 }
 
-static noinline size_t if_nlmsg_size(const struct net_device *dev)
+static noinline size_t if_nlmsg_size(const struct net_device *dev,
+				     u32 ext_filter_mask)
 {
 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
 	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
@@ -784,8 +785,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(4) /* IFLA_MASTER */
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
-	       + nla_total_size(4) /* IFLA_NUM_VF */
-	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
+	       + nla_total_size(ext_filter_mask
+			        & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+	       + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
 	       + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
 	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
 	       + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
@@ -868,7 +870,7 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
 
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    int type, u32 pid, u32 seq, u32 change,
-			    unsigned int flags)
+			    unsigned int flags, u32 ext_filter_mask)
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
@@ -941,10 +943,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		goto nla_put_failure;
 	copy_rtnl_link_stats64(nla_data(attr), stats);
 
-	if (dev->dev.parent)
+	if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF))
 		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
 
-	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
+	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent
+	    && (ext_filter_mask & RTEXT_FILTER_VF)) {
 		int i;
 
 		struct nlattr *vfinfo, *vf;
@@ -1048,6 +1051,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net_device *dev;
 	struct hlist_head *head;
 	struct hlist_node *node;
+	struct nlattr *tb[IFLA_MAX+1];
+	u32 ext_filter_mask = 0;
 
 	s_h = cb->args[0];
 	s_idx = cb->args[1];
@@ -1055,6 +1060,12 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	rcu_read_lock();
 	cb->seq = net->dev_base_seq;
 
+	nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
+		    ifla_policy);
+
+	if (tb[IFLA_EXT_MASK])
+		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
 		head = &net->dev_index_head[h];
@@ -1064,7 +1075,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
 					     NETLINK_CB(cb->skb).pid,
 					     cb->nlh->nlmsg_seq, 0,
-					     NLM_F_MULTI) <= 0)
+					     NLM_F_MULTI,
+					     ext_filter_mask) <= 0)
 				goto out;
 
 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -1100,6 +1112,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
 	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
 	[IFLA_AF_SPEC]		= { .type = NLA_NESTED },
+	[IFLA_EXT_MASK]		= { .type = NLA_U32 },
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -1509,8 +1522,6 @@ errout:
 
 	if (send_addr_notify)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-	min_ifinfo_dump_size = max_t(u16, if_nlmsg_size(dev),
-				     min_ifinfo_dump_size);
 
 	return err;
 }
@@ -1842,6 +1853,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	struct net_device *dev = NULL;
 	struct sk_buff *nskb;
 	int err;
+	u32 ext_filter_mask = 0;
 
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
 	if (err < 0)
@@ -1850,6 +1862,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (tb[IFLA_IFNAME])
 		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
 
+	if (tb[IFLA_EXT_MASK])
+		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+
 	ifm = nlmsg_data(nlh);
 	if (ifm->ifi_index > 0)
 		dev = __dev_get_by_index(net, ifm->ifi_index);
@@ -1861,12 +1876,12 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (dev == NULL)
 		return -ENODEV;
 
-	nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
+	nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
 	if (nskb == NULL)
 		return -ENOBUFS;
 
 	err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid,
-			       nlh->nlmsg_seq, 0, 0);
+			       nlh->nlmsg_seq, 0, 0, ext_filter_mask);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size */
 		WARN_ON(err == -EMSGSIZE);
@@ -1877,8 +1892,31 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	return err;
 }
 
-static u16 rtnl_calcit(struct sk_buff *skb)
+static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
+	struct net *net = sock_net(skb->sk);
+	struct net_device *dev;
+	struct nlattr *tb[IFLA_MAX+1];
+	u32 ext_filter_mask = 0;
+	u16 min_ifinfo_dump_size = 0;
+
+	nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, ifla_policy);
+
+	if (tb[IFLA_EXT_MASK])
+		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+
+	if (!ext_filter_mask)
+		return NLMSG_GOODSIZE;
+	/*
+	 * traverse the list of net devices and compute the minimum
+	 * buffer size based upon the filter mask.
+	 */
+	list_for_each_entry(dev, &net->dev_base_head, dev_list) {
+		min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
+					     if_nlmsg_size(dev,
+						           ext_filter_mask));
+	}
+
 	return min_ifinfo_dump_size;
 }
 
@@ -1913,13 +1951,11 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 	int err = -ENOBUFS;
 	size_t if_info_size;
 
-	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL);
+	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL);
 	if (skb == NULL)
 		goto errout;
 
-	min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size);
-
-	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0);
+	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -1977,7 +2013,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			return -EOPNOTSUPP;
 		calcit = rtnl_get_calcit(family, type);
 		if (calcit)
-			min_dump_alloc = calcit(skb);
+			min_dump_alloc = calcit(skb, nlh);
 
 		__rtnl_unlock();
 		rtnl = net->rtnl;
-- 
cgit v1.2.3-55-g7522


From 797a796a13df6b84a4791e57306737059b5b2384 Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake
Date: Tue, 7 Feb 2012 11:45:33 +0900
Subject: asm-generic: architecture independent readq/writeq for 32bit
 environment

This provides unified readq()/writeq() helper functions for 32-bit
drivers.

For some cases, readq/writeq without atomicity is harmful, and order of
io access has to be specified explicitly.  So in this patch, new two
header files which contain non-atomic readq/writeq are added.

 - <asm-generic/io-64-nonatomic-lo-hi.h> provides non-atomic readq/
   writeq with the order of lower address -> higher address

 - <asm-generic/io-64-nonatomic-hi-lo.h> provides non-atomic readq/
   writeq with reversed order

This allows us to remove some readq()s that were added drivers when the
default non-atomic ones were removed in commit dbee8a0affd5 ("x86:
remove 32-bit versions of readq()/writeq()")

The drivers which need readq/writeq but can do with the non-atomic ones
must add the line:

  #include <asm-generic/io-64-nonatomic-lo-hi.h> /* or hi-lo.h */

But this will be nop in 64-bit environments, and no other #ifdefs are
required.  So I believe that this patch can solve the problem of
 1. driver-specific readq/writeq
 2. atomicity and order of io access

This patch is tested with building allyesconfig and allmodconfig as
ARCH=x86 and ARCH=i386 on top of tip/master.

Cc: Kashyap Desai <Kashyap.Desai@lsi.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Ravi Anand <ravi.anand@qlogic.com>
Cc: Vikas Chaudhary <vikas.chaudhary@qlogic.com>
Cc: Matthew Garrett <mjg@redhat.com>
Cc: Jason Uhlenkott <juhlenko@akamai.com>
Cc: James Bottomley <James.Bottomley@parallels.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Roland Dreier <roland@purestorage.com>
Cc: James Bottomley <jbottomley@parallels.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Matthew Wilcox <matthew.r.wilcox@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Hitoshi Mitake <h.mitake@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/nvme.c                        |  2 ++
 drivers/edac/i3200_edac.c                   | 15 ++-------------
 drivers/platform/x86/ibm_rtl.c              | 15 ++-------------
 drivers/platform/x86/intel_ips.c            | 15 ++-------------
 drivers/scsi/qla4xxx/ql4_nx.c               | 23 ++---------------------
 include/asm-generic/io-64-nonatomic-hi-lo.h | 28 ++++++++++++++++++++++++++++
 include/asm-generic/io-64-nonatomic-lo-hi.h | 28 ++++++++++++++++++++++++++++
 7 files changed, 66 insertions(+), 60 deletions(-)
 create mode 100644 include/asm-generic/io-64-nonatomic-hi-lo.h
 create mode 100644 include/asm-generic/io-64-nonatomic-lo-hi.h

(limited to 'include')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index c1dc4d86c221..1f3c1a7d132a 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -41,6 +41,8 @@
 #include <linux/types.h>
 #include <linux/version.h>
 
+#include <asm-generic/io-64-nonatomic-lo-hi.h>
+
 #define NVME_Q_DEPTH 1024
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
index aa08497a075a..73f55e2008c2 100644
--- a/drivers/edac/i3200_edac.c
+++ b/drivers/edac/i3200_edac.c
@@ -15,6 +15,8 @@
 #include <linux/io.h>
 #include "edac_core.h"
 
+#include <asm-generic/io-64-nonatomic-lo-hi.h>
+
 #define I3200_REVISION        "1.1"
 
 #define EDAC_MOD_STR        "i3200_edac"
@@ -101,19 +103,6 @@ struct i3200_priv {
 
 static int nr_channels;
 
-#ifndef readq
-static inline __u64 readq(const volatile void __iomem *addr)
-{
-	const volatile u32 __iomem *p = addr;
-	u32 low, high;
-
-	low = readl(p);
-	high = readl(p + 1);
-
-	return low + ((u64)high << 32);
-}
-#endif
-
 static int how_many_channels(struct pci_dev *pdev)
 {
 	unsigned char capid0_8b; /* 8th byte of CAPID0 */
diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c
index 42a7d603c870..7481146a5b47 100644
--- a/drivers/platform/x86/ibm_rtl.c
+++ b/drivers/platform/x86/ibm_rtl.c
@@ -33,6 +33,8 @@
 #include <linux/mutex.h>
 #include <asm/bios_ebda.h>
 
+#include <asm-generic/io-64-nonatomic-lo-hi.h>
+
 static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Force driver load, ignore DMI data");
@@ -83,19 +85,6 @@ static void __iomem *rtl_cmd_addr;
 static u8 rtl_cmd_type;
 static u8 rtl_cmd_width;
 
-#ifndef readq
-static inline __u64 readq(const volatile void __iomem *addr)
-{
-	const volatile u32 __iomem *p = addr;
-	u32 low, high;
-
-	low = readl(p);
-	high = readl(p + 1);
-
-	return low + ((u64)high << 32);
-}
-#endif
-
 static void __iomem *rtl_port_map(phys_addr_t addr, unsigned long len)
 {
 	if (rtl_cmd_type == RTL_ADDR_TYPE_MMIO)
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index 809a3ae943c6..88a98cff5a44 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -77,6 +77,8 @@
 #include <asm/processor.h>
 #include "intel_ips.h"
 
+#include <asm-generic/io-64-nonatomic-lo-hi.h>
+
 #define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32
 
 /*
@@ -344,19 +346,6 @@ struct ips_driver {
 static bool
 ips_gpu_turbo_enabled(struct ips_driver *ips);
 
-#ifndef readq
-static inline __u64 readq(const volatile void __iomem *addr)
-{
-	const volatile u32 __iomem *p = addr;
-	u32 low, high;
-
-	low = readl(p);
-	high = readl(p + 1);
-
-	return low + ((u64)high << 32);
-}
-#endif
-
 /**
  * ips_cpu_busy - is CPU busy?
  * @ips: IPS driver struct
diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c
index 78f1111158d7..65253dfbe962 100644
--- a/drivers/scsi/qla4xxx/ql4_nx.c
+++ b/drivers/scsi/qla4xxx/ql4_nx.c
@@ -10,6 +10,8 @@
 #include "ql4_def.h"
 #include "ql4_glbl.h"
 
+#include <asm-generic/io-64-nonatomic-lo-hi.h>
+
 #define MASK(n)		DMA_BIT_MASK(n)
 #define MN_WIN(addr)	(((addr & 0x1fc0000) >> 1) | ((addr >> 25) & 0x3ff))
 #define OCM_WIN(addr)	(((addr & 0x1ff0000) >> 1) | ((addr >> 25) & 0x3ff))
@@ -655,27 +657,6 @@ static int qla4_8xxx_pci_is_same_window(struct scsi_qla_host *ha,
 	return 0;
 }
 
-#ifndef readq
-static inline __u64 readq(const volatile void __iomem *addr)
-{
-	const volatile u32 __iomem *p = addr;
-	u32 low, high;
-
-	low = readl(p);
-	high = readl(p + 1);
-
-	return low + ((u64)high << 32);
-}
-#endif
-
-#ifndef writeq
-static inline void writeq(__u64 val, volatile void __iomem *addr)
-{
-	writel(val, addr);
-	writel(val >> 32, addr+4);
-}
-#endif
-
 static int qla4_8xxx_pci_mem_read_direct(struct scsi_qla_host *ha,
 		u64 off, void *data, int size)
 {
diff --git a/include/asm-generic/io-64-nonatomic-hi-lo.h b/include/asm-generic/io-64-nonatomic-hi-lo.h
new file mode 100644
index 000000000000..a6806a94250d
--- /dev/null
+++ b/include/asm-generic/io-64-nonatomic-hi-lo.h
@@ -0,0 +1,28 @@
+#ifndef _ASM_IO_64_NONATOMIC_HI_LO_H_
+#define _ASM_IO_64_NONATOMIC_HI_LO_H_
+
+#include <linux/io.h>
+#include <asm-generic/int-ll64.h>
+
+#ifndef readq
+static inline __u64 readq(const volatile void __iomem *addr)
+{
+	const volatile u32 __iomem *p = addr;
+	u32 low, high;
+
+	high = readl(p + 1);
+	low = readl(p);
+
+	return low + ((u64)high << 32);
+}
+#endif
+
+#ifndef writeq
+static inline void writeq(__u64 val, volatile void __iomem *addr)
+{
+	writel(val >> 32, addr + 4);
+	writel(val, addr);
+}
+#endif
+
+#endif	/* _ASM_IO_64_NONATOMIC_HI_LO_H_ */
diff --git a/include/asm-generic/io-64-nonatomic-lo-hi.h b/include/asm-generic/io-64-nonatomic-lo-hi.h
new file mode 100644
index 000000000000..ca546b1ff8b5
--- /dev/null
+++ b/include/asm-generic/io-64-nonatomic-lo-hi.h
@@ -0,0 +1,28 @@
+#ifndef _ASM_IO_64_NONATOMIC_LO_HI_H_
+#define _ASM_IO_64_NONATOMIC_LO_HI_H_
+
+#include <linux/io.h>
+#include <asm-generic/int-ll64.h>
+
+#ifndef readq
+static inline __u64 readq(const volatile void __iomem *addr)
+{
+	const volatile u32 __iomem *p = addr;
+	u32 low, high;
+
+	low = readl(p);
+	high = readl(p + 1);
+
+	return low + ((u64)high << 32);
+}
+#endif
+
+#ifndef writeq
+static inline void writeq(__u64 val, volatile void __iomem *addr)
+{
+	writel(val, addr);
+	writel(val >> 32, addr + 4);
+}
+#endif
+
+#endif	/* _ASM_IO_64_NONATOMIC_LO_HI_H_ */
-- 
cgit v1.2.3-55-g7522


From faf309009e2e18d30c032b7d9479f29b91677c37 Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Tue, 21 Feb 2012 17:24:20 -0800
Subject: sys_poll: fix incorrect type for 'timeout' parameter

The 'poll()' system call timeout parameter is supposed to be 'int', not
'long'.

Now, the reason this matters is that right now 32-bit compat mode is
broken on at least x86-64, because the 32-bit code just calls
'sys_poll()' directly on x86-64, and the 32-bit argument will have been
zero-extended, turning a signed 'int' into a large unsigned 'long'
value.

We could just introduce a 'compat_sys_poll()' function for this, and
that may eventually be what we have to do, but since the actual standard
poll() semantics is *supposed* to be 'int', and since at least on x86-64
glibc sign-extends the argument before invocing the system call (so
nobody can actually use a 64-bit timeout value in user space _anyway_,
even in 64-bit binaries), the simpler solution would seem to be to just
fix the definition of the system call to match what it should have been
from the very start.

If it turns out that somebody somehow circumvents the user-level libc
64-bit sign extension and actually uses a large unsigned 64-bit timeout
despite that not being how poll() is supposed to work, we will need to
do the compat_sys_poll() approach.

Reported-by: Thomas Meyer <thomas@m3y3r.de>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/kernel/compat_wrapper.S | 2 +-
 fs/select.c                       | 2 +-
 include/linux/syscalls.h          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 18c51df9fe06..ff605a39cf43 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -662,7 +662,7 @@ ENTRY(sys32_getresuid16_wrapper)
 ENTRY(sys32_poll_wrapper)
 	llgtr	%r2,%r2			# struct pollfd *
 	llgfr	%r3,%r3			# unsigned int
-	lgfr	%r4,%r4			# long
+	lgfr	%r4,%r4			# int
 	jg	sys_poll		# branch to system call
 
 ENTRY(sys32_setresgid16_wrapper)
diff --git a/fs/select.c b/fs/select.c
index d33418fdc858..e782258d0de3 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -912,7 +912,7 @@ static long do_restart_poll(struct restart_block *restart_block)
 }
 
 SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
-		long, timeout_msecs)
+		int, timeout_msecs)
 {
 	struct timespec end_time, *to = NULL;
 	int ret;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 515669fa3c1d..8ec1153ff57b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -624,7 +624,7 @@ asmlinkage long sys_socketpair(int, int, int, int __user *);
 asmlinkage long sys_socketcall(int call, unsigned long __user *args);
 asmlinkage long sys_listen(int, int);
 asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
-				long timeout);
+				int timeout);
 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			fd_set __user *exp, struct timeval __user *tvp);
 asmlinkage long sys_old_select(struct sel_arg_struct __user *arg);
-- 
cgit v1.2.3-55-g7522


From 8c79a045fd590a26e81e75f5d8d4ec5c7d23e565 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra
Date: Mon, 30 Jan 2012 14:51:37 +0100
Subject: sched/events: Revert trace_sched_stat_sleeptime()

Commit 1ac9bc69 ("sched/tracing: Add a new tracepoint for sleeptime")
added a new sched:sched_stat_sleeptime tracepoint.

It's broken: the first sample we get on a task might be bad because
of a stale sleep_start value that wasn't reset at the last task switch
because the tracepoint was not active.

It also breaks the existing schedstat samples due to the side
effects of:

-               se->statistics.sleep_start = 0;
...
-               se->statistics.block_start = 0;

Nor do I see means to fix it without adding overhead to the scheduler
fast path, which I'm not willing to for the sake of redundant
instrumentation.

Most importantly, sleep time information can already be constructed
by tracing context switches and wakeups, and taking the timestamp
difference between the schedule-out, the wakeup and the schedule-in.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pc4c9qhl8q6vg3bs4j6k0rbd@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/sched.h | 50 --------------------------------------------
 kernel/sched/core.c          |  1 -
 kernel/sched/fair.c          |  2 ++
 3 files changed, 2 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 6ba596b07a72..e33ed1bfa113 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -370,56 +370,6 @@ TRACE_EVENT(sched_stat_runtime,
 			(unsigned long long)__entry->vruntime)
 );
 
-#ifdef CREATE_TRACE_POINTS
-static inline u64 trace_get_sleeptime(struct task_struct *tsk)
-{
-#ifdef CONFIG_SCHEDSTATS
-	u64 block, sleep;
-
-	block = tsk->se.statistics.block_start;
-	sleep = tsk->se.statistics.sleep_start;
-	tsk->se.statistics.block_start = 0;
-	tsk->se.statistics.sleep_start = 0;
-
-	return block ? block : sleep ? sleep : 0;
-#else
-	return 0;
-#endif
-}
-#endif
-
-/*
- * Tracepoint for accounting sleeptime (time the task is sleeping
- * or waiting for I/O).
- */
-TRACE_EVENT(sched_stat_sleeptime,
-
-	TP_PROTO(struct task_struct *tsk, u64 now),
-
-	TP_ARGS(tsk, now),
-
-	TP_STRUCT__entry(
-		__array( char,	comm,	TASK_COMM_LEN	)
-		__field( pid_t,	pid			)
-		__field( u64,	sleeptime		)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
-		__entry->pid		= tsk->pid;
-		__entry->sleeptime = trace_get_sleeptime(tsk);
-		__entry->sleeptime = __entry->sleeptime ?
-				now - __entry->sleeptime : 0;
-	)
-	TP_perf_assign(
-		__perf_count(__entry->sleeptime);
-	),
-
-	TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]",
-			__entry->comm, __entry->pid,
-			(unsigned long long)__entry->sleeptime)
-);
-
 /*
  * Tracepoint for showing priority inheritance modifying a tasks
  * priority.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5255c9d2e053..b342f57879e6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1932,7 +1932,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	local_irq_enable();
 #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
 	finish_lock_switch(rq, prev);
-	trace_sched_stat_sleeptime(current, rq->clock);
 
 	fire_sched_in_preempt_notifiers(current);
 	if (mm)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7c6414fc669d..aca16b843b7e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1003,6 +1003,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		if (unlikely(delta > se->statistics.sleep_max))
 			se->statistics.sleep_max = delta;
 
+		se->statistics.sleep_start = 0;
 		se->statistics.sum_sleep_runtime += delta;
 
 		if (tsk) {
@@ -1019,6 +1020,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		if (unlikely(delta > se->statistics.block_max))
 			se->statistics.block_max = delta;
 
+		se->statistics.block_start = 0;
 		se->statistics.sum_sleep_runtime += delta;
 
 		if (tsk) {
-- 
cgit v1.2.3-55-g7522


From 03606895cd98c0a628b17324fd7b5ff15db7e3cd Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Thu, 23 Feb 2012 10:55:02 +0000
Subject: ipsec: be careful of non existing mac headers

Niccolo Belli reported ipsec crashes in case we handle a frame without
mac header (atm in his case)

Before copying mac header, better make sure it is present.

Bugzilla reference:  https://bugzilla.kernel.org/show_bug.cgi?id=42809

Reported-by: Niccolò Belli <darkbasic@linuxsystems.it>
Tested-by: Niccolò Belli <darkbasic@linuxsystems.it>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h       | 10 ++++++++++
 net/ipv4/xfrm4_mode_beet.c   |  5 +----
 net/ipv4/xfrm4_mode_tunnel.c |  6 ++----
 net/ipv6/xfrm6_mode_beet.c   |  6 +-----
 net/ipv6/xfrm6_mode_tunnel.c |  6 ++----
 5 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 50db9b04a552..ae86adee3746 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1465,6 +1465,16 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
 }
 #endif /* NET_SKBUFF_DATA_USES_OFFSET */
 
+static inline void skb_mac_header_rebuild(struct sk_buff *skb)
+{
+	if (skb_mac_header_was_set(skb)) {
+		const unsigned char *old_mac = skb_mac_header(skb);
+
+		skb_set_mac_header(skb, -skb->mac_len);
+		memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+	}
+}
+
 static inline int skb_checksum_start_offset(const struct sk_buff *skb)
 {
 	return skb->csum_start - skb_headroom(skb);
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 63418185f524..e3db3f915114 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -110,10 +110,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_push(skb, sizeof(*iph));
 	skb_reset_network_header(skb);
-
-	memmove(skb->data - skb->mac_len, skb_mac_header(skb),
-		skb->mac_len);
-	skb_set_mac_header(skb, -skb->mac_len);
+	skb_mac_header_rebuild(skb);
 
 	xfrm4_beet_make_header(skb);
 
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 534972e114ac..ed4bf11ef9f4 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -66,7 +66,6 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	const unsigned char *old_mac;
 	int err = -EINVAL;
 
 	if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
@@ -84,10 +83,9 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!(x->props.flags & XFRM_STATE_NOECN))
 		ipip_ecn_decapsulate(skb);
 
-	old_mac = skb_mac_header(skb);
-	skb_set_mac_header(skb, -skb->mac_len);
-	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
 	skb_reset_network_header(skb);
+	skb_mac_header_rebuild(skb);
+
 	err = 0;
 
 out:
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index a81ce9450750..9949a356d62c 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -80,7 +80,6 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct ipv6hdr *ip6h;
-	const unsigned char *old_mac;
 	int size = sizeof(struct ipv6hdr);
 	int err;
 
@@ -90,10 +89,7 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	__skb_push(skb, size);
 	skb_reset_network_header(skb);
-
-	old_mac = skb_mac_header(skb);
-	skb_set_mac_header(skb, -skb->mac_len);
-	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+	skb_mac_header_rebuild(skb);
 
 	xfrm6_beet_make_header(skb);
 
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 261e6e6f487e..9f2095b19ad0 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -63,7 +63,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err = -EINVAL;
-	const unsigned char *old_mac;
 
 	if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
 		goto out;
@@ -80,10 +79,9 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!(x->props.flags & XFRM_STATE_NOECN))
 		ipip6_ecn_decapsulate(skb);
 
-	old_mac = skb_mac_header(skb);
-	skb_set_mac_header(skb, -skb->mac_len);
-	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
 	skb_reset_network_header(skb);
+	skb_mac_header_rebuild(skb);
+
 	err = 0;
 
 out:
-- 
cgit v1.2.3-55-g7522


From 7e55d0527e4925a49464a5b26fdabae1f7a91a77 Mon Sep 17 00:00:00 2001
From: viresh kumar
Date: Thu, 23 Feb 2012 04:41:05 +0100
Subject: ARM: 7339/1: amba/serial.h: Include types.h for resolving dependency
 of type bool

serial.h uses bool, but its definition is missing, as it doesn't include
types.h. Fix this by including types.h

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/serial.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 514ed45c462e..d117b29d1062 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -23,6 +23,8 @@
 #ifndef ASM_ARM_HARDWARE_SERIAL_AMBA_H
 #define ASM_ARM_HARDWARE_SERIAL_AMBA_H
 
+#include <linux/types.h>
+
 /* -------------------------------------------------------------------------------
  *  From AMBA UART (PL010) Block Specification
  * -------------------------------------------------------------------------------
-- 
cgit v1.2.3-55-g7522


From 7d367e06688dc7a2cc98c2ace04e1296e1d987e2 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik
Date: Fri, 24 Feb 2012 11:45:49 +0100
Subject: netfilter: ctnetlink: fix soft lockup when netlink adds new entries
 (v2)

Marcell Zambo and Janos Farago noticed and reported that when
new conntrack entries are added via netlink and the conntrack table
gets full, soft lockup happens. This is because the nf_conntrack_lock
is held while nf_conntrack_alloc is called, which is in turn wants
to lock nf_conntrack_lock while evicting entries from the full table.

The patch fixes the soft lockup with limiting the holding of the
nf_conntrack_lock to the minimum, where it's absolutely required.
It required to extend (and thus change) nf_conntrack_hash_insert
so that it makes sure conntrack and ctnetlink do not add the same entry
twice to the conntrack table.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h |  2 +-
 net/netfilter/nf_conntrack_core.c    | 38 +++++++++++++++++++++++++----
 net/netfilter/nf_conntrack_netlink.c | 46 +++++++++++++-----------------------
 3 files changed, 51 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 8a2b0ae7dbd2..ab86036bbf0c 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -209,7 +209,7 @@ extern struct nf_conntrack_tuple_hash *
 __nf_conntrack_find(struct net *net, u16 zone,
 		    const struct nf_conntrack_tuple *tuple);
 
-extern void nf_conntrack_hash_insert(struct nf_conn *ct);
+extern int nf_conntrack_hash_check_insert(struct nf_conn *ct);
 extern void nf_ct_delete_from_lists(struct nf_conn *ct);
 extern void nf_ct_insert_dying_list(struct nf_conn *ct);
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 76613f5a55c0..ed86a3be678e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -404,19 +404,49 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
 			   &net->ct.hash[repl_hash]);
 }
 
-void nf_conntrack_hash_insert(struct nf_conn *ct)
+int
+nf_conntrack_hash_check_insert(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
 	unsigned int hash, repl_hash;
+	struct nf_conntrack_tuple_hash *h;
+	struct hlist_nulls_node *n;
 	u16 zone;
 
 	zone = nf_ct_zone(ct);
-	hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	hash = hash_conntrack(net, zone,
+			      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, zone,
+				   &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+	spin_lock_bh(&nf_conntrack_lock);
 
+	/* See if there's one in the list already, including reverse */
+	hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
+		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				      &h->tuple) &&
+		    zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+			goto out;
+	hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
+		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+				      &h->tuple) &&
+		    zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+			goto out;
+
+	add_timer(&ct->timeout);
+	nf_conntrack_get(&ct->ct_general);
 	__nf_conntrack_hash_insert(ct, hash, repl_hash);
+	NF_CT_STAT_INC(net, insert);
+	spin_unlock_bh(&nf_conntrack_lock);
+
+	return 0;
+
+out:
+	NF_CT_STAT_INC(net, insert_failed);
+	spin_unlock_bh(&nf_conntrack_lock);
+	return -EEXIST;
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert);
+EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
 
 /* Confirm a connection given skb; places it in hash table */
 int
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 9307b033c0c9..30c9d4ca0218 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1367,15 +1367,12 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 						    nf_ct_protonum(ct));
 		if (helper == NULL) {
 			rcu_read_unlock();
-			spin_unlock_bh(&nf_conntrack_lock);
 #ifdef CONFIG_MODULES
 			if (request_module("nfct-helper-%s", helpname) < 0) {
-				spin_lock_bh(&nf_conntrack_lock);
 				err = -EOPNOTSUPP;
 				goto err1;
 			}
 
-			spin_lock_bh(&nf_conntrack_lock);
 			rcu_read_lock();
 			helper = __nf_conntrack_helper_find(helpname,
 							    nf_ct_l3num(ct),
@@ -1468,8 +1465,10 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 	if (tstamp)
 		tstamp->start = ktime_to_ns(ktime_get_real());
 
-	add_timer(&ct->timeout);
-	nf_conntrack_hash_insert(ct);
+	err = nf_conntrack_hash_check_insert(ct);
+	if (err < 0)
+		goto err2;
+
 	rcu_read_unlock();
 
 	return ct;
@@ -1490,6 +1489,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	struct nf_conntrack_tuple otuple, rtuple;
 	struct nf_conntrack_tuple_hash *h = NULL;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	struct nf_conn *ct;
 	u_int8_t u3 = nfmsg->nfgen_family;
 	u16 zone;
 	int err;
@@ -1510,27 +1510,22 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			return err;
 	}
 
-	spin_lock_bh(&nf_conntrack_lock);
 	if (cda[CTA_TUPLE_ORIG])
-		h = __nf_conntrack_find(net, zone, &otuple);
+		h = nf_conntrack_find_get(net, zone, &otuple);
 	else if (cda[CTA_TUPLE_REPLY])
-		h = __nf_conntrack_find(net, zone, &rtuple);
+		h = nf_conntrack_find_get(net, zone, &rtuple);
 
 	if (h == NULL) {
 		err = -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_CREATE) {
-			struct nf_conn *ct;
 			enum ip_conntrack_events events;
 
 			ct = ctnetlink_create_conntrack(net, zone, cda, &otuple,
 							&rtuple, u3);
-			if (IS_ERR(ct)) {
-				err = PTR_ERR(ct);
-				goto out_unlock;
-			}
+			if (IS_ERR(ct))
+				return PTR_ERR(ct);
+
 			err = 0;
-			nf_conntrack_get(&ct->ct_general);
-			spin_unlock_bh(&nf_conntrack_lock);
 			if (test_bit(IPS_EXPECTED_BIT, &ct->status))
 				events = IPCT_RELATED;
 			else
@@ -1545,23 +1540,19 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 						      ct, NETLINK_CB(skb).pid,
 						      nlmsg_report(nlh));
 			nf_ct_put(ct);
-		} else
-			spin_unlock_bh(&nf_conntrack_lock);
+		}
 
 		return err;
 	}
 	/* implicit 'else' */
 
-	/* We manipulate the conntrack inside the global conntrack table lock,
-	 * so there's no need to increase the refcount */
 	err = -EEXIST;
+	ct = nf_ct_tuplehash_to_ctrack(h);
 	if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
-		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-
+		spin_lock_bh(&nf_conntrack_lock);
 		err = ctnetlink_change_conntrack(ct, cda);
+		spin_unlock_bh(&nf_conntrack_lock);
 		if (err == 0) {
-			nf_conntrack_get(&ct->ct_general);
-			spin_unlock_bh(&nf_conntrack_lock);
 			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
 						      (1 << IPCT_ASSURED) |
 						      (1 << IPCT_HELPER) |
@@ -1570,15 +1561,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 						      (1 << IPCT_MARK),
 						      ct, NETLINK_CB(skb).pid,
 						      nlmsg_report(nlh));
-			nf_ct_put(ct);
-		} else
-			spin_unlock_bh(&nf_conntrack_lock);
-
-		return err;
+		}
 	}
 
-out_unlock:
-	spin_unlock_bh(&nf_conntrack_lock);
+	nf_ct_put(ct);
 	return err;
 }
 
-- 
cgit v1.2.3-55-g7522


From d80e731ecab420ddcb79ee9d0ac427acbc187b4b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Fri, 24 Feb 2012 20:07:11 +0100
Subject: epoll: introduce POLLFREE to flush ->signalfd_wqh before kfree()

This patch is intentionally incomplete to simplify the review.
It ignores ep_unregister_pollwait() which plays with the same wqh.
See the next change.

epoll assumes that the EPOLL_CTL_ADD'ed file controls everything
f_op->poll() needs. In particular it assumes that the wait queue
can't go away until eventpoll_release(). This is not true in case
of signalfd, the task which does EPOLL_CTL_ADD uses its ->sighand
which is not connected to the file.

This patch adds the special event, POLLFREE, currently only for
epoll. It expects that init_poll_funcptr()'ed hook should do the
necessary cleanup. Perhaps it should be defined as EPOLLFREE in
eventpoll.

__cleanup_sighand() is changed to do wake_up_poll(POLLFREE) if
->signalfd_wqh is not empty, we add the new signalfd_cleanup()
helper.

ep_poll_callback(POLLFREE) simply does list_del_init(task_list).
This make this poll entry inconsistent, but we don't care. If you
share epoll fd which contains our sigfd with another process you
should blame yourself. signalfd is "really special". I simply do
not know how we can define the "right" semantics if it used with
epoll.

The main problem is, epoll calls signalfd_poll() once to establish
the connection with the wait queue, after that signalfd_poll(NULL)
returns the different/inconsistent results depending on who does
EPOLL_CTL_MOD/signalfd_read/etc. IOW: apart from sigmask, signalfd
has nothing to do with the file, it works with the current thread.

In short: this patch is the hack which tries to fix the symptoms.
It also assumes that nobody can take tasklist_lock under epoll
locks, this seems to be true.

Note:

	- we do not have wake_up_all_poll() but wake_up_poll()
	  is fine, poll/epoll doesn't use WQ_FLAG_EXCLUSIVE.

	- signalfd_cleanup() uses POLLHUP along with POLLFREE,
	  we need a couple of simple changes in eventpoll.c to
	  make sure it can't be "lost".

Reported-by: Maxime Bizon <mbizon@freebox.fr>
Cc: <stable@kernel.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c             |  4 ++++
 fs/signalfd.c              | 11 +++++++++++
 include/asm-generic/poll.h |  2 ++
 include/linux/signalfd.h   |  5 ++++-
 kernel/fork.c              |  5 ++++-
 5 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index aabdfc38cf24..34bbfc6dd8dc 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -842,6 +842,10 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 	struct epitem *epi = ep_item_from_wait(wait);
 	struct eventpoll *ep = epi->ep;
 
+	/* the caller holds eppoll_entry->whead->lock */
+	if ((unsigned long)key & POLLFREE)
+		list_del_init(&wait->task_list);
+
 	spin_lock_irqsave(&ep->lock, flags);
 
 	/*
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 492465b451dd..79c1eea98a3a 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -30,6 +30,17 @@
 #include <linux/signalfd.h>
 #include <linux/syscalls.h>
 
+void signalfd_cleanup(struct sighand_struct *sighand)
+{
+	wait_queue_head_t *wqh = &sighand->signalfd_wqh;
+
+	if (likely(!waitqueue_active(wqh)))
+		return;
+
+	/* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
+	wake_up_poll(wqh, POLLHUP | POLLFREE);
+}
+
 struct signalfd_ctx {
 	sigset_t sigmask;
 };
diff --git a/include/asm-generic/poll.h b/include/asm-generic/poll.h
index 44bce836d350..9ce7f44aebd2 100644
--- a/include/asm-generic/poll.h
+++ b/include/asm-generic/poll.h
@@ -28,6 +28,8 @@
 #define POLLRDHUP       0x2000
 #endif
 
+#define POLLFREE	0x4000	/* currently only for epoll */
+
 struct pollfd {
 	int fd;
 	short events;
diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h
index 3ff4961da9b5..247399b2979a 100644
--- a/include/linux/signalfd.h
+++ b/include/linux/signalfd.h
@@ -61,13 +61,16 @@ static inline void signalfd_notify(struct task_struct *tsk, int sig)
 		wake_up(&tsk->sighand->signalfd_wqh);
 }
 
+extern void signalfd_cleanup(struct sighand_struct *sighand);
+
 #else /* CONFIG_SIGNALFD */
 
 static inline void signalfd_notify(struct task_struct *tsk, int sig) { }
 
+static inline void signalfd_cleanup(struct sighand_struct *sighand) { }
+
 #endif /* CONFIG_SIGNALFD */
 
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SIGNALFD_H */
-
diff --git a/kernel/fork.c b/kernel/fork.c
index b77fd559c78e..e2cd3e2a5ae8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -66,6 +66,7 @@
 #include <linux/user-return-notifier.h>
 #include <linux/oom.h>
 #include <linux/khugepaged.h>
+#include <linux/signalfd.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -935,8 +936,10 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 
 void __cleanup_sighand(struct sighand_struct *sighand)
 {
-	if (atomic_dec_and_test(&sighand->count))
+	if (atomic_dec_and_test(&sighand->count)) {
+		signalfd_cleanup(sighand);
 		kmem_cache_free(sighand_cachep, sighand);
+	}
 }
 
 
-- 
cgit v1.2.3-55-g7522


From 3c761ea05a8900a907f32b628611873f6bef24b2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Sun, 26 Feb 2012 09:44:55 -0800
Subject: Fix autofs compile without CONFIG_COMPAT

The autofs compat handling fix caused a compile failure when
CONFIG_COMPAT isn't defined.

Instead of adding random #ifdef'fery in autofs, let's just make the
compat helpers earlier to use: without CONFIG_COMPAT, is_compat_task()
just hardcodes to zero.

We could probably do something similar for a number of other cases where
we have #ifdef's in code, but this is the low-hanging fruit.

Reported-and-tested-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compat.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 41c9f6515f46..7e05fcee75a1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -561,5 +561,9 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
 		unsigned long liovcnt, const struct compat_iovec __user *rvec,
 		unsigned long riovcnt, unsigned long flags);
 
+#else
+
+#define is_compat_task() (0)
+
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
-- 
cgit v1.2.3-55-g7522


From 97a29d59fc222b36bac3ee3a8ae994f65bf7ffdf Mon Sep 17 00:00:00 2001
From: James Bottomley
Date: Mon, 30 Jan 2012 10:40:47 -0600
Subject: [PARISC] fix compile break caused by iomap: make IOPORT/PCI mapping
 functions conditional

The problem in

commit fea80311a939a746533a6d7e7c3183729d6a3faf
Author: Randy Dunlap <rdunlap@xenotime.net>
Date:   Sun Jul 24 11:39:14 2011 -0700

    iomap: make IOPORT/PCI mapping functions conditional

is that if your architecture supplies pci_iomap/pci_iounmap, it expects
always to supply them.  Adding empty body defitions in the !CONFIG_PCI
case, which is what this patch does, breaks the parisc compile because
the functions become doubly defined.  It took us a while to spot this,
because we don't actually build !CONFIG_PCI very often (only if someone
is brave enough to test the snake/asp machines).

Since the note in the commit log says this is to fix a
CONFIG_GENERIC_IOMAP issue (which it does because CONFIG_GENERIC_IOMAP
supplies pci_iounmap only if CONFIG_PCI is set), there should actually
have been a condition upon this.  This should make sure no other
architecture's !CONFIG_PCI compile breaks in the same way as parisc.

The fix had to be updated to take account of the GENERIC_PCI_IOMAP
separation.

Reported-by: Rolf Eike Beer <eike@sf-mail.de>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
---
 include/asm-generic/iomap.h     | 2 +-
 include/asm-generic/pci_iomap.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h
index 8a3d4fde2604..6afd7d6a9899 100644
--- a/include/asm-generic/iomap.h
+++ b/include/asm-generic/iomap.h
@@ -70,7 +70,7 @@ extern void ioport_unmap(void __iomem *);
 /* Destroy a virtual mapping cookie for a PCI BAR (memory or IO) */
 struct pci_dev;
 extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
-#else
+#elif defined(CONFIG_GENERIC_IOMAP)
 struct pci_dev;
 static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
 { }
diff --git a/include/asm-generic/pci_iomap.h b/include/asm-generic/pci_iomap.h
index e58fcf891370..ce37349860fe 100644
--- a/include/asm-generic/pci_iomap.h
+++ b/include/asm-generic/pci_iomap.h
@@ -25,7 +25,7 @@ extern void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port,
 #define __pci_ioport_map(dev, port, nr) ioport_map((port), (nr))
 #endif
 
-#else
+#elif defined(CONFIG_GENERIC_PCI_IOMAP)
 static inline void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
 {
 	return NULL;
-- 
cgit v1.2.3-55-g7522


From ecb971923614775a118bc05ad16b2bde450cac7d Mon Sep 17 00:00:00 2001
From: Neal Cardwell
Date: Mon, 27 Feb 2012 17:52:52 -0500
Subject: tcp: fix comment for tp->highest_sack

There was an off-by-one error in the comments describing the
highest_sack field in struct tcp_sock. The comments previously claimed
that it was the "start sequence of the highest skb with SACKed
bit". This commit fixes the comments to note that it is the "start
sequence of the skb just *after* the highest skb with SACKed bit".

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 3 ++-
 include/net/tcp.h   | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 46a85c9e1f25..3c7ffdb40dc6 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -412,7 +412,8 @@ struct tcp_sock {
 
 	struct tcp_sack_block recv_sack_cache[4];
 
-	struct sk_buff *highest_sack;   /* highest skb with SACK received
+	struct sk_buff *highest_sack;   /* skb just after the highest
+					 * skb with SACKed bit set
 					 * (validity guaranteed only if
 					 * sacked_out > 0)
 					 */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 42c29bfbcee3..2d80c291fffb 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1364,8 +1364,9 @@ static inline void tcp_push_pending_frames(struct sock *sk)
 	}
 }
 
-/* Start sequence of the highest skb with SACKed bit, valid only if
- * sacked > 0 or when the caller has ensured validity by itself.
+/* Start sequence of the skb just after the highest skb with SACKed
+ * bit, valid only if sacked_out > 0 or when the caller has ensured
+ * validity by itself.
  */
 static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp)
 {
-- 
cgit v1.2.3-55-g7522


From c8e252586f8d5de906385d8cf6385fee289a825e Mon Sep 17 00:00:00 2001
From: H. Peter Anvin
Date: Fri, 2 Mar 2012 10:43:48 -0800
Subject: regset: Prevent null pointer reference on readonly regsets

The regset common infrastructure assumed that regsets would always
have .get and .set methods, but not necessarily .active methods.
Unfortunately people have since written regsets without .set methods.

Rather than putting in stub functions everywhere, handle regsets with
null .get or .set methods explicitly.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@hack.frob.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c        | 2 +-
 include/linux/regset.h | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index bcb884e2d613..07d096c49920 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1421,7 +1421,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 	for (i = 1; i < view->n; ++i) {
 		const struct user_regset *regset = &view->regsets[i];
 		do_thread_regset_writeback(t->task, regset);
-		if (regset->core_note_type &&
+		if (regset->core_note_type && regset->get &&
 		    (!regset->active || regset->active(t->task, regset))) {
 			int ret;
 			size_t size = regset->n * regset->size;
diff --git a/include/linux/regset.h b/include/linux/regset.h
index 8abee6556223..5150fd16ef93 100644
--- a/include/linux/regset.h
+++ b/include/linux/regset.h
@@ -335,6 +335,9 @@ static inline int copy_regset_to_user(struct task_struct *target,
 {
 	const struct user_regset *regset = &view->regsets[setno];
 
+	if (!regset->get)
+		return -EOPNOTSUPP;
+
 	if (!access_ok(VERIFY_WRITE, data, size))
 		return -EIO;
 
@@ -358,6 +361,9 @@ static inline int copy_regset_from_user(struct task_struct *target,
 {
 	const struct user_regset *regset = &view->regsets[setno];
 
+	if (!regset->set)
+		return -EOPNOTSUPP;
+
 	if (!access_ok(VERIFY_READ, data, size))
 		return -EIO;
 
-- 
cgit v1.2.3-55-g7522


From 5189fa19a4b2b4c3bec37c3a019d446148827717 Mon Sep 17 00:00:00 2001
From: H. Peter Anvin
Date: Fri, 2 Mar 2012 10:43:49 -0800
Subject: regset: Return -EFAULT, not -EIO, on host-side memory fault

There is only one error code to return for a bad user-space buffer
pointer passed to a system call in the same address space as the
system call is executed, and that is EFAULT.  Furthermore, the
low-level access routines, which catch most of the faults, return
EFAULT already.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@hack.frob.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/regset.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/regset.h b/include/linux/regset.h
index 5150fd16ef93..686f37327a49 100644
--- a/include/linux/regset.h
+++ b/include/linux/regset.h
@@ -339,7 +339,7 @@ static inline int copy_regset_to_user(struct task_struct *target,
 		return -EOPNOTSUPP;
 
 	if (!access_ok(VERIFY_WRITE, data, size))
-		return -EIO;
+		return -EFAULT;
 
 	return regset->get(target, regset, offset, size, NULL, data);
 }
@@ -365,7 +365,7 @@ static inline int copy_regset_from_user(struct task_struct *target,
 		return -EOPNOTSUPP;
 
 	if (!access_ok(VERIFY_READ, data, size))
-		return -EIO;
+		return -EFAULT;
 
 	return regset->set(target, regset, offset, size, NULL, data);
 }
-- 
cgit v1.2.3-55-g7522


From 8966be90304b394fd6a2c5af7b6b3abe2df3889c Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Fri, 2 Mar 2012 14:23:30 -0800
Subject: vfs: trivial __d_lookup_rcu() cleanups

These don't change any semantics, but they clean up the code a bit and
mark some arguments appropriately 'const'.

They came up as I was doing the word-at-a-time dcache name accessor
code, and cleaning this up now allows me to send out a smaller relevant
interesting patch for the experimental stuff.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c            | 13 ++++++++-----
 include/linux/dcache.h |  3 ++-
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/dcache.c b/fs/dcache.c
index fe19ac13f75f..138be96e25b6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -104,7 +104,7 @@ static unsigned int d_hash_shift __read_mostly;
 
 static struct hlist_bl_head *dentry_hashtable __read_mostly;
 
-static inline struct hlist_bl_head *d_hash(struct dentry *parent,
+static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
 					unsigned long hash)
 {
 	hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
@@ -1717,8 +1717,9 @@ EXPORT_SYMBOL(d_add_ci);
  * child is looked up. Thus, an interlocking stepping of sequence lock checks
  * is formed, giving integrity down the path walk.
  */
-struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
-				unsigned *seq, struct inode **inode)
+struct dentry *__d_lookup_rcu(const struct dentry *parent,
+				const struct qstr *name,
+				unsigned *seqp, struct inode **inode)
 {
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
@@ -1748,6 +1749,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
 	 * See Documentation/filesystems/path-lookup.txt for more details.
 	 */
 	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
+		unsigned seq;
 		struct inode *i;
 		const char *tname;
 		int tlen;
@@ -1756,7 +1758,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
 			continue;
 
 seqretry:
-		*seq = read_seqcount_begin(&dentry->d_seq);
+		seq = read_seqcount_begin(&dentry->d_seq);
 		if (dentry->d_parent != parent)
 			continue;
 		if (d_unhashed(dentry))
@@ -1771,7 +1773,7 @@ seqretry:
 		 * edge of memory when walking. If we could load this
 		 * atomically some other way, we could drop this check.
 		 */
-		if (read_seqcount_retry(&dentry->d_seq, *seq))
+		if (read_seqcount_retry(&dentry->d_seq, seq))
 			goto seqretry;
 		if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
 			if (parent->d_op->d_compare(parent, *inode,
@@ -1788,6 +1790,7 @@ seqretry:
 		 * order to do anything useful with the returned dentry
 		 * anyway.
 		 */
+		*seqp = seq;
 		*inode = i;
 		return dentry;
 	}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d64a55b23afd..61b24261e07a 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -309,7 +309,8 @@ extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
 extern struct dentry *d_lookup(struct dentry *, struct qstr *);
 extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
 extern struct dentry *__d_lookup(struct dentry *, struct qstr *);
-extern struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
+extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
+				const struct qstr *name,
 				unsigned *seq, struct inode **inode);
 
 /**
-- 
cgit v1.2.3-55-g7522


From 0145acc202ca613b23b5383e55df3c32a92ad1bf Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Fri, 2 Mar 2012 14:32:59 -0800
Subject: vfs: uninline full_name_hash()

.. and also use it in lookup_one_len() rather than open-coding it.

There aren't any performance-critical users, so inlining it is silly.
But it wouldn't matter if it wasn't for the fact that the word-at-a-time
dentry name patches want to conditionally replace the function, and
uninlining it sets the stage for that.

So again, this is a preparatory patch that doesn't change any semantics,
and only prepares for a much cleaner and testable word-at-a-time dentry
name accessor patch.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namei.c             | 13 +++++++++----
 include/linux/dcache.h |  9 +--------
 2 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index a780ea515c47..ec72fa1acb14 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1374,6 +1374,14 @@ static inline int can_lookup(struct inode *inode)
 	return 1;
 }
 
+unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+{
+	unsigned long hash = init_name_hash();
+	while (len--)
+		hash = partial_name_hash(*name++, hash);
+	return end_name_hash(hash);
+}
+
 /*
  * Name resolution.
  * This is the basic name resolution function, turning a pathname into
@@ -1775,24 +1783,21 @@ static struct dentry *lookup_hash(struct nameidata *nd)
 struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
 {
 	struct qstr this;
-	unsigned long hash;
 	unsigned int c;
 
 	WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
 
 	this.name = name;
 	this.len = len;
+	this.hash = full_name_hash(name, len);
 	if (!len)
 		return ERR_PTR(-EACCES);
 
-	hash = init_name_hash();
 	while (len--) {
 		c = *(const unsigned char *)name++;
 		if (c == '/' || c == '\0')
 			return ERR_PTR(-EACCES);
-		hash = partial_name_hash(c, hash);
 	}
-	this.hash = end_name_hash(hash);
 	/*
 	 * See if the low-level filesystem might want
 	 * to use its own hash..
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 61b24261e07a..f1c7eb8461be 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -89,14 +89,7 @@ static inline unsigned long end_name_hash(unsigned long hash)
 }
 
 /* Compute the hash for a name string. */
-static inline unsigned int
-full_name_hash(const unsigned char *name, unsigned int len)
-{
-	unsigned long hash = init_name_hash();
-	while (len--)
-		hash = partial_name_hash(*name++, hash);
-	return end_name_hash(hash);
-}
+extern unsigned int full_name_hash(const unsigned char *, unsigned int);
 
 /*
  * Try to keep struct dentry aligned on 64 byte cachelines (this will
-- 
cgit v1.2.3-55-g7522


From 5707c87f20bca9e76969bb4096149de6ef74cbb9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Fri, 2 Mar 2012 14:47:15 -0800
Subject: vfs: clarify and clean up dentry_cmp()

It did some odd things for unclear reasons.  As this is one of the
functions that gets changed when doing word-at-a-time compares, this is
yet another of the "don't change any semantics, but clean things up so
that subsequent patches don't get obscured by the cleanups".

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dcache.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index f1c7eb8461be..4270bedd2308 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -54,18 +54,17 @@ extern struct dentry_stat_t dentry_stat;
 static inline int dentry_cmp(const unsigned char *cs, size_t scount,
 				const unsigned char *ct, size_t tcount)
 {
-	int ret;
 	if (scount != tcount)
 		return 1;
+
 	do {
-		ret = (*cs != *ct);
-		if (ret)
-			break;
+		if (*cs != *ct)
+			return 1;
 		cs++;
 		ct++;
 		tcount--;
 	} while (tcount);
-	return ret;
+	return 0;
 }
 
 /* Name hashing routines. Initial hash value */
-- 
cgit v1.2.3-55-g7522


From adb795062f89b8d67d295ee25e04034bccce6779 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov
Date: Wed, 29 Feb 2012 00:41:12 +0400
Subject: percpu: fix __this_cpu_{sub,inc,dec}_return() definition

This patch adds missed "__" prefixes, otherwise these functions
works as irq/preemption safe.

Reported-by: Torsten Kaiser <just.for.lkml@googlemail.com>
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 594c0040fdd8..21638ae14e07 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -722,9 +722,9 @@ do {									\
 	__pcpu_size_call_return2(__this_cpu_add_return_, pcp, val)
 #endif
 
-#define __this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(val))
-#define __this_cpu_inc_return(pcp)	this_cpu_add_return(pcp, 1)
-#define __this_cpu_dec_return(pcp)	this_cpu_add_return(pcp, -1)
+#define __this_cpu_sub_return(pcp, val)	__this_cpu_add_return(pcp, -(val))
+#define __this_cpu_inc_return(pcp)	__this_cpu_add_return(pcp, 1)
+#define __this_cpu_dec_return(pcp)	__this_cpu_add_return(pcp, -1)
 
 #define __this_cpu_generic_xchg(pcp, nval)				\
 ({	typeof(pcp) ret__;						\
-- 
cgit v1.2.3-55-g7522


From 5483f18e986ed5267b923bec12b407845181350b Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Sun, 4 Mar 2012 15:51:42 -0800
Subject: vfs: move dentry_cmp from <linux/dcache.h> to fs/dcache.c

It's only used inside fs/dcache.c, and we're going to play games with it
for the word-at-a-time patches.  This time we really don't even want to
export it, because it really is an internal function to fs/dcache.c, and
has been since it was introduced.

Having it in that extremely hot header file (it's included in pretty
much everything, thanks to <linux/fs.h>) is a disaster for testing
different versions, and is utterly pointless.

We really should have some kind of header file diet thing, where we
figure out which parts of header files are really better off private and
only result in more expensive compiles.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c            | 20 ++++++++++++++++++++
 include/linux/dcache.h | 20 --------------------
 2 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/dcache.c b/fs/dcache.c
index 138be96e25b6..bcbdb33fcc20 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -137,6 +137,26 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 }
 #endif
 
+/*
+ * Compare 2 name strings, return 0 if they match, otherwise non-zero.
+ * The strings are both count bytes long, and count is non-zero.
+ */
+static inline int dentry_cmp(const unsigned char *cs, size_t scount,
+				const unsigned char *ct, size_t tcount)
+{
+	if (scount != tcount)
+		return 1;
+
+	do {
+		if (*cs != *ct)
+			return 1;
+		cs++;
+		ct++;
+		tcount--;
+	} while (tcount);
+	return 0;
+}
+
 static void __d_free(struct rcu_head *head)
 {
 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 4270bedd2308..ff5f5256d175 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -47,26 +47,6 @@ struct dentry_stat_t {
 };
 extern struct dentry_stat_t dentry_stat;
 
-/*
- * Compare 2 name strings, return 0 if they match, otherwise non-zero.
- * The strings are both count bytes long, and count is non-zero.
- */
-static inline int dentry_cmp(const unsigned char *cs, size_t scount,
-				const unsigned char *ct, size_t tcount)
-{
-	if (scount != tcount)
-		return 1;
-
-	do {
-		if (*cs != *ct)
-			return 1;
-		cs++;
-		ct++;
-		tcount--;
-	} while (tcount);
-	return 0;
-}
-
 /* Name hashing routines. Initial hash value */
 /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
 #define init_name_hash()		0
-- 
cgit v1.2.3-55-g7522


From c22ab332902333f83766017478c1ef6607ace681 Mon Sep 17 00:00:00 2001
From: Matthew Garrett
Date: Mon, 5 Mar 2012 14:59:10 -0800
Subject: kmsg_dump: don't run on non-error paths by default

Since commit 04c6862c055f ("kmsg_dump: add kmsg_dump() calls to the
reboot, halt, poweroff and emergency_restart paths"), kmsg_dump() gets
run on normal paths including poweroff and reboot.

This is less than ideal given pstore implementations that can only
represent single backtraces, since a reboot may overwrite a stored oops
before it's been picked up by userspace.  In addition, some pstore
backends may have low performance and provide a significant delay in
reboot as a result.

This patch adds a printk.always_kmsg_dump kernel parameter (which can also
be changed from userspace).  Without it, the code will only be run on
failure paths rather than on normal paths.  The option can be enabled in
environments where there's a desire to attempt to audit whether or not a
reboot was cleanly requested or not.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Marco Stornelli <marco.stornelli@gmail.com>
Cc: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt | 6 ++++++
 include/linux/kmsg_dump.h           | 9 +++++++--
 kernel/printk.c                     | 6 ++++++
 3 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 033d4e69b43b..d99fd9c0ec0e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2211,6 +2211,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 			default: off.
 
+	printk.always_kmsg_dump=
+			Trigger kmsg_dump for cases other than kernel oops or
+			panics
+			Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
+			default: disabled
+
 	printk.time=	Show timing data prefixed to each printk message line
 			Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
 
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index fee66317e071..35f7237ec972 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -15,13 +15,18 @@
 #include <linux/errno.h>
 #include <linux/list.h>
 
+/*
+ * Keep this list arranged in rough order of priority. Anything listed after
+ * KMSG_DUMP_OOPS will not be logged by default unless printk.always_kmsg_dump
+ * is passed to the kernel.
+ */
 enum kmsg_dump_reason {
-	KMSG_DUMP_OOPS,
 	KMSG_DUMP_PANIC,
+	KMSG_DUMP_OOPS,
+	KMSG_DUMP_EMERG,
 	KMSG_DUMP_RESTART,
 	KMSG_DUMP_HALT,
 	KMSG_DUMP_POWEROFF,
-	KMSG_DUMP_EMERG,
 };
 
 /**
diff --git a/kernel/printk.c b/kernel/printk.c
index 13c0a1143f49..32690a0b7a18 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -702,6 +702,9 @@ static bool printk_time = 0;
 #endif
 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
 
+static bool always_kmsg_dump;
+module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
+
 /* Check if we have any console registered that can be called early in boot. */
 static int have_callable_console(void)
 {
@@ -1732,6 +1735,9 @@ void kmsg_dump(enum kmsg_dump_reason reason)
 	unsigned long l1, l2;
 	unsigned long flags;
 
+	if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
+		return;
+
 	/* Theoretically, the log could move on after we do this, but
 	   there's not a lot we can do about that. The new messages
 	   will overwrite the start of what we dump. */
-- 
cgit v1.2.3-55-g7522


From c415c3b47ea2754659d915cca387a20999044163 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 5 Mar 2012 14:59:13 -0800
Subject: vfork: introduce complete_vfork_done()

No functional changes.

Move the clear-and-complete-vfork_done code into the new trivial helper,
complete_vfork_done().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c             |  8 ++------
 include/linux/sched.h |  1 +
 kernel/fork.c         | 17 ++++++++++-------
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 92ce83a11e90..dccdcec913e9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1915,7 +1915,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
 {
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
-	struct completion *vfork_done;
 	int core_waiters = -EBUSY;
 
 	init_completion(&core_state->startup);
@@ -1934,11 +1933,8 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
 	 * Make sure nobody is waiting for us to release the VM,
 	 * otherwise we can deadlock when we wait on each other
 	 */
-	vfork_done = tsk->vfork_done;
-	if (vfork_done) {
-		tsk->vfork_done = NULL;
-		complete(vfork_done);
-	}
+	if (tsk->vfork_done)
+		complete_vfork_done(tsk);
 
 	if (core_waiters)
 		wait_for_completion(&core_state->startup);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6bfd88..1b25a37f2aee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2291,6 +2291,7 @@ extern int do_execve(const char *,
 		     const char __user * const __user *,
 		     const char __user * const __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
+extern void complete_vfork_done(struct task_struct *tsk);
 struct task_struct *fork_idle(int);
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
diff --git a/kernel/fork.c b/kernel/fork.c
index e2cd3e2a5ae8..cf3d96379608 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -668,6 +668,14 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
 	return mm;
 }
 
+void complete_vfork_done(struct task_struct *tsk)
+{
+	struct completion *vfork_done = tsk->vfork_done;
+
+	tsk->vfork_done = NULL;
+	complete(vfork_done);
+}
+
 /* Please note the differences between mmput and mm_release.
  * mmput is called whenever we stop holding onto a mm_struct,
  * error success whatever.
@@ -683,8 +691,6 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
  */
 void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 {
-	struct completion *vfork_done = tsk->vfork_done;
-
 	/* Get rid of any futexes when releasing the mm */
 #ifdef CONFIG_FUTEX
 	if (unlikely(tsk->robust_list)) {
@@ -704,11 +710,8 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 	/* Get rid of any cached register state */
 	deactivate_mm(tsk, mm);
 
-	/* notify parent sleeping on vfork() */
-	if (vfork_done) {
-		tsk->vfork_done = NULL;
-		complete(vfork_done);
-	}
+	if (tsk->vfork_done)
+		complete_vfork_done(tsk);
 
 	/*
 	 * If we're exiting normally, clear a user-space tid field if
-- 
cgit v1.2.3-55-g7522


From d68b46fe16ad59b3a5f51ec73daaa5dc06753798 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 5 Mar 2012 14:59:13 -0800
Subject: vfork: make it killable

Make vfork() killable.

Change do_fork(CLONE_VFORK) to do wait_for_completion_killable().  If it
fails we do not return to the user-mode and never touch the memory shared
with our child.

However, in this case we should clear child->vfork_done before return, we
use task_lock() in do_fork()->wait_for_vfork_done() and
complete_vfork_done() to serialize with each other.

Note: now that we use task_lock() we don't really need completion, we
could turn task->vfork_done into "task_struct *wake_up_me" but this needs
some complications.

NOTE: this and the next patches do not affect in-kernel users of
CLONE_VFORK, kernel threads run with all signals ignored including
SIGKILL/SIGSTOP.

However this is obviously the user-visible change.  Not only a fatal
signal can kill the vforking parent, a sub-thread can do execve or
exit_group() and kill the thread sleeping in vfork().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  2 +-
 kernel/fork.c         | 40 ++++++++++++++++++++++++++++++++--------
 2 files changed, 33 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1b25a37f2aee..b6467711f12e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2372,7 +2372,7 @@ static inline int thread_group_empty(struct task_struct *p)
  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
  * pins the final release of task.io_context.  Also protects ->cpuset and
- * ->cgroup.subsys[].
+ * ->cgroup.subsys[]. And ->vfork_done.
  *
  * Nests both inside and outside of read_lock(&tasklist_lock).
  * It must not be nested with write_lock_irq(&tasklist_lock),
diff --git a/kernel/fork.c b/kernel/fork.c
index cf3d96379608..892c534ce6e3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -670,10 +670,34 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
 
 void complete_vfork_done(struct task_struct *tsk)
 {
-	struct completion *vfork_done = tsk->vfork_done;
+	struct completion *vfork;
 
-	tsk->vfork_done = NULL;
-	complete(vfork_done);
+	task_lock(tsk);
+	vfork = tsk->vfork_done;
+	if (likely(vfork)) {
+		tsk->vfork_done = NULL;
+		complete(vfork);
+	}
+	task_unlock(tsk);
+}
+
+static int wait_for_vfork_done(struct task_struct *child,
+				struct completion *vfork)
+{
+	int killed;
+
+	freezer_do_not_count();
+	killed = wait_for_completion_killable(vfork);
+	freezer_count();
+
+	if (killed) {
+		task_lock(child);
+		child->vfork_done = NULL;
+		task_unlock(child);
+	}
+
+	put_task_struct(child);
+	return killed;
 }
 
 /* Please note the differences between mmput and mm_release.
@@ -717,7 +741,8 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 	 * If we're exiting normally, clear a user-space tid field if
 	 * requested.  We leave this alone when dying by signal, to leave
 	 * the value intact in a core dump, and to save the unnecessary
-	 * trouble otherwise.  Userland only wants this done for a sys_exit.
+	 * trouble, say, a killed vfork parent shouldn't touch this mm.
+	 * Userland only wants this done for a sys_exit.
 	 */
 	if (tsk->clear_child_tid) {
 		if (!(tsk->flags & PF_SIGNALED) &&
@@ -1551,6 +1576,7 @@ long do_fork(unsigned long clone_flags,
 		if (clone_flags & CLONE_VFORK) {
 			p->vfork_done = &vfork;
 			init_completion(&vfork);
+			get_task_struct(p);
 		}
 
 		/*
@@ -1568,10 +1594,8 @@ long do_fork(unsigned long clone_flags,
 			ptrace_event(trace, nr);
 
 		if (clone_flags & CLONE_VFORK) {
-			freezer_do_not_count();
-			wait_for_completion(&vfork);
-			freezer_count();
-			ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
+			if (!wait_for_vfork_done(p, &vfork))
+				ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
 		}
 	} else {
 		nr = PTR_ERR(p);
-- 
cgit v1.2.3-55-g7522


From 57b59c4a1400fa6c34764eab2e35a8762dc05a09 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 5 Mar 2012 14:59:13 -0800
Subject: coredump_wait: don't call complete_vfork_done()

Now that CLONE_VFORK is killable, coredump_wait() no longer needs
complete_vfork_done().  zap_threads() should find and kill all tasks with
the same ->mm, this includes our parent if ->vfork_done is set.

mm_release() becomes the only caller, unexport complete_vfork_done().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c             | 14 ++------------
 include/linux/sched.h |  1 -
 kernel/fork.c         |  2 +-
 3 files changed, 3 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index dccdcec913e9..153dee14fe55 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1926,19 +1926,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
 		core_waiters = zap_threads(tsk, mm, core_state, exit_code);
 	up_write(&mm->mmap_sem);
 
-	if (unlikely(core_waiters < 0))
-		goto fail;
-
-	/*
-	 * Make sure nobody is waiting for us to release the VM,
-	 * otherwise we can deadlock when we wait on each other
-	 */
-	if (tsk->vfork_done)
-		complete_vfork_done(tsk);
-
-	if (core_waiters)
+	if (core_waiters > 0)
 		wait_for_completion(&core_state->startup);
-fail:
+
 	return core_waiters;
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b6467711f12e..11fcafaf4ae4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2291,7 +2291,6 @@ extern int do_execve(const char *,
 		     const char __user * const __user *,
 		     const char __user * const __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
-extern void complete_vfork_done(struct task_struct *tsk);
 struct task_struct *fork_idle(int);
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
diff --git a/kernel/fork.c b/kernel/fork.c
index 892c534ce6e3..44b0e21af50e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -668,7 +668,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
 	return mm;
 }
 
-void complete_vfork_done(struct task_struct *tsk)
+static void complete_vfork_done(struct task_struct *tsk)
 {
 	struct completion *vfork;
 
-- 
cgit v1.2.3-55-g7522


From 6e27f63edbd7ab893258e16500171dd1270a1369 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 5 Mar 2012 14:59:14 -0800
Subject: vfork: kill PF_STARTING

Previously it was (ab)used by utrace.  Then it was wrongly used by the
scheduler code.

Currently it is not used, kill it before it finds the new erroneous user.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 1 -
 kernel/fork.c         | 9 ---------
 2 files changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 11fcafaf4ae4..0657368bd78f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1777,7 +1777,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 /*
  * Per process flags
  */
-#define PF_STARTING	0x00000002	/* being created */
 #define PF_EXITING	0x00000004	/* getting shut down */
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
 #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
diff --git a/kernel/fork.c b/kernel/fork.c
index 44b0e21af50e..26a7a6707fa7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1046,7 +1046,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 
 	new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
 	new_flags |= PF_FORKNOEXEC;
-	new_flags |= PF_STARTING;
 	p->flags = new_flags;
 }
 
@@ -1579,14 +1578,6 @@ long do_fork(unsigned long clone_flags,
 			get_task_struct(p);
 		}
 
-		/*
-		 * We set PF_STARTING at creation in case tracing wants to
-		 * use this to distinguish a fully live task from one that
-		 * hasn't finished SIGSTOP raising yet.  Now we clear it
-		 * and set the child going.
-		 */
-		p->flags &= ~PF_STARTING;
-
 		wake_up_new_task(p);
 
 		/* forking complete and child started to run, tell ptracer */
-- 
cgit v1.2.3-55-g7522


From 7512102cf64d36e3c7444480273623c7aab3563f Mon Sep 17 00:00:00 2001
From: Hugh Dickins
Date: Mon, 5 Mar 2012 14:59:18 -0800
Subject: memcg: fix GPF when cgroup removal races with last exit

When moving tasks from old memcg (with move_charge_at_immigrate on new
memcg), followed by removal of old memcg, hit General Protection Fault in
mem_cgroup_lru_del_list() (called from release_pages called from
free_pages_and_swap_cache from tlb_flush_mmu from tlb_finish_mmu from
exit_mmap from mmput from exit_mm from do_exit).

Somewhat reproducible, takes a few hours: the old struct mem_cgroup has
been freed and poisoned by SLAB_DEBUG, but mem_cgroup_lru_del_list() is
still trying to update its stats, and take page off lru before freeing.

A task, or a charge, or a page on lru: each secures a memcg against
removal.  In this case, the last task has been moved out of the old memcg,
and it is exiting: anonymous pages are uncharged one by one from the
memcg, as they are zapped from its pagetables, so the charge gets down to
0; but the pages themselves are queued in an mmu_gather for freeing.

Most of those pages will be on lru (and force_empty is careful to
lru_add_drain_all, to add pages from pagevec to lru first), but not
necessarily all: perhaps some have been isolated for page reclaim, perhaps
some isolated for other reasons.  So, force_empty may find no task, no
charge and no page on lru, and let the removal proceed.

There would still be no problem if these pages were immediately freed; but
typically (and the put_page_testzero protocol demands it) they have to be
added back to lru before they are found freeable, then removed from lru
and freed.  We don't see the issue when adding, because the
mem_cgroup_iter() loops keep their own reference to the memcg being
scanned; but when it comes to mem_cgroup_lru_del_list().

I believe this was not an issue in v3.2: there, PageCgroupAcctLRU and
PageCgroupUsed flags were used (like a trick with mirrors) to deflect view
of pc->mem_cgroup to the stable root_mem_cgroup when neither set.
38c5d72f3ebe ("memcg: simplify LRU handling by new rule") mercifully
removed those convolutions, but left this General Protection Fault.

But it's surprisingly easy to restore the old behaviour: just check
PageCgroupUsed in mem_cgroup_lru_add_list() (which decides on which lruvec
to add), and reset pc to root_mem_cgroup if page is uncharged.  A risky
change?  just going back to how it worked before; testing, and an audit of
uses of pc->mem_cgroup, show no problem.

And there's a nice bonus: with mem_cgroup_lru_add_list() itself making
sure that an uncharged page goes to root lru, mem_cgroup_reset_owner() no
longer has any purpose, and we can safely revert 4e5f01c2b9b9 ("memcg:
clear pc->mem_cgroup if necessary").

Calling update_page_reclaim_stat() after add_page_to_lru_list() in swap.c
is not strictly necessary: the lru_lock there, with RCU before memcg
structures are freed, makes mem_cgroup_get_reclaim_stat_from_page safe
without that; but it seems cleaner to rely on one dependency less.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  5 -----
 mm/ksm.c                   | 11 -----------
 mm/memcontrol.c            | 30 +++++++++++++-----------------
 mm/migrate.c               |  2 --
 mm/swap.c                  |  8 +++++---
 mm/swap_state.c            | 10 ----------
 6 files changed, 18 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4d34356fe644..b80de520670b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -129,7 +129,6 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 extern void mem_cgroup_replace_page_cache(struct page *oldpage,
 					struct page *newpage);
 
-extern void mem_cgroup_reset_owner(struct page *page);
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern int do_swap_account;
 #endif
@@ -392,10 +391,6 @@ static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
 				struct page *newpage)
 {
 }
-
-static inline void mem_cgroup_reset_owner(struct page *page)
-{
-}
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
diff --git a/mm/ksm.c b/mm/ksm.c
index 1925ffbfb27f..310544a379ae 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -28,7 +28,6 @@
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
-#include <linux/memcontrol.h>
 #include <linux/rbtree.h>
 #include <linux/memory.h>
 #include <linux/mmu_notifier.h>
@@ -1572,16 +1571,6 @@ struct page *ksm_does_need_to_copy(struct page *page,
 
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
 	if (new_page) {
-		/*
-		 * The memcg-specific accounting when moving
-		 * pages around the LRU lists relies on the
-		 * page's owner (memcg) to be valid.  Usually,
-		 * pages are assigned to a new owner before
-		 * being put on the LRU list, but since this
-		 * is not the case here, the stale owner from
-		 * a previous allocation cycle must be reset.
-		 */
-		mem_cgroup_reset_owner(new_page);
 		copy_user_highpage(new_page, page, address, vma);
 
 		SetPageDirty(new_page);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1097d8098f8c..d0e57a3cda18 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1042,6 +1042,19 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
 
 	pc = lookup_page_cgroup(page);
 	memcg = pc->mem_cgroup;
+
+	/*
+	 * Surreptitiously switch any uncharged page to root:
+	 * an uncharged page off lru does nothing to secure
+	 * its former mem_cgroup from sudden removal.
+	 *
+	 * Our caller holds lru_lock, and PageCgroupUsed is updated
+	 * under page_cgroup lock: between them, they make all uses
+	 * of pc->mem_cgroup safe.
+	 */
+	if (!PageCgroupUsed(pc) && memcg != root_mem_cgroup)
+		pc->mem_cgroup = memcg = root_mem_cgroup;
+
 	mz = page_cgroup_zoneinfo(memcg, page);
 	/* compound_order() is stabilized through lru_lock */
 	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
@@ -3029,23 +3042,6 @@ void mem_cgroup_uncharge_end(void)
 	batch->memcg = NULL;
 }
 
-/*
- * A function for resetting pc->mem_cgroup for newly allocated pages.
- * This function should be called if the newpage will be added to LRU
- * before start accounting.
- */
-void mem_cgroup_reset_owner(struct page *newpage)
-{
-	struct page_cgroup *pc;
-
-	if (mem_cgroup_disabled())
-		return;
-
-	pc = lookup_page_cgroup(newpage);
-	VM_BUG_ON(PageCgroupUsed(pc));
-	pc->mem_cgroup = root_mem_cgroup;
-}
-
 #ifdef CONFIG_SWAP
 /*
  * called after __delete_from_swap_cache() and drop "page" account.
diff --git a/mm/migrate.c b/mm/migrate.c
index df141f60289e..1503b6b54ecb 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -839,8 +839,6 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 	if (!newpage)
 		return -ENOMEM;
 
-	mem_cgroup_reset_owner(newpage);
-
 	if (page_count(page) == 1) {
 		/* page was freed from under us. So we are done. */
 		goto out;
diff --git a/mm/swap.c b/mm/swap.c
index fff1ff7fb9ad..14380e9fbe33 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -652,7 +652,7 @@ EXPORT_SYMBOL(__pagevec_release);
 void lru_add_page_tail(struct zone* zone,
 		       struct page *page, struct page *page_tail)
 {
-	int active;
+	int uninitialized_var(active);
 	enum lru_list lru;
 	const int file = 0;
 
@@ -672,7 +672,6 @@ void lru_add_page_tail(struct zone* zone,
 			active = 0;
 			lru = LRU_INACTIVE_ANON;
 		}
-		update_page_reclaim_stat(zone, page_tail, file, active);
 	} else {
 		SetPageUnevictable(page_tail);
 		lru = LRU_UNEVICTABLE;
@@ -693,6 +692,9 @@ void lru_add_page_tail(struct zone* zone,
 		list_head = page_tail->lru.prev;
 		list_move_tail(&page_tail->lru, list_head);
 	}
+
+	if (!PageUnevictable(page))
+		update_page_reclaim_stat(zone, page_tail, file, active);
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
@@ -710,8 +712,8 @@ static void __pagevec_lru_add_fn(struct page *page, void *arg)
 	SetPageLRU(page);
 	if (active)
 		SetPageActive(page);
-	update_page_reclaim_stat(zone, page, file, active);
 	add_page_to_lru_list(zone, page, lru);
+	update_page_reclaim_stat(zone, page, file, active);
 }
 
 /*
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 470038a91873..ea6b32d61873 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -300,16 +300,6 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			new_page = alloc_page_vma(gfp_mask, vma, addr);
 			if (!new_page)
 				break;		/* Out of memory */
-			/*
-			 * The memcg-specific accounting when moving
-			 * pages around the LRU lists relies on the
-			 * page's owner (memcg) to be valid.  Usually,
-			 * pages are assigned to a new owner before
-			 * being put on the LRU list, but since this
-			 * is not the case here, the stale owner from
-			 * a previous allocation cycle must be reset.
-			 */
-			mem_cgroup_reset_owner(new_page);
 		}
 
 		/*
-- 
cgit v1.2.3-55-g7522


From 5faa5df1fa2024bd750089ff21dcc4191798263d Mon Sep 17 00:00:00 2001
From: Steffen Klassert
Date: Tue, 6 Mar 2012 21:20:26 +0000
Subject: inetpeer: Invalidate the inetpeer tree along with the routing cache

We initialize the routing metrics with the values cached on the
inetpeer in rt_init_metrics(). So if we have the metrics cached on the
inetpeer, we ignore the user configured fib_metrics.

To fix this issue, we replace the old tree with a fresh initialized
inet_peer_base. The old tree is removed later with a delayed work queue.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |  3 ++
 net/ipv4/inetpeer.c    | 80 +++++++++++++++++++++++++++++++++++++++++++++++++-
 net/ipv4/route.c       |  1 +
 3 files changed, 83 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 06b795dd5906..ff04a33acf00 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -41,6 +41,7 @@ struct inet_peer {
 	u32			pmtu_orig;
 	u32			pmtu_learned;
 	struct inetpeer_addr_base redirect_learned;
+	struct list_head	gc_list;
 	/*
 	 * Once inet_peer is queued for deletion (refcnt == -1), following fields
 	 * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
@@ -96,6 +97,8 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr,
 extern void inet_putpeer(struct inet_peer *p);
 extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
 
+extern void inetpeer_invalidate_tree(int family);
+
 /*
  * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
  * tcp_ts_stamp if no refcount is taken on inet_peer
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index bf4a9c4808e1..deea2e96b7f2 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/net.h>
+#include <linux/workqueue.h>
 #include <net/ip.h>
 #include <net/inetpeer.h>
 #include <net/secure_seq.h>
@@ -66,6 +67,11 @@
 
 static struct kmem_cache *peer_cachep __read_mostly;
 
+static LIST_HEAD(gc_list);
+static const int gc_delay = 60 * HZ;
+static struct delayed_work gc_work;
+static DEFINE_SPINLOCK(gc_lock);
+
 #define node_height(x) x->avl_height
 
 #define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
@@ -102,6 +108,50 @@ int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries m
 int inet_peer_minttl __read_mostly = 120 * HZ;	/* TTL under high load: 120 sec */
 int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min */
 
+static void inetpeer_gc_worker(struct work_struct *work)
+{
+	struct inet_peer *p, *n;
+	LIST_HEAD(list);
+
+	spin_lock_bh(&gc_lock);
+	list_replace_init(&gc_list, &list);
+	spin_unlock_bh(&gc_lock);
+
+	if (list_empty(&list))
+		return;
+
+	list_for_each_entry_safe(p, n, &list, gc_list) {
+
+		if(need_resched())
+			cond_resched();
+
+		if (p->avl_left != peer_avl_empty) {
+			list_add_tail(&p->avl_left->gc_list, &list);
+			p->avl_left = peer_avl_empty;
+		}
+
+		if (p->avl_right != peer_avl_empty) {
+			list_add_tail(&p->avl_right->gc_list, &list);
+			p->avl_right = peer_avl_empty;
+		}
+
+		n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
+
+		if (!atomic_read(&p->refcnt)) {
+			list_del(&p->gc_list);
+			kmem_cache_free(peer_cachep, p);
+		}
+	}
+
+	if (list_empty(&list))
+		return;
+
+	spin_lock_bh(&gc_lock);
+	list_splice(&list, &gc_list);
+	spin_unlock_bh(&gc_lock);
+
+	schedule_delayed_work(&gc_work, gc_delay);
+}
 
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
@@ -126,6 +176,7 @@ void __init inet_initpeers(void)
 			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
 			NULL);
 
+	INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker);
 }
 
 static int addr_compare(const struct inetpeer_addr *a,
@@ -449,7 +500,7 @@ relookup:
 		p->pmtu_orig = 0;
 		p->redirect_genid = 0;
 		memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
-
+		INIT_LIST_HEAD(&p->gc_list);
 
 		/* Link the node. */
 		link_to_pool(p, base);
@@ -509,3 +560,30 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
 	return rc;
 }
 EXPORT_SYMBOL(inet_peer_xrlim_allow);
+
+void inetpeer_invalidate_tree(int family)
+{
+	struct inet_peer *old, *new, *prev;
+	struct inet_peer_base *base = family_to_base(family);
+
+	write_seqlock_bh(&base->lock);
+
+	old = base->root;
+	if (old == peer_avl_empty_rcu)
+		goto out;
+
+	new = peer_avl_empty_rcu;
+
+	prev = cmpxchg(&base->root, old, new);
+	if (prev == old) {
+		base->total = 0;
+		spin_lock(&gc_lock);
+		list_add_tail(&prev->gc_list, &gc_list);
+		spin_unlock(&gc_lock);
+		schedule_delayed_work(&gc_work, gc_delay);
+	}
+
+out:
+	write_sequnlock_bh(&base->lock);
+}
+EXPORT_SYMBOL(inetpeer_invalidate_tree);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index bcacf54e5418..23ce0c1287ab 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -938,6 +938,7 @@ static void rt_cache_invalidate(struct net *net)
 	get_random_bytes(&shuffle, sizeof(shuffle));
 	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
 	redirect_genid++;
+	inetpeer_invalidate_tree(AF_INET);
 }
 
 /*
-- 
cgit v1.2.3-55-g7522


From ac3f48de09d8f4b73397047e413fadff7f65cfa7 Mon Sep 17 00:00:00 2001
From: Steffen Klassert
Date: Tue, 6 Mar 2012 21:21:10 +0000
Subject: route: Remove redirect_genid

As we invalidate the inetpeer tree along with the routing cache now,
we don't need a genid to reset the redirect handling when the routing
cache is flushed.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |  1 -
 net/ipv4/inetpeer.c    |  1 -
 net/ipv4/route.c       | 11 ++---------
 3 files changed, 2 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index ff04a33acf00..b94765e38e80 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -35,7 +35,6 @@ struct inet_peer {
 
 	u32			metrics[RTAX_MAX];
 	u32			rate_tokens;	/* rate limiting for ICMP */
-	int			redirect_genid;
 	unsigned long		rate_last;
 	unsigned long		pmtu_expires;
 	u32			pmtu_orig;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index deea2e96b7f2..d4d61b694fab 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -498,7 +498,6 @@ relookup:
 		p->rate_last = 0;
 		p->pmtu_expires = 0;
 		p->pmtu_orig = 0;
-		p->redirect_genid = 0;
 		memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
 		INIT_LIST_HEAD(&p->gc_list);
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 23ce0c1287ab..019774796174 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -132,7 +132,6 @@ static int ip_rt_mtu_expires __read_mostly	= 10 * 60 * HZ;
 static int ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly	= 256;
 static int rt_chain_length_max __read_mostly	= 20;
-static int redirect_genid;
 
 static struct delayed_work expires_work;
 static unsigned long expires_ljiffies;
@@ -937,7 +936,6 @@ static void rt_cache_invalidate(struct net *net)
 
 	get_random_bytes(&shuffle, sizeof(shuffle));
 	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
-	redirect_genid++;
 	inetpeer_invalidate_tree(AF_INET);
 }
 
@@ -1486,10 +1484,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 
 				peer = rt->peer;
 				if (peer) {
-					if (peer->redirect_learned.a4 != new_gw ||
-					    peer->redirect_genid != redirect_genid) {
+					if (peer->redirect_learned.a4 != new_gw) {
 						peer->redirect_learned.a4 = new_gw;
-						peer->redirect_genid = redirect_genid;
 						atomic_inc(&__rt_peer_genid);
 					}
 					check_peer_redir(&rt->dst, peer);
@@ -1794,8 +1790,6 @@ static void ipv4_validate_peer(struct rtable *rt)
 		if (peer) {
 			check_peer_pmtu(&rt->dst, peer);
 
-			if (peer->redirect_genid != redirect_genid)
-				peer->redirect_learned.a4 = 0;
 			if (peer->redirect_learned.a4 &&
 			    peer->redirect_learned.a4 != rt->rt_gateway)
 				check_peer_redir(&rt->dst, peer);
@@ -1959,8 +1953,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 		dst_init_metrics(&rt->dst, peer->metrics, false);
 
 		check_peer_pmtu(&rt->dst, peer);
-		if (peer->redirect_genid != redirect_genid)
-			peer->redirect_learned.a4 = 0;
+
 		if (peer->redirect_learned.a4 &&
 		    peer->redirect_learned.a4 != rt->rt_gateway) {
 			rt->rt_gateway = peer->redirect_learned.a4;
-- 
cgit v1.2.3-55-g7522